@speechos/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Audio capture module for SpeechOS WebSocket integration.
3
+ *
4
+ * Provides MediaRecorder-based audio capture with:
5
+ * - Format detection for cross-browser compatibility
6
+ * - Buffering for instant start (audio captured before connection is ready)
7
+ * - Atomic buffer swap pattern to prevent chunk reordering
8
+ */
9
+ /**
10
+ * Supported audio formats with their MIME types and whether
11
+ * Deepgram needs explicit encoding parameters.
12
+ */
13
+ export interface AudioFormat {
14
+ /** MIME type for MediaRecorder */
15
+ mimeType: string;
16
+ /** Short identifier for the format */
17
+ format: 'webm' | 'mp4' | 'pcm';
18
+ /** Whether Deepgram needs encoding/sample_rate params */
19
+ needsEncodingParams: boolean;
20
+ }
21
+ /**
22
+ * Detect the best supported audio format for the current browser.
23
+ *
24
+ * IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
25
+ * and produces truncated/incomplete audio.
26
+ */
27
+ export declare function getSupportedAudioFormat(): AudioFormat;
28
+ /**
29
+ * Callback for receiving audio chunks.
30
+ */
31
+ export type AudioChunkCallback = (chunk: Blob) => void;
32
+ /**
33
+ * Audio capture manager with buffering support.
34
+ *
35
+ * Usage:
36
+ * 1. Create instance with onChunk callback
37
+ * 2. Call start() - immediately begins capturing
38
+ * 3. Call setReady() when connection is established - flushes buffer
39
+ * 4. Call stop() when done
40
+ */
41
+ export declare class AudioCapture {
42
+ private mediaStream;
43
+ private recorder;
44
+ private buffer;
45
+ private isReady;
46
+ private isRecording;
47
+ private onChunk;
48
+ private audioFormat;
49
+ private deviceId;
50
+ /**
51
+ * Time slice for MediaRecorder in milliseconds.
52
+ *
53
+ * Safari requires a larger timeslice (1000ms) to properly flush its internal
54
+ * audio buffers. Smaller values cause Safari to drop or truncate audio data.
55
+ * See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
56
+ *
57
+ * Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
58
+ * which provide lower latency for real-time transcription.
59
+ */
60
+ private static readonly TIME_SLICE_MS;
61
+ private static readonly SAFARI_TIME_SLICE_MS;
62
+ /**
63
+ * @param onChunk - Callback for receiving audio chunks
64
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
65
+ */
66
+ constructor(onChunk: AudioChunkCallback, deviceId?: string);
67
+ /**
68
+ * Get the appropriate timeslice for the current browser.
69
+ * Safari needs a larger timeslice to avoid dropping audio data.
70
+ */
71
+ private getTimeSlice;
72
+ /**
73
+ * Get the timeslice being used (in milliseconds).
74
+ * Useful for callers that need to wait for audio processing.
75
+ */
76
+ getTimeSliceMs(): number;
77
+ /**
78
+ * Get the audio format being used.
79
+ */
80
+ getFormat(): AudioFormat;
81
+ /**
82
+ * Start capturing audio immediately.
83
+ *
84
+ * Audio chunks will be buffered until setReady() is called.
85
+ */
86
+ start(): Promise<void>;
87
+ /**
88
+ * Handle an audio chunk with atomic buffer swap pattern.
89
+ *
90
+ * If not ready: buffer the chunk.
91
+ * If ready: send directly via callback.
92
+ */
93
+ private handleChunk;
94
+ /**
95
+ * Mark the capture as ready (connection established).
96
+ *
97
+ * This flushes any buffered chunks and switches to direct mode.
98
+ * Uses atomic swap to prevent chunk reordering.
99
+ */
100
+ setReady(): void;
101
+ /**
102
+ * Stop capturing audio and wait for final chunk.
103
+ *
104
+ * Uses requestData() before stop() to force the MediaRecorder to flush
105
+ * any buffered audio immediately. This is critical for Safari which
106
+ * may hold audio data in internal buffers.
107
+ *
108
+ * Safari requires an additional delay after stopping to ensure all audio
109
+ * from its internal encoding pipeline has been fully processed and emitted.
110
+ */
111
+ stop(): Promise<void>;
112
+ /**
113
+ * Check if currently recording.
114
+ */
115
+ get recording(): boolean;
116
+ /**
117
+ * Check if ready (connection established, direct mode active).
118
+ */
119
+ get ready(): boolean;
120
+ /**
121
+ * Get the number of buffered chunks waiting to be sent.
122
+ */
123
+ get bufferedChunks(): number;
124
+ }
125
+ /**
126
+ * Factory function to create an AudioCapture instance.
127
+ * @param onChunk - Callback for receiving audio chunks
128
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
129
+ */
130
+ export declare function createAudioCapture(onChunk: AudioChunkCallback, deviceId?: string): AudioCapture;
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Audio capture module for SpeechOS WebSocket integration.
3
+ *
4
+ * Provides MediaRecorder-based audio capture with:
5
+ * - Format detection for cross-browser compatibility
6
+ * - Buffering for instant start (audio captured before connection is ready)
7
+ * - Atomic buffer swap pattern to prevent chunk reordering
8
+ */
9
+ /**
10
+ * Supported audio formats with their MIME types and whether
11
+ * Deepgram needs explicit encoding parameters.
12
+ */
13
+ export interface AudioFormat {
14
+ /** MIME type for MediaRecorder */
15
+ mimeType: string;
16
+ /** Short identifier for the format */
17
+ format: 'webm' | 'mp4' | 'pcm';
18
+ /** Whether Deepgram needs encoding/sample_rate params */
19
+ needsEncodingParams: boolean;
20
+ }
21
+ /**
22
+ * Detect the best supported audio format for the current browser.
23
+ *
24
+ * IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
25
+ * and produces truncated/incomplete audio.
26
+ */
27
+ export declare function getSupportedAudioFormat(): AudioFormat;
28
+ /**
29
+ * Callback for receiving audio chunks.
30
+ */
31
+ export type AudioChunkCallback = (chunk: Blob) => void;
32
+ /**
33
+ * Audio capture manager with buffering support.
34
+ *
35
+ * Usage:
36
+ * 1. Create instance with onChunk callback
37
+ * 2. Call start() - immediately begins capturing
38
+ * 3. Call setReady() when connection is established - flushes buffer
39
+ * 4. Call stop() when done
40
+ */
41
+ export declare class AudioCapture {
42
+ private mediaStream;
43
+ private recorder;
44
+ private buffer;
45
+ private isReady;
46
+ private isRecording;
47
+ private onChunk;
48
+ private audioFormat;
49
+ private deviceId;
50
+ /**
51
+ * Time slice for MediaRecorder in milliseconds.
52
+ *
53
+ * Safari requires a larger timeslice (1000ms) to properly flush its internal
54
+ * audio buffers. Smaller values cause Safari to drop or truncate audio data.
55
+ * See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
56
+ *
57
+ * Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
58
+ * which provide lower latency for real-time transcription.
59
+ */
60
+ private static readonly TIME_SLICE_MS;
61
+ private static readonly SAFARI_TIME_SLICE_MS;
62
+ /**
63
+ * @param onChunk - Callback for receiving audio chunks
64
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
65
+ */
66
+ constructor(onChunk: AudioChunkCallback, deviceId?: string);
67
+ /**
68
+ * Get the appropriate timeslice for the current browser.
69
+ * Safari needs a larger timeslice to avoid dropping audio data.
70
+ */
71
+ private getTimeSlice;
72
+ /**
73
+ * Get the timeslice being used (in milliseconds).
74
+ * Useful for callers that need to wait for audio processing.
75
+ */
76
+ getTimeSliceMs(): number;
77
+ /**
78
+ * Get the audio format being used.
79
+ */
80
+ getFormat(): AudioFormat;
81
+ /**
82
+ * Start capturing audio immediately.
83
+ *
84
+ * Audio chunks will be buffered until setReady() is called.
85
+ */
86
+ start(): Promise<void>;
87
+ /**
88
+ * Handle an audio chunk with atomic buffer swap pattern.
89
+ *
90
+ * If not ready: buffer the chunk.
91
+ * If ready: send directly via callback.
92
+ */
93
+ private handleChunk;
94
+ /**
95
+ * Mark the capture as ready (connection established).
96
+ *
97
+ * This flushes any buffered chunks and switches to direct mode.
98
+ * Uses atomic swap to prevent chunk reordering.
99
+ */
100
+ setReady(): void;
101
+ /**
102
+ * Stop capturing audio and wait for final chunk.
103
+ *
104
+ * Uses requestData() before stop() to force the MediaRecorder to flush
105
+ * any buffered audio immediately. This is critical for Safari which
106
+ * may hold audio data in internal buffers.
107
+ *
108
+ * Safari requires an additional delay after stopping to ensure all audio
109
+ * from its internal encoding pipeline has been fully processed and emitted.
110
+ */
111
+ stop(): Promise<void>;
112
+ /**
113
+ * Check if currently recording.
114
+ */
115
+ get recording(): boolean;
116
+ /**
117
+ * Check if ready (connection established, direct mode active).
118
+ */
119
+ get ready(): boolean;
120
+ /**
121
+ * Get the number of buffered chunks waiting to be sent.
122
+ */
123
+ get bufferedChunks(): number;
124
+ }
125
+ /**
126
+ * Factory function to create an AudioCapture instance.
127
+ * @param onChunk - Callback for receiving audio chunks
128
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
129
+ */
130
+ export declare function createAudioCapture(onChunk: AudioChunkCallback, deviceId?: string): AudioCapture;
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Backend abstraction for voice sessions.
3
+ *
4
+ * Provides a unified interface for voice backends.
5
+ * Currently always uses WebSocket backend.
6
+ */
7
+ import type { CommandDefinition, CommandResult, VoiceSessionOptions } from './types.js';
8
+ /**
9
+ * Voice backend interface - common methods between backends
10
+ */
11
+ export interface VoiceBackend {
12
+ startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
13
+ stopVoiceSession(): Promise<string>;
14
+ requestEditText(originalText: string): Promise<string>;
15
+ requestCommand(commands: CommandDefinition[]): Promise<CommandResult | null>;
16
+ disconnect(): Promise<void>;
17
+ isConnected(): boolean;
18
+ /** Get the last input text (transcript) from a command result */
19
+ getLastInputText?(): string | undefined;
20
+ prefetchToken?(): Promise<unknown>;
21
+ startAutoRefresh?(): void;
22
+ stopAutoRefresh?(): void;
23
+ invalidateTokenCache?(): void;
24
+ }
25
+ /**
26
+ * Get the active voice backend.
27
+ * Always returns WebSocket backend (LiveKit is legacy).
28
+ *
29
+ * @returns The websocket backend
30
+ */
31
+ export declare function getBackend(): VoiceBackend;
32
+ /**
33
+ * Check if the current backend is LiveKit.
34
+ * @deprecated Always returns false - LiveKit is legacy
35
+ */
36
+ export declare function isLiveKitBackend(): boolean;
37
+ /**
38
+ * Check if the current backend is WebSocket.
39
+ * @deprecated Always returns true - WebSocket is the only backend
40
+ */
41
+ export declare function isWebSocketBackend(): boolean;
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Backend abstraction for voice sessions.
3
+ *
4
+ * Provides a unified interface for voice backends.
5
+ * Currently always uses WebSocket backend.
6
+ */
7
+ import type { CommandDefinition, CommandResult, VoiceSessionOptions } from './types.js';
8
+ /**
9
+ * Voice backend interface - common methods between backends
10
+ */
11
+ export interface VoiceBackend {
12
+ startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
13
+ stopVoiceSession(): Promise<string>;
14
+ requestEditText(originalText: string): Promise<string>;
15
+ requestCommand(commands: CommandDefinition[]): Promise<CommandResult | null>;
16
+ disconnect(): Promise<void>;
17
+ isConnected(): boolean;
18
+ /** Get the last input text (transcript) from a command result */
19
+ getLastInputText?(): string | undefined;
20
+ prefetchToken?(): Promise<unknown>;
21
+ startAutoRefresh?(): void;
22
+ stopAutoRefresh?(): void;
23
+ invalidateTokenCache?(): void;
24
+ }
25
+ /**
26
+ * Get the active voice backend.
27
+ * Always returns WebSocket backend (LiveKit is legacy).
28
+ *
29
+ * @returns The websocket backend
30
+ */
31
+ export declare function getBackend(): VoiceBackend;
32
+ /**
33
+ * Check if the current backend is LiveKit.
34
+ * @deprecated Always returns false - LiveKit is legacy
35
+ */
36
+ export declare function isLiveKitBackend(): boolean;
37
+ /**
38
+ * Check if the current backend is WebSocket.
39
+ * @deprecated Always returns true - WebSocket is the only backend
40
+ */
41
+ export declare function isWebSocketBackend(): boolean;
package/dist/config.d.cts CHANGED
@@ -1,30 +1,35 @@
1
1
  /**
2
- * Configuration management for SpeechOS SDK
2
+ * Configuration management for SpeechOS Core SDK
3
3
  */
4
- import type { SpeechOSConfig } from "./types.js";
4
+ import type { SpeechOSCoreConfig } from "./types.js";
5
5
  /**
6
6
  * Default host - can be overridden by SPEECHOS_HOST env var at build time
7
7
  */
8
8
  export declare const DEFAULT_HOST: string;
9
9
  /**
10
- * Default configuration values
10
+ * Configuration with defaults applied (all fields required internally)
11
11
  */
12
- export declare const defaultConfig: Required<SpeechOSConfig>;
12
+ interface ResolvedConfig {
13
+ apiKey: string;
14
+ userId: string;
15
+ host: string;
16
+ debug: boolean;
17
+ }
13
18
  /**
14
19
  * Validates and merges user config with defaults
15
20
  * @param userConfig - User-provided configuration
16
21
  * @returns Validated and merged configuration
17
22
  */
18
- export declare function validateConfig(userConfig?: SpeechOSConfig): Required<SpeechOSConfig>;
23
+ export declare function validateConfig(userConfig: SpeechOSCoreConfig): ResolvedConfig;
19
24
  /**
20
25
  * Get the current configuration
21
26
  */
22
- export declare function getConfig(): Required<SpeechOSConfig>;
27
+ export declare function getConfig(): ResolvedConfig;
23
28
  /**
24
29
  * Set the current configuration
25
30
  * @param config - Configuration to set
26
31
  */
27
- export declare function setConfig(config: SpeechOSConfig): void;
32
+ export declare function setConfig(config: SpeechOSCoreConfig): void;
28
33
  /**
29
34
  * Reset configuration to defaults
30
35
  */
@@ -34,3 +39,4 @@ export declare function resetConfig(): void;
34
39
  * @param userId - The user identifier to set
35
40
  */
36
41
  export declare function updateUserId(userId: string): void;
42
+ export {};
package/dist/config.d.ts CHANGED
@@ -1,30 +1,35 @@
1
1
  /**
2
- * Configuration management for SpeechOS SDK
2
+ * Configuration management for SpeechOS Core SDK
3
3
  */
4
- import type { SpeechOSConfig } from "./types.js";
4
+ import type { SpeechOSCoreConfig } from "./types.js";
5
5
  /**
6
6
  * Default host - can be overridden by SPEECHOS_HOST env var at build time
7
7
  */
8
8
  export declare const DEFAULT_HOST: string;
9
9
  /**
10
- * Default configuration values
10
+ * Configuration with defaults applied (all fields required internally)
11
11
  */
12
- export declare const defaultConfig: Required<SpeechOSConfig>;
12
+ interface ResolvedConfig {
13
+ apiKey: string;
14
+ userId: string;
15
+ host: string;
16
+ debug: boolean;
17
+ }
13
18
  /**
14
19
  * Validates and merges user config with defaults
15
20
  * @param userConfig - User-provided configuration
16
21
  * @returns Validated and merged configuration
17
22
  */
18
- export declare function validateConfig(userConfig?: SpeechOSConfig): Required<SpeechOSConfig>;
23
+ export declare function validateConfig(userConfig: SpeechOSCoreConfig): ResolvedConfig;
19
24
  /**
20
25
  * Get the current configuration
21
26
  */
22
- export declare function getConfig(): Required<SpeechOSConfig>;
27
+ export declare function getConfig(): ResolvedConfig;
23
28
  /**
24
29
  * Set the current configuration
25
30
  * @param config - Configuration to set
26
31
  */
27
- export declare function setConfig(config: SpeechOSConfig): void;
32
+ export declare function setConfig(config: SpeechOSCoreConfig): void;
28
33
  /**
29
34
  * Reset configuration to defaults
30
35
  */
@@ -34,3 +39,4 @@ export declare function resetConfig(): void;
34
39
  * @param userId - The user identifier to set
35
40
  */
36
41
  export declare function updateUserId(userId: string): void;
42
+ export {};