@speechos/core 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,9 +10,7 @@ import { events } from "./events.js";
10
10
  /**
11
11
  * SpeechOS Core SDK
12
12
  *
13
- * Provides two API layers:
14
- * 1. Low-level API: Granular control over LiveKit connection lifecycle
15
- * 2. High-level API: One-shot methods for common voice tasks
13
+ * Provides a high-level API for common voice tasks.
16
14
  */
17
15
  declare class SpeechOSCore {
18
16
  private initialized;
@@ -26,36 +24,7 @@ declare class SpeechOSCore {
26
24
  */
27
25
  isInitialized(): boolean;
28
26
  /**
29
- * Connect to LiveKit (fetches token, establishes connection)
30
- * Call this before other low-level methods
31
- */
32
- connect(): Promise<void>;
33
- /**
34
- * Wait until the agent is ready to receive audio
35
- * Resolves when the agent subscribes to our audio track
36
- */
37
- waitUntilReady(): Promise<void>;
38
- /**
39
- * Enable microphone (user is now being recorded)
40
- */
41
- enableMicrophone(): Promise<void>;
42
- /**
43
- * Stop recording and get the transcript
44
- * @returns The transcribed text
45
- */
46
- stopAndGetTranscript(): Promise<string>;
47
- /**
48
- * Stop recording and get edited text
49
- * @param originalText - The original text to edit based on voice instructions
50
- * @returns The edited text
51
- */
52
- stopAndEdit(originalText: string): Promise<string>;
53
- /**
54
- * Disconnect from LiveKit
55
- */
56
- disconnect(): Promise<void>;
57
- /**
58
- * One-shot dictation: connect, wait for agent, record, and get transcript
27
+ * One-shot dictation: connect, record, and get transcript
59
28
  * Automatically handles the full voice session lifecycle
60
29
  *
61
30
  * @returns The transcribed text
@@ -69,7 +38,7 @@ declare class SpeechOSCore {
69
38
  */
70
39
  stopDictation(): Promise<string>;
71
40
  /**
72
- * One-shot edit: connect, wait for agent, record voice instructions, apply to text
41
+ * One-shot edit: connect, record voice instructions, apply to text
73
42
  * Automatically handles the full voice session lifecycle
74
43
  *
75
44
  * @param originalText - The text to edit
@@ -85,21 +54,23 @@ declare class SpeechOSCore {
85
54
  */
86
55
  stopEdit(): Promise<string>;
87
56
  /**
88
- * One-shot command: connect, wait for agent, record voice, match against commands
57
+ * One-shot command: connect, record voice, match against commands
89
58
  * Automatically handles the full voice session lifecycle
90
59
  *
91
60
  * @param commands - Array of command definitions to match against
92
- * @returns The matched command result or null if no match
61
+ * @returns Array of matched commands (empty array if no matches)
93
62
  */
94
- command(commands: CommandDefinition[]): Promise<CommandResult | null>;
63
+ command(commands: CommandDefinition[]): Promise<CommandResult[]>;
95
64
  private _commandCommands?;
96
65
  private _commandResolve?;
97
66
  private _commandReject?;
98
67
  /**
99
- * Stop command recording and get the matched command
68
+ * Stop command recording and get the matched commands
100
69
  * Call this after command() when user stops speaking
70
+ *
71
+ * @returns Array of matched commands (empty array if no matches)
101
72
  */
102
- stopCommand(): Promise<CommandResult | null>;
73
+ stopCommand(): Promise<CommandResult[]>;
103
74
  /**
104
75
  * Cancel the current operation
105
76
  */
@@ -10,9 +10,7 @@ import { events } from "./events.js";
10
10
  /**
11
11
  * SpeechOS Core SDK
12
12
  *
13
- * Provides two API layers:
14
- * 1. Low-level API: Granular control over LiveKit connection lifecycle
15
- * 2. High-level API: One-shot methods for common voice tasks
13
+ * Provides a high-level API for common voice tasks.
16
14
  */
17
15
  declare class SpeechOSCore {
18
16
  private initialized;
@@ -26,36 +24,7 @@ declare class SpeechOSCore {
26
24
  */
27
25
  isInitialized(): boolean;
28
26
  /**
29
- * Connect to LiveKit (fetches token, establishes connection)
30
- * Call this before other low-level methods
31
- */
32
- connect(): Promise<void>;
33
- /**
34
- * Wait until the agent is ready to receive audio
35
- * Resolves when the agent subscribes to our audio track
36
- */
37
- waitUntilReady(): Promise<void>;
38
- /**
39
- * Enable microphone (user is now being recorded)
40
- */
41
- enableMicrophone(): Promise<void>;
42
- /**
43
- * Stop recording and get the transcript
44
- * @returns The transcribed text
45
- */
46
- stopAndGetTranscript(): Promise<string>;
47
- /**
48
- * Stop recording and get edited text
49
- * @param originalText - The original text to edit based on voice instructions
50
- * @returns The edited text
51
- */
52
- stopAndEdit(originalText: string): Promise<string>;
53
- /**
54
- * Disconnect from LiveKit
55
- */
56
- disconnect(): Promise<void>;
57
- /**
58
- * One-shot dictation: connect, wait for agent, record, and get transcript
27
+ * One-shot dictation: connect, record, and get transcript
59
28
  * Automatically handles the full voice session lifecycle
60
29
  *
61
30
  * @returns The transcribed text
@@ -69,7 +38,7 @@ declare class SpeechOSCore {
69
38
  */
70
39
  stopDictation(): Promise<string>;
71
40
  /**
72
- * One-shot edit: connect, wait for agent, record voice instructions, apply to text
41
+ * One-shot edit: connect, record voice instructions, apply to text
73
42
  * Automatically handles the full voice session lifecycle
74
43
  *
75
44
  * @param originalText - The text to edit
@@ -85,21 +54,23 @@ declare class SpeechOSCore {
85
54
  */
86
55
  stopEdit(): Promise<string>;
87
56
  /**
88
- * One-shot command: connect, wait for agent, record voice, match against commands
57
+ * One-shot command: connect, record voice, match against commands
89
58
  * Automatically handles the full voice session lifecycle
90
59
  *
91
60
  * @param commands - Array of command definitions to match against
92
- * @returns The matched command result or null if no match
61
+ * @returns Array of matched commands (empty array if no matches)
93
62
  */
94
- command(commands: CommandDefinition[]): Promise<CommandResult | null>;
63
+ command(commands: CommandDefinition[]): Promise<CommandResult[]>;
95
64
  private _commandCommands?;
96
65
  private _commandResolve?;
97
66
  private _commandReject?;
98
67
  /**
99
- * Stop command recording and get the matched command
68
+ * Stop command recording and get the matched commands
100
69
  * Call this after command() when user stops speaking
70
+ *
71
+ * @returns Array of matched commands (empty array if no matches)
101
72
  */
102
- stopCommand(): Promise<CommandResult | null>;
73
+ stopCommand(): Promise<CommandResult[]>;
103
74
  /**
104
75
  * Cancel the current operation
105
76
  */
package/dist/state.d.cts CHANGED
@@ -49,6 +49,16 @@ declare class StateManager {
49
49
  * @param element - The form element that has focus
50
50
  */
51
51
  setFocusedElement(element: HTMLElement | null): void;
52
+ /**
53
+ * Set the current text selection
54
+ * @param text - Selected text (null to clear)
55
+ * @param element - Element associated with selection
56
+ */
57
+ setSelection(text: string | null, element: HTMLElement | null): void;
58
+ /**
59
+ * Clear the current text selection
60
+ */
61
+ clearSelection(): void;
52
62
  /**
53
63
  * Set the active action
54
64
  * @param action - The action to set as active
@@ -61,7 +71,7 @@ declare class StateManager {
61
71
  setRecordingState(recordingState: SpeechOSState["recordingState"]): void;
62
72
  /**
63
73
  * Set the connection state
64
- * @param isConnected - Whether connected to LiveKit
74
+ * @param isConnected - Whether connected to the backend
65
75
  */
66
76
  setConnected(isConnected: boolean): void;
67
77
  /**
package/dist/state.d.ts CHANGED
@@ -49,6 +49,16 @@ declare class StateManager {
49
49
  * @param element - The form element that has focus
50
50
  */
51
51
  setFocusedElement(element: HTMLElement | null): void;
52
+ /**
53
+ * Set the current text selection
54
+ * @param text - Selected text (null to clear)
55
+ * @param element - Element associated with selection
56
+ */
57
+ setSelection(text: string | null, element: HTMLElement | null): void;
58
+ /**
59
+ * Clear the current text selection
60
+ */
61
+ clearSelection(): void;
52
62
  /**
53
63
  * Set the active action
54
64
  * @param action - The action to set as active
@@ -61,7 +71,7 @@ declare class StateManager {
61
71
  setRecordingState(recordingState: SpeechOSState["recordingState"]): void;
62
72
  /**
63
73
  * Set the connection state
64
- * @param isConnected - Whether connected to LiveKit
74
+ * @param isConnected - Whether connected to the backend
65
75
  */
66
76
  setConnected(isConnected: boolean): void;
67
77
  /**
package/dist/tts.d.cts ADDED
@@ -0,0 +1,74 @@
1
+ /**
2
+ * TTS (Text-to-Speech) client for SpeechOS SDK
3
+ *
4
+ * Provides methods to synthesize speech from text using the SpeechOS TTS API.
5
+ * This is a headless module - audio playback is handled by @speechos/client.
6
+ */
7
+ /**
8
+ * Default TTS voice ID (matches server default).
9
+ * The server validates voice IDs - pass any valid voice ID or omit to use default.
10
+ */
11
+ export declare const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
12
+ /**
13
+ * Options for TTS synthesis
14
+ */
15
+ export interface TTSOptions {
16
+ /** Voice ID. Server uses its default if not specified. */
17
+ voiceId?: string;
18
+ /** Language code (e.g., 'en', 'es', 'fr'). Defaults to 'en'. */
19
+ language?: string;
20
+ /** Optional abort signal for cancelling the request. */
21
+ signal?: AbortSignal;
22
+ }
23
+ /**
24
+ * Result of TTS synthesis
25
+ */
26
+ export interface TTSResult {
27
+ /** Audio data as ArrayBuffer (MP3 format) */
28
+ audio: ArrayBuffer;
29
+ /** Content type of the audio (e.g., 'audio/mpeg') */
30
+ contentType: string;
31
+ }
32
+ /**
33
+ * TTS error codes
34
+ */
35
+ export type TTSErrorCode = "invalid_request" | "usage_limit_exceeded" | "authentication_failed" | "network_error" | "unknown_error";
36
+ /**
37
+ * TTS Client for synthesizing speech from text
38
+ */
39
+ export declare class TTSClient {
40
+ /**
41
+ * Synthesize text to speech and return audio bytes
42
+ *
43
+ * @param text - Text to synthesize (max 1000 chars)
44
+ * @param options - Optional synthesis options
45
+ * @returns Audio data and content type
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * const result = await tts.synthesize('Hello world');
50
+ * console.log(result.audio); // ArrayBuffer
51
+ * console.log(result.contentType); // 'audio/mpeg'
52
+ * ```
53
+ */
54
+ synthesize(text: string, options?: TTSOptions): Promise<TTSResult>;
55
+ /**
56
+ * Stream TTS audio chunks as they arrive from the server
57
+ *
58
+ * Useful for progressive playback or processing large texts.
59
+ *
60
+ * @param text - Text to synthesize (max 1000 chars)
61
+ * @param options - Optional synthesis options
62
+ * @yields Audio chunks as Uint8Array
63
+ *
64
+ * @example
65
+ * ```typescript
66
+ * const chunks: Uint8Array[] = [];
67
+ * for await (const chunk of tts.stream('Hello world')) {
68
+ * chunks.push(chunk);
69
+ * }
70
+ * ```
71
+ */
72
+ stream(text: string, options?: TTSOptions): AsyncGenerator<Uint8Array>;
73
+ }
74
+ export declare const tts: TTSClient;
package/dist/tts.d.ts ADDED
@@ -0,0 +1,74 @@
1
+ /**
2
+ * TTS (Text-to-Speech) client for SpeechOS SDK
3
+ *
4
+ * Provides methods to synthesize speech from text using the SpeechOS TTS API.
5
+ * This is a headless module - audio playback is handled by @speechos/client.
6
+ */
7
+ /**
8
+ * Default TTS voice ID (matches server default).
9
+ * The server validates voice IDs - pass any valid voice ID or omit to use default.
10
+ */
11
+ export declare const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
12
+ /**
13
+ * Options for TTS synthesis
14
+ */
15
+ export interface TTSOptions {
16
+ /** Voice ID. Server uses its default if not specified. */
17
+ voiceId?: string;
18
+ /** Language code (e.g., 'en', 'es', 'fr'). Defaults to 'en'. */
19
+ language?: string;
20
+ /** Optional abort signal for cancelling the request. */
21
+ signal?: AbortSignal;
22
+ }
23
+ /**
24
+ * Result of TTS synthesis
25
+ */
26
+ export interface TTSResult {
27
+ /** Audio data as ArrayBuffer (MP3 format) */
28
+ audio: ArrayBuffer;
29
+ /** Content type of the audio (e.g., 'audio/mpeg') */
30
+ contentType: string;
31
+ }
32
+ /**
33
+ * TTS error codes
34
+ */
35
+ export type TTSErrorCode = "invalid_request" | "usage_limit_exceeded" | "authentication_failed" | "network_error" | "unknown_error";
36
+ /**
37
+ * TTS Client for synthesizing speech from text
38
+ */
39
+ export declare class TTSClient {
40
+ /**
41
+ * Synthesize text to speech and return audio bytes
42
+ *
43
+ * @param text - Text to synthesize (max 1000 chars)
44
+ * @param options - Optional synthesis options
45
+ * @returns Audio data and content type
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * const result = await tts.synthesize('Hello world');
50
+ * console.log(result.audio); // ArrayBuffer
51
+ * console.log(result.contentType); // 'audio/mpeg'
52
+ * ```
53
+ */
54
+ synthesize(text: string, options?: TTSOptions): Promise<TTSResult>;
55
+ /**
56
+ * Stream TTS audio chunks as they arrive from the server
57
+ *
58
+ * Useful for progressive playback or processing large texts.
59
+ *
60
+ * @param text - Text to synthesize (max 1000 chars)
61
+ * @param options - Optional synthesis options
62
+ * @yields Audio chunks as Uint8Array
63
+ *
64
+ * @example
65
+ * ```typescript
66
+ * const chunks: Uint8Array[] = [];
67
+ * for await (const chunk of tts.stream('Hello world')) {
68
+ * chunks.push(chunk);
69
+ * }
70
+ * ```
71
+ */
72
+ stream(text: string, options?: TTSOptions): AsyncGenerator<Uint8Array>;
73
+ }
74
+ export declare const tts: TTSClient;
package/dist/types.d.cts CHANGED
@@ -2,7 +2,7 @@
2
2
  * Shared TypeScript types for SpeechOS Core SDK
3
3
  */
4
4
  /**
5
- * Server error message structure received via LiveKit data channel
5
+ * Server error message structure received via WebSocket
6
6
  */
7
7
  export interface ServerErrorMessage {
8
8
  type: "error";
@@ -17,12 +17,6 @@ export interface ServerErrorMessage {
17
17
  * Error source indicating where the error originated
18
18
  */
19
19
  export type ErrorSource = "init" | "connection" | "timeout" | "server";
20
- /**
21
- * Backend type for voice sessions
22
- * - 'websocket': Direct WebSocket connection (lower latency, recommended)
23
- * - 'livekit': LiveKit WebRTC connection (legacy)
24
- */
25
- export type VoiceBackend = "websocket" | "livekit";
26
20
  /**
27
21
  * Configuration options for initializing SpeechOS Core
28
22
  */
@@ -85,15 +79,6 @@ export interface VoiceSessionOptions {
85
79
  /** User settings for this session */
86
80
  settings?: SessionSettings;
87
81
  }
88
- /**
89
- * LiveKit token response from the backend
90
- */
91
- export interface LiveKitTokenResponse {
92
- token: string;
93
- ws_url: string;
94
- room: string;
95
- identity: string;
96
- }
97
82
  /**
98
83
  * User vocabulary data sent with transcription/edit requests
99
84
  * Includes custom vocabulary terms for improved transcription accuracy
@@ -113,7 +98,7 @@ export interface UserVocabularyData {
113
98
  /**
114
99
  * Available actions that can be triggered from the widget
115
100
  */
116
- export type SpeechOSAction = "dictate" | "edit" | "command";
101
+ export type SpeechOSAction = "dictate" | "edit" | "command" | "read";
117
102
  /**
118
103
  * Definition of a command argument
119
104
  */
@@ -159,7 +144,7 @@ export interface SpeechOSState {
159
144
  isVisible: boolean;
160
145
  /** Whether the action bubbles are expanded */
161
146
  isExpanded: boolean;
162
- /** Whether connected to LiveKit room */
147
+ /** Whether connected to the backend */
163
148
  isConnected: boolean;
164
149
  /** Whether microphone is enabled and publishing */
165
150
  isMicEnabled: boolean;
@@ -167,6 +152,10 @@ export interface SpeechOSState {
167
152
  activeAction: SpeechOSAction | null;
168
153
  /** The form field element that currently has focus (set by client) */
169
154
  focusedElement: HTMLElement | null;
155
+ /** Currently selected text (if any) */
156
+ selectionText: string | null;
157
+ /** Element associated with the current selection (if any) */
158
+ selectionElement: HTMLElement | null;
170
159
  /** Current recording state */
171
160
  recordingState: RecordingState;
172
161
  /** Error message to display (if any) */
@@ -192,10 +181,15 @@ export interface SpeechOSEventMap {
192
181
  "widget:show": void;
193
182
  /** Emitted when the widget is hidden */
194
183
  "widget:hide": void;
195
- /** Emitted when user selects an action (dictate/edit) */
184
+ /** Emitted when user selects an action */
196
185
  "action:select": {
197
186
  action: SpeechOSAction;
198
187
  };
188
+ /** Emitted when selected text changes (empty string when cleared) */
189
+ "selection:change": {
190
+ text: string;
191
+ element: HTMLElement | null;
192
+ };
199
193
  /** Emitted when internal state changes */
200
194
  "state:change": {
201
195
  state: SpeechOSState;
@@ -214,9 +208,9 @@ export interface SpeechOSEventMap {
214
208
  text: string;
215
209
  originalText: string;
216
210
  };
217
- /** Emitted when command matching completes (null if no command matched) */
211
+ /** Emitted when command matching completes (empty array if no commands matched) */
218
212
  "command:complete": {
219
- command: CommandResult | null;
213
+ commands: CommandResult[];
220
214
  };
221
215
  /** Emitted when transcribed text is inserted into a form field */
222
216
  "transcription:inserted": {
@@ -232,7 +226,7 @@ export interface SpeechOSEventMap {
232
226
  /** Emitted when user settings change (language, snippets, vocabulary, smartFormat, history) */
233
227
  "settings:changed": {
234
228
  /** Type of setting that changed */
235
- setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history";
229
+ setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history" | "voice";
236
230
  };
237
231
  /** Emitted when settings are loaded from the server */
238
232
  "settings:loaded": void;
@@ -244,6 +238,32 @@ export interface SpeechOSEventMap {
244
238
  };
245
239
  /** Emitted when the settings token expires (user should request a new one) */
246
240
  "settings:tokenExpired": void;
241
+ /** Emitted when a TTS synthesis request begins */
242
+ "tts:synthesize:start": {
243
+ text: string;
244
+ };
245
+ /** Emitted when audio bytes are fully received from the server */
246
+ "tts:synthesize:complete": {
247
+ text: string;
248
+ };
249
+ /** Emitted when audio playback begins */
250
+ "tts:playback:start": {
251
+ text: string;
252
+ };
253
+ /** Emitted when audio playback finishes */
254
+ "tts:playback:complete": {
255
+ text: string;
256
+ };
257
+ /** Emitted when audio playback is stopped */
258
+ "tts:playback:stop": {
259
+ text: string | null;
260
+ };
261
+ /** Emitted when an error occurs during TTS synthesis or playback */
262
+ "tts:error": {
263
+ code: string;
264
+ message: string;
265
+ phase: "synthesize" | "playback";
266
+ };
247
267
  /** Emitted when an error occurs */
248
268
  error: {
249
269
  code: string;
package/dist/types.d.ts CHANGED
@@ -2,7 +2,7 @@
2
2
  * Shared TypeScript types for SpeechOS Core SDK
3
3
  */
4
4
  /**
5
- * Server error message structure received via LiveKit data channel
5
+ * Server error message structure received via WebSocket
6
6
  */
7
7
  export interface ServerErrorMessage {
8
8
  type: "error";
@@ -17,12 +17,6 @@ export interface ServerErrorMessage {
17
17
  * Error source indicating where the error originated
18
18
  */
19
19
  export type ErrorSource = "init" | "connection" | "timeout" | "server";
20
- /**
21
- * Backend type for voice sessions
22
- * - 'websocket': Direct WebSocket connection (lower latency, recommended)
23
- * - 'livekit': LiveKit WebRTC connection (legacy)
24
- */
25
- export type VoiceBackend = "websocket" | "livekit";
26
20
  /**
27
21
  * Configuration options for initializing SpeechOS Core
28
22
  */
@@ -85,15 +79,6 @@ export interface VoiceSessionOptions {
85
79
  /** User settings for this session */
86
80
  settings?: SessionSettings;
87
81
  }
88
- /**
89
- * LiveKit token response from the backend
90
- */
91
- export interface LiveKitTokenResponse {
92
- token: string;
93
- ws_url: string;
94
- room: string;
95
- identity: string;
96
- }
97
82
  /**
98
83
  * User vocabulary data sent with transcription/edit requests
99
84
  * Includes custom vocabulary terms for improved transcription accuracy
@@ -113,7 +98,7 @@ export interface UserVocabularyData {
113
98
  /**
114
99
  * Available actions that can be triggered from the widget
115
100
  */
116
- export type SpeechOSAction = "dictate" | "edit" | "command";
101
+ export type SpeechOSAction = "dictate" | "edit" | "command" | "read";
117
102
  /**
118
103
  * Definition of a command argument
119
104
  */
@@ -159,7 +144,7 @@ export interface SpeechOSState {
159
144
  isVisible: boolean;
160
145
  /** Whether the action bubbles are expanded */
161
146
  isExpanded: boolean;
162
- /** Whether connected to LiveKit room */
147
+ /** Whether connected to the backend */
163
148
  isConnected: boolean;
164
149
  /** Whether microphone is enabled and publishing */
165
150
  isMicEnabled: boolean;
@@ -167,6 +152,10 @@ export interface SpeechOSState {
167
152
  activeAction: SpeechOSAction | null;
168
153
  /** The form field element that currently has focus (set by client) */
169
154
  focusedElement: HTMLElement | null;
155
+ /** Currently selected text (if any) */
156
+ selectionText: string | null;
157
+ /** Element associated with the current selection (if any) */
158
+ selectionElement: HTMLElement | null;
170
159
  /** Current recording state */
171
160
  recordingState: RecordingState;
172
161
  /** Error message to display (if any) */
@@ -192,10 +181,15 @@ export interface SpeechOSEventMap {
192
181
  "widget:show": void;
193
182
  /** Emitted when the widget is hidden */
194
183
  "widget:hide": void;
195
- /** Emitted when user selects an action (dictate/edit) */
184
+ /** Emitted when user selects an action */
196
185
  "action:select": {
197
186
  action: SpeechOSAction;
198
187
  };
188
+ /** Emitted when selected text changes (empty string when cleared) */
189
+ "selection:change": {
190
+ text: string;
191
+ element: HTMLElement | null;
192
+ };
199
193
  /** Emitted when internal state changes */
200
194
  "state:change": {
201
195
  state: SpeechOSState;
@@ -214,9 +208,9 @@ export interface SpeechOSEventMap {
214
208
  text: string;
215
209
  originalText: string;
216
210
  };
217
- /** Emitted when command matching completes (null if no command matched) */
211
+ /** Emitted when command matching completes (empty array if no commands matched) */
218
212
  "command:complete": {
219
- command: CommandResult | null;
213
+ commands: CommandResult[];
220
214
  };
221
215
  /** Emitted when transcribed text is inserted into a form field */
222
216
  "transcription:inserted": {
@@ -232,7 +226,7 @@ export interface SpeechOSEventMap {
232
226
  /** Emitted when user settings change (language, snippets, vocabulary, smartFormat, history) */
233
227
  "settings:changed": {
234
228
  /** Type of setting that changed */
235
- setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history";
229
+ setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history" | "voice";
236
230
  };
237
231
  /** Emitted when settings are loaded from the server */
238
232
  "settings:loaded": void;
@@ -244,6 +238,32 @@ export interface SpeechOSEventMap {
244
238
  };
245
239
  /** Emitted when the settings token expires (user should request a new one) */
246
240
  "settings:tokenExpired": void;
241
+ /** Emitted when a TTS synthesis request begins */
242
+ "tts:synthesize:start": {
243
+ text: string;
244
+ };
245
+ /** Emitted when audio bytes are fully received from the server */
246
+ "tts:synthesize:complete": {
247
+ text: string;
248
+ };
249
+ /** Emitted when audio playback begins */
250
+ "tts:playback:start": {
251
+ text: string;
252
+ };
253
+ /** Emitted when audio playback finishes */
254
+ "tts:playback:complete": {
255
+ text: string;
256
+ };
257
+ /** Emitted when audio playback is stopped */
258
+ "tts:playback:stop": {
259
+ text: string | null;
260
+ };
261
+ /** Emitted when an error occurs during TTS synthesis or playback */
262
+ "tts:error": {
263
+ code: string;
264
+ message: string;
265
+ phase: "synthesize" | "playback";
266
+ };
247
267
  /** Emitted when an error occurs */
248
268
  error: {
249
269
  code: string;
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * WebSocket integration for SpeechOS SDK.
3
3
  *
4
- * Provides a direct WebSocket connection to the backend for voice sessions,
5
- * bypassing LiveKit for lower latency. Uses audio buffering to capture
4
+ * Provides a direct WebSocket connection to the backend for voice sessions.
5
+ * Uses audio buffering to capture
6
6
  * audio immediately while the connection is being established.
7
7
  */
8
8
  import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
@@ -92,8 +92,9 @@ declare class WebSocketManager {
92
92
  /**
93
93
  * Request command matching using the transcript as input.
94
94
  * Note: The command definitions were already sent in the auth message via startVoiceSession.
95
+ * Returns an array of matched commands (empty array if no matches).
95
96
  */
96
- requestCommand(_commands: CommandDefinition[]): Promise<CommandResult | null>;
97
+ requestCommand(_commands: CommandDefinition[]): Promise<CommandResult[]>;
97
98
  /**
98
99
  * Stop audio capture and wait for all data to be sent.
99
100
  *
@@ -108,7 +109,7 @@ declare class WebSocketManager {
108
109
  * Wait for the WebSocket send buffer to drain.
109
110
  *
110
111
  * This ensures all audio data has been transmitted before we request
111
- * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
112
+ * the transcript.
112
113
  */
113
114
  private waitForBufferDrain;
114
115
  /**