@speechos/core 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.d.ts CHANGED
@@ -18,22 +18,60 @@ export interface ServerErrorMessage {
18
18
  */
19
19
  export type ErrorSource = "init" | "connection" | "timeout" | "server";
20
20
  /**
21
- * Configuration options for initializing SpeechOS
21
+ * Backend type for voice sessions
22
+ * - 'websocket': Direct WebSocket connection (lower latency, recommended)
23
+ * - 'livekit': LiveKit WebRTC connection (legacy)
22
24
  */
23
- export interface SpeechOSConfig {
24
- /** API key for authentication with SpeechOS backend */
25
- apiKey?: string;
25
+ export type VoiceBackend = "websocket" | "livekit";
26
+ /**
27
+ * Configuration options for initializing SpeechOS Core
28
+ */
29
+ export interface SpeechOSCoreConfig {
30
+ /** API key for authentication with SpeechOS backend (required) */
31
+ apiKey: string;
26
32
  /** Optional user identifier for tracking which end user is using the SDK */
27
33
  userId?: string;
28
34
  /** Backend host URL for API calls (default: https://app.speechos.ai) */
29
35
  host?: string;
30
- /** Position of the widget on screen (used by client package) */
31
- position?: "bottom-center" | "bottom-right" | "bottom-left";
32
- /** Custom z-index for widget overlay (used by client package) */
33
- zIndex?: number;
34
36
  /** Enable debug logging */
35
37
  debug?: boolean;
36
38
  }
39
+ /**
40
+ * Session settings passed when starting a voice session
41
+ * Contains user preferences for transcription and processing
42
+ */
43
+ export interface SessionSettings {
44
+ /** Input language code for speech recognition (e.g., "en-US", "es", "fr") */
45
+ inputLanguageCode?: string;
46
+ /** Output language code for transcription formatting */
47
+ outputLanguageCode?: string;
48
+ /** Whether to apply AI formatting (removes filler words, adds punctuation) */
49
+ smartFormat?: boolean;
50
+ /** Custom vocabulary terms to improve transcription accuracy */
51
+ vocabulary?: string[];
52
+ /** Text snippets with trigger phrases that expand to full text */
53
+ snippets?: Array<{
54
+ trigger: string;
55
+ expansion: string;
56
+ }>;
57
+ /** Audio input device ID (empty string for system default) */
58
+ audioDeviceId?: string;
59
+ }
60
+ /**
61
+ * Options for starting a voice session
62
+ */
63
+ export interface VoiceSessionOptions {
64
+ /** Callback when microphone is ready and capturing */
65
+ onMicReady?: () => void;
66
+ /** Action type for this session */
67
+ action?: SpeechOSAction;
68
+ /** Text to edit (for edit action) */
69
+ inputText?: string;
70
+ /** Command definitions (for command action) */
71
+ commands?: CommandDefinition[];
72
+ /** User settings for this session */
73
+ settings?: SessionSettings;
74
+ }
37
75
  /**
38
76
  * LiveKit token response from the backend
39
77
  */
@@ -43,10 +81,59 @@ export interface LiveKitTokenResponse {
43
81
  room: string;
44
82
  identity: string;
45
83
  }
84
+ /**
85
+ * User vocabulary data sent with transcription/edit requests
86
+ * Includes custom vocabulary terms for improved transcription accuracy
87
+ * and text snippets that can be expanded from trigger phrases
88
+ */
89
+ export interface UserVocabularyData {
90
+ /** Custom vocabulary terms to improve transcription of domain-specific words */
91
+ vocabulary: string[];
92
+ /** Text snippets with trigger phrases that expand to full text */
93
+ snippets: Array<{
94
+ /** Short trigger phrase the user speaks */
95
+ trigger: string;
96
+ /** Full text to expand the trigger into */
97
+ expansion: string;
98
+ }>;
99
+ }
46
100
  /**
47
101
  * Available actions that can be triggered from the widget
48
102
  */
49
- export type SpeechOSAction = "dictate" | "edit";
103
+ export type SpeechOSAction = "dictate" | "edit" | "command";
104
+ /**
105
+ * Definition of a command argument
106
+ */
107
+ export interface CommandArgument {
108
+ /** Name of the argument (used as key in the result) */
109
+ name: string;
110
+ /** Description of what this argument represents */
111
+ description: string;
112
+ /** Type of the argument value */
113
+ type?: "string" | "number" | "integer" | "boolean";
114
+ /** Whether this argument is required (default: true) */
115
+ required?: boolean;
116
+ }
117
+ /**
118
+ * Definition of a command that can be matched
119
+ */
120
+ export interface CommandDefinition {
121
+ /** Unique name/identifier for the command */
122
+ name: string;
123
+ /** Description of what this command does (helps LLM match intent) */
124
+ description: string;
125
+ /** Arguments that can be extracted from the user's speech */
126
+ arguments?: CommandArgument[];
127
+ }
128
+ /**
129
+ * Result of a successful command match
130
+ */
131
+ export interface CommandResult {
132
+ /** Name of the matched command */
133
+ name: string;
134
+ /** Extracted argument values */
135
+ arguments: Record<string, unknown>;
136
+ }
50
137
  /**
51
138
  * Recording/dictation states
52
139
  */
@@ -109,6 +196,10 @@ export interface SpeechOSEventMap {
109
196
  text: string;
110
197
  originalText: string;
111
198
  };
199
+ /** Emitted when command matching completes (null if no command matched) */
200
+ "command:complete": {
201
+ command: CommandResult | null;
202
+ };
112
203
  /** Emitted when transcribed text is inserted into a form field */
113
204
  "transcription:inserted": {
114
205
  text: string;
@@ -120,6 +211,11 @@ export interface SpeechOSEventMap {
120
211
  editedContent: string;
121
212
  element: HTMLElement;
122
213
  };
214
+ /** Emitted when user settings change (language, snippets, vocabulary, smartFormat) */
215
+ "settings:changed": {
216
+ /** Type of setting that changed */
217
+ setting: "language" | "snippets" | "vocabulary" | "smartFormat";
218
+ };
123
219
  /** Emitted when an error occurs */
124
220
  error: {
125
221
  code: string;
@@ -0,0 +1,133 @@
1
+ /**
2
+ * WebSocket integration for SpeechOS SDK.
3
+ *
4
+ * Provides a direct WebSocket connection to the backend for voice sessions,
5
+ * bypassing LiveKit for lower latency. Uses audio buffering to capture
6
+ * audio immediately while the connection is being established.
7
+ */
8
+ import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
9
+ /**
10
+ * A deferred promise with timeout support.
11
+ */
12
+ export declare class Deferred<T> {
13
+ readonly promise: Promise<T>;
14
+ private _resolve;
15
+ private _reject;
16
+ private _timeoutId;
17
+ private _settled;
18
+ constructor();
19
+ setTimeout(ms: number, errorMessage: string, errorCode: string, errorSource: ErrorSource): void;
20
+ resolve(value: T): void;
21
+ reject(error: Error): void;
22
+ private clearTimeout;
23
+ get isSettled(): boolean;
24
+ }
25
+ /**
26
+ * WebSocket connection manager for voice sessions.
27
+ */
28
+ declare class WebSocketManager {
29
+ private ws;
30
+ private audioCapture;
31
+ private sessionId;
32
+ private pendingAuth;
33
+ private pendingTranscript;
34
+ private pendingEditText;
35
+ private pendingCommand;
36
+ private pendingAudioSends;
37
+ private editOriginalText;
38
+ private lastInputText;
39
+ private sessionAction;
40
+ private sessionInputText;
41
+ private sessionCommands;
42
+ private sessionSettings;
43
+ /**
44
+ * Get the WebSocket URL for voice sessions.
45
+ */
46
+ private getWebSocketUrl;
47
+ /**
48
+ * Start a voice session with the WebSocket backend.
49
+ *
50
+ * This method:
51
+ * 1. Starts audio capture immediately (buffering)
52
+ * 2. Opens WebSocket connection
53
+ * 3. Authenticates with API key and action parameters
54
+ * 4. Flushes buffered audio and continues streaming
55
+ *
56
+ * @param options - Session options including action type and parameters
57
+ */
58
+ startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
59
+ /**
60
+ * Send authentication message with action parameters.
61
+ * All session parameters are now sent upfront in the auth message.
62
+ */
63
+ private authenticate;
64
+ /**
65
+ * Send an audio chunk over the WebSocket.
66
+ * Tracks the promise so we can wait for all sends to complete.
67
+ */
68
+ private sendAudioChunk;
69
+ /**
70
+ * Actually send the audio chunk (async operation).
71
+ */
72
+ private doSendAudioChunk;
73
+ /**
74
+ * Handle incoming WebSocket messages.
75
+ */
76
+ private handleMessage;
77
+ private handleReady;
78
+ private handleIntermediateTranscription;
79
+ private handleFinalTranscript;
80
+ private handleEditedText;
81
+ private handleCommandResult;
82
+ private handleError;
83
+ /**
84
+ * Stop the voice session and request the transcript.
85
+ */
86
+ stopVoiceSession(): Promise<string>;
87
+ /**
88
+ * Request text editing using the transcript as instructions.
89
+ * Note: The input text was already sent in the auth message via startVoiceSession.
90
+ */
91
+ requestEditText(_originalText: string): Promise<string>;
92
+ /**
93
+ * Request command matching using the transcript as input.
94
+ * Note: The command definitions were already sent in the auth message via startVoiceSession.
95
+ */
96
+ requestCommand(_commands: CommandDefinition[]): Promise<CommandResult | null>;
97
+ /**
98
+ * Stop audio capture and wait for all data to be sent.
99
+ *
100
+ * Waits for:
101
+ * 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
102
+ * 2. WebSocket buffer to drain (all data transmitted)
103
+ *
104
+ * WebSocket message ordering ensures server receives all audio before transcript request.
105
+ */
106
+ private stopAudioCapture;
107
+ /**
108
+ * Wait for the WebSocket send buffer to drain.
109
+ *
110
+ * This ensures all audio data has been transmitted before we request
111
+ * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
112
+ */
113
+ private waitForBufferDrain;
114
+ /**
115
+ * Send a JSON message over the WebSocket.
116
+ */
117
+ private sendMessage;
118
+ /**
119
+ * Disconnect from the WebSocket.
120
+ */
121
+ disconnect(): Promise<void>;
122
+ /**
123
+ * Check if connected to WebSocket.
124
+ */
125
+ isConnected(): boolean;
126
+ /**
127
+ * Get the last input text from a command result.
128
+ * This is the raw transcript of what the user said.
129
+ */
130
+ getLastInputText(): string | undefined;
131
+ }
132
+ export declare const websocket: WebSocketManager;
133
+ export {};
@@ -0,0 +1,133 @@
1
+ /**
2
+ * WebSocket integration for SpeechOS SDK.
3
+ *
4
+ * Provides a direct WebSocket connection to the backend for voice sessions,
5
+ * bypassing LiveKit for lower latency. Uses audio buffering to capture
6
+ * audio immediately while the connection is being established.
7
+ */
8
+ import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
9
+ /**
10
+ * A deferred promise with timeout support.
11
+ */
12
+ export declare class Deferred<T> {
13
+ readonly promise: Promise<T>;
14
+ private _resolve;
15
+ private _reject;
16
+ private _timeoutId;
17
+ private _settled;
18
+ constructor();
19
+ setTimeout(ms: number, errorMessage: string, errorCode: string, errorSource: ErrorSource): void;
20
+ resolve(value: T): void;
21
+ reject(error: Error): void;
22
+ private clearTimeout;
23
+ get isSettled(): boolean;
24
+ }
25
+ /**
26
+ * WebSocket connection manager for voice sessions.
27
+ */
28
+ declare class WebSocketManager {
29
+ private ws;
30
+ private audioCapture;
31
+ private sessionId;
32
+ private pendingAuth;
33
+ private pendingTranscript;
34
+ private pendingEditText;
35
+ private pendingCommand;
36
+ private pendingAudioSends;
37
+ private editOriginalText;
38
+ private lastInputText;
39
+ private sessionAction;
40
+ private sessionInputText;
41
+ private sessionCommands;
42
+ private sessionSettings;
43
+ /**
44
+ * Get the WebSocket URL for voice sessions.
45
+ */
46
+ private getWebSocketUrl;
47
+ /**
48
+ * Start a voice session with the WebSocket backend.
49
+ *
50
+ * This method:
51
+ * 1. Starts audio capture immediately (buffering)
52
+ * 2. Opens WebSocket connection
53
+ * 3. Authenticates with API key and action parameters
54
+ * 4. Flushes buffered audio and continues streaming
55
+ *
56
+ * @param options - Session options including action type and parameters
57
+ */
58
+ startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
59
+ /**
60
+ * Send authentication message with action parameters.
61
+ * All session parameters are now sent upfront in the auth message.
62
+ */
63
+ private authenticate;
64
+ /**
65
+ * Send an audio chunk over the WebSocket.
66
+ * Tracks the promise so we can wait for all sends to complete.
67
+ */
68
+ private sendAudioChunk;
69
+ /**
70
+ * Actually send the audio chunk (async operation).
71
+ */
72
+ private doSendAudioChunk;
73
+ /**
74
+ * Handle incoming WebSocket messages.
75
+ */
76
+ private handleMessage;
77
+ private handleReady;
78
+ private handleIntermediateTranscription;
79
+ private handleFinalTranscript;
80
+ private handleEditedText;
81
+ private handleCommandResult;
82
+ private handleError;
83
+ /**
84
+ * Stop the voice session and request the transcript.
85
+ */
86
+ stopVoiceSession(): Promise<string>;
87
+ /**
88
+ * Request text editing using the transcript as instructions.
89
+ * Note: The input text was already sent in the auth message via startVoiceSession.
90
+ */
91
+ requestEditText(_originalText: string): Promise<string>;
92
+ /**
93
+ * Request command matching using the transcript as input.
94
+ * Note: The command definitions were already sent in the auth message via startVoiceSession.
95
+ */
96
+ requestCommand(_commands: CommandDefinition[]): Promise<CommandResult | null>;
97
+ /**
98
+ * Stop audio capture and wait for all data to be sent.
99
+ *
100
+ * Waits for:
101
+ * 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
102
+ * 2. WebSocket buffer to drain (all data transmitted)
103
+ *
104
+ * WebSocket message ordering ensures server receives all audio before transcript request.
105
+ */
106
+ private stopAudioCapture;
107
+ /**
108
+ * Wait for the WebSocket send buffer to drain.
109
+ *
110
+ * This ensures all audio data has been transmitted before we request
111
+ * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
112
+ */
113
+ private waitForBufferDrain;
114
+ /**
115
+ * Send a JSON message over the WebSocket.
116
+ */
117
+ private sendMessage;
118
+ /**
119
+ * Disconnect from the WebSocket.
120
+ */
121
+ disconnect(): Promise<void>;
122
+ /**
123
+ * Check if connected to WebSocket.
124
+ */
125
+ isConnected(): boolean;
126
+ /**
127
+ * Get the last input text from a command result.
128
+ * This is the raw transcript of what the user said.
129
+ */
130
+ getLastInputText(): string | undefined;
131
+ }
132
+ export declare const websocket: WebSocketManager;
133
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@speechos/core",
3
- "version": "0.2.0",
3
+ "version": "0.2.3",
4
4
  "description": "Headless core SDK for SpeechOS - state, events, LiveKit integration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -22,14 +22,15 @@
22
22
  },
23
23
  "repository": {
24
24
  "type": "git",
25
- "url": "https://github.com/speechos-org/speechos.git",
26
- "directory": "speechos-client/packages/core"
25
+ "url": "git+ssh://git@github.com/speechos-org/speechos-client.git",
26
+ "directory": "packages/core"
27
27
  },
28
28
  "homepage": "https://speechos.ai",
29
29
  "bugs": {
30
- "url": "https://github.com/speechos-org/speechos/issues"
30
+ "url": "https://github.com/speechos-org/speechos-client/issues"
31
31
  },
32
32
  "scripts": {
33
+ "prepare": "npm run build",
33
34
  "build": "tsdown",
34
35
  "dev": "tsdown --watch",
35
36
  "type-check": "tsc --noEmit",
@@ -1,35 +0,0 @@
1
- /**
2
- * Transcript history store
3
- * Persists transcripts to localStorage for viewing in the settings modal
4
- */
5
- export type TranscriptAction = "dictate" | "edit";
6
- export interface TranscriptEntry {
7
- id: string;
8
- text: string;
9
- timestamp: number;
10
- action: TranscriptAction;
11
- /** Original text before edit (only for edit actions) */
12
- originalText?: string;
13
- }
14
- /**
15
- * Get all transcripts from localStorage
16
- */
17
- export declare function getTranscripts(): TranscriptEntry[];
18
- /**
19
- * Save a new transcript entry
20
- */
21
- export declare function saveTranscript(text: string, action: TranscriptAction, originalText?: string): TranscriptEntry;
22
- /**
23
- * Clear all transcript history
24
- */
25
- export declare function clearTranscripts(): void;
26
- /**
27
- * Delete a single transcript by ID
28
- */
29
- export declare function deleteTranscript(id: string): void;
30
- export declare const transcriptStore: {
31
- getTranscripts: typeof getTranscripts;
32
- saveTranscript: typeof saveTranscript;
33
- clearTranscripts: typeof clearTranscripts;
34
- deleteTranscript: typeof deleteTranscript;
35
- };
@@ -1,35 +0,0 @@
1
- /**
2
- * Transcript history store
3
- * Persists transcripts to localStorage for viewing in the settings modal
4
- */
5
- export type TranscriptAction = "dictate" | "edit";
6
- export interface TranscriptEntry {
7
- id: string;
8
- text: string;
9
- timestamp: number;
10
- action: TranscriptAction;
11
- /** Original text before edit (only for edit actions) */
12
- originalText?: string;
13
- }
14
- /**
15
- * Get all transcripts from localStorage
16
- */
17
- export declare function getTranscripts(): TranscriptEntry[];
18
- /**
19
- * Save a new transcript entry
20
- */
21
- export declare function saveTranscript(text: string, action: TranscriptAction, originalText?: string): TranscriptEntry;
22
- /**
23
- * Clear all transcript history
24
- */
25
- export declare function clearTranscripts(): void;
26
- /**
27
- * Delete a single transcript by ID
28
- */
29
- export declare function deleteTranscript(id: string): void;
30
- export declare const transcriptStore: {
31
- getTranscripts: typeof getTranscripts;
32
- saveTranscript: typeof saveTranscript;
33
- clearTranscripts: typeof clearTranscripts;
34
- deleteTranscript: typeof deleteTranscript;
35
- };