@speechos/core 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backend.d.cts +1 -16
- package/dist/backend.d.ts +1 -16
- package/dist/index.cjs +236 -853
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -3
- package/dist/index.d.ts +4 -3
- package/dist/index.js +234 -831
- package/dist/index.js.map +1 -1
- package/dist/speechos.d.cts +10 -39
- package/dist/speechos.d.ts +10 -39
- package/dist/state.d.cts +11 -1
- package/dist/state.d.ts +11 -1
- package/dist/tts.d.cts +74 -0
- package/dist/tts.d.ts +74 -0
- package/dist/types.d.cts +42 -22
- package/dist/types.d.ts +42 -22
- package/dist/websocket.d.cts +5 -4
- package/dist/websocket.d.ts +5 -4
- package/package.json +2 -5
- package/dist/livekit.d.cts +0 -199
- package/dist/livekit.d.ts +0 -199
package/dist/speechos.d.cts
CHANGED
|
@@ -10,9 +10,7 @@ import { events } from "./events.js";
|
|
|
10
10
|
/**
|
|
11
11
|
* SpeechOS Core SDK
|
|
12
12
|
*
|
|
13
|
-
* Provides
|
|
14
|
-
* 1. Low-level API: Granular control over LiveKit connection lifecycle
|
|
15
|
-
* 2. High-level API: One-shot methods for common voice tasks
|
|
13
|
+
* Provides a high-level API for common voice tasks.
|
|
16
14
|
*/
|
|
17
15
|
declare class SpeechOSCore {
|
|
18
16
|
private initialized;
|
|
@@ -26,36 +24,7 @@ declare class SpeechOSCore {
|
|
|
26
24
|
*/
|
|
27
25
|
isInitialized(): boolean;
|
|
28
26
|
/**
|
|
29
|
-
*
|
|
30
|
-
* Call this before other low-level methods
|
|
31
|
-
*/
|
|
32
|
-
connect(): Promise<void>;
|
|
33
|
-
/**
|
|
34
|
-
* Wait until the agent is ready to receive audio
|
|
35
|
-
* Resolves when the agent subscribes to our audio track
|
|
36
|
-
*/
|
|
37
|
-
waitUntilReady(): Promise<void>;
|
|
38
|
-
/**
|
|
39
|
-
* Enable microphone (user is now being recorded)
|
|
40
|
-
*/
|
|
41
|
-
enableMicrophone(): Promise<void>;
|
|
42
|
-
/**
|
|
43
|
-
* Stop recording and get the transcript
|
|
44
|
-
* @returns The transcribed text
|
|
45
|
-
*/
|
|
46
|
-
stopAndGetTranscript(): Promise<string>;
|
|
47
|
-
/**
|
|
48
|
-
* Stop recording and get edited text
|
|
49
|
-
* @param originalText - The original text to edit based on voice instructions
|
|
50
|
-
* @returns The edited text
|
|
51
|
-
*/
|
|
52
|
-
stopAndEdit(originalText: string): Promise<string>;
|
|
53
|
-
/**
|
|
54
|
-
* Disconnect from LiveKit
|
|
55
|
-
*/
|
|
56
|
-
disconnect(): Promise<void>;
|
|
57
|
-
/**
|
|
58
|
-
* One-shot dictation: connect, wait for agent, record, and get transcript
|
|
27
|
+
* One-shot dictation: connect, record, and get transcript
|
|
59
28
|
* Automatically handles the full voice session lifecycle
|
|
60
29
|
*
|
|
61
30
|
* @returns The transcribed text
|
|
@@ -69,7 +38,7 @@ declare class SpeechOSCore {
|
|
|
69
38
|
*/
|
|
70
39
|
stopDictation(): Promise<string>;
|
|
71
40
|
/**
|
|
72
|
-
* One-shot edit: connect,
|
|
41
|
+
* One-shot edit: connect, record voice instructions, apply to text
|
|
73
42
|
* Automatically handles the full voice session lifecycle
|
|
74
43
|
*
|
|
75
44
|
* @param originalText - The text to edit
|
|
@@ -85,21 +54,23 @@ declare class SpeechOSCore {
|
|
|
85
54
|
*/
|
|
86
55
|
stopEdit(): Promise<string>;
|
|
87
56
|
/**
|
|
88
|
-
* One-shot command: connect,
|
|
57
|
+
* One-shot command: connect, record voice, match against commands
|
|
89
58
|
* Automatically handles the full voice session lifecycle
|
|
90
59
|
*
|
|
91
60
|
* @param commands - Array of command definitions to match against
|
|
92
|
-
* @returns
|
|
61
|
+
* @returns Array of matched commands (empty array if no matches)
|
|
93
62
|
*/
|
|
94
|
-
command(commands: CommandDefinition[]): Promise<CommandResult
|
|
63
|
+
command(commands: CommandDefinition[]): Promise<CommandResult[]>;
|
|
95
64
|
private _commandCommands?;
|
|
96
65
|
private _commandResolve?;
|
|
97
66
|
private _commandReject?;
|
|
98
67
|
/**
|
|
99
|
-
* Stop command recording and get the matched
|
|
68
|
+
* Stop command recording and get the matched commands
|
|
100
69
|
* Call this after command() when user stops speaking
|
|
70
|
+
*
|
|
71
|
+
* @returns Array of matched commands (empty array if no matches)
|
|
101
72
|
*/
|
|
102
|
-
stopCommand(): Promise<CommandResult
|
|
73
|
+
stopCommand(): Promise<CommandResult[]>;
|
|
103
74
|
/**
|
|
104
75
|
* Cancel the current operation
|
|
105
76
|
*/
|
package/dist/speechos.d.ts
CHANGED
|
@@ -10,9 +10,7 @@ import { events } from "./events.js";
|
|
|
10
10
|
/**
|
|
11
11
|
* SpeechOS Core SDK
|
|
12
12
|
*
|
|
13
|
-
* Provides
|
|
14
|
-
* 1. Low-level API: Granular control over LiveKit connection lifecycle
|
|
15
|
-
* 2. High-level API: One-shot methods for common voice tasks
|
|
13
|
+
* Provides a high-level API for common voice tasks.
|
|
16
14
|
*/
|
|
17
15
|
declare class SpeechOSCore {
|
|
18
16
|
private initialized;
|
|
@@ -26,36 +24,7 @@ declare class SpeechOSCore {
|
|
|
26
24
|
*/
|
|
27
25
|
isInitialized(): boolean;
|
|
28
26
|
/**
|
|
29
|
-
*
|
|
30
|
-
* Call this before other low-level methods
|
|
31
|
-
*/
|
|
32
|
-
connect(): Promise<void>;
|
|
33
|
-
/**
|
|
34
|
-
* Wait until the agent is ready to receive audio
|
|
35
|
-
* Resolves when the agent subscribes to our audio track
|
|
36
|
-
*/
|
|
37
|
-
waitUntilReady(): Promise<void>;
|
|
38
|
-
/**
|
|
39
|
-
* Enable microphone (user is now being recorded)
|
|
40
|
-
*/
|
|
41
|
-
enableMicrophone(): Promise<void>;
|
|
42
|
-
/**
|
|
43
|
-
* Stop recording and get the transcript
|
|
44
|
-
* @returns The transcribed text
|
|
45
|
-
*/
|
|
46
|
-
stopAndGetTranscript(): Promise<string>;
|
|
47
|
-
/**
|
|
48
|
-
* Stop recording and get edited text
|
|
49
|
-
* @param originalText - The original text to edit based on voice instructions
|
|
50
|
-
* @returns The edited text
|
|
51
|
-
*/
|
|
52
|
-
stopAndEdit(originalText: string): Promise<string>;
|
|
53
|
-
/**
|
|
54
|
-
* Disconnect from LiveKit
|
|
55
|
-
*/
|
|
56
|
-
disconnect(): Promise<void>;
|
|
57
|
-
/**
|
|
58
|
-
* One-shot dictation: connect, wait for agent, record, and get transcript
|
|
27
|
+
* One-shot dictation: connect, record, and get transcript
|
|
59
28
|
* Automatically handles the full voice session lifecycle
|
|
60
29
|
*
|
|
61
30
|
* @returns The transcribed text
|
|
@@ -69,7 +38,7 @@ declare class SpeechOSCore {
|
|
|
69
38
|
*/
|
|
70
39
|
stopDictation(): Promise<string>;
|
|
71
40
|
/**
|
|
72
|
-
* One-shot edit: connect,
|
|
41
|
+
* One-shot edit: connect, record voice instructions, apply to text
|
|
73
42
|
* Automatically handles the full voice session lifecycle
|
|
74
43
|
*
|
|
75
44
|
* @param originalText - The text to edit
|
|
@@ -85,21 +54,23 @@ declare class SpeechOSCore {
|
|
|
85
54
|
*/
|
|
86
55
|
stopEdit(): Promise<string>;
|
|
87
56
|
/**
|
|
88
|
-
* One-shot command: connect,
|
|
57
|
+
* One-shot command: connect, record voice, match against commands
|
|
89
58
|
* Automatically handles the full voice session lifecycle
|
|
90
59
|
*
|
|
91
60
|
* @param commands - Array of command definitions to match against
|
|
92
|
-
* @returns
|
|
61
|
+
* @returns Array of matched commands (empty array if no matches)
|
|
93
62
|
*/
|
|
94
|
-
command(commands: CommandDefinition[]): Promise<CommandResult
|
|
63
|
+
command(commands: CommandDefinition[]): Promise<CommandResult[]>;
|
|
95
64
|
private _commandCommands?;
|
|
96
65
|
private _commandResolve?;
|
|
97
66
|
private _commandReject?;
|
|
98
67
|
/**
|
|
99
|
-
* Stop command recording and get the matched
|
|
68
|
+
* Stop command recording and get the matched commands
|
|
100
69
|
* Call this after command() when user stops speaking
|
|
70
|
+
*
|
|
71
|
+
* @returns Array of matched commands (empty array if no matches)
|
|
101
72
|
*/
|
|
102
|
-
stopCommand(): Promise<CommandResult
|
|
73
|
+
stopCommand(): Promise<CommandResult[]>;
|
|
103
74
|
/**
|
|
104
75
|
* Cancel the current operation
|
|
105
76
|
*/
|
package/dist/state.d.cts
CHANGED
|
@@ -49,6 +49,16 @@ declare class StateManager {
|
|
|
49
49
|
* @param element - The form element that has focus
|
|
50
50
|
*/
|
|
51
51
|
setFocusedElement(element: HTMLElement | null): void;
|
|
52
|
+
/**
|
|
53
|
+
* Set the current text selection
|
|
54
|
+
* @param text - Selected text (null to clear)
|
|
55
|
+
* @param element - Element associated with selection
|
|
56
|
+
*/
|
|
57
|
+
setSelection(text: string | null, element: HTMLElement | null): void;
|
|
58
|
+
/**
|
|
59
|
+
* Clear the current text selection
|
|
60
|
+
*/
|
|
61
|
+
clearSelection(): void;
|
|
52
62
|
/**
|
|
53
63
|
* Set the active action
|
|
54
64
|
* @param action - The action to set as active
|
|
@@ -61,7 +71,7 @@ declare class StateManager {
|
|
|
61
71
|
setRecordingState(recordingState: SpeechOSState["recordingState"]): void;
|
|
62
72
|
/**
|
|
63
73
|
* Set the connection state
|
|
64
|
-
* @param isConnected - Whether connected to
|
|
74
|
+
* @param isConnected - Whether connected to the backend
|
|
65
75
|
*/
|
|
66
76
|
setConnected(isConnected: boolean): void;
|
|
67
77
|
/**
|
package/dist/state.d.ts
CHANGED
|
@@ -49,6 +49,16 @@ declare class StateManager {
|
|
|
49
49
|
* @param element - The form element that has focus
|
|
50
50
|
*/
|
|
51
51
|
setFocusedElement(element: HTMLElement | null): void;
|
|
52
|
+
/**
|
|
53
|
+
* Set the current text selection
|
|
54
|
+
* @param text - Selected text (null to clear)
|
|
55
|
+
* @param element - Element associated with selection
|
|
56
|
+
*/
|
|
57
|
+
setSelection(text: string | null, element: HTMLElement | null): void;
|
|
58
|
+
/**
|
|
59
|
+
* Clear the current text selection
|
|
60
|
+
*/
|
|
61
|
+
clearSelection(): void;
|
|
52
62
|
/**
|
|
53
63
|
* Set the active action
|
|
54
64
|
* @param action - The action to set as active
|
|
@@ -61,7 +71,7 @@ declare class StateManager {
|
|
|
61
71
|
setRecordingState(recordingState: SpeechOSState["recordingState"]): void;
|
|
62
72
|
/**
|
|
63
73
|
* Set the connection state
|
|
64
|
-
* @param isConnected - Whether connected to
|
|
74
|
+
* @param isConnected - Whether connected to the backend
|
|
65
75
|
*/
|
|
66
76
|
setConnected(isConnected: boolean): void;
|
|
67
77
|
/**
|
package/dist/tts.d.cts
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TTS (Text-to-Speech) client for SpeechOS SDK
|
|
3
|
+
*
|
|
4
|
+
* Provides methods to synthesize speech from text using the SpeechOS TTS API.
|
|
5
|
+
* This is a headless module - audio playback is handled by @speechos/client.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Default TTS voice ID (matches server default).
|
|
9
|
+
* The server validates voice IDs - pass any valid voice ID or omit to use default.
|
|
10
|
+
*/
|
|
11
|
+
export declare const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
|
|
12
|
+
/**
|
|
13
|
+
* Options for TTS synthesis
|
|
14
|
+
*/
|
|
15
|
+
export interface TTSOptions {
|
|
16
|
+
/** Voice ID. Server uses its default if not specified. */
|
|
17
|
+
voiceId?: string;
|
|
18
|
+
/** Language code (e.g., 'en', 'es', 'fr'). Defaults to 'en'. */
|
|
19
|
+
language?: string;
|
|
20
|
+
/** Optional abort signal for cancelling the request. */
|
|
21
|
+
signal?: AbortSignal;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Result of TTS synthesis
|
|
25
|
+
*/
|
|
26
|
+
export interface TTSResult {
|
|
27
|
+
/** Audio data as ArrayBuffer (MP3 format) */
|
|
28
|
+
audio: ArrayBuffer;
|
|
29
|
+
/** Content type of the audio (e.g., 'audio/mpeg') */
|
|
30
|
+
contentType: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* TTS error codes
|
|
34
|
+
*/
|
|
35
|
+
export type TTSErrorCode = "invalid_request" | "usage_limit_exceeded" | "authentication_failed" | "network_error" | "unknown_error";
|
|
36
|
+
/**
|
|
37
|
+
* TTS Client for synthesizing speech from text
|
|
38
|
+
*/
|
|
39
|
+
export declare class TTSClient {
|
|
40
|
+
/**
|
|
41
|
+
* Synthesize text to speech and return audio bytes
|
|
42
|
+
*
|
|
43
|
+
* @param text - Text to synthesize (max 1000 chars)
|
|
44
|
+
* @param options - Optional synthesis options
|
|
45
|
+
* @returns Audio data and content type
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const result = await tts.synthesize('Hello world');
|
|
50
|
+
* console.log(result.audio); // ArrayBuffer
|
|
51
|
+
* console.log(result.contentType); // 'audio/mpeg'
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
synthesize(text: string, options?: TTSOptions): Promise<TTSResult>;
|
|
55
|
+
/**
|
|
56
|
+
* Stream TTS audio chunks as they arrive from the server
|
|
57
|
+
*
|
|
58
|
+
* Useful for progressive playback or processing large texts.
|
|
59
|
+
*
|
|
60
|
+
* @param text - Text to synthesize (max 1000 chars)
|
|
61
|
+
* @param options - Optional synthesis options
|
|
62
|
+
* @yields Audio chunks as Uint8Array
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const chunks: Uint8Array[] = [];
|
|
67
|
+
* for await (const chunk of tts.stream('Hello world')) {
|
|
68
|
+
* chunks.push(chunk);
|
|
69
|
+
* }
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
stream(text: string, options?: TTSOptions): AsyncGenerator<Uint8Array>;
|
|
73
|
+
}
|
|
74
|
+
export declare const tts: TTSClient;
|
package/dist/tts.d.ts
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TTS (Text-to-Speech) client for SpeechOS SDK
|
|
3
|
+
*
|
|
4
|
+
* Provides methods to synthesize speech from text using the SpeechOS TTS API.
|
|
5
|
+
* This is a headless module - audio playback is handled by @speechos/client.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Default TTS voice ID (matches server default).
|
|
9
|
+
* The server validates voice IDs - pass any valid voice ID or omit to use default.
|
|
10
|
+
*/
|
|
11
|
+
export declare const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
|
|
12
|
+
/**
|
|
13
|
+
* Options for TTS synthesis
|
|
14
|
+
*/
|
|
15
|
+
export interface TTSOptions {
|
|
16
|
+
/** Voice ID. Server uses its default if not specified. */
|
|
17
|
+
voiceId?: string;
|
|
18
|
+
/** Language code (e.g., 'en', 'es', 'fr'). Defaults to 'en'. */
|
|
19
|
+
language?: string;
|
|
20
|
+
/** Optional abort signal for cancelling the request. */
|
|
21
|
+
signal?: AbortSignal;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Result of TTS synthesis
|
|
25
|
+
*/
|
|
26
|
+
export interface TTSResult {
|
|
27
|
+
/** Audio data as ArrayBuffer (MP3 format) */
|
|
28
|
+
audio: ArrayBuffer;
|
|
29
|
+
/** Content type of the audio (e.g., 'audio/mpeg') */
|
|
30
|
+
contentType: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* TTS error codes
|
|
34
|
+
*/
|
|
35
|
+
export type TTSErrorCode = "invalid_request" | "usage_limit_exceeded" | "authentication_failed" | "network_error" | "unknown_error";
|
|
36
|
+
/**
|
|
37
|
+
* TTS Client for synthesizing speech from text
|
|
38
|
+
*/
|
|
39
|
+
export declare class TTSClient {
|
|
40
|
+
/**
|
|
41
|
+
* Synthesize text to speech and return audio bytes
|
|
42
|
+
*
|
|
43
|
+
* @param text - Text to synthesize (max 1000 chars)
|
|
44
|
+
* @param options - Optional synthesis options
|
|
45
|
+
* @returns Audio data and content type
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const result = await tts.synthesize('Hello world');
|
|
50
|
+
* console.log(result.audio); // ArrayBuffer
|
|
51
|
+
* console.log(result.contentType); // 'audio/mpeg'
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
synthesize(text: string, options?: TTSOptions): Promise<TTSResult>;
|
|
55
|
+
/**
|
|
56
|
+
* Stream TTS audio chunks as they arrive from the server
|
|
57
|
+
*
|
|
58
|
+
* Useful for progressive playback or processing large texts.
|
|
59
|
+
*
|
|
60
|
+
* @param text - Text to synthesize (max 1000 chars)
|
|
61
|
+
* @param options - Optional synthesis options
|
|
62
|
+
* @yields Audio chunks as Uint8Array
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const chunks: Uint8Array[] = [];
|
|
67
|
+
* for await (const chunk of tts.stream('Hello world')) {
|
|
68
|
+
* chunks.push(chunk);
|
|
69
|
+
* }
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
stream(text: string, options?: TTSOptions): AsyncGenerator<Uint8Array>;
|
|
73
|
+
}
|
|
74
|
+
export declare const tts: TTSClient;
|
package/dist/types.d.cts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Shared TypeScript types for SpeechOS Core SDK
|
|
3
3
|
*/
|
|
4
4
|
/**
|
|
5
|
-
* Server error message structure received via
|
|
5
|
+
* Server error message structure received via WebSocket
|
|
6
6
|
*/
|
|
7
7
|
export interface ServerErrorMessage {
|
|
8
8
|
type: "error";
|
|
@@ -17,12 +17,6 @@ export interface ServerErrorMessage {
|
|
|
17
17
|
* Error source indicating where the error originated
|
|
18
18
|
*/
|
|
19
19
|
export type ErrorSource = "init" | "connection" | "timeout" | "server";
|
|
20
|
-
/**
|
|
21
|
-
* Backend type for voice sessions
|
|
22
|
-
* - 'websocket': Direct WebSocket connection (lower latency, recommended)
|
|
23
|
-
* - 'livekit': LiveKit WebRTC connection (legacy)
|
|
24
|
-
*/
|
|
25
|
-
export type VoiceBackend = "websocket" | "livekit";
|
|
26
20
|
/**
|
|
27
21
|
* Configuration options for initializing SpeechOS Core
|
|
28
22
|
*/
|
|
@@ -85,15 +79,6 @@ export interface VoiceSessionOptions {
|
|
|
85
79
|
/** User settings for this session */
|
|
86
80
|
settings?: SessionSettings;
|
|
87
81
|
}
|
|
88
|
-
/**
|
|
89
|
-
* LiveKit token response from the backend
|
|
90
|
-
*/
|
|
91
|
-
export interface LiveKitTokenResponse {
|
|
92
|
-
token: string;
|
|
93
|
-
ws_url: string;
|
|
94
|
-
room: string;
|
|
95
|
-
identity: string;
|
|
96
|
-
}
|
|
97
82
|
/**
|
|
98
83
|
* User vocabulary data sent with transcription/edit requests
|
|
99
84
|
* Includes custom vocabulary terms for improved transcription accuracy
|
|
@@ -113,7 +98,7 @@ export interface UserVocabularyData {
|
|
|
113
98
|
/**
|
|
114
99
|
* Available actions that can be triggered from the widget
|
|
115
100
|
*/
|
|
116
|
-
export type SpeechOSAction = "dictate" | "edit" | "command";
|
|
101
|
+
export type SpeechOSAction = "dictate" | "edit" | "command" | "read";
|
|
117
102
|
/**
|
|
118
103
|
* Definition of a command argument
|
|
119
104
|
*/
|
|
@@ -159,7 +144,7 @@ export interface SpeechOSState {
|
|
|
159
144
|
isVisible: boolean;
|
|
160
145
|
/** Whether the action bubbles are expanded */
|
|
161
146
|
isExpanded: boolean;
|
|
162
|
-
/** Whether connected to
|
|
147
|
+
/** Whether connected to the backend */
|
|
163
148
|
isConnected: boolean;
|
|
164
149
|
/** Whether microphone is enabled and publishing */
|
|
165
150
|
isMicEnabled: boolean;
|
|
@@ -167,6 +152,10 @@ export interface SpeechOSState {
|
|
|
167
152
|
activeAction: SpeechOSAction | null;
|
|
168
153
|
/** The form field element that currently has focus (set by client) */
|
|
169
154
|
focusedElement: HTMLElement | null;
|
|
155
|
+
/** Currently selected text (if any) */
|
|
156
|
+
selectionText: string | null;
|
|
157
|
+
/** Element associated with the current selection (if any) */
|
|
158
|
+
selectionElement: HTMLElement | null;
|
|
170
159
|
/** Current recording state */
|
|
171
160
|
recordingState: RecordingState;
|
|
172
161
|
/** Error message to display (if any) */
|
|
@@ -192,10 +181,15 @@ export interface SpeechOSEventMap {
|
|
|
192
181
|
"widget:show": void;
|
|
193
182
|
/** Emitted when the widget is hidden */
|
|
194
183
|
"widget:hide": void;
|
|
195
|
-
/** Emitted when user selects an action
|
|
184
|
+
/** Emitted when user selects an action */
|
|
196
185
|
"action:select": {
|
|
197
186
|
action: SpeechOSAction;
|
|
198
187
|
};
|
|
188
|
+
/** Emitted when selected text changes (empty string when cleared) */
|
|
189
|
+
"selection:change": {
|
|
190
|
+
text: string;
|
|
191
|
+
element: HTMLElement | null;
|
|
192
|
+
};
|
|
199
193
|
/** Emitted when internal state changes */
|
|
200
194
|
"state:change": {
|
|
201
195
|
state: SpeechOSState;
|
|
@@ -214,9 +208,9 @@ export interface SpeechOSEventMap {
|
|
|
214
208
|
text: string;
|
|
215
209
|
originalText: string;
|
|
216
210
|
};
|
|
217
|
-
/** Emitted when command matching completes (
|
|
211
|
+
/** Emitted when command matching completes (empty array if no commands matched) */
|
|
218
212
|
"command:complete": {
|
|
219
|
-
|
|
213
|
+
commands: CommandResult[];
|
|
220
214
|
};
|
|
221
215
|
/** Emitted when transcribed text is inserted into a form field */
|
|
222
216
|
"transcription:inserted": {
|
|
@@ -232,7 +226,7 @@ export interface SpeechOSEventMap {
|
|
|
232
226
|
/** Emitted when user settings change (language, snippets, vocabulary, smartFormat, history) */
|
|
233
227
|
"settings:changed": {
|
|
234
228
|
/** Type of setting that changed */
|
|
235
|
-
setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history";
|
|
229
|
+
setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history" | "voice";
|
|
236
230
|
};
|
|
237
231
|
/** Emitted when settings are loaded from the server */
|
|
238
232
|
"settings:loaded": void;
|
|
@@ -244,6 +238,32 @@ export interface SpeechOSEventMap {
|
|
|
244
238
|
};
|
|
245
239
|
/** Emitted when the settings token expires (user should request a new one) */
|
|
246
240
|
"settings:tokenExpired": void;
|
|
241
|
+
/** Emitted when a TTS synthesis request begins */
|
|
242
|
+
"tts:synthesize:start": {
|
|
243
|
+
text: string;
|
|
244
|
+
};
|
|
245
|
+
/** Emitted when audio bytes are fully received from the server */
|
|
246
|
+
"tts:synthesize:complete": {
|
|
247
|
+
text: string;
|
|
248
|
+
};
|
|
249
|
+
/** Emitted when audio playback begins */
|
|
250
|
+
"tts:playback:start": {
|
|
251
|
+
text: string;
|
|
252
|
+
};
|
|
253
|
+
/** Emitted when audio playback finishes */
|
|
254
|
+
"tts:playback:complete": {
|
|
255
|
+
text: string;
|
|
256
|
+
};
|
|
257
|
+
/** Emitted when audio playback is stopped */
|
|
258
|
+
"tts:playback:stop": {
|
|
259
|
+
text: string | null;
|
|
260
|
+
};
|
|
261
|
+
/** Emitted when an error occurs during TTS synthesis or playback */
|
|
262
|
+
"tts:error": {
|
|
263
|
+
code: string;
|
|
264
|
+
message: string;
|
|
265
|
+
phase: "synthesize" | "playback";
|
|
266
|
+
};
|
|
247
267
|
/** Emitted when an error occurs */
|
|
248
268
|
error: {
|
|
249
269
|
code: string;
|
package/dist/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Shared TypeScript types for SpeechOS Core SDK
|
|
3
3
|
*/
|
|
4
4
|
/**
|
|
5
|
-
* Server error message structure received via
|
|
5
|
+
* Server error message structure received via WebSocket
|
|
6
6
|
*/
|
|
7
7
|
export interface ServerErrorMessage {
|
|
8
8
|
type: "error";
|
|
@@ -17,12 +17,6 @@ export interface ServerErrorMessage {
|
|
|
17
17
|
* Error source indicating where the error originated
|
|
18
18
|
*/
|
|
19
19
|
export type ErrorSource = "init" | "connection" | "timeout" | "server";
|
|
20
|
-
/**
|
|
21
|
-
* Backend type for voice sessions
|
|
22
|
-
* - 'websocket': Direct WebSocket connection (lower latency, recommended)
|
|
23
|
-
* - 'livekit': LiveKit WebRTC connection (legacy)
|
|
24
|
-
*/
|
|
25
|
-
export type VoiceBackend = "websocket" | "livekit";
|
|
26
20
|
/**
|
|
27
21
|
* Configuration options for initializing SpeechOS Core
|
|
28
22
|
*/
|
|
@@ -85,15 +79,6 @@ export interface VoiceSessionOptions {
|
|
|
85
79
|
/** User settings for this session */
|
|
86
80
|
settings?: SessionSettings;
|
|
87
81
|
}
|
|
88
|
-
/**
|
|
89
|
-
* LiveKit token response from the backend
|
|
90
|
-
*/
|
|
91
|
-
export interface LiveKitTokenResponse {
|
|
92
|
-
token: string;
|
|
93
|
-
ws_url: string;
|
|
94
|
-
room: string;
|
|
95
|
-
identity: string;
|
|
96
|
-
}
|
|
97
82
|
/**
|
|
98
83
|
* User vocabulary data sent with transcription/edit requests
|
|
99
84
|
* Includes custom vocabulary terms for improved transcription accuracy
|
|
@@ -113,7 +98,7 @@ export interface UserVocabularyData {
|
|
|
113
98
|
/**
|
|
114
99
|
* Available actions that can be triggered from the widget
|
|
115
100
|
*/
|
|
116
|
-
export type SpeechOSAction = "dictate" | "edit" | "command";
|
|
101
|
+
export type SpeechOSAction = "dictate" | "edit" | "command" | "read";
|
|
117
102
|
/**
|
|
118
103
|
* Definition of a command argument
|
|
119
104
|
*/
|
|
@@ -159,7 +144,7 @@ export interface SpeechOSState {
|
|
|
159
144
|
isVisible: boolean;
|
|
160
145
|
/** Whether the action bubbles are expanded */
|
|
161
146
|
isExpanded: boolean;
|
|
162
|
-
/** Whether connected to
|
|
147
|
+
/** Whether connected to the backend */
|
|
163
148
|
isConnected: boolean;
|
|
164
149
|
/** Whether microphone is enabled and publishing */
|
|
165
150
|
isMicEnabled: boolean;
|
|
@@ -167,6 +152,10 @@ export interface SpeechOSState {
|
|
|
167
152
|
activeAction: SpeechOSAction | null;
|
|
168
153
|
/** The form field element that currently has focus (set by client) */
|
|
169
154
|
focusedElement: HTMLElement | null;
|
|
155
|
+
/** Currently selected text (if any) */
|
|
156
|
+
selectionText: string | null;
|
|
157
|
+
/** Element associated with the current selection (if any) */
|
|
158
|
+
selectionElement: HTMLElement | null;
|
|
170
159
|
/** Current recording state */
|
|
171
160
|
recordingState: RecordingState;
|
|
172
161
|
/** Error message to display (if any) */
|
|
@@ -192,10 +181,15 @@ export interface SpeechOSEventMap {
|
|
|
192
181
|
"widget:show": void;
|
|
193
182
|
/** Emitted when the widget is hidden */
|
|
194
183
|
"widget:hide": void;
|
|
195
|
-
/** Emitted when user selects an action
|
|
184
|
+
/** Emitted when user selects an action */
|
|
196
185
|
"action:select": {
|
|
197
186
|
action: SpeechOSAction;
|
|
198
187
|
};
|
|
188
|
+
/** Emitted when selected text changes (empty string when cleared) */
|
|
189
|
+
"selection:change": {
|
|
190
|
+
text: string;
|
|
191
|
+
element: HTMLElement | null;
|
|
192
|
+
};
|
|
199
193
|
/** Emitted when internal state changes */
|
|
200
194
|
"state:change": {
|
|
201
195
|
state: SpeechOSState;
|
|
@@ -214,9 +208,9 @@ export interface SpeechOSEventMap {
|
|
|
214
208
|
text: string;
|
|
215
209
|
originalText: string;
|
|
216
210
|
};
|
|
217
|
-
/** Emitted when command matching completes (
|
|
211
|
+
/** Emitted when command matching completes (empty array if no commands matched) */
|
|
218
212
|
"command:complete": {
|
|
219
|
-
|
|
213
|
+
commands: CommandResult[];
|
|
220
214
|
};
|
|
221
215
|
/** Emitted when transcribed text is inserted into a form field */
|
|
222
216
|
"transcription:inserted": {
|
|
@@ -232,7 +226,7 @@ export interface SpeechOSEventMap {
|
|
|
232
226
|
/** Emitted when user settings change (language, snippets, vocabulary, smartFormat, history) */
|
|
233
227
|
"settings:changed": {
|
|
234
228
|
/** Type of setting that changed */
|
|
235
|
-
setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history";
|
|
229
|
+
setting: "language" | "snippets" | "vocabulary" | "smartFormat" | "history" | "voice";
|
|
236
230
|
};
|
|
237
231
|
/** Emitted when settings are loaded from the server */
|
|
238
232
|
"settings:loaded": void;
|
|
@@ -244,6 +238,32 @@ export interface SpeechOSEventMap {
|
|
|
244
238
|
};
|
|
245
239
|
/** Emitted when the settings token expires (user should request a new one) */
|
|
246
240
|
"settings:tokenExpired": void;
|
|
241
|
+
/** Emitted when a TTS synthesis request begins */
|
|
242
|
+
"tts:synthesize:start": {
|
|
243
|
+
text: string;
|
|
244
|
+
};
|
|
245
|
+
/** Emitted when audio bytes are fully received from the server */
|
|
246
|
+
"tts:synthesize:complete": {
|
|
247
|
+
text: string;
|
|
248
|
+
};
|
|
249
|
+
/** Emitted when audio playback begins */
|
|
250
|
+
"tts:playback:start": {
|
|
251
|
+
text: string;
|
|
252
|
+
};
|
|
253
|
+
/** Emitted when audio playback finishes */
|
|
254
|
+
"tts:playback:complete": {
|
|
255
|
+
text: string;
|
|
256
|
+
};
|
|
257
|
+
/** Emitted when audio playback is stopped */
|
|
258
|
+
"tts:playback:stop": {
|
|
259
|
+
text: string | null;
|
|
260
|
+
};
|
|
261
|
+
/** Emitted when an error occurs during TTS synthesis or playback */
|
|
262
|
+
"tts:error": {
|
|
263
|
+
code: string;
|
|
264
|
+
message: string;
|
|
265
|
+
phase: "synthesize" | "playback";
|
|
266
|
+
};
|
|
247
267
|
/** Emitted when an error occurs */
|
|
248
268
|
error: {
|
|
249
269
|
code: string;
|
package/dist/websocket.d.cts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* WebSocket integration for SpeechOS SDK.
|
|
3
3
|
*
|
|
4
|
-
* Provides a direct WebSocket connection to the backend for voice sessions
|
|
5
|
-
*
|
|
4
|
+
* Provides a direct WebSocket connection to the backend for voice sessions.
|
|
5
|
+
* Uses audio buffering to capture
|
|
6
6
|
* audio immediately while the connection is being established.
|
|
7
7
|
*/
|
|
8
8
|
import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
|
|
@@ -92,8 +92,9 @@ declare class WebSocketManager {
|
|
|
92
92
|
/**
|
|
93
93
|
* Request command matching using the transcript as input.
|
|
94
94
|
* Note: The command definitions were already sent in the auth message via startVoiceSession.
|
|
95
|
+
* Returns an array of matched commands (empty array if no matches).
|
|
95
96
|
*/
|
|
96
|
-
requestCommand(_commands: CommandDefinition[]): Promise<CommandResult
|
|
97
|
+
requestCommand(_commands: CommandDefinition[]): Promise<CommandResult[]>;
|
|
97
98
|
/**
|
|
98
99
|
* Stop audio capture and wait for all data to be sent.
|
|
99
100
|
*
|
|
@@ -108,7 +109,7 @@ declare class WebSocketManager {
|
|
|
108
109
|
* Wait for the WebSocket send buffer to drain.
|
|
109
110
|
*
|
|
110
111
|
* This ensures all audio data has been transmitted before we request
|
|
111
|
-
* the transcript.
|
|
112
|
+
* the transcript.
|
|
112
113
|
*/
|
|
113
114
|
private waitForBufferDrain;
|
|
114
115
|
/**
|