@realtimex/sdk 1.1.4 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +121 -1
- package/dist/index.d.ts +121 -1
- package/dist/index.js +185 -0
- package/dist/index.mjs +184 -0
- package/package.json +2 -2
package/dist/index.d.mts
CHANGED
|
@@ -79,6 +79,79 @@ interface Task {
|
|
|
79
79
|
updated_at: string;
|
|
80
80
|
runs: TaskRun[];
|
|
81
81
|
}
|
|
82
|
+
interface TTSOptions {
|
|
83
|
+
/** Voice ID (provider-specific) */
|
|
84
|
+
voice?: string;
|
|
85
|
+
/** Model ID (provider-specific) */
|
|
86
|
+
model?: string;
|
|
87
|
+
/** Speech speed (0.5-2.0) */
|
|
88
|
+
speed?: number;
|
|
89
|
+
/** TTS provider ID */
|
|
90
|
+
provider?: string;
|
|
91
|
+
/** Language code (e.g., 'en', 'es', 'fr') - for Supertonic */
|
|
92
|
+
language?: string;
|
|
93
|
+
/** Quality level (1-20) - for Supertonic num_inference_steps */
|
|
94
|
+
num_inference_steps?: number;
|
|
95
|
+
}
|
|
96
|
+
interface TTSProviderConfig {
|
|
97
|
+
/** Available voice/speaker IDs */
|
|
98
|
+
voices: string[];
|
|
99
|
+
/** Supported languages (for multilingual providers) */
|
|
100
|
+
languages?: string[];
|
|
101
|
+
/** Speed range */
|
|
102
|
+
speed?: {
|
|
103
|
+
min: number;
|
|
104
|
+
max: number;
|
|
105
|
+
default: number;
|
|
106
|
+
};
|
|
107
|
+
/** Quality range (for providers that support it) */
|
|
108
|
+
quality?: {
|
|
109
|
+
min: number;
|
|
110
|
+
max: number;
|
|
111
|
+
default: number;
|
|
112
|
+
description?: string;
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
interface TTSProvider {
|
|
116
|
+
/** Provider ID (e.g., 'elevenlabs', 'supertonic_local') */
|
|
117
|
+
id: string;
|
|
118
|
+
/** Display name */
|
|
119
|
+
name: string;
|
|
120
|
+
/** Provider type: 'server' (remote API) or 'client' (local) */
|
|
121
|
+
type: 'server' | 'client';
|
|
122
|
+
/** Whether provider is configured and ready */
|
|
123
|
+
configured: boolean;
|
|
124
|
+
/** Whether streaming is supported */
|
|
125
|
+
supportsStreaming: boolean;
|
|
126
|
+
/** Optional note about provider requirements */
|
|
127
|
+
note?: string;
|
|
128
|
+
/** Configuration options */
|
|
129
|
+
config?: TTSProviderConfig;
|
|
130
|
+
}
|
|
131
|
+
interface TTSProvidersResponse {
|
|
132
|
+
success: boolean;
|
|
133
|
+
providers: TTSProvider[];
|
|
134
|
+
default: string;
|
|
135
|
+
error?: string;
|
|
136
|
+
}
|
|
137
|
+
interface TTSChunk {
|
|
138
|
+
/** Chunk index (0-based) */
|
|
139
|
+
index: number;
|
|
140
|
+
/** Total number of chunks */
|
|
141
|
+
total: number;
|
|
142
|
+
/** Decoded audio data (ArrayBuffer) - ready for playback */
|
|
143
|
+
audio: ArrayBuffer;
|
|
144
|
+
/** Audio MIME type */
|
|
145
|
+
mimeType: string;
|
|
146
|
+
}
|
|
147
|
+
interface TTSChunkEvent {
|
|
148
|
+
type: 'info' | 'chunk' | 'error' | 'done';
|
|
149
|
+
data: TTSChunk | {
|
|
150
|
+
totalChunks: number;
|
|
151
|
+
} | {
|
|
152
|
+
error: string;
|
|
153
|
+
};
|
|
154
|
+
}
|
|
82
155
|
|
|
83
156
|
/**
|
|
84
157
|
* Activities Module - HTTP Proxy to RealtimeX Main App
|
|
@@ -636,6 +709,52 @@ declare class LLMModule {
|
|
|
636
709
|
search(query: string, options?: VectorQueryOptions): Promise<VectorQueryResult[]>;
|
|
637
710
|
}
|
|
638
711
|
|
|
712
|
+
declare class TTSModule {
|
|
713
|
+
private baseUrl;
|
|
714
|
+
private appId;
|
|
715
|
+
private appName;
|
|
716
|
+
private apiKey?;
|
|
717
|
+
constructor(realtimexUrl: string, appId: string, appName?: string, apiKey?: string);
|
|
718
|
+
private get headers();
|
|
719
|
+
/**
|
|
720
|
+
* Request a single permission from Electron via internal API
|
|
721
|
+
*/
|
|
722
|
+
private requestPermission;
|
|
723
|
+
/**
|
|
724
|
+
* Internal request wrapper that handles automatic permission prompts
|
|
725
|
+
*/
|
|
726
|
+
private request;
|
|
727
|
+
/**
|
|
728
|
+
* Generate speech from text (returns full buffer)
|
|
729
|
+
*
|
|
730
|
+
* @example
|
|
731
|
+
* ```ts
|
|
732
|
+
* const buffer = await sdk.tts.speak("Hello world");
|
|
733
|
+
* // Play buffer...
|
|
734
|
+
* ```
|
|
735
|
+
*/
|
|
736
|
+
speak(text: string, options?: TTSOptions): Promise<ArrayBuffer>;
|
|
737
|
+
/**
|
|
738
|
+
* Generate speech from text with streaming (yields decoded audio chunks)
|
|
739
|
+
* Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
|
|
740
|
+
*
|
|
741
|
+
* @example
|
|
742
|
+
* ```ts
|
|
743
|
+
* for await (const chunk of sdk.tts.speakStream("Hello world")) {
|
|
744
|
+
* // chunk.audio is ArrayBuffer (already decoded!)
|
|
745
|
+
* const blob = new Blob([chunk.audio], { type: chunk.mimeType });
|
|
746
|
+
* const audio = new Audio(URL.createObjectURL(blob));
|
|
747
|
+
* await audio.play();
|
|
748
|
+
* }
|
|
749
|
+
* ```
|
|
750
|
+
*/
|
|
751
|
+
speakStream(text: string, options?: TTSOptions): AsyncGenerator<TTSChunk>;
|
|
752
|
+
/**
|
|
753
|
+
* List available TTS providers with configuration options
|
|
754
|
+
*/
|
|
755
|
+
listProviders(): Promise<TTSProvider[]>;
|
|
756
|
+
}
|
|
757
|
+
|
|
639
758
|
/**
|
|
640
759
|
* RealtimeX Local App SDK
|
|
641
760
|
*
|
|
@@ -650,6 +769,7 @@ declare class RealtimeXSDK {
|
|
|
650
769
|
task: TaskModule;
|
|
651
770
|
port: PortModule;
|
|
652
771
|
llm: LLMModule;
|
|
772
|
+
tts: TTSModule;
|
|
653
773
|
readonly appId: string;
|
|
654
774
|
readonly appName: string | undefined;
|
|
655
775
|
readonly apiKey: string | undefined;
|
|
@@ -683,4 +803,4 @@ declare class RealtimeXSDK {
|
|
|
683
803
|
getAppDataDir(): Promise<string>;
|
|
684
804
|
}
|
|
685
805
|
|
|
686
|
-
export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
|
|
806
|
+
export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type TTSChunk, type TTSChunkEvent, TTSModule, type TTSOptions, type TTSProvider, type TTSProviderConfig, type TTSProvidersResponse, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
|
package/dist/index.d.ts
CHANGED
|
@@ -79,6 +79,79 @@ interface Task {
|
|
|
79
79
|
updated_at: string;
|
|
80
80
|
runs: TaskRun[];
|
|
81
81
|
}
|
|
82
|
+
interface TTSOptions {
|
|
83
|
+
/** Voice ID (provider-specific) */
|
|
84
|
+
voice?: string;
|
|
85
|
+
/** Model ID (provider-specific) */
|
|
86
|
+
model?: string;
|
|
87
|
+
/** Speech speed (0.5-2.0) */
|
|
88
|
+
speed?: number;
|
|
89
|
+
/** TTS provider ID */
|
|
90
|
+
provider?: string;
|
|
91
|
+
/** Language code (e.g., 'en', 'es', 'fr') - for Supertonic */
|
|
92
|
+
language?: string;
|
|
93
|
+
/** Quality level (1-20) - for Supertonic num_inference_steps */
|
|
94
|
+
num_inference_steps?: number;
|
|
95
|
+
}
|
|
96
|
+
interface TTSProviderConfig {
|
|
97
|
+
/** Available voice/speaker IDs */
|
|
98
|
+
voices: string[];
|
|
99
|
+
/** Supported languages (for multilingual providers) */
|
|
100
|
+
languages?: string[];
|
|
101
|
+
/** Speed range */
|
|
102
|
+
speed?: {
|
|
103
|
+
min: number;
|
|
104
|
+
max: number;
|
|
105
|
+
default: number;
|
|
106
|
+
};
|
|
107
|
+
/** Quality range (for providers that support it) */
|
|
108
|
+
quality?: {
|
|
109
|
+
min: number;
|
|
110
|
+
max: number;
|
|
111
|
+
default: number;
|
|
112
|
+
description?: string;
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
interface TTSProvider {
|
|
116
|
+
/** Provider ID (e.g., 'elevenlabs', 'supertonic_local') */
|
|
117
|
+
id: string;
|
|
118
|
+
/** Display name */
|
|
119
|
+
name: string;
|
|
120
|
+
/** Provider type: 'server' (remote API) or 'client' (local) */
|
|
121
|
+
type: 'server' | 'client';
|
|
122
|
+
/** Whether provider is configured and ready */
|
|
123
|
+
configured: boolean;
|
|
124
|
+
/** Whether streaming is supported */
|
|
125
|
+
supportsStreaming: boolean;
|
|
126
|
+
/** Optional note about provider requirements */
|
|
127
|
+
note?: string;
|
|
128
|
+
/** Configuration options */
|
|
129
|
+
config?: TTSProviderConfig;
|
|
130
|
+
}
|
|
131
|
+
interface TTSProvidersResponse {
|
|
132
|
+
success: boolean;
|
|
133
|
+
providers: TTSProvider[];
|
|
134
|
+
default: string;
|
|
135
|
+
error?: string;
|
|
136
|
+
}
|
|
137
|
+
interface TTSChunk {
|
|
138
|
+
/** Chunk index (0-based) */
|
|
139
|
+
index: number;
|
|
140
|
+
/** Total number of chunks */
|
|
141
|
+
total: number;
|
|
142
|
+
/** Decoded audio data (ArrayBuffer) - ready for playback */
|
|
143
|
+
audio: ArrayBuffer;
|
|
144
|
+
/** Audio MIME type */
|
|
145
|
+
mimeType: string;
|
|
146
|
+
}
|
|
147
|
+
interface TTSChunkEvent {
|
|
148
|
+
type: 'info' | 'chunk' | 'error' | 'done';
|
|
149
|
+
data: TTSChunk | {
|
|
150
|
+
totalChunks: number;
|
|
151
|
+
} | {
|
|
152
|
+
error: string;
|
|
153
|
+
};
|
|
154
|
+
}
|
|
82
155
|
|
|
83
156
|
/**
|
|
84
157
|
* Activities Module - HTTP Proxy to RealtimeX Main App
|
|
@@ -636,6 +709,52 @@ declare class LLMModule {
|
|
|
636
709
|
search(query: string, options?: VectorQueryOptions): Promise<VectorQueryResult[]>;
|
|
637
710
|
}
|
|
638
711
|
|
|
712
|
+
declare class TTSModule {
|
|
713
|
+
private baseUrl;
|
|
714
|
+
private appId;
|
|
715
|
+
private appName;
|
|
716
|
+
private apiKey?;
|
|
717
|
+
constructor(realtimexUrl: string, appId: string, appName?: string, apiKey?: string);
|
|
718
|
+
private get headers();
|
|
719
|
+
/**
|
|
720
|
+
* Request a single permission from Electron via internal API
|
|
721
|
+
*/
|
|
722
|
+
private requestPermission;
|
|
723
|
+
/**
|
|
724
|
+
* Internal request wrapper that handles automatic permission prompts
|
|
725
|
+
*/
|
|
726
|
+
private request;
|
|
727
|
+
/**
|
|
728
|
+
* Generate speech from text (returns full buffer)
|
|
729
|
+
*
|
|
730
|
+
* @example
|
|
731
|
+
* ```ts
|
|
732
|
+
* const buffer = await sdk.tts.speak("Hello world");
|
|
733
|
+
* // Play buffer...
|
|
734
|
+
* ```
|
|
735
|
+
*/
|
|
736
|
+
speak(text: string, options?: TTSOptions): Promise<ArrayBuffer>;
|
|
737
|
+
/**
|
|
738
|
+
* Generate speech from text with streaming (yields decoded audio chunks)
|
|
739
|
+
* Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
|
|
740
|
+
*
|
|
741
|
+
* @example
|
|
742
|
+
* ```ts
|
|
743
|
+
* for await (const chunk of sdk.tts.speakStream("Hello world")) {
|
|
744
|
+
* // chunk.audio is ArrayBuffer (already decoded!)
|
|
745
|
+
* const blob = new Blob([chunk.audio], { type: chunk.mimeType });
|
|
746
|
+
* const audio = new Audio(URL.createObjectURL(blob));
|
|
747
|
+
* await audio.play();
|
|
748
|
+
* }
|
|
749
|
+
* ```
|
|
750
|
+
*/
|
|
751
|
+
speakStream(text: string, options?: TTSOptions): AsyncGenerator<TTSChunk>;
|
|
752
|
+
/**
|
|
753
|
+
* List available TTS providers with configuration options
|
|
754
|
+
*/
|
|
755
|
+
listProviders(): Promise<TTSProvider[]>;
|
|
756
|
+
}
|
|
757
|
+
|
|
639
758
|
/**
|
|
640
759
|
* RealtimeX Local App SDK
|
|
641
760
|
*
|
|
@@ -650,6 +769,7 @@ declare class RealtimeXSDK {
|
|
|
650
769
|
task: TaskModule;
|
|
651
770
|
port: PortModule;
|
|
652
771
|
llm: LLMModule;
|
|
772
|
+
tts: TTSModule;
|
|
653
773
|
readonly appId: string;
|
|
654
774
|
readonly appName: string | undefined;
|
|
655
775
|
readonly apiKey: string | undefined;
|
|
@@ -683,4 +803,4 @@ declare class RealtimeXSDK {
|
|
|
683
803
|
getAppDataDir(): Promise<string>;
|
|
684
804
|
}
|
|
685
805
|
|
|
686
|
-
export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
|
|
806
|
+
export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type TTSChunk, type TTSChunkEvent, TTSModule, type TTSOptions, type TTSProvider, type TTSProviderConfig, type TTSProvidersResponse, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
|
package/dist/index.js
CHANGED
|
@@ -39,6 +39,7 @@ __export(index_exports, {
|
|
|
39
39
|
PermissionRequiredError: () => PermissionRequiredError,
|
|
40
40
|
PortModule: () => PortModule,
|
|
41
41
|
RealtimeXSDK: () => RealtimeXSDK,
|
|
42
|
+
TTSModule: () => TTSModule,
|
|
42
43
|
TaskModule: () => TaskModule,
|
|
43
44
|
VectorStore: () => VectorStore,
|
|
44
45
|
WebhookModule: () => WebhookModule
|
|
@@ -980,6 +981,188 @@ var LLMModule = class {
|
|
|
980
981
|
}
|
|
981
982
|
};
|
|
982
983
|
|
|
984
|
+
// src/modules/tts.ts
|
|
985
|
+
var TTSModule = class {
|
|
986
|
+
constructor(realtimexUrl, appId, appName, apiKey) {
|
|
987
|
+
this.baseUrl = realtimexUrl.replace(/\/$/, "");
|
|
988
|
+
this.appId = appId;
|
|
989
|
+
this.appName = appName || process.env.RTX_APP_NAME || "Local App";
|
|
990
|
+
this.apiKey = apiKey;
|
|
991
|
+
}
|
|
992
|
+
get headers() {
|
|
993
|
+
if (this.apiKey) {
|
|
994
|
+
return {
|
|
995
|
+
"Content-Type": "application/json",
|
|
996
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
997
|
+
};
|
|
998
|
+
}
|
|
999
|
+
return {
|
|
1000
|
+
"Content-Type": "application/json",
|
|
1001
|
+
"x-app-id": this.appId
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Request a single permission from Electron via internal API
|
|
1006
|
+
*/
|
|
1007
|
+
async requestPermission(permission) {
|
|
1008
|
+
try {
|
|
1009
|
+
const response = await fetch(`${this.baseUrl}/api/local-apps/request-permission`, {
|
|
1010
|
+
method: "POST",
|
|
1011
|
+
headers: { "Content-Type": "application/json" },
|
|
1012
|
+
body: JSON.stringify({
|
|
1013
|
+
app_id: this.appId,
|
|
1014
|
+
app_name: this.appName,
|
|
1015
|
+
permission
|
|
1016
|
+
})
|
|
1017
|
+
});
|
|
1018
|
+
const data = await response.json();
|
|
1019
|
+
return data.granted === true;
|
|
1020
|
+
} catch (error) {
|
|
1021
|
+
console.error("[SDK] Permission request failed:", error);
|
|
1022
|
+
return false;
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
/**
|
|
1026
|
+
* Internal request wrapper that handles automatic permission prompts
|
|
1027
|
+
*/
|
|
1028
|
+
async request(method, endpoint, body, isStream = false) {
|
|
1029
|
+
const response = await fetch(`${this.baseUrl}${endpoint}`, {
|
|
1030
|
+
method,
|
|
1031
|
+
headers: this.headers,
|
|
1032
|
+
body: body ? JSON.stringify(body) : void 0
|
|
1033
|
+
});
|
|
1034
|
+
if (!response.ok) {
|
|
1035
|
+
const data = await response.json();
|
|
1036
|
+
if (data.code === "PERMISSION_REQUIRED") {
|
|
1037
|
+
const permission = data.permission || "tts.speak";
|
|
1038
|
+
const granted = await this.requestPermission(permission);
|
|
1039
|
+
if (granted) {
|
|
1040
|
+
return this.request(method, endpoint, body, isStream);
|
|
1041
|
+
}
|
|
1042
|
+
throw new PermissionDeniedError(permission);
|
|
1043
|
+
}
|
|
1044
|
+
throw new Error(data.error || `Request failed: ${response.status}`);
|
|
1045
|
+
}
|
|
1046
|
+
if (isStream) {
|
|
1047
|
+
return response.body;
|
|
1048
|
+
}
|
|
1049
|
+
const contentType = response.headers.get("content-type");
|
|
1050
|
+
if (contentType && contentType.includes("application/json")) {
|
|
1051
|
+
return response.json();
|
|
1052
|
+
}
|
|
1053
|
+
return response.arrayBuffer();
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Generate speech from text (returns full buffer)
|
|
1057
|
+
*
|
|
1058
|
+
* @example
|
|
1059
|
+
* ```ts
|
|
1060
|
+
* const buffer = await sdk.tts.speak("Hello world");
|
|
1061
|
+
* // Play buffer...
|
|
1062
|
+
* ```
|
|
1063
|
+
*/
|
|
1064
|
+
async speak(text, options = {}) {
|
|
1065
|
+
return this.request("POST", "/sdk/tts", {
|
|
1066
|
+
text,
|
|
1067
|
+
...options
|
|
1068
|
+
});
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* Generate speech from text with streaming (yields decoded audio chunks)
|
|
1072
|
+
* Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
|
|
1073
|
+
*
|
|
1074
|
+
* @example
|
|
1075
|
+
* ```ts
|
|
1076
|
+
* for await (const chunk of sdk.tts.speakStream("Hello world")) {
|
|
1077
|
+
* // chunk.audio is ArrayBuffer (already decoded!)
|
|
1078
|
+
* const blob = new Blob([chunk.audio], { type: chunk.mimeType });
|
|
1079
|
+
* const audio = new Audio(URL.createObjectURL(blob));
|
|
1080
|
+
* await audio.play();
|
|
1081
|
+
* }
|
|
1082
|
+
* ```
|
|
1083
|
+
*/
|
|
1084
|
+
async *speakStream(text, options = {}) {
|
|
1085
|
+
const response = await fetch(`${this.baseUrl}/sdk/tts/stream`, {
|
|
1086
|
+
method: "POST",
|
|
1087
|
+
headers: this.headers,
|
|
1088
|
+
body: JSON.stringify({ text, ...options })
|
|
1089
|
+
});
|
|
1090
|
+
if (!response.ok) {
|
|
1091
|
+
const data = await response.json();
|
|
1092
|
+
if (data.code === "PERMISSION_REQUIRED") {
|
|
1093
|
+
const permission = data.permission || "tts.generate";
|
|
1094
|
+
const granted = await this.requestPermission(permission);
|
|
1095
|
+
if (granted) {
|
|
1096
|
+
yield* this.speakStream(text, options);
|
|
1097
|
+
return;
|
|
1098
|
+
}
|
|
1099
|
+
throw new PermissionDeniedError(permission);
|
|
1100
|
+
}
|
|
1101
|
+
throw new Error(data.error || `Streaming failed: ${response.status}`);
|
|
1102
|
+
}
|
|
1103
|
+
const reader = response.body?.getReader();
|
|
1104
|
+
if (!reader) throw new Error("No response body");
|
|
1105
|
+
const decoder = new TextDecoder();
|
|
1106
|
+
let buffer = "";
|
|
1107
|
+
let eventType = "";
|
|
1108
|
+
try {
|
|
1109
|
+
while (true) {
|
|
1110
|
+
const { done, value } = await reader.read();
|
|
1111
|
+
if (done) break;
|
|
1112
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1113
|
+
const lines = buffer.split("\n");
|
|
1114
|
+
buffer = lines.pop() || "";
|
|
1115
|
+
for (const line of lines) {
|
|
1116
|
+
const trimmedLine = line.trim();
|
|
1117
|
+
if (!trimmedLine) continue;
|
|
1118
|
+
if (trimmedLine.startsWith("event:")) {
|
|
1119
|
+
eventType = trimmedLine.slice(6).trim();
|
|
1120
|
+
} else if (trimmedLine.startsWith("data:")) {
|
|
1121
|
+
const eventData = trimmedLine.slice(5).trim();
|
|
1122
|
+
if (eventType === "chunk" && eventData) {
|
|
1123
|
+
try {
|
|
1124
|
+
const parsed = JSON.parse(eventData);
|
|
1125
|
+
const binaryString = atob(parsed.audio);
|
|
1126
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
1127
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
1128
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
1129
|
+
}
|
|
1130
|
+
yield {
|
|
1131
|
+
index: parsed.index,
|
|
1132
|
+
total: parsed.total,
|
|
1133
|
+
audio: bytes.buffer,
|
|
1134
|
+
mimeType: parsed.mimeType
|
|
1135
|
+
};
|
|
1136
|
+
} catch (e) {
|
|
1137
|
+
console.warn("[TTS SDK] Failed to parse chunk:", e);
|
|
1138
|
+
}
|
|
1139
|
+
} else if (eventType === "error" && eventData) {
|
|
1140
|
+
try {
|
|
1141
|
+
const err = JSON.parse(eventData);
|
|
1142
|
+
throw new Error(err.error || "TTS streaming error");
|
|
1143
|
+
} catch (e) {
|
|
1144
|
+
if (e instanceof Error && e.message !== "TTS streaming error") {
|
|
1145
|
+
throw e;
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
eventType = "";
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
} finally {
|
|
1154
|
+
reader.releaseLock();
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
/**
|
|
1158
|
+
* List available TTS providers with configuration options
|
|
1159
|
+
*/
|
|
1160
|
+
async listProviders() {
|
|
1161
|
+
const data = await this.request("GET", "/sdk/tts/providers");
|
|
1162
|
+
return data.providers || [];
|
|
1163
|
+
}
|
|
1164
|
+
};
|
|
1165
|
+
|
|
983
1166
|
// src/index.ts
|
|
984
1167
|
var _RealtimeXSDK = class _RealtimeXSDK {
|
|
985
1168
|
constructor(config = {}) {
|
|
@@ -997,6 +1180,7 @@ var _RealtimeXSDK = class _RealtimeXSDK {
|
|
|
997
1180
|
this.task = new TaskModule(this.realtimexUrl, this.appName, this.appId, this.apiKey);
|
|
998
1181
|
this.port = new PortModule(config.defaultPort);
|
|
999
1182
|
this.llm = new LLMModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
|
|
1183
|
+
this.tts = new TTSModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
|
|
1000
1184
|
if (this.permissions.length > 0 && this.appId && !this.apiKey) {
|
|
1001
1185
|
this.register().catch((err) => {
|
|
1002
1186
|
console.error("[RealtimeX SDK] Auto-registration failed:", err.message);
|
|
@@ -1105,6 +1289,7 @@ var RealtimeXSDK = _RealtimeXSDK;
|
|
|
1105
1289
|
PermissionRequiredError,
|
|
1106
1290
|
PortModule,
|
|
1107
1291
|
RealtimeXSDK,
|
|
1292
|
+
TTSModule,
|
|
1108
1293
|
TaskModule,
|
|
1109
1294
|
VectorStore,
|
|
1110
1295
|
WebhookModule
|
package/dist/index.mjs
CHANGED
|
@@ -933,6 +933,188 @@ var LLMModule = class {
|
|
|
933
933
|
}
|
|
934
934
|
};
|
|
935
935
|
|
|
936
|
+
// src/modules/tts.ts
|
|
937
|
+
var TTSModule = class {
|
|
938
|
+
constructor(realtimexUrl, appId, appName, apiKey) {
|
|
939
|
+
this.baseUrl = realtimexUrl.replace(/\/$/, "");
|
|
940
|
+
this.appId = appId;
|
|
941
|
+
this.appName = appName || process.env.RTX_APP_NAME || "Local App";
|
|
942
|
+
this.apiKey = apiKey;
|
|
943
|
+
}
|
|
944
|
+
get headers() {
|
|
945
|
+
if (this.apiKey) {
|
|
946
|
+
return {
|
|
947
|
+
"Content-Type": "application/json",
|
|
948
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
949
|
+
};
|
|
950
|
+
}
|
|
951
|
+
return {
|
|
952
|
+
"Content-Type": "application/json",
|
|
953
|
+
"x-app-id": this.appId
|
|
954
|
+
};
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* Request a single permission from Electron via internal API
|
|
958
|
+
*/
|
|
959
|
+
async requestPermission(permission) {
|
|
960
|
+
try {
|
|
961
|
+
const response = await fetch(`${this.baseUrl}/api/local-apps/request-permission`, {
|
|
962
|
+
method: "POST",
|
|
963
|
+
headers: { "Content-Type": "application/json" },
|
|
964
|
+
body: JSON.stringify({
|
|
965
|
+
app_id: this.appId,
|
|
966
|
+
app_name: this.appName,
|
|
967
|
+
permission
|
|
968
|
+
})
|
|
969
|
+
});
|
|
970
|
+
const data = await response.json();
|
|
971
|
+
return data.granted === true;
|
|
972
|
+
} catch (error) {
|
|
973
|
+
console.error("[SDK] Permission request failed:", error);
|
|
974
|
+
return false;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Internal request wrapper that handles automatic permission prompts
|
|
979
|
+
*/
|
|
980
|
+
async request(method, endpoint, body, isStream = false) {
|
|
981
|
+
const response = await fetch(`${this.baseUrl}${endpoint}`, {
|
|
982
|
+
method,
|
|
983
|
+
headers: this.headers,
|
|
984
|
+
body: body ? JSON.stringify(body) : void 0
|
|
985
|
+
});
|
|
986
|
+
if (!response.ok) {
|
|
987
|
+
const data = await response.json();
|
|
988
|
+
if (data.code === "PERMISSION_REQUIRED") {
|
|
989
|
+
const permission = data.permission || "tts.speak";
|
|
990
|
+
const granted = await this.requestPermission(permission);
|
|
991
|
+
if (granted) {
|
|
992
|
+
return this.request(method, endpoint, body, isStream);
|
|
993
|
+
}
|
|
994
|
+
throw new PermissionDeniedError(permission);
|
|
995
|
+
}
|
|
996
|
+
throw new Error(data.error || `Request failed: ${response.status}`);
|
|
997
|
+
}
|
|
998
|
+
if (isStream) {
|
|
999
|
+
return response.body;
|
|
1000
|
+
}
|
|
1001
|
+
const contentType = response.headers.get("content-type");
|
|
1002
|
+
if (contentType && contentType.includes("application/json")) {
|
|
1003
|
+
return response.json();
|
|
1004
|
+
}
|
|
1005
|
+
return response.arrayBuffer();
|
|
1006
|
+
}
|
|
1007
|
+
/**
|
|
1008
|
+
* Generate speech from text (returns full buffer)
|
|
1009
|
+
*
|
|
1010
|
+
* @example
|
|
1011
|
+
* ```ts
|
|
1012
|
+
* const buffer = await sdk.tts.speak("Hello world");
|
|
1013
|
+
* // Play buffer...
|
|
1014
|
+
* ```
|
|
1015
|
+
*/
|
|
1016
|
+
async speak(text, options = {}) {
|
|
1017
|
+
return this.request("POST", "/sdk/tts", {
|
|
1018
|
+
text,
|
|
1019
|
+
...options
|
|
1020
|
+
});
|
|
1021
|
+
}
|
|
1022
|
+
/**
|
|
1023
|
+
* Generate speech from text with streaming (yields decoded audio chunks)
|
|
1024
|
+
* Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
|
|
1025
|
+
*
|
|
1026
|
+
* @example
|
|
1027
|
+
* ```ts
|
|
1028
|
+
* for await (const chunk of sdk.tts.speakStream("Hello world")) {
|
|
1029
|
+
* // chunk.audio is ArrayBuffer (already decoded!)
|
|
1030
|
+
* const blob = new Blob([chunk.audio], { type: chunk.mimeType });
|
|
1031
|
+
* const audio = new Audio(URL.createObjectURL(blob));
|
|
1032
|
+
* await audio.play();
|
|
1033
|
+
* }
|
|
1034
|
+
* ```
|
|
1035
|
+
*/
|
|
1036
|
+
async *speakStream(text, options = {}) {
|
|
1037
|
+
const response = await fetch(`${this.baseUrl}/sdk/tts/stream`, {
|
|
1038
|
+
method: "POST",
|
|
1039
|
+
headers: this.headers,
|
|
1040
|
+
body: JSON.stringify({ text, ...options })
|
|
1041
|
+
});
|
|
1042
|
+
if (!response.ok) {
|
|
1043
|
+
const data = await response.json();
|
|
1044
|
+
if (data.code === "PERMISSION_REQUIRED") {
|
|
1045
|
+
const permission = data.permission || "tts.generate";
|
|
1046
|
+
const granted = await this.requestPermission(permission);
|
|
1047
|
+
if (granted) {
|
|
1048
|
+
yield* this.speakStream(text, options);
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
1051
|
+
throw new PermissionDeniedError(permission);
|
|
1052
|
+
}
|
|
1053
|
+
throw new Error(data.error || `Streaming failed: ${response.status}`);
|
|
1054
|
+
}
|
|
1055
|
+
const reader = response.body?.getReader();
|
|
1056
|
+
if (!reader) throw new Error("No response body");
|
|
1057
|
+
const decoder = new TextDecoder();
|
|
1058
|
+
let buffer = "";
|
|
1059
|
+
let eventType = "";
|
|
1060
|
+
try {
|
|
1061
|
+
while (true) {
|
|
1062
|
+
const { done, value } = await reader.read();
|
|
1063
|
+
if (done) break;
|
|
1064
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1065
|
+
const lines = buffer.split("\n");
|
|
1066
|
+
buffer = lines.pop() || "";
|
|
1067
|
+
for (const line of lines) {
|
|
1068
|
+
const trimmedLine = line.trim();
|
|
1069
|
+
if (!trimmedLine) continue;
|
|
1070
|
+
if (trimmedLine.startsWith("event:")) {
|
|
1071
|
+
eventType = trimmedLine.slice(6).trim();
|
|
1072
|
+
} else if (trimmedLine.startsWith("data:")) {
|
|
1073
|
+
const eventData = trimmedLine.slice(5).trim();
|
|
1074
|
+
if (eventType === "chunk" && eventData) {
|
|
1075
|
+
try {
|
|
1076
|
+
const parsed = JSON.parse(eventData);
|
|
1077
|
+
const binaryString = atob(parsed.audio);
|
|
1078
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
1079
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
1080
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
1081
|
+
}
|
|
1082
|
+
yield {
|
|
1083
|
+
index: parsed.index,
|
|
1084
|
+
total: parsed.total,
|
|
1085
|
+
audio: bytes.buffer,
|
|
1086
|
+
mimeType: parsed.mimeType
|
|
1087
|
+
};
|
|
1088
|
+
} catch (e) {
|
|
1089
|
+
console.warn("[TTS SDK] Failed to parse chunk:", e);
|
|
1090
|
+
}
|
|
1091
|
+
} else if (eventType === "error" && eventData) {
|
|
1092
|
+
try {
|
|
1093
|
+
const err = JSON.parse(eventData);
|
|
1094
|
+
throw new Error(err.error || "TTS streaming error");
|
|
1095
|
+
} catch (e) {
|
|
1096
|
+
if (e instanceof Error && e.message !== "TTS streaming error") {
|
|
1097
|
+
throw e;
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
eventType = "";
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
} finally {
|
|
1106
|
+
reader.releaseLock();
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
/**
|
|
1110
|
+
* List available TTS providers with configuration options
|
|
1111
|
+
*/
|
|
1112
|
+
async listProviders() {
|
|
1113
|
+
const data = await this.request("GET", "/sdk/tts/providers");
|
|
1114
|
+
return data.providers || [];
|
|
1115
|
+
}
|
|
1116
|
+
};
|
|
1117
|
+
|
|
936
1118
|
// src/index.ts
|
|
937
1119
|
var _RealtimeXSDK = class _RealtimeXSDK {
|
|
938
1120
|
constructor(config = {}) {
|
|
@@ -950,6 +1132,7 @@ var _RealtimeXSDK = class _RealtimeXSDK {
|
|
|
950
1132
|
this.task = new TaskModule(this.realtimexUrl, this.appName, this.appId, this.apiKey);
|
|
951
1133
|
this.port = new PortModule(config.defaultPort);
|
|
952
1134
|
this.llm = new LLMModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
|
|
1135
|
+
this.tts = new TTSModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
|
|
953
1136
|
if (this.permissions.length > 0 && this.appId && !this.apiKey) {
|
|
954
1137
|
this.register().catch((err) => {
|
|
955
1138
|
console.error("[RealtimeX SDK] Auto-registration failed:", err.message);
|
|
@@ -1057,6 +1240,7 @@ export {
|
|
|
1057
1240
|
PermissionRequiredError,
|
|
1058
1241
|
PortModule,
|
|
1059
1242
|
RealtimeXSDK,
|
|
1243
|
+
TTSModule,
|
|
1060
1244
|
TaskModule,
|
|
1061
1245
|
VectorStore,
|
|
1062
1246
|
WebhookModule
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@realtimex/sdk",
|
|
3
|
-
"version": "1.1
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "SDK for building Local Apps that integrate with RealtimeX",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -40,4 +40,4 @@
|
|
|
40
40
|
"engines": {
|
|
41
41
|
"node": ">=18.0.0"
|
|
42
42
|
}
|
|
43
|
-
}
|
|
43
|
+
}
|