@realtimex/sdk 1.1.4 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -79,6 +79,79 @@ interface Task {
79
79
  updated_at: string;
80
80
  runs: TaskRun[];
81
81
  }
82
+ interface TTSOptions {
83
+ /** Voice ID (provider-specific) */
84
+ voice?: string;
85
+ /** Model ID (provider-specific) */
86
+ model?: string;
87
+ /** Speech speed (0.5-2.0) */
88
+ speed?: number;
89
+ /** TTS provider ID */
90
+ provider?: string;
91
+ /** Language code (e.g., 'en', 'es', 'fr') - for Supertonic */
92
+ language?: string;
93
+ /** Quality level (1-20) - for Supertonic num_inference_steps */
94
+ num_inference_steps?: number;
95
+ }
96
+ interface TTSProviderConfig {
97
+ /** Available voice/speaker IDs */
98
+ voices: string[];
99
+ /** Supported languages (for multilingual providers) */
100
+ languages?: string[];
101
+ /** Speed range */
102
+ speed?: {
103
+ min: number;
104
+ max: number;
105
+ default: number;
106
+ };
107
+ /** Quality range (for providers that support it) */
108
+ quality?: {
109
+ min: number;
110
+ max: number;
111
+ default: number;
112
+ description?: string;
113
+ };
114
+ }
115
+ interface TTSProvider {
116
+ /** Provider ID (e.g., 'elevenlabs', 'supertonic_local') */
117
+ id: string;
118
+ /** Display name */
119
+ name: string;
120
+ /** Provider type: 'server' (remote API) or 'client' (local) */
121
+ type: 'server' | 'client';
122
+ /** Whether provider is configured and ready */
123
+ configured: boolean;
124
+ /** Whether streaming is supported */
125
+ supportsStreaming: boolean;
126
+ /** Optional note about provider requirements */
127
+ note?: string;
128
+ /** Configuration options */
129
+ config?: TTSProviderConfig;
130
+ }
131
+ interface TTSProvidersResponse {
132
+ success: boolean;
133
+ providers: TTSProvider[];
134
+ default: string;
135
+ error?: string;
136
+ }
137
+ interface TTSChunk {
138
+ /** Chunk index (0-based) */
139
+ index: number;
140
+ /** Total number of chunks */
141
+ total: number;
142
+ /** Decoded audio data (ArrayBuffer) - ready for playback */
143
+ audio: ArrayBuffer;
144
+ /** Audio MIME type */
145
+ mimeType: string;
146
+ }
147
+ interface TTSChunkEvent {
148
+ type: 'info' | 'chunk' | 'error' | 'done';
149
+ data: TTSChunk | {
150
+ totalChunks: number;
151
+ } | {
152
+ error: string;
153
+ };
154
+ }
82
155
 
83
156
  /**
84
157
  * Activities Module - HTTP Proxy to RealtimeX Main App
@@ -636,6 +709,52 @@ declare class LLMModule {
636
709
  search(query: string, options?: VectorQueryOptions): Promise<VectorQueryResult[]>;
637
710
  }
638
711
 
712
+ declare class TTSModule {
713
+ private baseUrl;
714
+ private appId;
715
+ private appName;
716
+ private apiKey?;
717
+ constructor(realtimexUrl: string, appId: string, appName?: string, apiKey?: string);
718
+ private get headers();
719
+ /**
720
+ * Request a single permission from Electron via internal API
721
+ */
722
+ private requestPermission;
723
+ /**
724
+ * Internal request wrapper that handles automatic permission prompts
725
+ */
726
+ private request;
727
+ /**
728
+ * Generate speech from text (returns full buffer)
729
+ *
730
+ * @example
731
+ * ```ts
732
+ * const buffer = await sdk.tts.speak("Hello world");
733
+ * // Play buffer...
734
+ * ```
735
+ */
736
+ speak(text: string, options?: TTSOptions): Promise<ArrayBuffer>;
737
+ /**
738
+ * Generate speech from text with streaming (yields decoded audio chunks)
739
+ * Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
740
+ *
741
+ * @example
742
+ * ```ts
743
+ * for await (const chunk of sdk.tts.speakStream("Hello world")) {
744
+ * // chunk.audio is ArrayBuffer (already decoded!)
745
+ * const blob = new Blob([chunk.audio], { type: chunk.mimeType });
746
+ * const audio = new Audio(URL.createObjectURL(blob));
747
+ * await audio.play();
748
+ * }
749
+ * ```
750
+ */
751
+ speakStream(text: string, options?: TTSOptions): AsyncGenerator<TTSChunk>;
752
+ /**
753
+ * List available TTS providers with configuration options
754
+ */
755
+ listProviders(): Promise<TTSProvider[]>;
756
+ }
757
+
639
758
  /**
640
759
  * RealtimeX Local App SDK
641
760
  *
@@ -650,6 +769,7 @@ declare class RealtimeXSDK {
650
769
  task: TaskModule;
651
770
  port: PortModule;
652
771
  llm: LLMModule;
772
+ tts: TTSModule;
653
773
  readonly appId: string;
654
774
  readonly appName: string | undefined;
655
775
  readonly apiKey: string | undefined;
@@ -683,4 +803,4 @@ declare class RealtimeXSDK {
683
803
  getAppDataDir(): Promise<string>;
684
804
  }
685
805
 
686
- export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
806
+ export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type TTSChunk, type TTSChunkEvent, TTSModule, type TTSOptions, type TTSProvider, type TTSProviderConfig, type TTSProvidersResponse, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
package/dist/index.d.ts CHANGED
@@ -79,6 +79,79 @@ interface Task {
79
79
  updated_at: string;
80
80
  runs: TaskRun[];
81
81
  }
82
+ interface TTSOptions {
83
+ /** Voice ID (provider-specific) */
84
+ voice?: string;
85
+ /** Model ID (provider-specific) */
86
+ model?: string;
87
+ /** Speech speed (0.5-2.0) */
88
+ speed?: number;
89
+ /** TTS provider ID */
90
+ provider?: string;
91
+ /** Language code (e.g., 'en', 'es', 'fr') - for Supertonic */
92
+ language?: string;
93
+ /** Quality level (1-20) - for Supertonic num_inference_steps */
94
+ num_inference_steps?: number;
95
+ }
96
+ interface TTSProviderConfig {
97
+ /** Available voice/speaker IDs */
98
+ voices: string[];
99
+ /** Supported languages (for multilingual providers) */
100
+ languages?: string[];
101
+ /** Speed range */
102
+ speed?: {
103
+ min: number;
104
+ max: number;
105
+ default: number;
106
+ };
107
+ /** Quality range (for providers that support it) */
108
+ quality?: {
109
+ min: number;
110
+ max: number;
111
+ default: number;
112
+ description?: string;
113
+ };
114
+ }
115
+ interface TTSProvider {
116
+ /** Provider ID (e.g., 'elevenlabs', 'supertonic_local') */
117
+ id: string;
118
+ /** Display name */
119
+ name: string;
120
+ /** Provider type: 'server' (remote API) or 'client' (local) */
121
+ type: 'server' | 'client';
122
+ /** Whether provider is configured and ready */
123
+ configured: boolean;
124
+ /** Whether streaming is supported */
125
+ supportsStreaming: boolean;
126
+ /** Optional note about provider requirements */
127
+ note?: string;
128
+ /** Configuration options */
129
+ config?: TTSProviderConfig;
130
+ }
131
+ interface TTSProvidersResponse {
132
+ success: boolean;
133
+ providers: TTSProvider[];
134
+ default: string;
135
+ error?: string;
136
+ }
137
+ interface TTSChunk {
138
+ /** Chunk index (0-based) */
139
+ index: number;
140
+ /** Total number of chunks */
141
+ total: number;
142
+ /** Decoded audio data (ArrayBuffer) - ready for playback */
143
+ audio: ArrayBuffer;
144
+ /** Audio MIME type */
145
+ mimeType: string;
146
+ }
147
+ interface TTSChunkEvent {
148
+ type: 'info' | 'chunk' | 'error' | 'done';
149
+ data: TTSChunk | {
150
+ totalChunks: number;
151
+ } | {
152
+ error: string;
153
+ };
154
+ }
82
155
 
83
156
  /**
84
157
  * Activities Module - HTTP Proxy to RealtimeX Main App
@@ -636,6 +709,52 @@ declare class LLMModule {
636
709
  search(query: string, options?: VectorQueryOptions): Promise<VectorQueryResult[]>;
637
710
  }
638
711
 
712
+ declare class TTSModule {
713
+ private baseUrl;
714
+ private appId;
715
+ private appName;
716
+ private apiKey?;
717
+ constructor(realtimexUrl: string, appId: string, appName?: string, apiKey?: string);
718
+ private get headers();
719
+ /**
720
+ * Request a single permission from Electron via internal API
721
+ */
722
+ private requestPermission;
723
+ /**
724
+ * Internal request wrapper that handles automatic permission prompts
725
+ */
726
+ private request;
727
+ /**
728
+ * Generate speech from text (returns full buffer)
729
+ *
730
+ * @example
731
+ * ```ts
732
+ * const buffer = await sdk.tts.speak("Hello world");
733
+ * // Play buffer...
734
+ * ```
735
+ */
736
+ speak(text: string, options?: TTSOptions): Promise<ArrayBuffer>;
737
+ /**
738
+ * Generate speech from text with streaming (yields decoded audio chunks)
739
+ * Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
740
+ *
741
+ * @example
742
+ * ```ts
743
+ * for await (const chunk of sdk.tts.speakStream("Hello world")) {
744
+ * // chunk.audio is ArrayBuffer (already decoded!)
745
+ * const blob = new Blob([chunk.audio], { type: chunk.mimeType });
746
+ * const audio = new Audio(URL.createObjectURL(blob));
747
+ * await audio.play();
748
+ * }
749
+ * ```
750
+ */
751
+ speakStream(text: string, options?: TTSOptions): AsyncGenerator<TTSChunk>;
752
+ /**
753
+ * List available TTS providers with configuration options
754
+ */
755
+ listProviders(): Promise<TTSProvider[]>;
756
+ }
757
+
639
758
  /**
640
759
  * RealtimeX Local App SDK
641
760
  *
@@ -650,6 +769,7 @@ declare class RealtimeXSDK {
650
769
  task: TaskModule;
651
770
  port: PortModule;
652
771
  llm: LLMModule;
772
+ tts: TTSModule;
653
773
  readonly appId: string;
654
774
  readonly appName: string | undefined;
655
775
  readonly apiKey: string | undefined;
@@ -683,4 +803,4 @@ declare class RealtimeXSDK {
683
803
  getAppDataDir(): Promise<string>;
684
804
  }
685
805
 
686
- export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
806
+ export { ActivitiesModule, type Activity, type Agent, ApiModule, type ChatMessage, type ChatOptions, type ChatResponse, type EmbedOptions, type EmbedResponse, LLMModule, LLMPermissionError, LLMProviderError, PermissionDeniedError, PermissionRequiredError, PortModule, type Provider, type ProvidersResponse, RealtimeXSDK, type SDKConfig, type StreamChunk, type TTSChunk, type TTSChunkEvent, TTSModule, type TTSOptions, type TTSProvider, type TTSProviderConfig, type TTSProvidersResponse, type Task, TaskModule, type TaskRun, type Thread, type TriggerAgentPayload, type TriggerAgentResponse, type VectorDeleteOptions, type VectorDeleteResponse, type VectorQueryOptions, type VectorQueryResponse, type VectorQueryResult, type VectorRecord, VectorStore, type VectorUpsertOptions, type VectorUpsertResponse, WebhookModule, type Workspace };
package/dist/index.js CHANGED
@@ -39,6 +39,7 @@ __export(index_exports, {
39
39
  PermissionRequiredError: () => PermissionRequiredError,
40
40
  PortModule: () => PortModule,
41
41
  RealtimeXSDK: () => RealtimeXSDK,
42
+ TTSModule: () => TTSModule,
42
43
  TaskModule: () => TaskModule,
43
44
  VectorStore: () => VectorStore,
44
45
  WebhookModule: () => WebhookModule
@@ -980,6 +981,188 @@ var LLMModule = class {
980
981
  }
981
982
  };
982
983
 
984
+ // src/modules/tts.ts
985
+ var TTSModule = class {
986
+ constructor(realtimexUrl, appId, appName, apiKey) {
987
+ this.baseUrl = realtimexUrl.replace(/\/$/, "");
988
+ this.appId = appId;
989
+ this.appName = appName || process.env.RTX_APP_NAME || "Local App";
990
+ this.apiKey = apiKey;
991
+ }
992
+ get headers() {
993
+ if (this.apiKey) {
994
+ return {
995
+ "Content-Type": "application/json",
996
+ "Authorization": `Bearer ${this.apiKey}`
997
+ };
998
+ }
999
+ return {
1000
+ "Content-Type": "application/json",
1001
+ "x-app-id": this.appId
1002
+ };
1003
+ }
1004
+ /**
1005
+ * Request a single permission from Electron via internal API
1006
+ */
1007
+ async requestPermission(permission) {
1008
+ try {
1009
+ const response = await fetch(`${this.baseUrl}/api/local-apps/request-permission`, {
1010
+ method: "POST",
1011
+ headers: { "Content-Type": "application/json" },
1012
+ body: JSON.stringify({
1013
+ app_id: this.appId,
1014
+ app_name: this.appName,
1015
+ permission
1016
+ })
1017
+ });
1018
+ const data = await response.json();
1019
+ return data.granted === true;
1020
+ } catch (error) {
1021
+ console.error("[SDK] Permission request failed:", error);
1022
+ return false;
1023
+ }
1024
+ }
1025
+ /**
1026
+ * Internal request wrapper that handles automatic permission prompts
1027
+ */
1028
+ async request(method, endpoint, body, isStream = false) {
1029
+ const response = await fetch(`${this.baseUrl}${endpoint}`, {
1030
+ method,
1031
+ headers: this.headers,
1032
+ body: body ? JSON.stringify(body) : void 0
1033
+ });
1034
+ if (!response.ok) {
1035
+ const data = await response.json();
1036
+ if (data.code === "PERMISSION_REQUIRED") {
1037
+ const permission = data.permission || "tts.speak";
1038
+ const granted = await this.requestPermission(permission);
1039
+ if (granted) {
1040
+ return this.request(method, endpoint, body, isStream);
1041
+ }
1042
+ throw new PermissionDeniedError(permission);
1043
+ }
1044
+ throw new Error(data.error || `Request failed: ${response.status}`);
1045
+ }
1046
+ if (isStream) {
1047
+ return response.body;
1048
+ }
1049
+ const contentType = response.headers.get("content-type");
1050
+ if (contentType && contentType.includes("application/json")) {
1051
+ return response.json();
1052
+ }
1053
+ return response.arrayBuffer();
1054
+ }
1055
+ /**
1056
+ * Generate speech from text (returns full buffer)
1057
+ *
1058
+ * @example
1059
+ * ```ts
1060
+ * const buffer = await sdk.tts.speak("Hello world");
1061
+ * // Play buffer...
1062
+ * ```
1063
+ */
1064
+ async speak(text, options = {}) {
1065
+ return this.request("POST", "/sdk/tts", {
1066
+ text,
1067
+ ...options
1068
+ });
1069
+ }
1070
+ /**
1071
+ * Generate speech from text with streaming (yields decoded audio chunks)
1072
+ * Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
1073
+ *
1074
+ * @example
1075
+ * ```ts
1076
+ * for await (const chunk of sdk.tts.speakStream("Hello world")) {
1077
+ * // chunk.audio is ArrayBuffer (already decoded!)
1078
+ * const blob = new Blob([chunk.audio], { type: chunk.mimeType });
1079
+ * const audio = new Audio(URL.createObjectURL(blob));
1080
+ * await audio.play();
1081
+ * }
1082
+ * ```
1083
+ */
1084
+ async *speakStream(text, options = {}) {
1085
+ const response = await fetch(`${this.baseUrl}/sdk/tts/stream`, {
1086
+ method: "POST",
1087
+ headers: this.headers,
1088
+ body: JSON.stringify({ text, ...options })
1089
+ });
1090
+ if (!response.ok) {
1091
+ const data = await response.json();
1092
+ if (data.code === "PERMISSION_REQUIRED") {
1093
+ const permission = data.permission || "tts.generate";
1094
+ const granted = await this.requestPermission(permission);
1095
+ if (granted) {
1096
+ yield* this.speakStream(text, options);
1097
+ return;
1098
+ }
1099
+ throw new PermissionDeniedError(permission);
1100
+ }
1101
+ throw new Error(data.error || `Streaming failed: ${response.status}`);
1102
+ }
1103
+ const reader = response.body?.getReader();
1104
+ if (!reader) throw new Error("No response body");
1105
+ const decoder = new TextDecoder();
1106
+ let buffer = "";
1107
+ let eventType = "";
1108
+ try {
1109
+ while (true) {
1110
+ const { done, value } = await reader.read();
1111
+ if (done) break;
1112
+ buffer += decoder.decode(value, { stream: true });
1113
+ const lines = buffer.split("\n");
1114
+ buffer = lines.pop() || "";
1115
+ for (const line of lines) {
1116
+ const trimmedLine = line.trim();
1117
+ if (!trimmedLine) continue;
1118
+ if (trimmedLine.startsWith("event:")) {
1119
+ eventType = trimmedLine.slice(6).trim();
1120
+ } else if (trimmedLine.startsWith("data:")) {
1121
+ const eventData = trimmedLine.slice(5).trim();
1122
+ if (eventType === "chunk" && eventData) {
1123
+ try {
1124
+ const parsed = JSON.parse(eventData);
1125
+ const binaryString = atob(parsed.audio);
1126
+ const bytes = new Uint8Array(binaryString.length);
1127
+ for (let i = 0; i < binaryString.length; i++) {
1128
+ bytes[i] = binaryString.charCodeAt(i);
1129
+ }
1130
+ yield {
1131
+ index: parsed.index,
1132
+ total: parsed.total,
1133
+ audio: bytes.buffer,
1134
+ mimeType: parsed.mimeType
1135
+ };
1136
+ } catch (e) {
1137
+ console.warn("[TTS SDK] Failed to parse chunk:", e);
1138
+ }
1139
+ } else if (eventType === "error" && eventData) {
1140
+ try {
1141
+ const err = JSON.parse(eventData);
1142
+ throw new Error(err.error || "TTS streaming error");
1143
+ } catch (e) {
1144
+ if (e instanceof Error && e.message !== "TTS streaming error") {
1145
+ throw e;
1146
+ }
1147
+ }
1148
+ }
1149
+ eventType = "";
1150
+ }
1151
+ }
1152
+ }
1153
+ } finally {
1154
+ reader.releaseLock();
1155
+ }
1156
+ }
1157
+ /**
1158
+ * List available TTS providers with configuration options
1159
+ */
1160
+ async listProviders() {
1161
+ const data = await this.request("GET", "/sdk/tts/providers");
1162
+ return data.providers || [];
1163
+ }
1164
+ };
1165
+
983
1166
  // src/index.ts
984
1167
  var _RealtimeXSDK = class _RealtimeXSDK {
985
1168
  constructor(config = {}) {
@@ -997,6 +1180,7 @@ var _RealtimeXSDK = class _RealtimeXSDK {
997
1180
  this.task = new TaskModule(this.realtimexUrl, this.appName, this.appId, this.apiKey);
998
1181
  this.port = new PortModule(config.defaultPort);
999
1182
  this.llm = new LLMModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
1183
+ this.tts = new TTSModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
1000
1184
  if (this.permissions.length > 0 && this.appId && !this.apiKey) {
1001
1185
  this.register().catch((err) => {
1002
1186
  console.error("[RealtimeX SDK] Auto-registration failed:", err.message);
@@ -1105,6 +1289,7 @@ var RealtimeXSDK = _RealtimeXSDK;
1105
1289
  PermissionRequiredError,
1106
1290
  PortModule,
1107
1291
  RealtimeXSDK,
1292
+ TTSModule,
1108
1293
  TaskModule,
1109
1294
  VectorStore,
1110
1295
  WebhookModule
package/dist/index.mjs CHANGED
@@ -933,6 +933,188 @@ var LLMModule = class {
933
933
  }
934
934
  };
935
935
 
936
+ // src/modules/tts.ts
937
+ var TTSModule = class {
938
+ constructor(realtimexUrl, appId, appName, apiKey) {
939
+ this.baseUrl = realtimexUrl.replace(/\/$/, "");
940
+ this.appId = appId;
941
+ this.appName = appName || process.env.RTX_APP_NAME || "Local App";
942
+ this.apiKey = apiKey;
943
+ }
944
+ get headers() {
945
+ if (this.apiKey) {
946
+ return {
947
+ "Content-Type": "application/json",
948
+ "Authorization": `Bearer ${this.apiKey}`
949
+ };
950
+ }
951
+ return {
952
+ "Content-Type": "application/json",
953
+ "x-app-id": this.appId
954
+ };
955
+ }
956
+ /**
957
+ * Request a single permission from Electron via internal API
958
+ */
959
+ async requestPermission(permission) {
960
+ try {
961
+ const response = await fetch(`${this.baseUrl}/api/local-apps/request-permission`, {
962
+ method: "POST",
963
+ headers: { "Content-Type": "application/json" },
964
+ body: JSON.stringify({
965
+ app_id: this.appId,
966
+ app_name: this.appName,
967
+ permission
968
+ })
969
+ });
970
+ const data = await response.json();
971
+ return data.granted === true;
972
+ } catch (error) {
973
+ console.error("[SDK] Permission request failed:", error);
974
+ return false;
975
+ }
976
+ }
977
+ /**
978
+ * Internal request wrapper that handles automatic permission prompts
979
+ */
980
+ async request(method, endpoint, body, isStream = false) {
981
+ const response = await fetch(`${this.baseUrl}${endpoint}`, {
982
+ method,
983
+ headers: this.headers,
984
+ body: body ? JSON.stringify(body) : void 0
985
+ });
986
+ if (!response.ok) {
987
+ const data = await response.json();
988
+ if (data.code === "PERMISSION_REQUIRED") {
989
+ const permission = data.permission || "tts.speak";
990
+ const granted = await this.requestPermission(permission);
991
+ if (granted) {
992
+ return this.request(method, endpoint, body, isStream);
993
+ }
994
+ throw new PermissionDeniedError(permission);
995
+ }
996
+ throw new Error(data.error || `Request failed: ${response.status}`);
997
+ }
998
+ if (isStream) {
999
+ return response.body;
1000
+ }
1001
+ const contentType = response.headers.get("content-type");
1002
+ if (contentType && contentType.includes("application/json")) {
1003
+ return response.json();
1004
+ }
1005
+ return response.arrayBuffer();
1006
+ }
1007
+ /**
1008
+ * Generate speech from text (returns full buffer)
1009
+ *
1010
+ * @example
1011
+ * ```ts
1012
+ * const buffer = await sdk.tts.speak("Hello world");
1013
+ * // Play buffer...
1014
+ * ```
1015
+ */
1016
+ async speak(text, options = {}) {
1017
+ return this.request("POST", "/sdk/tts", {
1018
+ text,
1019
+ ...options
1020
+ });
1021
+ }
1022
+ /**
1023
+ * Generate speech from text with streaming (yields decoded audio chunks)
1024
+ * Uses SSE internally but returns decoded ArrayBuffer chunks for easy playback.
1025
+ *
1026
+ * @example
1027
+ * ```ts
1028
+ * for await (const chunk of sdk.tts.speakStream("Hello world")) {
1029
+ * // chunk.audio is ArrayBuffer (already decoded!)
1030
+ * const blob = new Blob([chunk.audio], { type: chunk.mimeType });
1031
+ * const audio = new Audio(URL.createObjectURL(blob));
1032
+ * await audio.play();
1033
+ * }
1034
+ * ```
1035
+ */
1036
+ async *speakStream(text, options = {}) {
1037
+ const response = await fetch(`${this.baseUrl}/sdk/tts/stream`, {
1038
+ method: "POST",
1039
+ headers: this.headers,
1040
+ body: JSON.stringify({ text, ...options })
1041
+ });
1042
+ if (!response.ok) {
1043
+ const data = await response.json();
1044
+ if (data.code === "PERMISSION_REQUIRED") {
1045
+ const permission = data.permission || "tts.generate";
1046
+ const granted = await this.requestPermission(permission);
1047
+ if (granted) {
1048
+ yield* this.speakStream(text, options);
1049
+ return;
1050
+ }
1051
+ throw new PermissionDeniedError(permission);
1052
+ }
1053
+ throw new Error(data.error || `Streaming failed: ${response.status}`);
1054
+ }
1055
+ const reader = response.body?.getReader();
1056
+ if (!reader) throw new Error("No response body");
1057
+ const decoder = new TextDecoder();
1058
+ let buffer = "";
1059
+ let eventType = "";
1060
+ try {
1061
+ while (true) {
1062
+ const { done, value } = await reader.read();
1063
+ if (done) break;
1064
+ buffer += decoder.decode(value, { stream: true });
1065
+ const lines = buffer.split("\n");
1066
+ buffer = lines.pop() || "";
1067
+ for (const line of lines) {
1068
+ const trimmedLine = line.trim();
1069
+ if (!trimmedLine) continue;
1070
+ if (trimmedLine.startsWith("event:")) {
1071
+ eventType = trimmedLine.slice(6).trim();
1072
+ } else if (trimmedLine.startsWith("data:")) {
1073
+ const eventData = trimmedLine.slice(5).trim();
1074
+ if (eventType === "chunk" && eventData) {
1075
+ try {
1076
+ const parsed = JSON.parse(eventData);
1077
+ const binaryString = atob(parsed.audio);
1078
+ const bytes = new Uint8Array(binaryString.length);
1079
+ for (let i = 0; i < binaryString.length; i++) {
1080
+ bytes[i] = binaryString.charCodeAt(i);
1081
+ }
1082
+ yield {
1083
+ index: parsed.index,
1084
+ total: parsed.total,
1085
+ audio: bytes.buffer,
1086
+ mimeType: parsed.mimeType
1087
+ };
1088
+ } catch (e) {
1089
+ console.warn("[TTS SDK] Failed to parse chunk:", e);
1090
+ }
1091
+ } else if (eventType === "error" && eventData) {
1092
+ try {
1093
+ const err = JSON.parse(eventData);
1094
+ throw new Error(err.error || "TTS streaming error");
1095
+ } catch (e) {
1096
+ if (e instanceof Error && e.message !== "TTS streaming error") {
1097
+ throw e;
1098
+ }
1099
+ }
1100
+ }
1101
+ eventType = "";
1102
+ }
1103
+ }
1104
+ }
1105
+ } finally {
1106
+ reader.releaseLock();
1107
+ }
1108
+ }
1109
+ /**
1110
+ * List available TTS providers with configuration options
1111
+ */
1112
+ async listProviders() {
1113
+ const data = await this.request("GET", "/sdk/tts/providers");
1114
+ return data.providers || [];
1115
+ }
1116
+ };
1117
+
936
1118
  // src/index.ts
937
1119
  var _RealtimeXSDK = class _RealtimeXSDK {
938
1120
  constructor(config = {}) {
@@ -950,6 +1132,7 @@ var _RealtimeXSDK = class _RealtimeXSDK {
950
1132
  this.task = new TaskModule(this.realtimexUrl, this.appName, this.appId, this.apiKey);
951
1133
  this.port = new PortModule(config.defaultPort);
952
1134
  this.llm = new LLMModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
1135
+ this.tts = new TTSModule(this.realtimexUrl, this.appId, this.appName, this.apiKey);
953
1136
  if (this.permissions.length > 0 && this.appId && !this.apiKey) {
954
1137
  this.register().catch((err) => {
955
1138
  console.error("[RealtimeX SDK] Auto-registration failed:", err.message);
@@ -1057,6 +1240,7 @@ export {
1057
1240
  PermissionRequiredError,
1058
1241
  PortModule,
1059
1242
  RealtimeXSDK,
1243
+ TTSModule,
1060
1244
  TaskModule,
1061
1245
  VectorStore,
1062
1246
  WebhookModule
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@realtimex/sdk",
3
- "version": "1.1.4",
3
+ "version": "1.2.1",
4
4
  "description": "SDK for building Local Apps that integrate with RealtimeX",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -40,4 +40,4 @@
40
40
  "engines": {
41
41
  "node": ">=18.0.0"
42
42
  }
43
- }
43
+ }