@soniox/node 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/index.cjs +988 -239
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +956 -176
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +956 -176
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +984 -240
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1,37 +1,16 @@
|
|
|
1
|
+
let node_fs_promises = require("node:fs/promises");
|
|
1
2
|
|
|
2
3
|
//#region src/constants.ts
|
|
3
4
|
const SONIOX_API_BASE_URL = "https://api.soniox.com";
|
|
4
5
|
const SONIOX_API_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
6
|
+
const SONIOX_TTS_API_BASE_URL = "https://tts-rt.soniox.com";
|
|
7
|
+
const SONIOX_TTS_WS_URL = "wss://tts-rt.soniox.com/tts-websocket";
|
|
5
8
|
const SONIOX_TMP_API_KEY_USAGE_TYPE = "transcribe_websocket";
|
|
6
9
|
const SONIOX_TMP_API_KEY_DURATION_MIN = 1;
|
|
7
10
|
const SONIOX_TMP_API_KEY_DURATION_MAX = 3600;
|
|
8
11
|
const SONIOX_API_WEBHOOK_HEADER_ENV = "SONIOX_API_WEBHOOK_HEADER";
|
|
9
12
|
const SONIOX_API_WEBHOOK_SECRET_ENV = "SONIOX_API_WEBHOOK_SECRET";
|
|
10
13
|
|
|
11
|
-
//#endregion
|
|
12
|
-
//#region src/async/auth.ts
|
|
13
|
-
var SonioxAuthAPI = class {
|
|
14
|
-
constructor(http) {
|
|
15
|
-
this.http = http;
|
|
16
|
-
}
|
|
17
|
-
/**
|
|
18
|
-
* Creates a temporary API key for client-side use.
|
|
19
|
-
*
|
|
20
|
-
* @param request - Request parameters for the temporary key
|
|
21
|
-
* @param signal - Optional AbortSignal for cancellation
|
|
22
|
-
* @returns The temporary API key response
|
|
23
|
-
*/
|
|
24
|
-
async createTemporaryKey(request, signal) {
|
|
25
|
-
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
26
|
-
return (await this.http.request({
|
|
27
|
-
method: "POST",
|
|
28
|
-
path: "/v1/auth/temporary-api-key",
|
|
29
|
-
body: request,
|
|
30
|
-
...signal && { signal }
|
|
31
|
-
})).data;
|
|
32
|
-
}
|
|
33
|
-
};
|
|
34
|
-
|
|
35
14
|
//#endregion
|
|
36
15
|
//#region ../core/src/errors.ts
|
|
37
16
|
var SonioxError = class extends Error {
|
|
@@ -79,6 +58,208 @@ var SonioxError = class extends Error {
|
|
|
79
58
|
}
|
|
80
59
|
};
|
|
81
60
|
|
|
61
|
+
//#endregion
|
|
62
|
+
//#region ../core/src/http-errors.ts
|
|
63
|
+
/**
|
|
64
|
+
* HTTP error handling for the Soniox SDK.
|
|
65
|
+
*
|
|
66
|
+
* Lives in `@soniox/core` so it can be shared by the browser-safe
|
|
67
|
+
* `TtsRestClient` and the Node `HttpClient`. `@soniox/node` re-exports
|
|
68
|
+
* these symbols for backwards compatibility.
|
|
69
|
+
*/
|
|
70
|
+
/** Maximum body text length to include in error details (4KB) */
|
|
71
|
+
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
72
|
+
/**
|
|
73
|
+
* HTTP error class for all HTTP-related failures (REST API).
|
|
74
|
+
*
|
|
75
|
+
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
76
|
+
* server errors, or response parsing failures.
|
|
77
|
+
*/
|
|
78
|
+
var SonioxHttpError = class extends SonioxError {
|
|
79
|
+
/** Request URL */
|
|
80
|
+
url;
|
|
81
|
+
/** HTTP method */
|
|
82
|
+
method;
|
|
83
|
+
/** Response headers (only for http_error) */
|
|
84
|
+
headers;
|
|
85
|
+
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
86
|
+
bodyText;
|
|
87
|
+
constructor(details) {
|
|
88
|
+
super(details.message, details.code, details.statusCode, details.cause);
|
|
89
|
+
this.name = "SonioxHttpError";
|
|
90
|
+
this.url = details.url;
|
|
91
|
+
this.method = details.method;
|
|
92
|
+
this.headers = details.headers;
|
|
93
|
+
this.bodyText = details.bodyText;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Creates a human-readable string representation
|
|
97
|
+
*/
|
|
98
|
+
toString() {
|
|
99
|
+
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
100
|
+
parts.push(` Method: ${this.method}`);
|
|
101
|
+
parts.push(` URL: ${this.url}`);
|
|
102
|
+
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
103
|
+
return parts.join("\n");
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Converts to a plain object for logging/serialization
|
|
107
|
+
*/
|
|
108
|
+
toJSON() {
|
|
109
|
+
return {
|
|
110
|
+
name: this.name,
|
|
111
|
+
code: this.code,
|
|
112
|
+
message: this.message,
|
|
113
|
+
url: this.url,
|
|
114
|
+
method: this.method,
|
|
115
|
+
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
116
|
+
...this.headers !== void 0 && { headers: this.headers },
|
|
117
|
+
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Creates a network error
|
|
123
|
+
*/
|
|
124
|
+
function createNetworkError(url, method, cause) {
|
|
125
|
+
return new SonioxHttpError({
|
|
126
|
+
code: "network_error",
|
|
127
|
+
message: `Network error: ${cause instanceof Error ? cause.message : "Network request failed"}`,
|
|
128
|
+
url,
|
|
129
|
+
method,
|
|
130
|
+
cause
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Creates a timeout error
|
|
135
|
+
*/
|
|
136
|
+
function createTimeoutError(url, method, timeoutMs) {
|
|
137
|
+
return new SonioxHttpError({
|
|
138
|
+
code: "timeout",
|
|
139
|
+
message: `Request timed out after ${timeoutMs}ms`,
|
|
140
|
+
url,
|
|
141
|
+
method
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Creates an abort error
|
|
146
|
+
*/
|
|
147
|
+
function createAbortError(url, method, cause) {
|
|
148
|
+
return new SonioxHttpError({
|
|
149
|
+
code: "aborted",
|
|
150
|
+
message: "Request was aborted",
|
|
151
|
+
url,
|
|
152
|
+
method,
|
|
153
|
+
cause
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Creates an HTTP error (non-2xx status)
|
|
158
|
+
*/
|
|
159
|
+
function createHttpError(url, method, statusCode, headers, bodyText) {
|
|
160
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
161
|
+
return new SonioxHttpError({
|
|
162
|
+
code: "http_error",
|
|
163
|
+
message: `HTTP ${statusCode}`,
|
|
164
|
+
url,
|
|
165
|
+
method,
|
|
166
|
+
statusCode,
|
|
167
|
+
headers,
|
|
168
|
+
bodyText: cappedBody
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Creates a parse error (invalid JSON, etc.)
|
|
173
|
+
*/
|
|
174
|
+
function createParseError(url, method, bodyText, cause) {
|
|
175
|
+
const message = cause instanceof Error ? cause.message : "Failed to parse response";
|
|
176
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
177
|
+
return new SonioxHttpError({
|
|
178
|
+
code: "parse_error",
|
|
179
|
+
message: `Parse error: ${message}`,
|
|
180
|
+
url,
|
|
181
|
+
method,
|
|
182
|
+
bodyText: cappedBody,
|
|
183
|
+
cause
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Truncates body text to the maximum allowed length
|
|
188
|
+
*/
|
|
189
|
+
function truncateBodyText(text) {
|
|
190
|
+
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
191
|
+
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Type guard to check if an error is an AbortError
|
|
195
|
+
*/
|
|
196
|
+
function isAbortError(error) {
|
|
197
|
+
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
198
|
+
return false;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Type guard to check if an error is any SonioxError (base class).
|
|
202
|
+
* This catches all SDK errors including HTTP and real-time errors.
|
|
203
|
+
*/
|
|
204
|
+
function isSonioxError(error) {
|
|
205
|
+
return error instanceof SonioxError;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Type guard to check if an error is a SonioxHttpError
|
|
209
|
+
*/
|
|
210
|
+
function isSonioxHttpError(error) {
|
|
211
|
+
return error instanceof SonioxHttpError;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Checks if an error is a 404 Not Found error
|
|
215
|
+
*/
|
|
216
|
+
function isNotFoundError(error) {
|
|
217
|
+
return isSonioxHttpError(error) && error.statusCode === 404;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//#endregion
|
|
221
|
+
//#region ../core/src/connection.ts
|
|
222
|
+
/** Root domain used for the default (US) deployment. */
|
|
223
|
+
const DEFAULT_BASE_DOMAIN = "soniox.com";
|
|
224
|
+
/**
|
|
225
|
+
* Derives the four Soniox service URLs from a base domain.
|
|
226
|
+
* All Soniox deployments follow the same subdomain pattern:
|
|
227
|
+
* api.{base} / stt-rt.{base} / tts-rt.{base}
|
|
228
|
+
*/
|
|
229
|
+
function urlsFromBase(base) {
|
|
230
|
+
return {
|
|
231
|
+
api_domain: `https://api.${base}`,
|
|
232
|
+
stt_ws_url: `wss://stt-rt.${base}/transcribe-websocket`,
|
|
233
|
+
tts_api_url: `https://tts-rt.${base}`,
|
|
234
|
+
tts_ws_url: `wss://tts-rt.${base}/tts-websocket`
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Resolve a {@link SonioxConnectionConfig} into fully qualified URLs.
|
|
239
|
+
*
|
|
240
|
+
* Resolution priority (highest → lowest) for each URL:
|
|
241
|
+
* 1. Explicit field (`api_domain`, `stt_ws_url`, `tts_api_url`, `tts_ws_url`)
|
|
242
|
+
* 2. Derived from `base_domain`
|
|
243
|
+
* 3. Derived from `region` → `{region}.soniox.com`
|
|
244
|
+
* 4. Default US base domain (`soniox.com`)
|
|
245
|
+
*/
|
|
246
|
+
function resolveConnectionConfig(config) {
|
|
247
|
+
const { region, base_domain, api_domain, stt_ws_url, tts_api_url, tts_ws_url } = config;
|
|
248
|
+
const normalizedRegion = region !== void 0 && region.toLowerCase() !== "us" ? region : void 0;
|
|
249
|
+
const derived = urlsFromBase(base_domain ?? (normalizedRegion !== void 0 ? `${normalizedRegion}.soniox.com` : DEFAULT_BASE_DOMAIN));
|
|
250
|
+
const sttDefaults = config.stt_defaults ?? config.session_defaults ?? {};
|
|
251
|
+
return {
|
|
252
|
+
api_key: config.api_key,
|
|
253
|
+
api_domain: api_domain ?? derived.api_domain,
|
|
254
|
+
stt_ws_url: stt_ws_url ?? derived.stt_ws_url,
|
|
255
|
+
tts_api_url: tts_api_url ?? derived.tts_api_url,
|
|
256
|
+
tts_ws_url: tts_ws_url ?? derived.tts_ws_url,
|
|
257
|
+
stt_defaults: sttDefaults,
|
|
258
|
+
tts_defaults: config.tts_defaults ?? {},
|
|
259
|
+
session_defaults: sttDefaults
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
82
263
|
//#endregion
|
|
83
264
|
//#region ../core/src/segments.ts
|
|
84
265
|
const DEFAULT_GROUP_BY = ["speaker", "language"];
|
|
@@ -422,8 +603,9 @@ function mapErrorResponse(response) {
|
|
|
422
603
|
|
|
423
604
|
//#endregion
|
|
424
605
|
//#region ../core/src/realtime/stt.ts
|
|
425
|
-
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
426
|
-
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
606
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS$1 = 5e3;
|
|
607
|
+
const MIN_KEEPALIVE_INTERVAL_MS$1 = 1e3;
|
|
608
|
+
const DEFAULT_CONNECT_TIMEOUT_MS$1 = 2e4;
|
|
427
609
|
/**
|
|
428
610
|
* Convert audio data to Uint8Array
|
|
429
611
|
* Handles Uint8Array and ArrayBuffer
|
|
@@ -448,6 +630,7 @@ function buildConfigMessage(config, apiKey) {
|
|
|
448
630
|
enable_language_identification: config.enable_language_identification,
|
|
449
631
|
enable_endpoint_detection: config.enable_endpoint_detection,
|
|
450
632
|
client_reference_id: config.client_reference_id,
|
|
633
|
+
max_endpoint_delay_ms: config.max_endpoint_delay_ms,
|
|
451
634
|
context: config.context,
|
|
452
635
|
translation: config.translation
|
|
453
636
|
};
|
|
@@ -516,6 +699,7 @@ var RealtimeSttSession = class {
|
|
|
516
699
|
wsBaseUrl;
|
|
517
700
|
config;
|
|
518
701
|
keepaliveIntervalMs;
|
|
702
|
+
connectTimeoutMs;
|
|
519
703
|
signal;
|
|
520
704
|
ws = null;
|
|
521
705
|
_state = "idle";
|
|
@@ -529,8 +713,10 @@ var RealtimeSttSession = class {
|
|
|
529
713
|
this.apiKey = apiKey;
|
|
530
714
|
this.wsBaseUrl = wsBaseUrl;
|
|
531
715
|
this.config = config;
|
|
532
|
-
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
533
|
-
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
716
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
717
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS$1) : DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
718
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
719
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
534
720
|
this.signal = options?.signal;
|
|
535
721
|
if (this.signal) {
|
|
536
722
|
this.abortHandler = () => this.handleAbort();
|
|
@@ -559,16 +745,26 @@ var RealtimeSttSession = class {
|
|
|
559
745
|
async connect() {
|
|
560
746
|
if (this._state !== "idle") throw new StateError(`Cannot connect: session is in "${this._state}" state`);
|
|
561
747
|
this.checkAborted();
|
|
562
|
-
this.setState("connecting");
|
|
748
|
+
this.setState("connecting", "user_action");
|
|
749
|
+
let connectTimer;
|
|
563
750
|
try {
|
|
564
|
-
await this.createWebSocket()
|
|
565
|
-
|
|
751
|
+
await Promise.race([this.createWebSocket().then((v) => {
|
|
752
|
+
clearTimeout(connectTimer);
|
|
753
|
+
return v;
|
|
754
|
+
}), new Promise((_resolve, reject) => {
|
|
755
|
+
connectTimer = setTimeout(() => {
|
|
756
|
+
if (this.ws) this.ws.close();
|
|
757
|
+
reject(new ConnectionError("Connection timed out"));
|
|
758
|
+
}, this.connectTimeoutMs);
|
|
759
|
+
})]);
|
|
760
|
+
this.setState("connected", "connected");
|
|
566
761
|
this.emitter.emit("connected");
|
|
567
762
|
this.updateKeepalive();
|
|
568
763
|
} catch (error) {
|
|
764
|
+
clearTimeout(connectTimer);
|
|
569
765
|
if (!this.isTerminalState(this._state)) {
|
|
570
766
|
const err = error instanceof Error ? error : new ConnectionError("Connection failed", error);
|
|
571
|
-
this.cleanup("error", err);
|
|
767
|
+
this.cleanup("error", err, "error");
|
|
572
768
|
}
|
|
573
769
|
throw error;
|
|
574
770
|
}
|
|
@@ -646,7 +842,7 @@ var RealtimeSttSession = class {
|
|
|
646
842
|
this.checkAborted();
|
|
647
843
|
if (this._state !== "connected") throw new StateError(`Cannot finish: session is in "${this._state}" state`);
|
|
648
844
|
if (this._paused) this.resume();
|
|
649
|
-
this.setState("finishing");
|
|
845
|
+
this.setState("finishing", "user_action");
|
|
650
846
|
this.updateKeepalive();
|
|
651
847
|
const finishPromise = new Promise((resolve, reject) => {
|
|
652
848
|
this.finishResolver = resolve;
|
|
@@ -662,7 +858,7 @@ var RealtimeSttSession = class {
|
|
|
662
858
|
if (this.isTerminalState(this._state)) return;
|
|
663
859
|
this.emitter.emit("disconnected", "client_closed");
|
|
664
860
|
this.settleFinish(new StateError("Session canceled"));
|
|
665
|
-
this.cleanup("canceled");
|
|
861
|
+
this.cleanup("canceled", void 0, "user_action");
|
|
666
862
|
}
|
|
667
863
|
/**
|
|
668
864
|
* Register an event handler
|
|
@@ -691,6 +887,13 @@ var RealtimeSttSession = class {
|
|
|
691
887
|
[Symbol.asyncIterator]() {
|
|
692
888
|
return this.eventQueue[Symbol.asyncIterator]();
|
|
693
889
|
}
|
|
890
|
+
/**
|
|
891
|
+
* @internal Debug-only: forcefully close the underlying WebSocket to
|
|
892
|
+
* simulate an unexpected network disconnection.
|
|
893
|
+
*/
|
|
894
|
+
__debugForceDisconnect() {
|
|
895
|
+
this.ws?.close(4999, "debug: simulated disconnect");
|
|
896
|
+
}
|
|
694
897
|
async createWebSocket() {
|
|
695
898
|
return new Promise((resolve, reject) => {
|
|
696
899
|
try {
|
|
@@ -756,50 +959,54 @@ var RealtimeSttSession = class {
|
|
|
756
959
|
this.emitter.emit("finished");
|
|
757
960
|
this.eventQueue.push({ kind: "finished" });
|
|
758
961
|
this.settleFinish();
|
|
759
|
-
this.cleanup("finished");
|
|
962
|
+
this.cleanup("finished", void 0, "finished");
|
|
760
963
|
}
|
|
761
964
|
} catch (error) {
|
|
762
965
|
const err = error;
|
|
763
966
|
this.emitter.emit("error", err);
|
|
764
967
|
this.settleFinish(err);
|
|
765
|
-
this.cleanup("error", err);
|
|
968
|
+
this.cleanup("error", err, "error");
|
|
766
969
|
}
|
|
767
970
|
}
|
|
768
971
|
handleClose(event) {
|
|
769
972
|
if (this.isTerminalState(this._state)) return;
|
|
770
973
|
this.emitter.emit("disconnected", event.reason || void 0);
|
|
771
974
|
if (this._state === "finishing") {
|
|
772
|
-
const error = new ConnectionError("WebSocket closed before finished response", event);
|
|
773
|
-
this.emitter.emit("error", error);
|
|
774
|
-
this.settleFinish(error);
|
|
775
|
-
this.cleanup("error", error);
|
|
975
|
+
const error$1 = new ConnectionError("WebSocket closed before finished response", event);
|
|
976
|
+
this.emitter.emit("error", error$1);
|
|
977
|
+
this.settleFinish(error$1);
|
|
978
|
+
this.cleanup("error", error$1, "connection_lost");
|
|
776
979
|
return;
|
|
777
980
|
}
|
|
778
|
-
|
|
981
|
+
const error = new ConnectionError("WebSocket closed unexpectedly", event);
|
|
982
|
+
this.emitter.emit("error", error);
|
|
983
|
+
this.cleanup("closed", error, "connection_lost");
|
|
779
984
|
}
|
|
780
985
|
handleError(event) {
|
|
781
986
|
const error = new ConnectionError("WebSocket error", event);
|
|
782
987
|
this.emitter.emit("error", error);
|
|
783
988
|
this.settleFinish(error);
|
|
784
|
-
this.cleanup("error", error);
|
|
989
|
+
this.cleanup("error", error, "error");
|
|
785
990
|
}
|
|
786
991
|
handleAbort() {
|
|
787
992
|
const error = new AbortError();
|
|
788
993
|
this.emitter.emit("error", error);
|
|
789
994
|
this.settleFinish(error);
|
|
790
|
-
this.cleanup("canceled", error);
|
|
995
|
+
this.cleanup("canceled", error, "user_action");
|
|
791
996
|
}
|
|
792
|
-
setState(newState) {
|
|
997
|
+
setState(newState, reason) {
|
|
793
998
|
if (this._state === newState) return;
|
|
794
999
|
const oldState = this._state;
|
|
795
1000
|
this._state = newState;
|
|
796
|
-
|
|
1001
|
+
const update = {
|
|
797
1002
|
old_state: oldState,
|
|
798
1003
|
new_state: newState
|
|
799
|
-
}
|
|
1004
|
+
};
|
|
1005
|
+
if (reason !== void 0) update.reason = reason;
|
|
1006
|
+
this.emitter.emit("state_change", update);
|
|
800
1007
|
}
|
|
801
|
-
cleanup(finalState, error) {
|
|
802
|
-
this.setState(finalState);
|
|
1008
|
+
cleanup(finalState, error, reason) {
|
|
1009
|
+
this.setState(finalState, reason);
|
|
803
1010
|
this.stopKeepalive();
|
|
804
1011
|
if (this.signal && this.abortHandler) {
|
|
805
1012
|
this.signal.removeEventListener("abort", this.abortHandler);
|
|
@@ -833,7 +1040,7 @@ var RealtimeSttSession = class {
|
|
|
833
1040
|
const error = new ConnectionError("WebSocket is not open");
|
|
834
1041
|
this.emitter.emit("error", error);
|
|
835
1042
|
this.settleFinish(error);
|
|
836
|
-
this.cleanup("error", error);
|
|
1043
|
+
this.cleanup("error", error, "error");
|
|
837
1044
|
if (shouldThrow) throw error;
|
|
838
1045
|
return;
|
|
839
1046
|
}
|
|
@@ -843,7 +1050,7 @@ var RealtimeSttSession = class {
|
|
|
843
1050
|
const error = new ConnectionError("WebSocket send failed", err);
|
|
844
1051
|
this.emitter.emit("error", error);
|
|
845
1052
|
this.settleFinish(error);
|
|
846
|
-
this.cleanup("error", error);
|
|
1053
|
+
this.cleanup("error", error, "error");
|
|
847
1054
|
if (shouldThrow) throw error;
|
|
848
1055
|
}
|
|
849
1056
|
}
|
|
@@ -865,6 +1072,383 @@ var RealtimeSttSession = class {
|
|
|
865
1072
|
}
|
|
866
1073
|
};
|
|
867
1074
|
|
|
1075
|
+
//#endregion
|
|
1076
|
+
//#region ../core/src/realtime/tts.ts
|
|
1077
|
+
const MAX_STREAMS_PER_CONNECTION = 5;
|
|
1078
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
1079
|
+
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
1080
|
+
const DEFAULT_CONNECT_TIMEOUT_MS = 2e4;
|
|
1081
|
+
function generateStreamId() {
|
|
1082
|
+
return globalThis.crypto.randomUUID();
|
|
1083
|
+
}
|
|
1084
|
+
function decodeBase64ToUint8Array(base64) {
|
|
1085
|
+
const binaryString = atob(base64);
|
|
1086
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
1087
|
+
for (let i = 0; i < binaryString.length; i++) bytes[i] = binaryString.charCodeAt(i);
|
|
1088
|
+
return bytes;
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* Merge a partial TTS stream input with defaults, validate required fields,
|
|
1092
|
+
* and return a fully resolved config ready for the WebSocket.
|
|
1093
|
+
*/
|
|
1094
|
+
function resolveStreamConfig(input, defaults) {
|
|
1095
|
+
const merged = {
|
|
1096
|
+
...defaults,
|
|
1097
|
+
...input
|
|
1098
|
+
};
|
|
1099
|
+
const model = merged.model;
|
|
1100
|
+
const language = merged.language;
|
|
1101
|
+
const voice = merged.voice;
|
|
1102
|
+
const audio_format = merged.audio_format;
|
|
1103
|
+
const missing = [];
|
|
1104
|
+
if (!model) missing.push("model");
|
|
1105
|
+
if (!language) missing.push("language");
|
|
1106
|
+
if (!voice) missing.push("voice");
|
|
1107
|
+
if (!audio_format) missing.push("audio_format");
|
|
1108
|
+
if (missing.length > 0) throw new Error(`Missing required TTS stream fields: ${missing.join(", ")}. Provide them directly or via tts_defaults in your connection config.`);
|
|
1109
|
+
return {
|
|
1110
|
+
model,
|
|
1111
|
+
language,
|
|
1112
|
+
voice,
|
|
1113
|
+
audio_format,
|
|
1114
|
+
...merged.sample_rate !== void 0 && { sample_rate: merged.sample_rate },
|
|
1115
|
+
...merged.bitrate !== void 0 && { bitrate: merged.bitrate },
|
|
1116
|
+
stream_id: merged.stream_id ?? generateStreamId()
|
|
1117
|
+
};
|
|
1118
|
+
}
|
|
1119
|
+
/**
|
|
1120
|
+
* Handle for one TTS stream on a WebSocket connection.
|
|
1121
|
+
*
|
|
1122
|
+
* Emits typed events and supports async iteration over decoded audio chunks.
|
|
1123
|
+
*
|
|
1124
|
+
* @example Event-based
|
|
1125
|
+
* ```typescript
|
|
1126
|
+
* stream.on('audio', (chunk) => process(chunk));
|
|
1127
|
+
* stream.on('terminated', () => console.log('done'));
|
|
1128
|
+
* stream.sendText("Hello world");
|
|
1129
|
+
* stream.finish();
|
|
1130
|
+
* ```
|
|
1131
|
+
*
|
|
1132
|
+
* @example Async iteration
|
|
1133
|
+
* ```typescript
|
|
1134
|
+
* stream.sendText("Hello world");
|
|
1135
|
+
* stream.finish();
|
|
1136
|
+
* for await (const chunk of stream) {
|
|
1137
|
+
* process(chunk);
|
|
1138
|
+
* }
|
|
1139
|
+
* ```
|
|
1140
|
+
*/
|
|
1141
|
+
var RealtimeTtsStream = class extends TypedEmitter {
|
|
1142
|
+
streamId;
|
|
1143
|
+
_state = "active";
|
|
1144
|
+
audioQueue = new AsyncEventQueue();
|
|
1145
|
+
connection;
|
|
1146
|
+
ownsConnection;
|
|
1147
|
+
/** @internal */
|
|
1148
|
+
constructor(streamId, connection, ownsConnection) {
|
|
1149
|
+
super();
|
|
1150
|
+
this.streamId = streamId;
|
|
1151
|
+
this.connection = connection;
|
|
1152
|
+
this.ownsConnection = ownsConnection;
|
|
1153
|
+
}
|
|
1154
|
+
/** Current stream lifecycle state. */
|
|
1155
|
+
get state() {
|
|
1156
|
+
return this._state;
|
|
1157
|
+
}
|
|
1158
|
+
/**
|
|
1159
|
+
* Send one text chunk to the TTS stream.
|
|
1160
|
+
*
|
|
1161
|
+
* @param text - Text to synthesize
|
|
1162
|
+
* @param options.end - If true, signals this is the final text chunk
|
|
1163
|
+
*/
|
|
1164
|
+
sendText(text, options) {
|
|
1165
|
+
if (this._state !== "active") throw new StateError(`Cannot send text in state '${this._state}'`);
|
|
1166
|
+
const payload = {
|
|
1167
|
+
text,
|
|
1168
|
+
text_end: options?.end ?? false,
|
|
1169
|
+
stream_id: this.streamId
|
|
1170
|
+
};
|
|
1171
|
+
this.connection._sendJson(payload);
|
|
1172
|
+
if (options?.end) this._state = "finishing";
|
|
1173
|
+
}
|
|
1174
|
+
/**
|
|
1175
|
+
* Pipe an async iterable of text chunks into the stream.
|
|
1176
|
+
* Automatically calls {@link finish} when the iterable completes.
|
|
1177
|
+
*
|
|
1178
|
+
* Designed for concurrent use: call `sendStream()` and consume audio
|
|
1179
|
+
* via `for await` or events simultaneously.
|
|
1180
|
+
*
|
|
1181
|
+
* @example LLM token piping
|
|
1182
|
+
* ```typescript
|
|
1183
|
+
* stream.sendStream(llmTokenStream);
|
|
1184
|
+
* for await (const audio of stream) { forward(audio); }
|
|
1185
|
+
* ```
|
|
1186
|
+
*/
|
|
1187
|
+
async sendStream(source) {
|
|
1188
|
+
for await (const chunk of source) {
|
|
1189
|
+
if (this._state !== "active") break;
|
|
1190
|
+
this.sendText(chunk);
|
|
1191
|
+
}
|
|
1192
|
+
if (this._state === "active") this.finish();
|
|
1193
|
+
}
|
|
1194
|
+
/**
|
|
1195
|
+
* Signal that no more text will be sent for this stream.
|
|
1196
|
+
* The server will finish generating audio and send `terminated`.
|
|
1197
|
+
*/
|
|
1198
|
+
finish() {
|
|
1199
|
+
if (this._state !== "active") throw new StateError(`Cannot finish in state '${this._state}'`);
|
|
1200
|
+
this.sendText("", { end: true });
|
|
1201
|
+
}
|
|
1202
|
+
/**
|
|
1203
|
+
* Cancel this stream. The server will stop generating and send `terminated`.
|
|
1204
|
+
*/
|
|
1205
|
+
cancel() {
|
|
1206
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1207
|
+
const payload = {
|
|
1208
|
+
stream_id: this.streamId,
|
|
1209
|
+
cancel: true
|
|
1210
|
+
};
|
|
1211
|
+
try {
|
|
1212
|
+
this.connection._sendJson(payload);
|
|
1213
|
+
} catch {}
|
|
1214
|
+
}
|
|
1215
|
+
/**
|
|
1216
|
+
* Close this stream. For single-stream usage (created via `tts(input)`),
|
|
1217
|
+
* also closes the underlying WebSocket connection.
|
|
1218
|
+
*/
|
|
1219
|
+
close() {
|
|
1220
|
+
this._endStream();
|
|
1221
|
+
if (this.ownsConnection) this.connection.close();
|
|
1222
|
+
}
|
|
1223
|
+
/** Async iterator that yields decoded audio chunks. */
|
|
1224
|
+
[Symbol.asyncIterator]() {
|
|
1225
|
+
return this.audioQueue[Symbol.asyncIterator]();
|
|
1226
|
+
}
|
|
1227
|
+
/** @internal Dispatch a server event to this stream. */
|
|
1228
|
+
_handleEvent(event) {
|
|
1229
|
+
if (event.error_code !== void 0) {
|
|
1230
|
+
const errPayload = { error_code: event.error_code };
|
|
1231
|
+
if (event.error_message !== void 0) errPayload.error_message = event.error_message;
|
|
1232
|
+
const error = mapErrorResponse(errPayload);
|
|
1233
|
+
this._state = "error";
|
|
1234
|
+
this.emit("error", error);
|
|
1235
|
+
this.audioQueue.abort(error);
|
|
1236
|
+
this.connection._deactivateStream(this.streamId);
|
|
1237
|
+
return;
|
|
1238
|
+
}
|
|
1239
|
+
if (event.audio !== void 0) {
|
|
1240
|
+
const chunk = decodeBase64ToUint8Array(event.audio);
|
|
1241
|
+
this.emit("audio", chunk);
|
|
1242
|
+
this.audioQueue.push(chunk);
|
|
1243
|
+
}
|
|
1244
|
+
if (event.audio_end) this.emit("audioEnd");
|
|
1245
|
+
if (event.terminated) this._endStream();
|
|
1246
|
+
}
|
|
1247
|
+
/** @internal Force-end this stream (connection closing). */
|
|
1248
|
+
_forceEnd() {
|
|
1249
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1250
|
+
this._state = "ended";
|
|
1251
|
+
this.audioQueue.end();
|
|
1252
|
+
}
|
|
1253
|
+
_endStream() {
|
|
1254
|
+
if (this._state === "ended") return;
|
|
1255
|
+
this._state = "ended";
|
|
1256
|
+
this.emit("terminated");
|
|
1257
|
+
this.audioQueue.end();
|
|
1258
|
+
this.connection._deactivateStream(this.streamId);
|
|
1259
|
+
}
|
|
1260
|
+
};
|
|
1261
|
+
/**
|
|
1262
|
+
* WebSocket connection for real-time Text-to-Speech.
|
|
1263
|
+
*
|
|
1264
|
+
* Supports up to 5 concurrent streams multiplexed by `stream_id`.
|
|
1265
|
+
* The connection automatically sends keepalive messages while open.
|
|
1266
|
+
*
|
|
1267
|
+
* @example Multi-stream
|
|
1268
|
+
* ```typescript
|
|
1269
|
+
* const conn = new RealtimeTtsConnection(apiKey, wsUrl, ttsDefaults);
|
|
1270
|
+
* await conn.connect();
|
|
1271
|
+
*
|
|
1272
|
+
* const s1 = conn.stream({ model, voice, language, audio_format });
|
|
1273
|
+
* s1.sendText("Hello");
|
|
1274
|
+
* s1.finish();
|
|
1275
|
+
* for await (const chunk of s1) { ... }
|
|
1276
|
+
*
|
|
1277
|
+
* conn.close();
|
|
1278
|
+
* ```
|
|
1279
|
+
*/
|
|
1280
|
+
var RealtimeTtsConnection = class extends TypedEmitter {
|
|
1281
|
+
apiKey;
|
|
1282
|
+
wsUrl;
|
|
1283
|
+
ttsDefaults;
|
|
1284
|
+
keepaliveIntervalMs;
|
|
1285
|
+
connectTimeoutMs;
|
|
1286
|
+
ws = null;
|
|
1287
|
+
connected = false;
|
|
1288
|
+
connecting = false;
|
|
1289
|
+
keepaliveTimer = null;
|
|
1290
|
+
activeStreams = /* @__PURE__ */ new Map();
|
|
1291
|
+
constructor(apiKey, wsUrl, ttsDefaults = {}, options) {
|
|
1292
|
+
super();
|
|
1293
|
+
this.apiKey = apiKey;
|
|
1294
|
+
this.wsUrl = wsUrl;
|
|
1295
|
+
this.ttsDefaults = ttsDefaults;
|
|
1296
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1297
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1298
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1299
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1300
|
+
}
|
|
1301
|
+
/** Whether the WebSocket is connected. */
|
|
1302
|
+
get isConnected() {
|
|
1303
|
+
return this.connected;
|
|
1304
|
+
}
|
|
1305
|
+
/**
|
|
1306
|
+
* Open the WebSocket connection and start keepalive.
|
|
1307
|
+
* Called automatically by {@link stream} if not yet connected.
|
|
1308
|
+
*/
|
|
1309
|
+
async connect() {
|
|
1310
|
+
if (this.connected) return;
|
|
1311
|
+
if (this.connecting) throw new StateError("Connection is already being established");
|
|
1312
|
+
this.connecting = true;
|
|
1313
|
+
try {
|
|
1314
|
+
await this.createWebSocket();
|
|
1315
|
+
this.connected = true;
|
|
1316
|
+
this.startKeepalive();
|
|
1317
|
+
} finally {
|
|
1318
|
+
this.connecting = false;
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
/**
|
|
1322
|
+
* Open a new TTS stream on this connection.
|
|
1323
|
+
* Auto-connects if the WebSocket is not yet open.
|
|
1324
|
+
*
|
|
1325
|
+
* @param input - Stream configuration (merged with tts_defaults)
|
|
1326
|
+
* @returns A ready-to-use stream handle
|
|
1327
|
+
*/
|
|
1328
|
+
async stream(input = {}) {
|
|
1329
|
+
return this._openStream(input, false);
|
|
1330
|
+
}
|
|
1331
|
+
/** @internal Open a stream, optionally marking it as connection-owning. */
|
|
1332
|
+
async _openStream(input, ownsConnection) {
|
|
1333
|
+
if (!this.connected) await this.connect();
|
|
1334
|
+
if (this.activeStreams.size >= MAX_STREAMS_PER_CONNECTION) throw new StateError(`Maximum concurrent streams (${MAX_STREAMS_PER_CONNECTION}) reached`);
|
|
1335
|
+
const config = resolveStreamConfig(input, this.ttsDefaults);
|
|
1336
|
+
if (this.activeStreams.has(config.stream_id)) throw new StateError(`Stream '${config.stream_id}' is already active on this connection`);
|
|
1337
|
+
const stream = new RealtimeTtsStream(config.stream_id, this, ownsConnection);
|
|
1338
|
+
this.activeStreams.set(config.stream_id, stream);
|
|
1339
|
+
const configPayload = {
|
|
1340
|
+
api_key: this.apiKey,
|
|
1341
|
+
...config
|
|
1342
|
+
};
|
|
1343
|
+
this._sendJson(configPayload);
|
|
1344
|
+
return stream;
|
|
1345
|
+
}
|
|
1346
|
+
/**
|
|
1347
|
+
* Close the WebSocket connection and terminate all active streams.
|
|
1348
|
+
*/
|
|
1349
|
+
close() {
|
|
1350
|
+
this.stopKeepalive();
|
|
1351
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1352
|
+
this.activeStreams.clear();
|
|
1353
|
+
if (this.ws) {
|
|
1354
|
+
try {
|
|
1355
|
+
this.ws.close();
|
|
1356
|
+
} catch {}
|
|
1357
|
+
this.ws = null;
|
|
1358
|
+
}
|
|
1359
|
+
this.connected = false;
|
|
1360
|
+
this.emit("close");
|
|
1361
|
+
}
|
|
1362
|
+
/** @internal Send a JSON payload on the WebSocket. */
|
|
1363
|
+
_sendJson(payload) {
|
|
1364
|
+
if (!this.ws || !this.connected) throw new StateError("TTS connection is not open");
|
|
1365
|
+
this.ws.send(JSON.stringify(payload));
|
|
1366
|
+
}
|
|
1367
|
+
/** @internal Remove a stream from the active set. */
|
|
1368
|
+
_deactivateStream(streamId) {
|
|
1369
|
+
this.activeStreams.delete(streamId);
|
|
1370
|
+
}
|
|
1371
|
+
async createWebSocket() {
|
|
1372
|
+
return new Promise((resolve, reject) => {
|
|
1373
|
+
const timer = setTimeout(() => {
|
|
1374
|
+
try {
|
|
1375
|
+
ws.close();
|
|
1376
|
+
} catch {}
|
|
1377
|
+
reject(new ConnectionError("TTS WebSocket connection timed out"));
|
|
1378
|
+
}, this.connectTimeoutMs);
|
|
1379
|
+
let ws;
|
|
1380
|
+
try {
|
|
1381
|
+
ws = new WebSocket(this.wsUrl);
|
|
1382
|
+
ws.binaryType = "arraybuffer";
|
|
1383
|
+
} catch (err) {
|
|
1384
|
+
clearTimeout(timer);
|
|
1385
|
+
reject(new ConnectionError(`Failed to create TTS WebSocket: ${err instanceof Error ? err.message : String(err)}`));
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
const onOpen = () => {
|
|
1389
|
+
clearTimeout(timer);
|
|
1390
|
+
ws.removeEventListener("error", onError);
|
|
1391
|
+
this.ws = ws;
|
|
1392
|
+
ws.addEventListener("message", (event) => {
|
|
1393
|
+
this.handleMessage(event);
|
|
1394
|
+
});
|
|
1395
|
+
ws.addEventListener("close", () => {
|
|
1396
|
+
if (this.connected) {
|
|
1397
|
+
this.connected = false;
|
|
1398
|
+
this.stopKeepalive();
|
|
1399
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1400
|
+
this.activeStreams.clear();
|
|
1401
|
+
this.emit("close");
|
|
1402
|
+
}
|
|
1403
|
+
});
|
|
1404
|
+
resolve();
|
|
1405
|
+
};
|
|
1406
|
+
const onError = () => {
|
|
1407
|
+
clearTimeout(timer);
|
|
1408
|
+
ws.removeEventListener("open", onOpen);
|
|
1409
|
+
reject(new ConnectionError("TTS WebSocket connection failed"));
|
|
1410
|
+
};
|
|
1411
|
+
ws.addEventListener("open", onOpen);
|
|
1412
|
+
ws.addEventListener("error", onError);
|
|
1413
|
+
});
|
|
1414
|
+
}
|
|
1415
|
+
handleMessage(event) {
|
|
1416
|
+
if (typeof event.data !== "string") return;
|
|
1417
|
+
let parsed;
|
|
1418
|
+
try {
|
|
1419
|
+
parsed = JSON.parse(event.data);
|
|
1420
|
+
} catch {
|
|
1421
|
+
return;
|
|
1422
|
+
}
|
|
1423
|
+
const streamId = parsed.stream_id;
|
|
1424
|
+
if (streamId !== void 0) {
|
|
1425
|
+
const stream = this.activeStreams.get(streamId);
|
|
1426
|
+
if (stream) stream._handleEvent(parsed);
|
|
1427
|
+
return;
|
|
1428
|
+
}
|
|
1429
|
+
if (parsed.error_code !== void 0) {
|
|
1430
|
+
const errPayload = { error_code: parsed.error_code };
|
|
1431
|
+
if (parsed.error_message !== void 0) errPayload.error_message = parsed.error_message;
|
|
1432
|
+
const error = mapErrorResponse(errPayload);
|
|
1433
|
+
this.emit("error", error);
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
startKeepalive() {
|
|
1437
|
+
if (this.keepaliveTimer) return;
|
|
1438
|
+
this.keepaliveTimer = setInterval(() => {
|
|
1439
|
+
if (this.connected && this.ws) try {
|
|
1440
|
+
this.ws.send(JSON.stringify({ keep_alive: true }));
|
|
1441
|
+
} catch {}
|
|
1442
|
+
}, this.keepaliveIntervalMs);
|
|
1443
|
+
}
|
|
1444
|
+
stopKeepalive() {
|
|
1445
|
+
if (this.keepaliveTimer) {
|
|
1446
|
+
clearInterval(this.keepaliveTimer);
|
|
1447
|
+
this.keepaliveTimer = null;
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
};
|
|
1451
|
+
|
|
868
1452
|
//#endregion
|
|
869
1453
|
//#region ../core/src/realtime/segments.ts
|
|
870
1454
|
/**
|
|
@@ -1020,199 +1604,210 @@ var RealtimeUtteranceBuffer = class {
|
|
|
1020
1604
|
markEndpoint() {
|
|
1021
1605
|
const trailingSegments = this.segmentBuffer.flushAll();
|
|
1022
1606
|
const segments = [...this.pendingSegments, ...trailingSegments];
|
|
1023
|
-
this.pendingSegments = [];
|
|
1024
|
-
if (segments.length === 0) return;
|
|
1025
|
-
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1026
|
-
}
|
|
1027
|
-
/**
|
|
1028
|
-
* Clear buffered segments and tokens.
|
|
1029
|
-
*/
|
|
1030
|
-
reset() {
|
|
1031
|
-
this.pendingSegments = [];
|
|
1032
|
-
this.segmentBuffer.reset();
|
|
1033
|
-
}
|
|
1034
|
-
};
|
|
1035
|
-
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1036
|
-
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1037
|
-
return {
|
|
1038
|
-
text: segments.map((segment) => segment.text).join(""),
|
|
1039
|
-
segments,
|
|
1040
|
-
tokens,
|
|
1041
|
-
start_ms: segments[0]?.start_ms,
|
|
1042
|
-
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1043
|
-
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1044
|
-
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1045
|
-
final_audio_proc_ms: finalAudioProcMs,
|
|
1046
|
-
total_audio_proc_ms: totalAudioProcMs
|
|
1047
|
-
};
|
|
1048
|
-
}
|
|
1049
|
-
function getCommonValue(values) {
|
|
1050
|
-
let common;
|
|
1051
|
-
for (const value of values) {
|
|
1052
|
-
if (value === void 0) return;
|
|
1053
|
-
if (common === void 0) {
|
|
1054
|
-
common = value;
|
|
1055
|
-
continue;
|
|
1056
|
-
}
|
|
1057
|
-
if (value !== common) return;
|
|
1058
|
-
}
|
|
1059
|
-
return common;
|
|
1060
|
-
}
|
|
1061
|
-
|
|
1062
|
-
//#endregion
|
|
1063
|
-
//#region src/http/errors.ts
|
|
1064
|
-
/**
|
|
1065
|
-
* HTTP error handling for the Soniox SDK
|
|
1066
|
-
*/
|
|
1067
|
-
/** Maximum body text length to include in error details (4KB) */
|
|
1068
|
-
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
1069
|
-
/**
|
|
1070
|
-
* HTTP error class for all HTTP-related failures (REST API).
|
|
1071
|
-
*
|
|
1072
|
-
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
1073
|
-
* server errors, or response parsing failures.
|
|
1074
|
-
*/
|
|
1075
|
-
var SonioxHttpError = class extends SonioxError {
|
|
1076
|
-
/** Request URL */
|
|
1077
|
-
url;
|
|
1078
|
-
/** HTTP method */
|
|
1079
|
-
method;
|
|
1080
|
-
/** Response headers (only for http_error) */
|
|
1081
|
-
headers;
|
|
1082
|
-
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
1083
|
-
bodyText;
|
|
1084
|
-
constructor(details) {
|
|
1085
|
-
super(details.message, details.code, details.statusCode, details.cause);
|
|
1086
|
-
this.name = "SonioxHttpError";
|
|
1087
|
-
this.url = details.url;
|
|
1088
|
-
this.method = details.method;
|
|
1089
|
-
this.headers = details.headers;
|
|
1090
|
-
this.bodyText = details.bodyText;
|
|
1091
|
-
}
|
|
1092
|
-
/**
|
|
1093
|
-
* Creates a human-readable string representation
|
|
1094
|
-
*/
|
|
1095
|
-
toString() {
|
|
1096
|
-
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
1097
|
-
parts.push(` Method: ${this.method}`);
|
|
1098
|
-
parts.push(` URL: ${this.url}`);
|
|
1099
|
-
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
1100
|
-
return parts.join("\n");
|
|
1607
|
+
this.pendingSegments = [];
|
|
1608
|
+
if (segments.length === 0) return;
|
|
1609
|
+
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1101
1610
|
}
|
|
1102
1611
|
/**
|
|
1103
|
-
*
|
|
1612
|
+
* Clear buffered segments and tokens.
|
|
1104
1613
|
*/
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
code: this.code,
|
|
1109
|
-
message: this.message,
|
|
1110
|
-
url: this.url,
|
|
1111
|
-
method: this.method,
|
|
1112
|
-
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
1113
|
-
...this.headers !== void 0 && { headers: this.headers },
|
|
1114
|
-
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
1115
|
-
};
|
|
1614
|
+
reset() {
|
|
1615
|
+
this.pendingSegments = [];
|
|
1616
|
+
this.segmentBuffer.reset();
|
|
1116
1617
|
}
|
|
1117
1618
|
};
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
* Creates a timeout error
|
|
1132
|
-
*/
|
|
1133
|
-
function createTimeoutError(url, method, timeoutMs) {
|
|
1134
|
-
return new SonioxHttpError({
|
|
1135
|
-
code: "timeout",
|
|
1136
|
-
message: `Request timed out after ${timeoutMs}ms`,
|
|
1137
|
-
url,
|
|
1138
|
-
method
|
|
1139
|
-
});
|
|
1619
|
+
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1620
|
+
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1621
|
+
return {
|
|
1622
|
+
text: segments.map((segment) => segment.text).join(""),
|
|
1623
|
+
segments,
|
|
1624
|
+
tokens,
|
|
1625
|
+
start_ms: segments[0]?.start_ms,
|
|
1626
|
+
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1627
|
+
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1628
|
+
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1629
|
+
final_audio_proc_ms: finalAudioProcMs,
|
|
1630
|
+
total_audio_proc_ms: totalAudioProcMs
|
|
1631
|
+
};
|
|
1140
1632
|
}
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1633
|
+
function getCommonValue(values) {
|
|
1634
|
+
let common;
|
|
1635
|
+
for (const value of values) {
|
|
1636
|
+
if (value === void 0) return;
|
|
1637
|
+
if (common === void 0) {
|
|
1638
|
+
common = value;
|
|
1639
|
+
continue;
|
|
1640
|
+
}
|
|
1641
|
+
if (value !== common) return;
|
|
1642
|
+
}
|
|
1643
|
+
return common;
|
|
1152
1644
|
}
|
|
1645
|
+
|
|
1646
|
+
//#endregion
|
|
1647
|
+
//#region ../core/src/tts-rest.ts
|
|
1153
1648
|
/**
|
|
1154
|
-
*
|
|
1649
|
+
* Browser-safe REST TTS client.
|
|
1650
|
+
*
|
|
1651
|
+
* Uses only `globalThis.fetch` — no Node-specific dependencies.
|
|
1652
|
+
* Shared by both `@soniox/node` and `@soniox/client`.
|
|
1155
1653
|
*/
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
}
|
|
1654
|
+
const DEFAULT_MODEL = "tts-rt-v1-preview";
|
|
1655
|
+
const DEFAULT_LANGUAGE = "en";
|
|
1656
|
+
const DEFAULT_AUDIO_FORMAT = "wav";
|
|
1657
|
+
function buildPayload(options) {
|
|
1658
|
+
const payload = {
|
|
1659
|
+
model: options.model ?? DEFAULT_MODEL,
|
|
1660
|
+
language: options.language ?? DEFAULT_LANGUAGE,
|
|
1661
|
+
voice: options.voice,
|
|
1662
|
+
audio_format: options.audio_format ?? DEFAULT_AUDIO_FORMAT,
|
|
1663
|
+
text: options.text
|
|
1664
|
+
};
|
|
1665
|
+
if (options.sample_rate !== void 0) payload.sample_rate = options.sample_rate;
|
|
1666
|
+
if (options.bitrate !== void 0) payload.bitrate = options.bitrate;
|
|
1667
|
+
return payload;
|
|
1167
1668
|
}
|
|
1168
1669
|
/**
|
|
1169
|
-
*
|
|
1670
|
+
* Normalizes fetch Headers to a plain object with lowercase keys.
|
|
1671
|
+
* Duplicated here (rather than imported from `@soniox/node`) to keep
|
|
1672
|
+
* this module browser-safe.
|
|
1170
1673
|
*/
|
|
1171
|
-
function
|
|
1172
|
-
const
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
code: "parse_error",
|
|
1176
|
-
message: `Parse error: ${message}`,
|
|
1177
|
-
url,
|
|
1178
|
-
method,
|
|
1179
|
-
bodyText: cappedBody,
|
|
1180
|
-
cause
|
|
1674
|
+
function headersToObject(headers) {
|
|
1675
|
+
const result = {};
|
|
1676
|
+
headers.forEach((value, key) => {
|
|
1677
|
+
result[key.toLowerCase()] = value;
|
|
1181
1678
|
});
|
|
1679
|
+
return result;
|
|
1182
1680
|
}
|
|
1183
|
-
|
|
1184
|
-
* Truncates body text to the maximum allowed length
|
|
1185
|
-
*/
|
|
1186
|
-
function truncateBodyText(text) {
|
|
1187
|
-
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
1188
|
-
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
1189
|
-
}
|
|
1190
|
-
/**
|
|
1191
|
-
* Type guard to check if an error is an AbortError
|
|
1192
|
-
*/
|
|
1193
|
-
function isAbortError(error) {
|
|
1681
|
+
function isAbortLikeError(error) {
|
|
1194
1682
|
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
1195
1683
|
return false;
|
|
1196
1684
|
}
|
|
1197
1685
|
/**
|
|
1198
|
-
*
|
|
1199
|
-
*
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
*
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
* Checks if an error is a 404 Not Found error
|
|
1686
|
+
* Browser-safe REST client for TTS generation.
|
|
1687
|
+
*
|
|
1688
|
+
* Provides `generate()` (buffered) and `generateStream()` (streaming)
|
|
1689
|
+
* using only `globalThis.fetch`. HTTP failures are surfaced as
|
|
1690
|
+
* {@link SonioxHttpError}, matching the rest of the Soniox SDK.
|
|
1691
|
+
*
|
|
1692
|
+
* Authentication uses the `Authorization: Bearer <api_key>` header.
|
|
1693
|
+
*
|
|
1694
|
+
* @example
|
|
1695
|
+
* ```typescript
|
|
1696
|
+
* const client = new TtsRestClient(apiKey, 'https://tts-rt.soniox.com');
|
|
1697
|
+
* const audio = await client.generate({ text: 'Hello', voice: 'Adrian' });
|
|
1698
|
+
* ```
|
|
1212
1699
|
*/
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1700
|
+
var TtsRestClient = class {
|
|
1701
|
+
apiKey;
|
|
1702
|
+
ttsApiUrl;
|
|
1703
|
+
constructor(apiKey, ttsApiUrl) {
|
|
1704
|
+
this.apiKey = apiKey;
|
|
1705
|
+
this.ttsApiUrl = ttsApiUrl;
|
|
1706
|
+
}
|
|
1707
|
+
/**
|
|
1708
|
+
* Generate speech audio from text. Returns the full audio as a `Uint8Array`.
|
|
1709
|
+
*
|
|
1710
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1711
|
+
* or aborted requests.
|
|
1712
|
+
*/
|
|
1713
|
+
async generate(options) {
|
|
1714
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1715
|
+
const buffer = await (await this.sendRequest(url, options)).arrayBuffer();
|
|
1716
|
+
return new Uint8Array(buffer);
|
|
1717
|
+
}
|
|
1718
|
+
/**
|
|
1719
|
+
* Generate speech audio from text as a streaming async iterable.
|
|
1720
|
+
*
|
|
1721
|
+
* Yields `Uint8Array` chunks as they arrive from the server response body.
|
|
1722
|
+
* Lower time-to-first-audio than {@link generate}.
|
|
1723
|
+
*
|
|
1724
|
+
* **Known limitation:** Mid-stream server errors (reported via HTTP trailers)
|
|
1725
|
+
* cannot be detected through the `fetch` API. The iterator may end early
|
|
1726
|
+
* without an explicit error. Use WebSocket TTS for reliable error detection.
|
|
1727
|
+
*
|
|
1728
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1729
|
+
* or aborted requests (before the stream starts).
|
|
1730
|
+
*/
|
|
1731
|
+
async *generateStream(options) {
|
|
1732
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1733
|
+
const response = await this.sendRequest(url, options);
|
|
1734
|
+
if (!response.body) throw createHttpError(url, "POST", response.status, headersToObject(response.headers), "Response has no body stream");
|
|
1735
|
+
const reader = response.body.getReader();
|
|
1736
|
+
try {
|
|
1737
|
+
while (true) {
|
|
1738
|
+
const { done, value } = await reader.read();
|
|
1739
|
+
if (done) break;
|
|
1740
|
+
yield value;
|
|
1741
|
+
}
|
|
1742
|
+
} finally {
|
|
1743
|
+
reader.releaseLock();
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
/**
|
|
1747
|
+
* Internal request helper. Performs the fetch, maps network/abort failures
|
|
1748
|
+
* to {@link SonioxHttpError}, and throws on non-2xx responses.
|
|
1749
|
+
*/
|
|
1750
|
+
async sendRequest(url, options) {
|
|
1751
|
+
const payload = buildPayload(options);
|
|
1752
|
+
let response;
|
|
1753
|
+
try {
|
|
1754
|
+
response = await globalThis.fetch(url, {
|
|
1755
|
+
method: "POST",
|
|
1756
|
+
headers: {
|
|
1757
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1758
|
+
"Content-Type": "application/json"
|
|
1759
|
+
},
|
|
1760
|
+
body: JSON.stringify(payload),
|
|
1761
|
+
...options.signal && { signal: options.signal }
|
|
1762
|
+
});
|
|
1763
|
+
} catch (cause) {
|
|
1764
|
+
if (isAbortLikeError(cause)) throw createAbortError(url, "POST", cause);
|
|
1765
|
+
throw createNetworkError(url, "POST", cause);
|
|
1766
|
+
}
|
|
1767
|
+
if (!response.ok) {
|
|
1768
|
+
const bodyText = await response.text().catch(() => "");
|
|
1769
|
+
throw createHttpError(url, "POST", response.status, headersToObject(response.headers), bodyText);
|
|
1770
|
+
}
|
|
1771
|
+
return response;
|
|
1772
|
+
}
|
|
1773
|
+
};
|
|
1774
|
+
|
|
1775
|
+
//#endregion
|
|
1776
|
+
//#region src/async/auth.ts
|
|
1777
|
+
var SonioxAuthAPI = class {
|
|
1778
|
+
constructor(http) {
|
|
1779
|
+
this.http = http;
|
|
1780
|
+
}
|
|
1781
|
+
/**
|
|
1782
|
+
* Creates a temporary API key for client-side use.
|
|
1783
|
+
*
|
|
1784
|
+
* @param request - Request parameters for the temporary key
|
|
1785
|
+
* @param signal - Optional AbortSignal for cancellation
|
|
1786
|
+
* @returns The temporary API key response
|
|
1787
|
+
*
|
|
1788
|
+
* @example
|
|
1789
|
+
* ```typescript
|
|
1790
|
+
* const sttKey = await client.auth.createTemporaryKey({
|
|
1791
|
+
* usage_type: 'transcribe_websocket',
|
|
1792
|
+
* expires_in_seconds: 300,
|
|
1793
|
+
* });
|
|
1794
|
+
*
|
|
1795
|
+
* const ttsKey = await client.auth.createTemporaryKey({
|
|
1796
|
+
* usage_type: 'tts_rt',
|
|
1797
|
+
* expires_in_seconds: 300,
|
|
1798
|
+
* });
|
|
1799
|
+
* ```
|
|
1800
|
+
*/
|
|
1801
|
+
async createTemporaryKey(request, signal) {
|
|
1802
|
+
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
1803
|
+
return (await this.http.request({
|
|
1804
|
+
method: "POST",
|
|
1805
|
+
path: "/v1/auth/temporary-api-key",
|
|
1806
|
+
body: request,
|
|
1807
|
+
...signal && { signal }
|
|
1808
|
+
})).data;
|
|
1809
|
+
}
|
|
1810
|
+
};
|
|
1216
1811
|
|
|
1217
1812
|
//#endregion
|
|
1218
1813
|
//#region src/async/files.ts
|
|
@@ -2686,6 +3281,86 @@ var SonioxSttApi = class {
|
|
|
2686
3281
|
}
|
|
2687
3282
|
};
|
|
2688
3283
|
|
|
3284
|
+
//#endregion
|
|
3285
|
+
//#region src/async/tts.ts
|
|
3286
|
+
/**
|
|
3287
|
+
* REST API for Text-to-Speech generation and TTS model listing.
|
|
3288
|
+
*
|
|
3289
|
+
* Accessed via `client.tts` on {@link SonioxNodeClient}.
|
|
3290
|
+
*
|
|
3291
|
+
* Inherits browser-safe `generate()` and `generateStream()` from
|
|
3292
|
+
* `TtsRestClient` in `@soniox/core`, and adds Node-specific methods
|
|
3293
|
+
* `generateToFile()` and `listModels()`.
|
|
3294
|
+
*/
|
|
3295
|
+
var SonioxTtsApi = class extends TtsRestClient {
|
|
3296
|
+
http;
|
|
3297
|
+
constructor(apiKey, ttsApiUrl, http) {
|
|
3298
|
+
super(apiKey, ttsApiUrl);
|
|
3299
|
+
this.http = http;
|
|
3300
|
+
}
|
|
3301
|
+
/**
|
|
3302
|
+
* Generate speech audio and write to a file or writable stream.
|
|
3303
|
+
*
|
|
3304
|
+
* @param output - File path (string) or a `WritableStream<Uint8Array>`
|
|
3305
|
+
* @param options - Generation options
|
|
3306
|
+
* @returns Number of bytes written
|
|
3307
|
+
*
|
|
3308
|
+
* @example Write to file
|
|
3309
|
+
* ```typescript
|
|
3310
|
+
* const bytes = await client.tts.generateToFile('output.wav', {
|
|
3311
|
+
* text: 'Hello world',
|
|
3312
|
+
* voice: 'Adrian',
|
|
3313
|
+
* language: 'en',
|
|
3314
|
+
* });
|
|
3315
|
+
* ```
|
|
3316
|
+
*
|
|
3317
|
+
* @example Write to a writable stream
|
|
3318
|
+
* ```typescript
|
|
3319
|
+
* const bytes = await client.tts.generateToFile(writableStream, {
|
|
3320
|
+
* text: 'Hello world',
|
|
3321
|
+
* voice: 'Adrian',
|
|
3322
|
+
* language: 'en',
|
|
3323
|
+
* });
|
|
3324
|
+
* ```
|
|
3325
|
+
*/
|
|
3326
|
+
async generateToFile(output, options) {
|
|
3327
|
+
if (typeof output === "string") {
|
|
3328
|
+
const audio = await this.generate(options);
|
|
3329
|
+
await (0, node_fs_promises.writeFile)(output, audio);
|
|
3330
|
+
return audio.byteLength;
|
|
3331
|
+
}
|
|
3332
|
+
let bytesWritten = 0;
|
|
3333
|
+
const writer = output.getWriter();
|
|
3334
|
+
try {
|
|
3335
|
+
for await (const chunk of this.generateStream(options)) {
|
|
3336
|
+
await writer.write(chunk);
|
|
3337
|
+
bytesWritten += chunk.byteLength;
|
|
3338
|
+
}
|
|
3339
|
+
} finally {
|
|
3340
|
+
writer.releaseLock();
|
|
3341
|
+
}
|
|
3342
|
+
return bytesWritten;
|
|
3343
|
+
}
|
|
3344
|
+
/**
|
|
3345
|
+
* List available TTS models and their voices.
|
|
3346
|
+
*
|
|
3347
|
+
* @example
|
|
3348
|
+
* ```typescript
|
|
3349
|
+
* const models = await client.tts.listModels();
|
|
3350
|
+
* for (const model of models) {
|
|
3351
|
+
* console.log(model.id, model.voices.map(v => v.id));
|
|
3352
|
+
* }
|
|
3353
|
+
* ```
|
|
3354
|
+
*/
|
|
3355
|
+
async listModels(signal) {
|
|
3356
|
+
return (await this.http.request({
|
|
3357
|
+
method: "GET",
|
|
3358
|
+
path: "/v1/tts-models",
|
|
3359
|
+
...signal && { signal }
|
|
3360
|
+
})).data.models;
|
|
3361
|
+
}
|
|
3362
|
+
};
|
|
3363
|
+
|
|
2689
3364
|
//#endregion
|
|
2690
3365
|
//#region src/async/webhooks.ts
|
|
2691
3366
|
const VALID_STATUSES = ["completed", "error"];
|
|
@@ -3508,36 +4183,75 @@ function combineAbortSignals(...signals) {
|
|
|
3508
4183
|
//#endregion
|
|
3509
4184
|
//#region src/realtime/index.ts
|
|
3510
4185
|
/**
|
|
3511
|
-
* Real-time API factory for creating STT sessions.
|
|
4186
|
+
* Real-time API factory for creating STT sessions and TTS connections.
|
|
3512
4187
|
*
|
|
3513
|
-
* @example
|
|
4188
|
+
* @example STT
|
|
4189
|
+
* ```typescript
|
|
4190
|
+
* const session = client.realtime.stt({ model: 'stt-rt-v4' });
|
|
4191
|
+
* await session.connect();
|
|
4192
|
+
* ```
|
|
4193
|
+
*
|
|
4194
|
+
* @example TTS (single stream)
|
|
3514
4195
|
* ```typescript
|
|
3515
|
-
* const
|
|
3516
|
-
* model: '
|
|
3517
|
-
*
|
|
4196
|
+
* const stream = await client.realtime.tts({
|
|
4197
|
+
* model: 'tts-rt-v1-preview',
|
|
4198
|
+
* voice: 'Adrian',
|
|
4199
|
+
* language: 'en',
|
|
4200
|
+
* audio_format: 'wav',
|
|
3518
4201
|
* });
|
|
4202
|
+
* stream.sendText("Hello");
|
|
4203
|
+
* stream.finish();
|
|
4204
|
+
* for await (const chunk of stream) { ... }
|
|
4205
|
+
* ```
|
|
3519
4206
|
*
|
|
3520
|
-
*
|
|
4207
|
+
* @example TTS (multi-stream)
|
|
4208
|
+
* ```typescript
|
|
4209
|
+
* const conn = await client.realtime.tts.multiStream();
|
|
4210
|
+
* const stream = await conn.stream({
|
|
4211
|
+
* model: 'tts-rt-v1-preview',
|
|
4212
|
+
* voice: 'Adrian',
|
|
4213
|
+
* language: 'en',
|
|
4214
|
+
* audio_format: 'wav',
|
|
4215
|
+
* });
|
|
3521
4216
|
* ```
|
|
3522
4217
|
*/
|
|
3523
4218
|
var SonioxRealtimeApi = class {
|
|
3524
4219
|
options;
|
|
4220
|
+
tts;
|
|
3525
4221
|
constructor(options) {
|
|
3526
4222
|
this.options = options;
|
|
4223
|
+
const ttsCall = (input) => {
|
|
4224
|
+
return this.createSingleTtsStream(input ?? {});
|
|
4225
|
+
};
|
|
4226
|
+
ttsCall.multiStream = () => {
|
|
4227
|
+
return this.createTtsConnection();
|
|
4228
|
+
};
|
|
4229
|
+
this.tts = ttsCall;
|
|
3527
4230
|
}
|
|
3528
4231
|
/**
|
|
3529
4232
|
* Create a new Speech-to-Text session.
|
|
3530
4233
|
*
|
|
3531
|
-
*
|
|
3532
|
-
*
|
|
3533
|
-
* @returns New STT session instance
|
|
4234
|
+
* `config` is shallow-merged on top of `stt_defaults` from the client
|
|
4235
|
+
* options; caller-provided fields override the defaults.
|
|
3534
4236
|
*/
|
|
3535
4237
|
stt(config, options) {
|
|
3536
4238
|
const mergedOptions = {
|
|
3537
4239
|
...this.options.default_session_options,
|
|
3538
4240
|
...options
|
|
3539
4241
|
};
|
|
3540
|
-
|
|
4242
|
+
const mergedConfig = {
|
|
4243
|
+
...this.options.stt_defaults,
|
|
4244
|
+
...config
|
|
4245
|
+
};
|
|
4246
|
+
return new RealtimeSttSession(this.options.api_key, this.options.ws_base_url, mergedConfig, mergedOptions);
|
|
4247
|
+
}
|
|
4248
|
+
async createSingleTtsStream(input) {
|
|
4249
|
+
return new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options)._openStream(input, true);
|
|
4250
|
+
}
|
|
4251
|
+
async createTtsConnection() {
|
|
4252
|
+
const connection = new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options);
|
|
4253
|
+
await connection.connect();
|
|
4254
|
+
return connection;
|
|
3541
4255
|
}
|
|
3542
4256
|
};
|
|
3543
4257
|
|
|
@@ -3545,20 +4259,37 @@ var SonioxRealtimeApi = class {
|
|
|
3545
4259
|
//#region src/client.ts
|
|
3546
4260
|
/**
|
|
3547
4261
|
* Soniox Node Client
|
|
3548
|
-
* @returns {SonioxNodeClient}
|
|
3549
4262
|
*
|
|
3550
4263
|
* @example
|
|
3551
4264
|
* ```typescript
|
|
3552
4265
|
* import { SonioxNodeClient } from '@soniox/node';
|
|
3553
4266
|
*
|
|
3554
|
-
*
|
|
3555
|
-
*
|
|
4267
|
+
* // Default (US) region
|
|
4268
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key' });
|
|
4269
|
+
*
|
|
4270
|
+
* // EU region
|
|
4271
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key', region: 'eu' });
|
|
4272
|
+
*
|
|
4273
|
+
* // REST TTS
|
|
4274
|
+
* const audio = await client.tts.generate({
|
|
4275
|
+
* text: 'Hello',
|
|
4276
|
+
* voice: 'Adrian',
|
|
4277
|
+
* language: 'en',
|
|
4278
|
+
* });
|
|
4279
|
+
*
|
|
4280
|
+
* // WebSocket TTS
|
|
4281
|
+
* const stream = await client.realtime.tts({
|
|
4282
|
+
* model: 'tts-rt-v1-preview',
|
|
4283
|
+
* voice: 'Adrian',
|
|
4284
|
+
* language: 'en',
|
|
4285
|
+
* audio_format: 'wav',
|
|
3556
4286
|
* });
|
|
3557
4287
|
* ```
|
|
3558
4288
|
*/
|
|
3559
4289
|
var SonioxNodeClient = class {
|
|
3560
4290
|
files;
|
|
3561
4291
|
stt;
|
|
4292
|
+
tts;
|
|
3562
4293
|
models;
|
|
3563
4294
|
webhooks;
|
|
3564
4295
|
auth;
|
|
@@ -3566,7 +4297,14 @@ var SonioxNodeClient = class {
|
|
|
3566
4297
|
constructor(options = {}) {
|
|
3567
4298
|
const apiKey = options.api_key ?? process.env["SONIOX_API_KEY"];
|
|
3568
4299
|
if (!apiKey) throw new Error("Missing API key. Provide it via options.api_key or set the SONIOX_API_KEY environment variable.");
|
|
3569
|
-
const
|
|
4300
|
+
const regionDefaults = resolveConnectionConfig({
|
|
4301
|
+
api_key: apiKey,
|
|
4302
|
+
region: options.region ?? process.env["SONIOX_REGION"],
|
|
4303
|
+
base_domain: options.base_domain ?? process.env["SONIOX_BASE_DOMAIN"],
|
|
4304
|
+
stt_defaults: options.stt_defaults,
|
|
4305
|
+
tts_defaults: options.tts_defaults
|
|
4306
|
+
});
|
|
4307
|
+
const baseURL = options.base_url ?? process.env["SONIOX_API_BASE_URL"] ?? regionDefaults.api_domain;
|
|
3570
4308
|
const http = options.http_client ?? new FetchHttpClient({
|
|
3571
4309
|
base_url: baseURL,
|
|
3572
4310
|
default_headers: {
|
|
@@ -3579,9 +4317,14 @@ var SonioxNodeClient = class {
|
|
|
3579
4317
|
this.models = new SonioxModelsAPI(http);
|
|
3580
4318
|
this.webhooks = new SonioxWebhooksAPI(this.stt);
|
|
3581
4319
|
this.auth = new SonioxAuthAPI(http);
|
|
4320
|
+
this.tts = new SonioxTtsApi(apiKey, options.tts_api_url ?? process.env["SONIOX_TTS_API_URL"] ?? regionDefaults.tts_api_url, http);
|
|
3582
4321
|
this.realtime = new SonioxRealtimeApi({
|
|
3583
4322
|
api_key: apiKey,
|
|
3584
|
-
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ??
|
|
4323
|
+
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ?? regionDefaults.stt_ws_url,
|
|
4324
|
+
tts_ws_url: options.realtime?.tts_ws_url ?? process.env["SONIOX_TTS_WS_URL"] ?? regionDefaults.tts_ws_url,
|
|
4325
|
+
stt_defaults: regionDefaults.stt_defaults,
|
|
4326
|
+
tts_defaults: regionDefaults.tts_defaults,
|
|
4327
|
+
tts_connection_options: options.realtime?.tts_connection_options,
|
|
3585
4328
|
default_session_options: options.realtime?.default_session_options
|
|
3586
4329
|
});
|
|
3587
4330
|
}
|
|
@@ -3599,6 +4342,8 @@ exports.QuotaError = QuotaError;
|
|
|
3599
4342
|
exports.RealtimeError = RealtimeError;
|
|
3600
4343
|
exports.RealtimeSegmentBuffer = RealtimeSegmentBuffer;
|
|
3601
4344
|
exports.RealtimeSttSession = RealtimeSttSession;
|
|
4345
|
+
exports.RealtimeTtsConnection = RealtimeTtsConnection;
|
|
4346
|
+
exports.RealtimeTtsStream = RealtimeTtsStream;
|
|
3602
4347
|
exports.RealtimeUtteranceBuffer = RealtimeUtteranceBuffer;
|
|
3603
4348
|
exports.SONIOX_API_BASE_URL = SONIOX_API_BASE_URL;
|
|
3604
4349
|
exports.SONIOX_API_WEBHOOK_HEADER_ENV = SONIOX_API_WEBHOOK_HEADER_ENV;
|
|
@@ -3607,6 +4352,8 @@ exports.SONIOX_API_WS_URL = SONIOX_API_WS_URL;
|
|
|
3607
4352
|
exports.SONIOX_TMP_API_KEY_DURATION_MAX = SONIOX_TMP_API_KEY_DURATION_MAX;
|
|
3608
4353
|
exports.SONIOX_TMP_API_KEY_DURATION_MIN = SONIOX_TMP_API_KEY_DURATION_MIN;
|
|
3609
4354
|
exports.SONIOX_TMP_API_KEY_USAGE_TYPE = SONIOX_TMP_API_KEY_USAGE_TYPE;
|
|
4355
|
+
exports.SONIOX_TTS_API_BASE_URL = SONIOX_TTS_API_BASE_URL;
|
|
4356
|
+
exports.SONIOX_TTS_WS_URL = SONIOX_TTS_WS_URL;
|
|
3610
4357
|
exports.SonioxError = SonioxError;
|
|
3611
4358
|
exports.SonioxFile = SonioxFile;
|
|
3612
4359
|
exports.SonioxHttpError = SonioxHttpError;
|
|
@@ -3614,6 +4361,7 @@ exports.SonioxNodeClient = SonioxNodeClient;
|
|
|
3614
4361
|
exports.SonioxRealtimeApi = SonioxRealtimeApi;
|
|
3615
4362
|
exports.SonioxTranscript = SonioxTranscript;
|
|
3616
4363
|
exports.SonioxTranscription = SonioxTranscription;
|
|
4364
|
+
exports.SonioxTtsApi = SonioxTtsApi;
|
|
3617
4365
|
exports.StateError = StateError;
|
|
3618
4366
|
exports.TranscriptionListResult = TranscriptionListResult;
|
|
3619
4367
|
exports.buildUrl = buildUrl;
|
|
@@ -3627,6 +4375,7 @@ exports.isSonioxError = isSonioxError;
|
|
|
3627
4375
|
exports.isSonioxHttpError = isSonioxHttpError;
|
|
3628
4376
|
exports.mergeHeaders = mergeHeaders;
|
|
3629
4377
|
exports.normalizeHeaders = normalizeHeaders;
|
|
4378
|
+
exports.resolveConnectionConfig = resolveConnectionConfig;
|
|
3630
4379
|
exports.segmentRealtimeTokens = segmentRealtimeTokens;
|
|
3631
4380
|
exports.segmentTranscript = segmentTranscript;
|
|
3632
4381
|
//# sourceMappingURL=index.cjs.map
|