@soniox/node 1.1.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/index.cjs +1073 -245
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +979 -176
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +979 -176
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1069 -246
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1,37 +1,16 @@
|
|
|
1
|
+
let node_fs_promises = require("node:fs/promises");
|
|
1
2
|
|
|
2
3
|
//#region src/constants.ts
|
|
3
4
|
const SONIOX_API_BASE_URL = "https://api.soniox.com";
|
|
4
5
|
const SONIOX_API_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
6
|
+
const SONIOX_TTS_API_BASE_URL = "https://tts-rt.soniox.com";
|
|
7
|
+
const SONIOX_TTS_WS_URL = "wss://tts-rt.soniox.com/tts-websocket";
|
|
5
8
|
const SONIOX_TMP_API_KEY_USAGE_TYPE = "transcribe_websocket";
|
|
6
9
|
const SONIOX_TMP_API_KEY_DURATION_MIN = 1;
|
|
7
10
|
const SONIOX_TMP_API_KEY_DURATION_MAX = 3600;
|
|
8
11
|
const SONIOX_API_WEBHOOK_HEADER_ENV = "SONIOX_API_WEBHOOK_HEADER";
|
|
9
12
|
const SONIOX_API_WEBHOOK_SECRET_ENV = "SONIOX_API_WEBHOOK_SECRET";
|
|
10
13
|
|
|
11
|
-
//#endregion
|
|
12
|
-
//#region src/async/auth.ts
|
|
13
|
-
var SonioxAuthAPI = class {
|
|
14
|
-
constructor(http) {
|
|
15
|
-
this.http = http;
|
|
16
|
-
}
|
|
17
|
-
/**
|
|
18
|
-
* Creates a temporary API key for client-side use.
|
|
19
|
-
*
|
|
20
|
-
* @param request - Request parameters for the temporary key
|
|
21
|
-
* @param signal - Optional AbortSignal for cancellation
|
|
22
|
-
* @returns The temporary API key response
|
|
23
|
-
*/
|
|
24
|
-
async createTemporaryKey(request, signal) {
|
|
25
|
-
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
26
|
-
return (await this.http.request({
|
|
27
|
-
method: "POST",
|
|
28
|
-
path: "/v1/auth/temporary-api-key",
|
|
29
|
-
body: request,
|
|
30
|
-
...signal && { signal }
|
|
31
|
-
})).data;
|
|
32
|
-
}
|
|
33
|
-
};
|
|
34
|
-
|
|
35
14
|
//#endregion
|
|
36
15
|
//#region ../core/src/errors.ts
|
|
37
16
|
var SonioxError = class extends Error {
|
|
@@ -79,6 +58,208 @@ var SonioxError = class extends Error {
|
|
|
79
58
|
}
|
|
80
59
|
};
|
|
81
60
|
|
|
61
|
+
//#endregion
|
|
62
|
+
//#region ../core/src/http-errors.ts
|
|
63
|
+
/**
|
|
64
|
+
* HTTP error handling for the Soniox SDK.
|
|
65
|
+
*
|
|
66
|
+
* Lives in `@soniox/core` so it can be shared by the browser-safe
|
|
67
|
+
* `TtsRestClient` and the Node `HttpClient`. `@soniox/node` re-exports
|
|
68
|
+
* these symbols for backwards compatibility.
|
|
69
|
+
*/
|
|
70
|
+
/** Maximum body text length to include in error details (4KB) */
|
|
71
|
+
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
72
|
+
/**
|
|
73
|
+
* HTTP error class for all HTTP-related failures (REST API).
|
|
74
|
+
*
|
|
75
|
+
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
76
|
+
* server errors, or response parsing failures.
|
|
77
|
+
*/
|
|
78
|
+
var SonioxHttpError = class extends SonioxError {
|
|
79
|
+
/** Request URL */
|
|
80
|
+
url;
|
|
81
|
+
/** HTTP method */
|
|
82
|
+
method;
|
|
83
|
+
/** Response headers (only for http_error) */
|
|
84
|
+
headers;
|
|
85
|
+
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
86
|
+
bodyText;
|
|
87
|
+
constructor(details) {
|
|
88
|
+
super(details.message, details.code, details.statusCode, details.cause);
|
|
89
|
+
this.name = "SonioxHttpError";
|
|
90
|
+
this.url = details.url;
|
|
91
|
+
this.method = details.method;
|
|
92
|
+
this.headers = details.headers;
|
|
93
|
+
this.bodyText = details.bodyText;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Creates a human-readable string representation
|
|
97
|
+
*/
|
|
98
|
+
toString() {
|
|
99
|
+
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
100
|
+
parts.push(` Method: ${this.method}`);
|
|
101
|
+
parts.push(` URL: ${this.url}`);
|
|
102
|
+
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
103
|
+
return parts.join("\n");
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Converts to a plain object for logging/serialization
|
|
107
|
+
*/
|
|
108
|
+
toJSON() {
|
|
109
|
+
return {
|
|
110
|
+
name: this.name,
|
|
111
|
+
code: this.code,
|
|
112
|
+
message: this.message,
|
|
113
|
+
url: this.url,
|
|
114
|
+
method: this.method,
|
|
115
|
+
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
116
|
+
...this.headers !== void 0 && { headers: this.headers },
|
|
117
|
+
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Creates a network error
|
|
123
|
+
*/
|
|
124
|
+
function createNetworkError(url, method, cause) {
|
|
125
|
+
return new SonioxHttpError({
|
|
126
|
+
code: "network_error",
|
|
127
|
+
message: `Network error: ${cause instanceof Error ? cause.message : "Network request failed"}`,
|
|
128
|
+
url,
|
|
129
|
+
method,
|
|
130
|
+
cause
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Creates a timeout error
|
|
135
|
+
*/
|
|
136
|
+
function createTimeoutError(url, method, timeoutMs) {
|
|
137
|
+
return new SonioxHttpError({
|
|
138
|
+
code: "timeout",
|
|
139
|
+
message: `Request timed out after ${timeoutMs}ms`,
|
|
140
|
+
url,
|
|
141
|
+
method
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Creates an abort error
|
|
146
|
+
*/
|
|
147
|
+
function createAbortError(url, method, cause) {
|
|
148
|
+
return new SonioxHttpError({
|
|
149
|
+
code: "aborted",
|
|
150
|
+
message: "Request was aborted",
|
|
151
|
+
url,
|
|
152
|
+
method,
|
|
153
|
+
cause
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Creates an HTTP error (non-2xx status)
|
|
158
|
+
*/
|
|
159
|
+
function createHttpError(url, method, statusCode, headers, bodyText) {
|
|
160
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
161
|
+
return new SonioxHttpError({
|
|
162
|
+
code: "http_error",
|
|
163
|
+
message: `HTTP ${statusCode}`,
|
|
164
|
+
url,
|
|
165
|
+
method,
|
|
166
|
+
statusCode,
|
|
167
|
+
headers,
|
|
168
|
+
bodyText: cappedBody
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Creates a parse error (invalid JSON, etc.)
|
|
173
|
+
*/
|
|
174
|
+
function createParseError(url, method, bodyText, cause) {
|
|
175
|
+
const message = cause instanceof Error ? cause.message : "Failed to parse response";
|
|
176
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
177
|
+
return new SonioxHttpError({
|
|
178
|
+
code: "parse_error",
|
|
179
|
+
message: `Parse error: ${message}`,
|
|
180
|
+
url,
|
|
181
|
+
method,
|
|
182
|
+
bodyText: cappedBody,
|
|
183
|
+
cause
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Truncates body text to the maximum allowed length
|
|
188
|
+
*/
|
|
189
|
+
function truncateBodyText(text) {
|
|
190
|
+
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
191
|
+
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Type guard to check if an error is an AbortError
|
|
195
|
+
*/
|
|
196
|
+
function isAbortError(error) {
|
|
197
|
+
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
198
|
+
return false;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Type guard to check if an error is any SonioxError (base class).
|
|
202
|
+
* This catches all SDK errors including HTTP and real-time errors.
|
|
203
|
+
*/
|
|
204
|
+
function isSonioxError(error) {
|
|
205
|
+
return error instanceof SonioxError;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Type guard to check if an error is a SonioxHttpError
|
|
209
|
+
*/
|
|
210
|
+
function isSonioxHttpError(error) {
|
|
211
|
+
return error instanceof SonioxHttpError;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Checks if an error is a 404 Not Found error
|
|
215
|
+
*/
|
|
216
|
+
function isNotFoundError(error) {
|
|
217
|
+
return isSonioxHttpError(error) && error.statusCode === 404;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//#endregion
|
|
221
|
+
//#region ../core/src/connection.ts
|
|
222
|
+
/** Root domain used for the default (US) deployment. */
|
|
223
|
+
const DEFAULT_BASE_DOMAIN = "soniox.com";
|
|
224
|
+
/**
|
|
225
|
+
* Derives the four Soniox service URLs from a base domain.
|
|
226
|
+
* All Soniox deployments follow the same subdomain pattern:
|
|
227
|
+
* api.{base} / stt-rt.{base} / tts-rt.{base}
|
|
228
|
+
*/
|
|
229
|
+
function urlsFromBase(base) {
|
|
230
|
+
return {
|
|
231
|
+
api_domain: `https://api.${base}`,
|
|
232
|
+
stt_ws_url: `wss://stt-rt.${base}/transcribe-websocket`,
|
|
233
|
+
tts_api_url: `https://tts-rt.${base}`,
|
|
234
|
+
tts_ws_url: `wss://tts-rt.${base}/tts-websocket`
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Resolve a {@link SonioxConnectionConfig} into fully qualified URLs.
|
|
239
|
+
*
|
|
240
|
+
* Resolution priority (highest → lowest) for each URL:
|
|
241
|
+
* 1. Explicit field (`api_domain`, `stt_ws_url`, `tts_api_url`, `tts_ws_url`)
|
|
242
|
+
* 2. Derived from `base_domain`
|
|
243
|
+
* 3. Derived from `region` → `{region}.soniox.com`
|
|
244
|
+
* 4. Default US base domain (`soniox.com`)
|
|
245
|
+
*/
|
|
246
|
+
function resolveConnectionConfig(config) {
|
|
247
|
+
const { region, base_domain, api_domain, stt_ws_url, tts_api_url, tts_ws_url } = config;
|
|
248
|
+
const normalizedRegion = region !== void 0 && region.toLowerCase() !== "us" ? region : void 0;
|
|
249
|
+
const derived = urlsFromBase(base_domain ?? (normalizedRegion !== void 0 ? `${normalizedRegion}.soniox.com` : DEFAULT_BASE_DOMAIN));
|
|
250
|
+
const sttDefaults = config.stt_defaults ?? config.session_defaults ?? {};
|
|
251
|
+
return {
|
|
252
|
+
api_key: config.api_key,
|
|
253
|
+
api_domain: api_domain ?? derived.api_domain,
|
|
254
|
+
stt_ws_url: stt_ws_url ?? derived.stt_ws_url,
|
|
255
|
+
tts_api_url: tts_api_url ?? derived.tts_api_url,
|
|
256
|
+
tts_ws_url: tts_ws_url ?? derived.tts_ws_url,
|
|
257
|
+
stt_defaults: sttDefaults,
|
|
258
|
+
tts_defaults: config.tts_defaults ?? {},
|
|
259
|
+
session_defaults: sttDefaults
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
82
263
|
//#endregion
|
|
83
264
|
//#region ../core/src/segments.ts
|
|
84
265
|
const DEFAULT_GROUP_BY = ["speaker", "language"];
|
|
@@ -161,10 +342,34 @@ var AsyncEventQueue = class {
|
|
|
161
342
|
return this.done;
|
|
162
343
|
}
|
|
163
344
|
/**
|
|
345
|
+
* Drop buffered events without ending the queue.
|
|
346
|
+
*
|
|
347
|
+
* Intended for owners that know their consumer has gone away (e.g. an
|
|
348
|
+
* async-iterator consumer broke out of its `for await` loop). The queue
|
|
349
|
+
* remains active and accepts future pushes. Callers must ensure no other
|
|
350
|
+
* iterator is concurrently consuming this queue, since this also drops
|
|
351
|
+
* events those consumers would have observed.
|
|
352
|
+
*/
|
|
353
|
+
clear() {
|
|
354
|
+
this.queue = [];
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
164
357
|
* Async iterator implementation.
|
|
358
|
+
*
|
|
359
|
+
* The returned iterator implements `return()` so consumers that exit
|
|
360
|
+
* `for await` early (via `break`, `throw`, or an outer `return`) cleanly
|
|
361
|
+
* release the iteration without further work. The queue itself is left
|
|
362
|
+
* in place — call {@link clear} or {@link end}/{@link abort} if buffered
|
|
363
|
+
* events should also be dropped.
|
|
165
364
|
*/
|
|
166
365
|
[Symbol.asyncIterator]() {
|
|
167
|
-
return {
|
|
366
|
+
return {
|
|
367
|
+
next: () => this.next(),
|
|
368
|
+
return: (value) => Promise.resolve({
|
|
369
|
+
value,
|
|
370
|
+
done: true
|
|
371
|
+
})
|
|
372
|
+
};
|
|
168
373
|
}
|
|
169
374
|
/**
|
|
170
375
|
* Get the next event from the queue.
|
|
@@ -422,8 +627,9 @@ function mapErrorResponse(response) {
|
|
|
422
627
|
|
|
423
628
|
//#endregion
|
|
424
629
|
//#region ../core/src/realtime/stt.ts
|
|
425
|
-
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
426
|
-
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
630
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS$1 = 5e3;
|
|
631
|
+
const MIN_KEEPALIVE_INTERVAL_MS$1 = 1e3;
|
|
632
|
+
const DEFAULT_CONNECT_TIMEOUT_MS$1 = 2e4;
|
|
427
633
|
/**
|
|
428
634
|
* Convert audio data to Uint8Array
|
|
429
635
|
* Handles Uint8Array and ArrayBuffer
|
|
@@ -513,10 +719,12 @@ function filterSpecialTokens(tokens) {
|
|
|
513
719
|
var RealtimeSttSession = class {
|
|
514
720
|
emitter = new TypedEmitter();
|
|
515
721
|
eventQueue = new AsyncEventQueue();
|
|
722
|
+
iteratorAttached = false;
|
|
516
723
|
apiKey;
|
|
517
724
|
wsBaseUrl;
|
|
518
725
|
config;
|
|
519
726
|
keepaliveIntervalMs;
|
|
727
|
+
connectTimeoutMs;
|
|
520
728
|
signal;
|
|
521
729
|
ws = null;
|
|
522
730
|
_state = "idle";
|
|
@@ -530,8 +738,10 @@ var RealtimeSttSession = class {
|
|
|
530
738
|
this.apiKey = apiKey;
|
|
531
739
|
this.wsBaseUrl = wsBaseUrl;
|
|
532
740
|
this.config = config;
|
|
533
|
-
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
534
|
-
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
741
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
742
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS$1) : DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
743
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
744
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
535
745
|
this.signal = options?.signal;
|
|
536
746
|
if (this.signal) {
|
|
537
747
|
this.abortHandler = () => this.handleAbort();
|
|
@@ -560,16 +770,26 @@ var RealtimeSttSession = class {
|
|
|
560
770
|
async connect() {
|
|
561
771
|
if (this._state !== "idle") throw new StateError(`Cannot connect: session is in "${this._state}" state`);
|
|
562
772
|
this.checkAborted();
|
|
563
|
-
this.setState("connecting");
|
|
773
|
+
this.setState("connecting", "user_action");
|
|
774
|
+
let connectTimer;
|
|
564
775
|
try {
|
|
565
|
-
await this.createWebSocket()
|
|
566
|
-
|
|
776
|
+
await Promise.race([this.createWebSocket().then((v) => {
|
|
777
|
+
clearTimeout(connectTimer);
|
|
778
|
+
return v;
|
|
779
|
+
}), new Promise((_resolve, reject) => {
|
|
780
|
+
connectTimer = setTimeout(() => {
|
|
781
|
+
if (this.ws) this.ws.close();
|
|
782
|
+
reject(new ConnectionError("Connection timed out"));
|
|
783
|
+
}, this.connectTimeoutMs);
|
|
784
|
+
})]);
|
|
785
|
+
this.setState("connected", "connected");
|
|
567
786
|
this.emitter.emit("connected");
|
|
568
787
|
this.updateKeepalive();
|
|
569
788
|
} catch (error) {
|
|
789
|
+
clearTimeout(connectTimer);
|
|
570
790
|
if (!this.isTerminalState(this._state)) {
|
|
571
791
|
const err = error instanceof Error ? error : new ConnectionError("Connection failed", error);
|
|
572
|
-
this.cleanup("error", err);
|
|
792
|
+
this.cleanup("error", err, "error");
|
|
573
793
|
}
|
|
574
794
|
throw error;
|
|
575
795
|
}
|
|
@@ -647,7 +867,7 @@ var RealtimeSttSession = class {
|
|
|
647
867
|
this.checkAborted();
|
|
648
868
|
if (this._state !== "connected") throw new StateError(`Cannot finish: session is in "${this._state}" state`);
|
|
649
869
|
if (this._paused) this.resume();
|
|
650
|
-
this.setState("finishing");
|
|
870
|
+
this.setState("finishing", "user_action");
|
|
651
871
|
this.updateKeepalive();
|
|
652
872
|
const finishPromise = new Promise((resolve, reject) => {
|
|
653
873
|
this.finishResolver = resolve;
|
|
@@ -663,7 +883,7 @@ var RealtimeSttSession = class {
|
|
|
663
883
|
if (this.isTerminalState(this._state)) return;
|
|
664
884
|
this.emitter.emit("disconnected", "client_closed");
|
|
665
885
|
this.settleFinish(new StateError("Session canceled"));
|
|
666
|
-
this.cleanup("canceled");
|
|
886
|
+
this.cleanup("canceled", void 0, "user_action");
|
|
667
887
|
}
|
|
668
888
|
/**
|
|
669
889
|
* Register an event handler
|
|
@@ -688,9 +908,42 @@ var RealtimeSttSession = class {
|
|
|
688
908
|
}
|
|
689
909
|
/**
|
|
690
910
|
* Async iterator for consuming events.
|
|
911
|
+
*
|
|
912
|
+
* The returned iterator's `return()` resets the internal iterator-attach
|
|
913
|
+
* flag and drops any buffered events, so consumers that exit `for await`
|
|
914
|
+
* early (via `break` etc.) stop accruing memory while the session keeps
|
|
915
|
+
* running.
|
|
691
916
|
*/
|
|
692
917
|
[Symbol.asyncIterator]() {
|
|
693
|
-
|
|
918
|
+
this.iteratorAttached = true;
|
|
919
|
+
const inner = this.eventQueue[Symbol.asyncIterator]();
|
|
920
|
+
return {
|
|
921
|
+
next: () => inner.next(),
|
|
922
|
+
return: (value) => {
|
|
923
|
+
this.iteratorAttached = false;
|
|
924
|
+
this.eventQueue.clear();
|
|
925
|
+
return inner.return?.(value) ?? Promise.resolve({
|
|
926
|
+
value,
|
|
927
|
+
done: true
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
/**
|
|
933
|
+
* @internal Debug-only: forcefully close the underlying WebSocket to
|
|
934
|
+
* simulate an unexpected network disconnection.
|
|
935
|
+
*/
|
|
936
|
+
__debugForceDisconnect() {
|
|
937
|
+
this.ws?.close(4999, "debug: simulated disconnect");
|
|
938
|
+
}
|
|
939
|
+
/**
|
|
940
|
+
* Push an event to the async iterator queue only when a consumer has
|
|
941
|
+
* attached via `[Symbol.asyncIterator]()`. Listener-only consumers
|
|
942
|
+
* (the documented `.on()` pattern) never drain the queue, so pushing
|
|
943
|
+
* unconditionally would leak buffered events on long-running sessions.
|
|
944
|
+
*/
|
|
945
|
+
enqueueIfIterating(event) {
|
|
946
|
+
if (this.iteratorAttached) this.eventQueue.push(event);
|
|
694
947
|
}
|
|
695
948
|
async createWebSocket() {
|
|
696
949
|
return new Promise((resolve, reject) => {
|
|
@@ -741,66 +994,70 @@ var RealtimeSttSession = class {
|
|
|
741
994
|
tokens: userTokens
|
|
742
995
|
};
|
|
743
996
|
this.emitter.emit("result", filteredResult);
|
|
744
|
-
this.
|
|
997
|
+
this.enqueueIfIterating({
|
|
745
998
|
kind: "result",
|
|
746
999
|
data: filteredResult
|
|
747
1000
|
});
|
|
748
1001
|
if (hasEndpoint) {
|
|
749
1002
|
this.emitter.emit("endpoint");
|
|
750
|
-
this.
|
|
1003
|
+
this.enqueueIfIterating({ kind: "endpoint" });
|
|
751
1004
|
}
|
|
752
1005
|
if (hasFinalized) {
|
|
753
1006
|
this.emitter.emit("finalized");
|
|
754
|
-
this.
|
|
1007
|
+
this.enqueueIfIterating({ kind: "finalized" });
|
|
755
1008
|
}
|
|
756
1009
|
if (result.finished) {
|
|
757
1010
|
this.emitter.emit("finished");
|
|
758
|
-
this.
|
|
1011
|
+
this.enqueueIfIterating({ kind: "finished" });
|
|
759
1012
|
this.settleFinish();
|
|
760
|
-
this.cleanup("finished");
|
|
1013
|
+
this.cleanup("finished", void 0, "finished");
|
|
761
1014
|
}
|
|
762
1015
|
} catch (error) {
|
|
763
1016
|
const err = error;
|
|
764
1017
|
this.emitter.emit("error", err);
|
|
765
1018
|
this.settleFinish(err);
|
|
766
|
-
this.cleanup("error", err);
|
|
1019
|
+
this.cleanup("error", err, "error");
|
|
767
1020
|
}
|
|
768
1021
|
}
|
|
769
1022
|
handleClose(event) {
|
|
770
1023
|
if (this.isTerminalState(this._state)) return;
|
|
771
1024
|
this.emitter.emit("disconnected", event.reason || void 0);
|
|
772
1025
|
if (this._state === "finishing") {
|
|
773
|
-
const error = new ConnectionError("WebSocket closed before finished response", event);
|
|
774
|
-
this.emitter.emit("error", error);
|
|
775
|
-
this.settleFinish(error);
|
|
776
|
-
this.cleanup("error", error);
|
|
1026
|
+
const error$1 = new ConnectionError("WebSocket closed before finished response", event);
|
|
1027
|
+
this.emitter.emit("error", error$1);
|
|
1028
|
+
this.settleFinish(error$1);
|
|
1029
|
+
this.cleanup("error", error$1, "connection_lost");
|
|
777
1030
|
return;
|
|
778
1031
|
}
|
|
779
|
-
|
|
1032
|
+
const error = new ConnectionError("WebSocket closed unexpectedly", event);
|
|
1033
|
+
this.emitter.emit("error", error);
|
|
1034
|
+
this.cleanup("closed", error, "connection_lost");
|
|
780
1035
|
}
|
|
781
1036
|
handleError(event) {
|
|
782
1037
|
const error = new ConnectionError("WebSocket error", event);
|
|
783
1038
|
this.emitter.emit("error", error);
|
|
784
1039
|
this.settleFinish(error);
|
|
785
|
-
this.cleanup("error", error);
|
|
1040
|
+
this.cleanup("error", error, "error");
|
|
786
1041
|
}
|
|
787
1042
|
handleAbort() {
|
|
788
1043
|
const error = new AbortError();
|
|
789
1044
|
this.emitter.emit("error", error);
|
|
790
1045
|
this.settleFinish(error);
|
|
791
|
-
this.cleanup("canceled", error);
|
|
1046
|
+
this.cleanup("canceled", error, "user_action");
|
|
792
1047
|
}
|
|
793
|
-
setState(newState) {
|
|
1048
|
+
setState(newState, reason) {
|
|
794
1049
|
if (this._state === newState) return;
|
|
795
1050
|
const oldState = this._state;
|
|
796
1051
|
this._state = newState;
|
|
797
|
-
|
|
1052
|
+
const update = {
|
|
798
1053
|
old_state: oldState,
|
|
799
1054
|
new_state: newState
|
|
800
|
-
}
|
|
1055
|
+
};
|
|
1056
|
+
if (reason !== void 0) update.reason = reason;
|
|
1057
|
+
this.emitter.emit("state_change", update);
|
|
801
1058
|
}
|
|
802
|
-
cleanup(finalState, error) {
|
|
803
|
-
this.setState(finalState);
|
|
1059
|
+
cleanup(finalState, error, reason) {
|
|
1060
|
+
this.setState(finalState, reason);
|
|
804
1061
|
this.stopKeepalive();
|
|
805
1062
|
if (this.signal && this.abortHandler) {
|
|
806
1063
|
this.signal.removeEventListener("abort", this.abortHandler);
|
|
@@ -834,7 +1091,7 @@ var RealtimeSttSession = class {
|
|
|
834
1091
|
const error = new ConnectionError("WebSocket is not open");
|
|
835
1092
|
this.emitter.emit("error", error);
|
|
836
1093
|
this.settleFinish(error);
|
|
837
|
-
this.cleanup("error", error);
|
|
1094
|
+
this.cleanup("error", error, "error");
|
|
838
1095
|
if (shouldThrow) throw error;
|
|
839
1096
|
return;
|
|
840
1097
|
}
|
|
@@ -844,7 +1101,7 @@ var RealtimeSttSession = class {
|
|
|
844
1101
|
const error = new ConnectionError("WebSocket send failed", err);
|
|
845
1102
|
this.emitter.emit("error", error);
|
|
846
1103
|
this.settleFinish(error);
|
|
847
|
-
this.cleanup("error", error);
|
|
1104
|
+
this.cleanup("error", error, "error");
|
|
848
1105
|
if (shouldThrow) throw error;
|
|
849
1106
|
}
|
|
850
1107
|
}
|
|
@@ -866,6 +1123,412 @@ var RealtimeSttSession = class {
|
|
|
866
1123
|
}
|
|
867
1124
|
};
|
|
868
1125
|
|
|
1126
|
+
//#endregion
|
|
1127
|
+
//#region ../core/src/realtime/tts.ts
|
|
1128
|
+
const MAX_STREAMS_PER_CONNECTION = 5;
|
|
1129
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
1130
|
+
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
1131
|
+
const DEFAULT_CONNECT_TIMEOUT_MS = 2e4;
|
|
1132
|
+
function generateStreamId() {
|
|
1133
|
+
return globalThis.crypto.randomUUID();
|
|
1134
|
+
}
|
|
1135
|
+
function decodeBase64ToUint8Array(base64) {
|
|
1136
|
+
const binaryString = atob(base64);
|
|
1137
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
1138
|
+
for (let i = 0; i < binaryString.length; i++) bytes[i] = binaryString.charCodeAt(i);
|
|
1139
|
+
return bytes;
|
|
1140
|
+
}
|
|
1141
|
+
/**
|
|
1142
|
+
* Merge a partial TTS stream input with defaults, validate required fields,
|
|
1143
|
+
* and return a fully resolved config ready for the WebSocket.
|
|
1144
|
+
*/
|
|
1145
|
+
function resolveStreamConfig(input, defaults) {
|
|
1146
|
+
const merged = {
|
|
1147
|
+
...defaults,
|
|
1148
|
+
...input
|
|
1149
|
+
};
|
|
1150
|
+
const model = merged.model;
|
|
1151
|
+
const language = merged.language;
|
|
1152
|
+
const voice = merged.voice;
|
|
1153
|
+
const audio_format = merged.audio_format;
|
|
1154
|
+
const missing = [];
|
|
1155
|
+
if (!model) missing.push("model");
|
|
1156
|
+
if (!language) missing.push("language");
|
|
1157
|
+
if (!voice) missing.push("voice");
|
|
1158
|
+
if (!audio_format) missing.push("audio_format");
|
|
1159
|
+
if (missing.length > 0) throw new Error(`Missing required TTS stream fields: ${missing.join(", ")}. Provide them directly or via tts_defaults in your connection config.`);
|
|
1160
|
+
return {
|
|
1161
|
+
model,
|
|
1162
|
+
language,
|
|
1163
|
+
voice,
|
|
1164
|
+
audio_format,
|
|
1165
|
+
...merged.sample_rate !== void 0 && { sample_rate: merged.sample_rate },
|
|
1166
|
+
...merged.bitrate !== void 0 && { bitrate: merged.bitrate },
|
|
1167
|
+
stream_id: merged.stream_id ?? generateStreamId()
|
|
1168
|
+
};
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Handle for one TTS stream on a WebSocket connection.
|
|
1172
|
+
*
|
|
1173
|
+
* Emits typed events and supports async iteration over decoded audio chunks.
|
|
1174
|
+
*
|
|
1175
|
+
* @example Event-based
|
|
1176
|
+
* ```typescript
|
|
1177
|
+
* stream.on('audio', (chunk) => process(chunk));
|
|
1178
|
+
* stream.on('terminated', () => console.log('done'));
|
|
1179
|
+
* stream.sendText("Hello world");
|
|
1180
|
+
* stream.finish();
|
|
1181
|
+
* ```
|
|
1182
|
+
*
|
|
1183
|
+
* @example Async iteration
|
|
1184
|
+
* ```typescript
|
|
1185
|
+
* stream.sendText("Hello world");
|
|
1186
|
+
* stream.finish();
|
|
1187
|
+
* for await (const chunk of stream) {
|
|
1188
|
+
* process(chunk);
|
|
1189
|
+
* }
|
|
1190
|
+
* ```
|
|
1191
|
+
*/
|
|
1192
|
+
var RealtimeTtsStream = class extends TypedEmitter {
|
|
1193
|
+
streamId;
|
|
1194
|
+
_state = "active";
|
|
1195
|
+
audioQueue = new AsyncEventQueue();
|
|
1196
|
+
iteratorAttached = false;
|
|
1197
|
+
connection;
|
|
1198
|
+
ownsConnection;
|
|
1199
|
+
/** @internal */
|
|
1200
|
+
constructor(streamId, connection, ownsConnection) {
|
|
1201
|
+
super();
|
|
1202
|
+
this.streamId = streamId;
|
|
1203
|
+
this.connection = connection;
|
|
1204
|
+
this.ownsConnection = ownsConnection;
|
|
1205
|
+
}
|
|
1206
|
+
/** Current stream lifecycle state. */
|
|
1207
|
+
get state() {
|
|
1208
|
+
return this._state;
|
|
1209
|
+
}
|
|
1210
|
+
/**
|
|
1211
|
+
* Send one text chunk to the TTS stream.
|
|
1212
|
+
*
|
|
1213
|
+
* @param text - Text to synthesize
|
|
1214
|
+
* @param options.end - If true, signals this is the final text chunk
|
|
1215
|
+
*/
|
|
1216
|
+
sendText(text, options) {
|
|
1217
|
+
if (this._state !== "active") throw new StateError(`Cannot send text in state '${this._state}'`);
|
|
1218
|
+
const payload = {
|
|
1219
|
+
text,
|
|
1220
|
+
text_end: options?.end ?? false,
|
|
1221
|
+
stream_id: this.streamId
|
|
1222
|
+
};
|
|
1223
|
+
this.connection._sendJson(payload);
|
|
1224
|
+
if (options?.end) this._state = "finishing";
|
|
1225
|
+
}
|
|
1226
|
+
/**
|
|
1227
|
+
* Pipe an async iterable of text chunks into the stream.
|
|
1228
|
+
* Automatically calls {@link finish} when the iterable completes.
|
|
1229
|
+
*
|
|
1230
|
+
* Designed for concurrent use: call `sendStream()` and consume audio
|
|
1231
|
+
* via `for await` or events simultaneously.
|
|
1232
|
+
*
|
|
1233
|
+
* @example LLM token piping
|
|
1234
|
+
* ```typescript
|
|
1235
|
+
* stream.sendStream(llmTokenStream);
|
|
1236
|
+
* for await (const audio of stream) { forward(audio); }
|
|
1237
|
+
* ```
|
|
1238
|
+
*/
|
|
1239
|
+
async sendStream(source) {
|
|
1240
|
+
for await (const chunk of source) {
|
|
1241
|
+
if (this._state !== "active") break;
|
|
1242
|
+
this.sendText(chunk);
|
|
1243
|
+
}
|
|
1244
|
+
if (this._state === "active") this.finish();
|
|
1245
|
+
}
|
|
1246
|
+
/**
|
|
1247
|
+
* Signal that no more text will be sent for this stream.
|
|
1248
|
+
* The server will finish generating audio and send `terminated`.
|
|
1249
|
+
*/
|
|
1250
|
+
finish() {
|
|
1251
|
+
if (this._state !== "active") throw new StateError(`Cannot finish in state '${this._state}'`);
|
|
1252
|
+
this.sendText("", { end: true });
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* Cancel this stream. The server will stop generating and send `terminated`.
|
|
1256
|
+
*/
|
|
1257
|
+
cancel() {
|
|
1258
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1259
|
+
const payload = {
|
|
1260
|
+
stream_id: this.streamId,
|
|
1261
|
+
cancel: true
|
|
1262
|
+
};
|
|
1263
|
+
try {
|
|
1264
|
+
this.connection._sendJson(payload);
|
|
1265
|
+
} catch {}
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Close this stream. For single-stream usage (created via `tts(input)`),
|
|
1269
|
+
* also closes the underlying WebSocket connection.
|
|
1270
|
+
*/
|
|
1271
|
+
close() {
|
|
1272
|
+
this._endStream();
|
|
1273
|
+
if (this.ownsConnection) this.connection.close();
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Async iterator that yields decoded audio chunks.
|
|
1277
|
+
*
|
|
1278
|
+
* The returned iterator's `return()` resets the internal iterator-attach
|
|
1279
|
+
* flag and drops any buffered audio, so consumers that exit `for await`
|
|
1280
|
+
* early (via `break` etc.) stop accruing memory while the stream keeps
|
|
1281
|
+
* receiving server audio.
|
|
1282
|
+
*/
|
|
1283
|
+
[Symbol.asyncIterator]() {
|
|
1284
|
+
this.iteratorAttached = true;
|
|
1285
|
+
const inner = this.audioQueue[Symbol.asyncIterator]();
|
|
1286
|
+
return {
|
|
1287
|
+
next: () => inner.next(),
|
|
1288
|
+
return: (value) => {
|
|
1289
|
+
this.iteratorAttached = false;
|
|
1290
|
+
this.audioQueue.clear();
|
|
1291
|
+
return inner.return?.(value) ?? Promise.resolve({
|
|
1292
|
+
value,
|
|
1293
|
+
done: true
|
|
1294
|
+
});
|
|
1295
|
+
}
|
|
1296
|
+
};
|
|
1297
|
+
}
|
|
1298
|
+
/**
|
|
1299
|
+
* Push an audio chunk to the async iterator queue only when a consumer
|
|
1300
|
+
* has attached via `[Symbol.asyncIterator]()`. Listener-only consumers
|
|
1301
|
+
* (the documented `.on('audio', ...)` pattern) never drain the queue,
|
|
1302
|
+
* so pushing unconditionally would leak buffered chunks.
|
|
1303
|
+
*/
|
|
1304
|
+
enqueueIfIterating(chunk) {
|
|
1305
|
+
if (this.iteratorAttached) this.audioQueue.push(chunk);
|
|
1306
|
+
}
|
|
1307
|
+
/** @internal Dispatch a server event to this stream. */
|
|
1308
|
+
_handleEvent(event) {
|
|
1309
|
+
if (event.error_code !== void 0) {
|
|
1310
|
+
const errPayload = { error_code: event.error_code };
|
|
1311
|
+
if (event.error_message !== void 0) errPayload.error_message = event.error_message;
|
|
1312
|
+
const error = mapErrorResponse(errPayload);
|
|
1313
|
+
this._state = "error";
|
|
1314
|
+
this.emit("error", error);
|
|
1315
|
+
this.audioQueue.abort(error);
|
|
1316
|
+
this.connection._deactivateStream(this.streamId);
|
|
1317
|
+
return;
|
|
1318
|
+
}
|
|
1319
|
+
if (event.audio !== void 0) {
|
|
1320
|
+
const chunk = decodeBase64ToUint8Array(event.audio);
|
|
1321
|
+
this.emit("audio", chunk);
|
|
1322
|
+
this.enqueueIfIterating(chunk);
|
|
1323
|
+
}
|
|
1324
|
+
if (event.audio_end) this.emit("audioEnd");
|
|
1325
|
+
if (event.terminated) this._endStream();
|
|
1326
|
+
}
|
|
1327
|
+
/** @internal Force-end this stream (connection closing). */
|
|
1328
|
+
_forceEnd() {
|
|
1329
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1330
|
+
this._state = "ended";
|
|
1331
|
+
this.audioQueue.end();
|
|
1332
|
+
}
|
|
1333
|
+
_endStream() {
|
|
1334
|
+
if (this._state === "ended") return;
|
|
1335
|
+
this._state = "ended";
|
|
1336
|
+
this.emit("terminated");
|
|
1337
|
+
this.audioQueue.end();
|
|
1338
|
+
this.connection._deactivateStream(this.streamId);
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1341
|
+
/**
|
|
1342
|
+
* WebSocket connection for real-time Text-to-Speech.
|
|
1343
|
+
*
|
|
1344
|
+
* Supports up to 5 concurrent streams multiplexed by `stream_id`.
|
|
1345
|
+
* The connection automatically sends keepalive messages while open.
|
|
1346
|
+
*
|
|
1347
|
+
* @example Multi-stream
|
|
1348
|
+
* ```typescript
|
|
1349
|
+
* const conn = new RealtimeTtsConnection(apiKey, wsUrl, ttsDefaults);
|
|
1350
|
+
* await conn.connect();
|
|
1351
|
+
*
|
|
1352
|
+
* const s1 = conn.stream({ model, voice, language, audio_format });
|
|
1353
|
+
* s1.sendText("Hello");
|
|
1354
|
+
* s1.finish();
|
|
1355
|
+
* for await (const chunk of s1) { ... }
|
|
1356
|
+
*
|
|
1357
|
+
* conn.close();
|
|
1358
|
+
* ```
|
|
1359
|
+
*/
|
|
1360
|
+
var RealtimeTtsConnection = class extends TypedEmitter {
|
|
1361
|
+
apiKey;
|
|
1362
|
+
wsUrl;
|
|
1363
|
+
ttsDefaults;
|
|
1364
|
+
keepaliveIntervalMs;
|
|
1365
|
+
connectTimeoutMs;
|
|
1366
|
+
ws = null;
|
|
1367
|
+
connected = false;
|
|
1368
|
+
connecting = false;
|
|
1369
|
+
keepaliveTimer = null;
|
|
1370
|
+
activeStreams = /* @__PURE__ */ new Map();
|
|
1371
|
+
constructor(apiKey, wsUrl, ttsDefaults = {}, options) {
|
|
1372
|
+
super();
|
|
1373
|
+
this.apiKey = apiKey;
|
|
1374
|
+
this.wsUrl = wsUrl;
|
|
1375
|
+
this.ttsDefaults = ttsDefaults;
|
|
1376
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1377
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1378
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1379
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1380
|
+
}
|
|
1381
|
+
/** Whether the WebSocket is connected. */
|
|
1382
|
+
get isConnected() {
|
|
1383
|
+
return this.connected;
|
|
1384
|
+
}
|
|
1385
|
+
/**
|
|
1386
|
+
* Open the WebSocket connection and start keepalive.
|
|
1387
|
+
* Called automatically by {@link stream} if not yet connected.
|
|
1388
|
+
*/
|
|
1389
|
+
async connect() {
|
|
1390
|
+
if (this.connected) return;
|
|
1391
|
+
if (this.connecting) throw new StateError("Connection is already being established");
|
|
1392
|
+
this.connecting = true;
|
|
1393
|
+
try {
|
|
1394
|
+
await this.createWebSocket();
|
|
1395
|
+
this.connected = true;
|
|
1396
|
+
this.startKeepalive();
|
|
1397
|
+
} finally {
|
|
1398
|
+
this.connecting = false;
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* Open a new TTS stream on this connection.
|
|
1403
|
+
* Auto-connects if the WebSocket is not yet open.
|
|
1404
|
+
*
|
|
1405
|
+
* @param input - Stream configuration (merged with tts_defaults)
|
|
1406
|
+
* @returns A ready-to-use stream handle
|
|
1407
|
+
*/
|
|
1408
|
+
async stream(input = {}) {
|
|
1409
|
+
return this._openStream(input, false);
|
|
1410
|
+
}
|
|
1411
|
+
/** @internal Open a stream, optionally marking it as connection-owning. */
|
|
1412
|
+
async _openStream(input, ownsConnection) {
|
|
1413
|
+
if (!this.connected) await this.connect();
|
|
1414
|
+
if (this.activeStreams.size >= MAX_STREAMS_PER_CONNECTION) throw new StateError(`Maximum concurrent streams (${MAX_STREAMS_PER_CONNECTION}) reached`);
|
|
1415
|
+
const config = resolveStreamConfig(input, this.ttsDefaults);
|
|
1416
|
+
if (this.activeStreams.has(config.stream_id)) throw new StateError(`Stream '${config.stream_id}' is already active on this connection`);
|
|
1417
|
+
const stream = new RealtimeTtsStream(config.stream_id, this, ownsConnection);
|
|
1418
|
+
this.activeStreams.set(config.stream_id, stream);
|
|
1419
|
+
const configPayload = {
|
|
1420
|
+
api_key: this.apiKey,
|
|
1421
|
+
...config
|
|
1422
|
+
};
|
|
1423
|
+
this._sendJson(configPayload);
|
|
1424
|
+
return stream;
|
|
1425
|
+
}
|
|
1426
|
+
/**
|
|
1427
|
+
* Close the WebSocket connection and terminate all active streams.
|
|
1428
|
+
*/
|
|
1429
|
+
close() {
|
|
1430
|
+
this.stopKeepalive();
|
|
1431
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1432
|
+
this.activeStreams.clear();
|
|
1433
|
+
if (this.ws) {
|
|
1434
|
+
try {
|
|
1435
|
+
this.ws.close();
|
|
1436
|
+
} catch {}
|
|
1437
|
+
this.ws = null;
|
|
1438
|
+
}
|
|
1439
|
+
this.connected = false;
|
|
1440
|
+
this.emit("close");
|
|
1441
|
+
}
|
|
1442
|
+
/** @internal Send a JSON payload on the WebSocket. */
|
|
1443
|
+
_sendJson(payload) {
|
|
1444
|
+
if (!this.ws || !this.connected) throw new StateError("TTS connection is not open");
|
|
1445
|
+
this.ws.send(JSON.stringify(payload));
|
|
1446
|
+
}
|
|
1447
|
+
/** @internal Remove a stream from the active set. */
|
|
1448
|
+
_deactivateStream(streamId) {
|
|
1449
|
+
this.activeStreams.delete(streamId);
|
|
1450
|
+
}
|
|
1451
|
+
async createWebSocket() {
|
|
1452
|
+
return new Promise((resolve, reject) => {
|
|
1453
|
+
const timer = setTimeout(() => {
|
|
1454
|
+
try {
|
|
1455
|
+
ws.close();
|
|
1456
|
+
} catch {}
|
|
1457
|
+
reject(new ConnectionError("TTS WebSocket connection timed out"));
|
|
1458
|
+
}, this.connectTimeoutMs);
|
|
1459
|
+
let ws;
|
|
1460
|
+
try {
|
|
1461
|
+
ws = new WebSocket(this.wsUrl);
|
|
1462
|
+
ws.binaryType = "arraybuffer";
|
|
1463
|
+
} catch (err) {
|
|
1464
|
+
clearTimeout(timer);
|
|
1465
|
+
reject(new ConnectionError(`Failed to create TTS WebSocket: ${err instanceof Error ? err.message : String(err)}`));
|
|
1466
|
+
return;
|
|
1467
|
+
}
|
|
1468
|
+
const onOpen = () => {
|
|
1469
|
+
clearTimeout(timer);
|
|
1470
|
+
ws.removeEventListener("error", onError);
|
|
1471
|
+
this.ws = ws;
|
|
1472
|
+
ws.addEventListener("message", (event) => {
|
|
1473
|
+
this.handleMessage(event);
|
|
1474
|
+
});
|
|
1475
|
+
ws.addEventListener("close", () => {
|
|
1476
|
+
if (this.connected) {
|
|
1477
|
+
this.connected = false;
|
|
1478
|
+
this.stopKeepalive();
|
|
1479
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1480
|
+
this.activeStreams.clear();
|
|
1481
|
+
this.emit("close");
|
|
1482
|
+
}
|
|
1483
|
+
});
|
|
1484
|
+
resolve();
|
|
1485
|
+
};
|
|
1486
|
+
const onError = () => {
|
|
1487
|
+
clearTimeout(timer);
|
|
1488
|
+
ws.removeEventListener("open", onOpen);
|
|
1489
|
+
reject(new ConnectionError("TTS WebSocket connection failed"));
|
|
1490
|
+
};
|
|
1491
|
+
ws.addEventListener("open", onOpen);
|
|
1492
|
+
ws.addEventListener("error", onError);
|
|
1493
|
+
});
|
|
1494
|
+
}
|
|
1495
|
+
handleMessage(event) {
|
|
1496
|
+
if (typeof event.data !== "string") return;
|
|
1497
|
+
let parsed;
|
|
1498
|
+
try {
|
|
1499
|
+
parsed = JSON.parse(event.data);
|
|
1500
|
+
} catch {
|
|
1501
|
+
return;
|
|
1502
|
+
}
|
|
1503
|
+
const streamId = parsed.stream_id;
|
|
1504
|
+
if (streamId !== void 0) {
|
|
1505
|
+
const stream = this.activeStreams.get(streamId);
|
|
1506
|
+
if (stream) stream._handleEvent(parsed);
|
|
1507
|
+
return;
|
|
1508
|
+
}
|
|
1509
|
+
if (parsed.error_code !== void 0) {
|
|
1510
|
+
const errPayload = { error_code: parsed.error_code };
|
|
1511
|
+
if (parsed.error_message !== void 0) errPayload.error_message = parsed.error_message;
|
|
1512
|
+
const error = mapErrorResponse(errPayload);
|
|
1513
|
+
this.emit("error", error);
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
startKeepalive() {
|
|
1517
|
+
if (this.keepaliveTimer) return;
|
|
1518
|
+
this.keepaliveTimer = setInterval(() => {
|
|
1519
|
+
if (this.connected && this.ws) try {
|
|
1520
|
+
this.ws.send(JSON.stringify({ keep_alive: true }));
|
|
1521
|
+
} catch {}
|
|
1522
|
+
}, this.keepaliveIntervalMs);
|
|
1523
|
+
}
|
|
1524
|
+
stopKeepalive() {
|
|
1525
|
+
if (this.keepaliveTimer) {
|
|
1526
|
+
clearInterval(this.keepaliveTimer);
|
|
1527
|
+
this.keepaliveTimer = null;
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
};
|
|
1531
|
+
|
|
869
1532
|
//#endregion
|
|
870
1533
|
//#region ../core/src/realtime/segments.ts
|
|
871
1534
|
/**
|
|
@@ -1021,199 +1684,210 @@ var RealtimeUtteranceBuffer = class {
|
|
|
1021
1684
|
markEndpoint() {
|
|
1022
1685
|
const trailingSegments = this.segmentBuffer.flushAll();
|
|
1023
1686
|
const segments = [...this.pendingSegments, ...trailingSegments];
|
|
1024
|
-
this.pendingSegments = [];
|
|
1025
|
-
if (segments.length === 0) return;
|
|
1026
|
-
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1027
|
-
}
|
|
1028
|
-
/**
|
|
1029
|
-
* Clear buffered segments and tokens.
|
|
1030
|
-
*/
|
|
1031
|
-
reset() {
|
|
1032
|
-
this.pendingSegments = [];
|
|
1033
|
-
this.segmentBuffer.reset();
|
|
1034
|
-
}
|
|
1035
|
-
};
|
|
1036
|
-
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1037
|
-
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1038
|
-
return {
|
|
1039
|
-
text: segments.map((segment) => segment.text).join(""),
|
|
1040
|
-
segments,
|
|
1041
|
-
tokens,
|
|
1042
|
-
start_ms: segments[0]?.start_ms,
|
|
1043
|
-
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1044
|
-
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1045
|
-
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1046
|
-
final_audio_proc_ms: finalAudioProcMs,
|
|
1047
|
-
total_audio_proc_ms: totalAudioProcMs
|
|
1048
|
-
};
|
|
1049
|
-
}
|
|
1050
|
-
function getCommonValue(values) {
|
|
1051
|
-
let common;
|
|
1052
|
-
for (const value of values) {
|
|
1053
|
-
if (value === void 0) return;
|
|
1054
|
-
if (common === void 0) {
|
|
1055
|
-
common = value;
|
|
1056
|
-
continue;
|
|
1057
|
-
}
|
|
1058
|
-
if (value !== common) return;
|
|
1059
|
-
}
|
|
1060
|
-
return common;
|
|
1061
|
-
}
|
|
1062
|
-
|
|
1063
|
-
//#endregion
|
|
1064
|
-
//#region src/http/errors.ts
|
|
1065
|
-
/**
|
|
1066
|
-
* HTTP error handling for the Soniox SDK
|
|
1067
|
-
*/
|
|
1068
|
-
/** Maximum body text length to include in error details (4KB) */
|
|
1069
|
-
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
1070
|
-
/**
|
|
1071
|
-
* HTTP error class for all HTTP-related failures (REST API).
|
|
1072
|
-
*
|
|
1073
|
-
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
1074
|
-
* server errors, or response parsing failures.
|
|
1075
|
-
*/
|
|
1076
|
-
var SonioxHttpError = class extends SonioxError {
|
|
1077
|
-
/** Request URL */
|
|
1078
|
-
url;
|
|
1079
|
-
/** HTTP method */
|
|
1080
|
-
method;
|
|
1081
|
-
/** Response headers (only for http_error) */
|
|
1082
|
-
headers;
|
|
1083
|
-
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
1084
|
-
bodyText;
|
|
1085
|
-
constructor(details) {
|
|
1086
|
-
super(details.message, details.code, details.statusCode, details.cause);
|
|
1087
|
-
this.name = "SonioxHttpError";
|
|
1088
|
-
this.url = details.url;
|
|
1089
|
-
this.method = details.method;
|
|
1090
|
-
this.headers = details.headers;
|
|
1091
|
-
this.bodyText = details.bodyText;
|
|
1092
|
-
}
|
|
1093
|
-
/**
|
|
1094
|
-
* Creates a human-readable string representation
|
|
1095
|
-
*/
|
|
1096
|
-
toString() {
|
|
1097
|
-
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
1098
|
-
parts.push(` Method: ${this.method}`);
|
|
1099
|
-
parts.push(` URL: ${this.url}`);
|
|
1100
|
-
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
1101
|
-
return parts.join("\n");
|
|
1687
|
+
this.pendingSegments = [];
|
|
1688
|
+
if (segments.length === 0) return;
|
|
1689
|
+
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1102
1690
|
}
|
|
1103
1691
|
/**
|
|
1104
|
-
*
|
|
1692
|
+
* Clear buffered segments and tokens.
|
|
1105
1693
|
*/
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
code: this.code,
|
|
1110
|
-
message: this.message,
|
|
1111
|
-
url: this.url,
|
|
1112
|
-
method: this.method,
|
|
1113
|
-
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
1114
|
-
...this.headers !== void 0 && { headers: this.headers },
|
|
1115
|
-
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
1116
|
-
};
|
|
1694
|
+
reset() {
|
|
1695
|
+
this.pendingSegments = [];
|
|
1696
|
+
this.segmentBuffer.reset();
|
|
1117
1697
|
}
|
|
1118
1698
|
};
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
* Creates a timeout error
|
|
1133
|
-
*/
|
|
1134
|
-
function createTimeoutError(url, method, timeoutMs) {
|
|
1135
|
-
return new SonioxHttpError({
|
|
1136
|
-
code: "timeout",
|
|
1137
|
-
message: `Request timed out after ${timeoutMs}ms`,
|
|
1138
|
-
url,
|
|
1139
|
-
method
|
|
1140
|
-
});
|
|
1699
|
+
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1700
|
+
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1701
|
+
return {
|
|
1702
|
+
text: segments.map((segment) => segment.text).join(""),
|
|
1703
|
+
segments,
|
|
1704
|
+
tokens,
|
|
1705
|
+
start_ms: segments[0]?.start_ms,
|
|
1706
|
+
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1707
|
+
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1708
|
+
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1709
|
+
final_audio_proc_ms: finalAudioProcMs,
|
|
1710
|
+
total_audio_proc_ms: totalAudioProcMs
|
|
1711
|
+
};
|
|
1141
1712
|
}
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1713
|
+
function getCommonValue(values) {
|
|
1714
|
+
let common;
|
|
1715
|
+
for (const value of values) {
|
|
1716
|
+
if (value === void 0) return;
|
|
1717
|
+
if (common === void 0) {
|
|
1718
|
+
common = value;
|
|
1719
|
+
continue;
|
|
1720
|
+
}
|
|
1721
|
+
if (value !== common) return;
|
|
1722
|
+
}
|
|
1723
|
+
return common;
|
|
1153
1724
|
}
|
|
1725
|
+
|
|
1726
|
+
//#endregion
|
|
1727
|
+
//#region ../core/src/tts-rest.ts
|
|
1154
1728
|
/**
|
|
1155
|
-
*
|
|
1729
|
+
* Browser-safe REST TTS client.
|
|
1730
|
+
*
|
|
1731
|
+
* Uses only `globalThis.fetch` — no Node-specific dependencies.
|
|
1732
|
+
* Shared by both `@soniox/node` and `@soniox/client`.
|
|
1156
1733
|
*/
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
}
|
|
1734
|
+
const DEFAULT_MODEL = "tts-rt-v1-preview";
|
|
1735
|
+
const DEFAULT_LANGUAGE = "en";
|
|
1736
|
+
const DEFAULT_AUDIO_FORMAT = "wav";
|
|
1737
|
+
function buildPayload(options) {
|
|
1738
|
+
const payload = {
|
|
1739
|
+
model: options.model ?? DEFAULT_MODEL,
|
|
1740
|
+
language: options.language ?? DEFAULT_LANGUAGE,
|
|
1741
|
+
voice: options.voice,
|
|
1742
|
+
audio_format: options.audio_format ?? DEFAULT_AUDIO_FORMAT,
|
|
1743
|
+
text: options.text
|
|
1744
|
+
};
|
|
1745
|
+
if (options.sample_rate !== void 0) payload.sample_rate = options.sample_rate;
|
|
1746
|
+
if (options.bitrate !== void 0) payload.bitrate = options.bitrate;
|
|
1747
|
+
return payload;
|
|
1168
1748
|
}
|
|
1169
1749
|
/**
|
|
1170
|
-
*
|
|
1750
|
+
* Normalizes fetch Headers to a plain object with lowercase keys.
|
|
1751
|
+
* Duplicated here (rather than imported from `@soniox/node`) to keep
|
|
1752
|
+
* this module browser-safe.
|
|
1171
1753
|
*/
|
|
1172
|
-
function
|
|
1173
|
-
const
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
code: "parse_error",
|
|
1177
|
-
message: `Parse error: ${message}`,
|
|
1178
|
-
url,
|
|
1179
|
-
method,
|
|
1180
|
-
bodyText: cappedBody,
|
|
1181
|
-
cause
|
|
1754
|
+
function headersToObject(headers) {
|
|
1755
|
+
const result = {};
|
|
1756
|
+
headers.forEach((value, key) => {
|
|
1757
|
+
result[key.toLowerCase()] = value;
|
|
1182
1758
|
});
|
|
1759
|
+
return result;
|
|
1183
1760
|
}
|
|
1184
|
-
|
|
1185
|
-
* Truncates body text to the maximum allowed length
|
|
1186
|
-
*/
|
|
1187
|
-
function truncateBodyText(text) {
|
|
1188
|
-
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
1189
|
-
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
1190
|
-
}
|
|
1191
|
-
/**
|
|
1192
|
-
* Type guard to check if an error is an AbortError
|
|
1193
|
-
*/
|
|
1194
|
-
function isAbortError(error) {
|
|
1761
|
+
function isAbortLikeError(error) {
|
|
1195
1762
|
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
1196
1763
|
return false;
|
|
1197
1764
|
}
|
|
1198
1765
|
/**
|
|
1199
|
-
*
|
|
1200
|
-
*
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
*
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
}
|
|
1211
|
-
|
|
1212
|
-
* Checks if an error is a 404 Not Found error
|
|
1766
|
+
* Browser-safe REST client for TTS generation.
|
|
1767
|
+
*
|
|
1768
|
+
* Provides `generate()` (buffered) and `generateStream()` (streaming)
|
|
1769
|
+
* using only `globalThis.fetch`. HTTP failures are surfaced as
|
|
1770
|
+
* {@link SonioxHttpError}, matching the rest of the Soniox SDK.
|
|
1771
|
+
*
|
|
1772
|
+
* Authentication uses the `Authorization: Bearer <api_key>` header.
|
|
1773
|
+
*
|
|
1774
|
+
* @example
|
|
1775
|
+
* ```typescript
|
|
1776
|
+
* const client = new TtsRestClient(apiKey, 'https://tts-rt.soniox.com');
|
|
1777
|
+
* const audio = await client.generate({ text: 'Hello', voice: 'Adrian' });
|
|
1778
|
+
* ```
|
|
1213
1779
|
*/
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1780
|
+
var TtsRestClient = class {
|
|
1781
|
+
apiKey;
|
|
1782
|
+
ttsApiUrl;
|
|
1783
|
+
constructor(apiKey, ttsApiUrl) {
|
|
1784
|
+
this.apiKey = apiKey;
|
|
1785
|
+
this.ttsApiUrl = ttsApiUrl;
|
|
1786
|
+
}
|
|
1787
|
+
/**
|
|
1788
|
+
* Generate speech audio from text. Returns the full audio as a `Uint8Array`.
|
|
1789
|
+
*
|
|
1790
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1791
|
+
* or aborted requests.
|
|
1792
|
+
*/
|
|
1793
|
+
async generate(options) {
|
|
1794
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1795
|
+
const buffer = await (await this.sendRequest(url, options)).arrayBuffer();
|
|
1796
|
+
return new Uint8Array(buffer);
|
|
1797
|
+
}
|
|
1798
|
+
/**
|
|
1799
|
+
* Generate speech audio from text as a streaming async iterable.
|
|
1800
|
+
*
|
|
1801
|
+
* Yields `Uint8Array` chunks as they arrive from the server response body.
|
|
1802
|
+
* Lower time-to-first-audio than {@link generate}.
|
|
1803
|
+
*
|
|
1804
|
+
* **Known limitation:** Mid-stream server errors (reported via HTTP trailers)
|
|
1805
|
+
* cannot be detected through the `fetch` API. The iterator may end early
|
|
1806
|
+
* without an explicit error. Use WebSocket TTS for reliable error detection.
|
|
1807
|
+
*
|
|
1808
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1809
|
+
* or aborted requests (before the stream starts).
|
|
1810
|
+
*/
|
|
1811
|
+
async *generateStream(options) {
|
|
1812
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1813
|
+
const response = await this.sendRequest(url, options);
|
|
1814
|
+
if (!response.body) throw createHttpError(url, "POST", response.status, headersToObject(response.headers), "Response has no body stream");
|
|
1815
|
+
const reader = response.body.getReader();
|
|
1816
|
+
try {
|
|
1817
|
+
while (true) {
|
|
1818
|
+
const { done, value } = await reader.read();
|
|
1819
|
+
if (done) break;
|
|
1820
|
+
yield value;
|
|
1821
|
+
}
|
|
1822
|
+
} finally {
|
|
1823
|
+
reader.releaseLock();
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
/**
|
|
1827
|
+
* Internal request helper. Performs the fetch, maps network/abort failures
|
|
1828
|
+
* to {@link SonioxHttpError}, and throws on non-2xx responses.
|
|
1829
|
+
*/
|
|
1830
|
+
async sendRequest(url, options) {
|
|
1831
|
+
const payload = buildPayload(options);
|
|
1832
|
+
let response;
|
|
1833
|
+
try {
|
|
1834
|
+
response = await globalThis.fetch(url, {
|
|
1835
|
+
method: "POST",
|
|
1836
|
+
headers: {
|
|
1837
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1838
|
+
"Content-Type": "application/json"
|
|
1839
|
+
},
|
|
1840
|
+
body: JSON.stringify(payload),
|
|
1841
|
+
...options.signal && { signal: options.signal }
|
|
1842
|
+
});
|
|
1843
|
+
} catch (cause) {
|
|
1844
|
+
if (isAbortLikeError(cause)) throw createAbortError(url, "POST", cause);
|
|
1845
|
+
throw createNetworkError(url, "POST", cause);
|
|
1846
|
+
}
|
|
1847
|
+
if (!response.ok) {
|
|
1848
|
+
const bodyText = await response.text().catch(() => "");
|
|
1849
|
+
throw createHttpError(url, "POST", response.status, headersToObject(response.headers), bodyText);
|
|
1850
|
+
}
|
|
1851
|
+
return response;
|
|
1852
|
+
}
|
|
1853
|
+
};
|
|
1854
|
+
|
|
1855
|
+
//#endregion
|
|
1856
|
+
//#region src/async/auth.ts
|
|
1857
|
+
var SonioxAuthAPI = class {
|
|
1858
|
+
constructor(http) {
|
|
1859
|
+
this.http = http;
|
|
1860
|
+
}
|
|
1861
|
+
/**
|
|
1862
|
+
* Creates a temporary API key for client-side use.
|
|
1863
|
+
*
|
|
1864
|
+
* @param request - Request parameters for the temporary key
|
|
1865
|
+
* @param signal - Optional AbortSignal for cancellation
|
|
1866
|
+
* @returns The temporary API key response
|
|
1867
|
+
*
|
|
1868
|
+
* @example
|
|
1869
|
+
* ```typescript
|
|
1870
|
+
* const sttKey = await client.auth.createTemporaryKey({
|
|
1871
|
+
* usage_type: 'transcribe_websocket',
|
|
1872
|
+
* expires_in_seconds: 300,
|
|
1873
|
+
* });
|
|
1874
|
+
*
|
|
1875
|
+
* const ttsKey = await client.auth.createTemporaryKey({
|
|
1876
|
+
* usage_type: 'tts_rt',
|
|
1877
|
+
* expires_in_seconds: 300,
|
|
1878
|
+
* });
|
|
1879
|
+
* ```
|
|
1880
|
+
*/
|
|
1881
|
+
async createTemporaryKey(request, signal) {
|
|
1882
|
+
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
1883
|
+
return (await this.http.request({
|
|
1884
|
+
method: "POST",
|
|
1885
|
+
path: "/v1/auth/temporary-api-key",
|
|
1886
|
+
body: request,
|
|
1887
|
+
...signal && { signal }
|
|
1888
|
+
})).data;
|
|
1889
|
+
}
|
|
1890
|
+
};
|
|
1217
1891
|
|
|
1218
1892
|
//#endregion
|
|
1219
1893
|
//#region src/async/files.ts
|
|
@@ -2687,6 +3361,86 @@ var SonioxSttApi = class {
|
|
|
2687
3361
|
}
|
|
2688
3362
|
};
|
|
2689
3363
|
|
|
3364
|
+
//#endregion
|
|
3365
|
+
//#region src/async/tts.ts
|
|
3366
|
+
/**
|
|
3367
|
+
* REST API for Text-to-Speech generation and TTS model listing.
|
|
3368
|
+
*
|
|
3369
|
+
* Accessed via `client.tts` on {@link SonioxNodeClient}.
|
|
3370
|
+
*
|
|
3371
|
+
* Inherits browser-safe `generate()` and `generateStream()` from
|
|
3372
|
+
* `TtsRestClient` in `@soniox/core`, and adds Node-specific methods
|
|
3373
|
+
* `generateToFile()` and `listModels()`.
|
|
3374
|
+
*/
|
|
3375
|
+
var SonioxTtsApi = class extends TtsRestClient {
|
|
3376
|
+
http;
|
|
3377
|
+
constructor(apiKey, ttsApiUrl, http) {
|
|
3378
|
+
super(apiKey, ttsApiUrl);
|
|
3379
|
+
this.http = http;
|
|
3380
|
+
}
|
|
3381
|
+
/**
|
|
3382
|
+
* Generate speech audio and write to a file or writable stream.
|
|
3383
|
+
*
|
|
3384
|
+
* @param output - File path (string) or a `WritableStream<Uint8Array>`
|
|
3385
|
+
* @param options - Generation options
|
|
3386
|
+
* @returns Number of bytes written
|
|
3387
|
+
*
|
|
3388
|
+
* @example Write to file
|
|
3389
|
+
* ```typescript
|
|
3390
|
+
* const bytes = await client.tts.generateToFile('output.wav', {
|
|
3391
|
+
* text: 'Hello world',
|
|
3392
|
+
* voice: 'Adrian',
|
|
3393
|
+
* language: 'en',
|
|
3394
|
+
* });
|
|
3395
|
+
* ```
|
|
3396
|
+
*
|
|
3397
|
+
* @example Write to a writable stream
|
|
3398
|
+
* ```typescript
|
|
3399
|
+
* const bytes = await client.tts.generateToFile(writableStream, {
|
|
3400
|
+
* text: 'Hello world',
|
|
3401
|
+
* voice: 'Adrian',
|
|
3402
|
+
* language: 'en',
|
|
3403
|
+
* });
|
|
3404
|
+
* ```
|
|
3405
|
+
*/
|
|
3406
|
+
async generateToFile(output, options) {
|
|
3407
|
+
if (typeof output === "string") {
|
|
3408
|
+
const audio = await this.generate(options);
|
|
3409
|
+
await (0, node_fs_promises.writeFile)(output, audio);
|
|
3410
|
+
return audio.byteLength;
|
|
3411
|
+
}
|
|
3412
|
+
let bytesWritten = 0;
|
|
3413
|
+
const writer = output.getWriter();
|
|
3414
|
+
try {
|
|
3415
|
+
for await (const chunk of this.generateStream(options)) {
|
|
3416
|
+
await writer.write(chunk);
|
|
3417
|
+
bytesWritten += chunk.byteLength;
|
|
3418
|
+
}
|
|
3419
|
+
} finally {
|
|
3420
|
+
writer.releaseLock();
|
|
3421
|
+
}
|
|
3422
|
+
return bytesWritten;
|
|
3423
|
+
}
|
|
3424
|
+
/**
|
|
3425
|
+
* List available TTS models and their voices.
|
|
3426
|
+
*
|
|
3427
|
+
* @example
|
|
3428
|
+
* ```typescript
|
|
3429
|
+
* const models = await client.tts.listModels();
|
|
3430
|
+
* for (const model of models) {
|
|
3431
|
+
* console.log(model.id, model.voices.map(v => v.id));
|
|
3432
|
+
* }
|
|
3433
|
+
* ```
|
|
3434
|
+
*/
|
|
3435
|
+
async listModels(signal) {
|
|
3436
|
+
return (await this.http.request({
|
|
3437
|
+
method: "GET",
|
|
3438
|
+
path: "/v1/tts-models",
|
|
3439
|
+
...signal && { signal }
|
|
3440
|
+
})).data.models;
|
|
3441
|
+
}
|
|
3442
|
+
};
|
|
3443
|
+
|
|
2690
3444
|
//#endregion
|
|
2691
3445
|
//#region src/async/webhooks.ts
|
|
2692
3446
|
const VALID_STATUSES = ["completed", "error"];
|
|
@@ -3509,36 +4263,75 @@ function combineAbortSignals(...signals) {
|
|
|
3509
4263
|
//#endregion
|
|
3510
4264
|
//#region src/realtime/index.ts
|
|
3511
4265
|
/**
|
|
3512
|
-
* Real-time API factory for creating STT sessions.
|
|
4266
|
+
* Real-time API factory for creating STT sessions and TTS connections.
|
|
3513
4267
|
*
|
|
3514
|
-
* @example
|
|
4268
|
+
* @example STT
|
|
4269
|
+
* ```typescript
|
|
4270
|
+
* const session = client.realtime.stt({ model: 'stt-rt-v4' });
|
|
4271
|
+
* await session.connect();
|
|
4272
|
+
* ```
|
|
4273
|
+
*
|
|
4274
|
+
* @example TTS (single stream)
|
|
3515
4275
|
* ```typescript
|
|
3516
|
-
* const
|
|
3517
|
-
* model: '
|
|
3518
|
-
*
|
|
4276
|
+
* const stream = await client.realtime.tts({
|
|
4277
|
+
* model: 'tts-rt-v1-preview',
|
|
4278
|
+
* voice: 'Adrian',
|
|
4279
|
+
* language: 'en',
|
|
4280
|
+
* audio_format: 'wav',
|
|
3519
4281
|
* });
|
|
4282
|
+
* stream.sendText("Hello");
|
|
4283
|
+
* stream.finish();
|
|
4284
|
+
* for await (const chunk of stream) { ... }
|
|
4285
|
+
* ```
|
|
3520
4286
|
*
|
|
3521
|
-
*
|
|
4287
|
+
* @example TTS (multi-stream)
|
|
4288
|
+
* ```typescript
|
|
4289
|
+
* const conn = await client.realtime.tts.multiStream();
|
|
4290
|
+
* const stream = await conn.stream({
|
|
4291
|
+
* model: 'tts-rt-v1-preview',
|
|
4292
|
+
* voice: 'Adrian',
|
|
4293
|
+
* language: 'en',
|
|
4294
|
+
* audio_format: 'wav',
|
|
4295
|
+
* });
|
|
3522
4296
|
* ```
|
|
3523
4297
|
*/
|
|
3524
4298
|
var SonioxRealtimeApi = class {
|
|
3525
4299
|
options;
|
|
4300
|
+
tts;
|
|
3526
4301
|
constructor(options) {
|
|
3527
4302
|
this.options = options;
|
|
4303
|
+
const ttsCall = (input) => {
|
|
4304
|
+
return this.createSingleTtsStream(input ?? {});
|
|
4305
|
+
};
|
|
4306
|
+
ttsCall.multiStream = () => {
|
|
4307
|
+
return this.createTtsConnection();
|
|
4308
|
+
};
|
|
4309
|
+
this.tts = ttsCall;
|
|
3528
4310
|
}
|
|
3529
4311
|
/**
|
|
3530
4312
|
* Create a new Speech-to-Text session.
|
|
3531
4313
|
*
|
|
3532
|
-
*
|
|
3533
|
-
*
|
|
3534
|
-
* @returns New STT session instance
|
|
4314
|
+
* `config` is shallow-merged on top of `stt_defaults` from the client
|
|
4315
|
+
* options; caller-provided fields override the defaults.
|
|
3535
4316
|
*/
|
|
3536
4317
|
stt(config, options) {
|
|
3537
4318
|
const mergedOptions = {
|
|
3538
4319
|
...this.options.default_session_options,
|
|
3539
4320
|
...options
|
|
3540
4321
|
};
|
|
3541
|
-
|
|
4322
|
+
const mergedConfig = {
|
|
4323
|
+
...this.options.stt_defaults,
|
|
4324
|
+
...config
|
|
4325
|
+
};
|
|
4326
|
+
return new RealtimeSttSession(this.options.api_key, this.options.ws_base_url, mergedConfig, mergedOptions);
|
|
4327
|
+
}
|
|
4328
|
+
async createSingleTtsStream(input) {
|
|
4329
|
+
return new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options)._openStream(input, true);
|
|
4330
|
+
}
|
|
4331
|
+
async createTtsConnection() {
|
|
4332
|
+
const connection = new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options);
|
|
4333
|
+
await connection.connect();
|
|
4334
|
+
return connection;
|
|
3542
4335
|
}
|
|
3543
4336
|
};
|
|
3544
4337
|
|
|
@@ -3546,20 +4339,37 @@ var SonioxRealtimeApi = class {
|
|
|
3546
4339
|
//#region src/client.ts
|
|
3547
4340
|
/**
|
|
3548
4341
|
* Soniox Node Client
|
|
3549
|
-
* @returns {SonioxNodeClient}
|
|
3550
4342
|
*
|
|
3551
4343
|
* @example
|
|
3552
4344
|
* ```typescript
|
|
3553
4345
|
* import { SonioxNodeClient } from '@soniox/node';
|
|
3554
4346
|
*
|
|
3555
|
-
*
|
|
3556
|
-
*
|
|
4347
|
+
* // Default (US) region
|
|
4348
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key' });
|
|
4349
|
+
*
|
|
4350
|
+
* // EU region
|
|
4351
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key', region: 'eu' });
|
|
4352
|
+
*
|
|
4353
|
+
* // REST TTS
|
|
4354
|
+
* const audio = await client.tts.generate({
|
|
4355
|
+
* text: 'Hello',
|
|
4356
|
+
* voice: 'Adrian',
|
|
4357
|
+
* language: 'en',
|
|
4358
|
+
* });
|
|
4359
|
+
*
|
|
4360
|
+
* // WebSocket TTS
|
|
4361
|
+
* const stream = await client.realtime.tts({
|
|
4362
|
+
* model: 'tts-rt-v1-preview',
|
|
4363
|
+
* voice: 'Adrian',
|
|
4364
|
+
* language: 'en',
|
|
4365
|
+
* audio_format: 'wav',
|
|
3557
4366
|
* });
|
|
3558
4367
|
* ```
|
|
3559
4368
|
*/
|
|
3560
4369
|
var SonioxNodeClient = class {
|
|
3561
4370
|
files;
|
|
3562
4371
|
stt;
|
|
4372
|
+
tts;
|
|
3563
4373
|
models;
|
|
3564
4374
|
webhooks;
|
|
3565
4375
|
auth;
|
|
@@ -3567,7 +4377,14 @@ var SonioxNodeClient = class {
|
|
|
3567
4377
|
constructor(options = {}) {
|
|
3568
4378
|
const apiKey = options.api_key ?? process.env["SONIOX_API_KEY"];
|
|
3569
4379
|
if (!apiKey) throw new Error("Missing API key. Provide it via options.api_key or set the SONIOX_API_KEY environment variable.");
|
|
3570
|
-
const
|
|
4380
|
+
const regionDefaults = resolveConnectionConfig({
|
|
4381
|
+
api_key: apiKey,
|
|
4382
|
+
region: options.region ?? process.env["SONIOX_REGION"],
|
|
4383
|
+
base_domain: options.base_domain ?? process.env["SONIOX_BASE_DOMAIN"],
|
|
4384
|
+
stt_defaults: options.stt_defaults,
|
|
4385
|
+
tts_defaults: options.tts_defaults
|
|
4386
|
+
});
|
|
4387
|
+
const baseURL = options.base_url ?? process.env["SONIOX_API_BASE_URL"] ?? regionDefaults.api_domain;
|
|
3571
4388
|
const http = options.http_client ?? new FetchHttpClient({
|
|
3572
4389
|
base_url: baseURL,
|
|
3573
4390
|
default_headers: {
|
|
@@ -3580,9 +4397,14 @@ var SonioxNodeClient = class {
|
|
|
3580
4397
|
this.models = new SonioxModelsAPI(http);
|
|
3581
4398
|
this.webhooks = new SonioxWebhooksAPI(this.stt);
|
|
3582
4399
|
this.auth = new SonioxAuthAPI(http);
|
|
4400
|
+
this.tts = new SonioxTtsApi(apiKey, options.tts_api_url ?? process.env["SONIOX_TTS_API_URL"] ?? regionDefaults.tts_api_url, http);
|
|
3583
4401
|
this.realtime = new SonioxRealtimeApi({
|
|
3584
4402
|
api_key: apiKey,
|
|
3585
|
-
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ??
|
|
4403
|
+
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ?? regionDefaults.stt_ws_url,
|
|
4404
|
+
tts_ws_url: options.realtime?.tts_ws_url ?? process.env["SONIOX_TTS_WS_URL"] ?? regionDefaults.tts_ws_url,
|
|
4405
|
+
stt_defaults: regionDefaults.stt_defaults,
|
|
4406
|
+
tts_defaults: regionDefaults.tts_defaults,
|
|
4407
|
+
tts_connection_options: options.realtime?.tts_connection_options,
|
|
3586
4408
|
default_session_options: options.realtime?.default_session_options
|
|
3587
4409
|
});
|
|
3588
4410
|
}
|
|
@@ -3600,6 +4422,8 @@ exports.QuotaError = QuotaError;
|
|
|
3600
4422
|
exports.RealtimeError = RealtimeError;
|
|
3601
4423
|
exports.RealtimeSegmentBuffer = RealtimeSegmentBuffer;
|
|
3602
4424
|
exports.RealtimeSttSession = RealtimeSttSession;
|
|
4425
|
+
exports.RealtimeTtsConnection = RealtimeTtsConnection;
|
|
4426
|
+
exports.RealtimeTtsStream = RealtimeTtsStream;
|
|
3603
4427
|
exports.RealtimeUtteranceBuffer = RealtimeUtteranceBuffer;
|
|
3604
4428
|
exports.SONIOX_API_BASE_URL = SONIOX_API_BASE_URL;
|
|
3605
4429
|
exports.SONIOX_API_WEBHOOK_HEADER_ENV = SONIOX_API_WEBHOOK_HEADER_ENV;
|
|
@@ -3608,6 +4432,8 @@ exports.SONIOX_API_WS_URL = SONIOX_API_WS_URL;
|
|
|
3608
4432
|
exports.SONIOX_TMP_API_KEY_DURATION_MAX = SONIOX_TMP_API_KEY_DURATION_MAX;
|
|
3609
4433
|
exports.SONIOX_TMP_API_KEY_DURATION_MIN = SONIOX_TMP_API_KEY_DURATION_MIN;
|
|
3610
4434
|
exports.SONIOX_TMP_API_KEY_USAGE_TYPE = SONIOX_TMP_API_KEY_USAGE_TYPE;
|
|
4435
|
+
exports.SONIOX_TTS_API_BASE_URL = SONIOX_TTS_API_BASE_URL;
|
|
4436
|
+
exports.SONIOX_TTS_WS_URL = SONIOX_TTS_WS_URL;
|
|
3611
4437
|
exports.SonioxError = SonioxError;
|
|
3612
4438
|
exports.SonioxFile = SonioxFile;
|
|
3613
4439
|
exports.SonioxHttpError = SonioxHttpError;
|
|
@@ -3615,6 +4441,7 @@ exports.SonioxNodeClient = SonioxNodeClient;
|
|
|
3615
4441
|
exports.SonioxRealtimeApi = SonioxRealtimeApi;
|
|
3616
4442
|
exports.SonioxTranscript = SonioxTranscript;
|
|
3617
4443
|
exports.SonioxTranscription = SonioxTranscription;
|
|
4444
|
+
exports.SonioxTtsApi = SonioxTtsApi;
|
|
3618
4445
|
exports.StateError = StateError;
|
|
3619
4446
|
exports.TranscriptionListResult = TranscriptionListResult;
|
|
3620
4447
|
exports.buildUrl = buildUrl;
|
|
@@ -3628,6 +4455,7 @@ exports.isSonioxError = isSonioxError;
|
|
|
3628
4455
|
exports.isSonioxHttpError = isSonioxHttpError;
|
|
3629
4456
|
exports.mergeHeaders = mergeHeaders;
|
|
3630
4457
|
exports.normalizeHeaders = normalizeHeaders;
|
|
4458
|
+
exports.resolveConnectionConfig = resolveConnectionConfig;
|
|
3631
4459
|
exports.segmentRealtimeTokens = segmentRealtimeTokens;
|
|
3632
4460
|
exports.segmentTranscript = segmentTranscript;
|
|
3633
4461
|
//# sourceMappingURL=index.cjs.map
|