@soniox/node 1.1.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/index.cjs +1073 -245
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +979 -176
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +979 -176
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1069 -246
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,36 +1,16 @@
|
|
|
1
|
+
import { writeFile } from "node:fs/promises";
|
|
2
|
+
|
|
1
3
|
//#region src/constants.ts
|
|
2
4
|
const SONIOX_API_BASE_URL = "https://api.soniox.com";
|
|
3
5
|
const SONIOX_API_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
6
|
+
const SONIOX_TTS_API_BASE_URL = "https://tts-rt.soniox.com";
|
|
7
|
+
const SONIOX_TTS_WS_URL = "wss://tts-rt.soniox.com/tts-websocket";
|
|
4
8
|
const SONIOX_TMP_API_KEY_USAGE_TYPE = "transcribe_websocket";
|
|
5
9
|
const SONIOX_TMP_API_KEY_DURATION_MIN = 1;
|
|
6
10
|
const SONIOX_TMP_API_KEY_DURATION_MAX = 3600;
|
|
7
11
|
const SONIOX_API_WEBHOOK_HEADER_ENV = "SONIOX_API_WEBHOOK_HEADER";
|
|
8
12
|
const SONIOX_API_WEBHOOK_SECRET_ENV = "SONIOX_API_WEBHOOK_SECRET";
|
|
9
13
|
|
|
10
|
-
//#endregion
|
|
11
|
-
//#region src/async/auth.ts
|
|
12
|
-
var SonioxAuthAPI = class {
|
|
13
|
-
constructor(http) {
|
|
14
|
-
this.http = http;
|
|
15
|
-
}
|
|
16
|
-
/**
|
|
17
|
-
* Creates a temporary API key for client-side use.
|
|
18
|
-
*
|
|
19
|
-
* @param request - Request parameters for the temporary key
|
|
20
|
-
* @param signal - Optional AbortSignal for cancellation
|
|
21
|
-
* @returns The temporary API key response
|
|
22
|
-
*/
|
|
23
|
-
async createTemporaryKey(request, signal) {
|
|
24
|
-
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
25
|
-
return (await this.http.request({
|
|
26
|
-
method: "POST",
|
|
27
|
-
path: "/v1/auth/temporary-api-key",
|
|
28
|
-
body: request,
|
|
29
|
-
...signal && { signal }
|
|
30
|
-
})).data;
|
|
31
|
-
}
|
|
32
|
-
};
|
|
33
|
-
|
|
34
14
|
//#endregion
|
|
35
15
|
//#region ../core/src/errors.ts
|
|
36
16
|
var SonioxError = class extends Error {
|
|
@@ -78,6 +58,208 @@ var SonioxError = class extends Error {
|
|
|
78
58
|
}
|
|
79
59
|
};
|
|
80
60
|
|
|
61
|
+
//#endregion
|
|
62
|
+
//#region ../core/src/http-errors.ts
|
|
63
|
+
/**
|
|
64
|
+
* HTTP error handling for the Soniox SDK.
|
|
65
|
+
*
|
|
66
|
+
* Lives in `@soniox/core` so it can be shared by the browser-safe
|
|
67
|
+
* `TtsRestClient` and the Node `HttpClient`. `@soniox/node` re-exports
|
|
68
|
+
* these symbols for backwards compatibility.
|
|
69
|
+
*/
|
|
70
|
+
/** Maximum body text length to include in error details (4KB) */
|
|
71
|
+
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
72
|
+
/**
|
|
73
|
+
* HTTP error class for all HTTP-related failures (REST API).
|
|
74
|
+
*
|
|
75
|
+
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
76
|
+
* server errors, or response parsing failures.
|
|
77
|
+
*/
|
|
78
|
+
var SonioxHttpError = class extends SonioxError {
|
|
79
|
+
/** Request URL */
|
|
80
|
+
url;
|
|
81
|
+
/** HTTP method */
|
|
82
|
+
method;
|
|
83
|
+
/** Response headers (only for http_error) */
|
|
84
|
+
headers;
|
|
85
|
+
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
86
|
+
bodyText;
|
|
87
|
+
constructor(details) {
|
|
88
|
+
super(details.message, details.code, details.statusCode, details.cause);
|
|
89
|
+
this.name = "SonioxHttpError";
|
|
90
|
+
this.url = details.url;
|
|
91
|
+
this.method = details.method;
|
|
92
|
+
this.headers = details.headers;
|
|
93
|
+
this.bodyText = details.bodyText;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Creates a human-readable string representation
|
|
97
|
+
*/
|
|
98
|
+
toString() {
|
|
99
|
+
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
100
|
+
parts.push(` Method: ${this.method}`);
|
|
101
|
+
parts.push(` URL: ${this.url}`);
|
|
102
|
+
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
103
|
+
return parts.join("\n");
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Converts to a plain object for logging/serialization
|
|
107
|
+
*/
|
|
108
|
+
toJSON() {
|
|
109
|
+
return {
|
|
110
|
+
name: this.name,
|
|
111
|
+
code: this.code,
|
|
112
|
+
message: this.message,
|
|
113
|
+
url: this.url,
|
|
114
|
+
method: this.method,
|
|
115
|
+
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
116
|
+
...this.headers !== void 0 && { headers: this.headers },
|
|
117
|
+
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Creates a network error
|
|
123
|
+
*/
|
|
124
|
+
function createNetworkError(url, method, cause) {
|
|
125
|
+
return new SonioxHttpError({
|
|
126
|
+
code: "network_error",
|
|
127
|
+
message: `Network error: ${cause instanceof Error ? cause.message : "Network request failed"}`,
|
|
128
|
+
url,
|
|
129
|
+
method,
|
|
130
|
+
cause
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Creates a timeout error
|
|
135
|
+
*/
|
|
136
|
+
function createTimeoutError(url, method, timeoutMs) {
|
|
137
|
+
return new SonioxHttpError({
|
|
138
|
+
code: "timeout",
|
|
139
|
+
message: `Request timed out after ${timeoutMs}ms`,
|
|
140
|
+
url,
|
|
141
|
+
method
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Creates an abort error
|
|
146
|
+
*/
|
|
147
|
+
function createAbortError(url, method, cause) {
|
|
148
|
+
return new SonioxHttpError({
|
|
149
|
+
code: "aborted",
|
|
150
|
+
message: "Request was aborted",
|
|
151
|
+
url,
|
|
152
|
+
method,
|
|
153
|
+
cause
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Creates an HTTP error (non-2xx status)
|
|
158
|
+
*/
|
|
159
|
+
function createHttpError(url, method, statusCode, headers, bodyText) {
|
|
160
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
161
|
+
return new SonioxHttpError({
|
|
162
|
+
code: "http_error",
|
|
163
|
+
message: `HTTP ${statusCode}`,
|
|
164
|
+
url,
|
|
165
|
+
method,
|
|
166
|
+
statusCode,
|
|
167
|
+
headers,
|
|
168
|
+
bodyText: cappedBody
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Creates a parse error (invalid JSON, etc.)
|
|
173
|
+
*/
|
|
174
|
+
function createParseError(url, method, bodyText, cause) {
|
|
175
|
+
const message = cause instanceof Error ? cause.message : "Failed to parse response";
|
|
176
|
+
const cappedBody = truncateBodyText(bodyText);
|
|
177
|
+
return new SonioxHttpError({
|
|
178
|
+
code: "parse_error",
|
|
179
|
+
message: `Parse error: ${message}`,
|
|
180
|
+
url,
|
|
181
|
+
method,
|
|
182
|
+
bodyText: cappedBody,
|
|
183
|
+
cause
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Truncates body text to the maximum allowed length
|
|
188
|
+
*/
|
|
189
|
+
function truncateBodyText(text) {
|
|
190
|
+
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
191
|
+
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Type guard to check if an error is an AbortError
|
|
195
|
+
*/
|
|
196
|
+
function isAbortError(error) {
|
|
197
|
+
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
198
|
+
return false;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Type guard to check if an error is any SonioxError (base class).
|
|
202
|
+
* This catches all SDK errors including HTTP and real-time errors.
|
|
203
|
+
*/
|
|
204
|
+
function isSonioxError(error) {
|
|
205
|
+
return error instanceof SonioxError;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Type guard to check if an error is a SonioxHttpError
|
|
209
|
+
*/
|
|
210
|
+
function isSonioxHttpError(error) {
|
|
211
|
+
return error instanceof SonioxHttpError;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Checks if an error is a 404 Not Found error
|
|
215
|
+
*/
|
|
216
|
+
function isNotFoundError(error) {
|
|
217
|
+
return isSonioxHttpError(error) && error.statusCode === 404;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//#endregion
|
|
221
|
+
//#region ../core/src/connection.ts
|
|
222
|
+
/** Root domain used for the default (US) deployment. */
|
|
223
|
+
const DEFAULT_BASE_DOMAIN = "soniox.com";
|
|
224
|
+
/**
|
|
225
|
+
* Derives the four Soniox service URLs from a base domain.
|
|
226
|
+
* All Soniox deployments follow the same subdomain pattern:
|
|
227
|
+
* api.{base} / stt-rt.{base} / tts-rt.{base}
|
|
228
|
+
*/
|
|
229
|
+
function urlsFromBase(base) {
|
|
230
|
+
return {
|
|
231
|
+
api_domain: `https://api.${base}`,
|
|
232
|
+
stt_ws_url: `wss://stt-rt.${base}/transcribe-websocket`,
|
|
233
|
+
tts_api_url: `https://tts-rt.${base}`,
|
|
234
|
+
tts_ws_url: `wss://tts-rt.${base}/tts-websocket`
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Resolve a {@link SonioxConnectionConfig} into fully qualified URLs.
|
|
239
|
+
*
|
|
240
|
+
* Resolution priority (highest → lowest) for each URL:
|
|
241
|
+
* 1. Explicit field (`api_domain`, `stt_ws_url`, `tts_api_url`, `tts_ws_url`)
|
|
242
|
+
* 2. Derived from `base_domain`
|
|
243
|
+
* 3. Derived from `region` → `{region}.soniox.com`
|
|
244
|
+
* 4. Default US base domain (`soniox.com`)
|
|
245
|
+
*/
|
|
246
|
+
function resolveConnectionConfig(config) {
|
|
247
|
+
const { region, base_domain, api_domain, stt_ws_url, tts_api_url, tts_ws_url } = config;
|
|
248
|
+
const normalizedRegion = region !== void 0 && region.toLowerCase() !== "us" ? region : void 0;
|
|
249
|
+
const derived = urlsFromBase(base_domain ?? (normalizedRegion !== void 0 ? `${normalizedRegion}.soniox.com` : DEFAULT_BASE_DOMAIN));
|
|
250
|
+
const sttDefaults = config.stt_defaults ?? config.session_defaults ?? {};
|
|
251
|
+
return {
|
|
252
|
+
api_key: config.api_key,
|
|
253
|
+
api_domain: api_domain ?? derived.api_domain,
|
|
254
|
+
stt_ws_url: stt_ws_url ?? derived.stt_ws_url,
|
|
255
|
+
tts_api_url: tts_api_url ?? derived.tts_api_url,
|
|
256
|
+
tts_ws_url: tts_ws_url ?? derived.tts_ws_url,
|
|
257
|
+
stt_defaults: sttDefaults,
|
|
258
|
+
tts_defaults: config.tts_defaults ?? {},
|
|
259
|
+
session_defaults: sttDefaults
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
81
263
|
//#endregion
|
|
82
264
|
//#region ../core/src/segments.ts
|
|
83
265
|
const DEFAULT_GROUP_BY = ["speaker", "language"];
|
|
@@ -160,10 +342,34 @@ var AsyncEventQueue = class {
|
|
|
160
342
|
return this.done;
|
|
161
343
|
}
|
|
162
344
|
/**
|
|
345
|
+
* Drop buffered events without ending the queue.
|
|
346
|
+
*
|
|
347
|
+
* Intended for owners that know their consumer has gone away (e.g. an
|
|
348
|
+
* async-iterator consumer broke out of its `for await` loop). The queue
|
|
349
|
+
* remains active and accepts future pushes. Callers must ensure no other
|
|
350
|
+
* iterator is concurrently consuming this queue, since this also drops
|
|
351
|
+
* events those consumers would have observed.
|
|
352
|
+
*/
|
|
353
|
+
clear() {
|
|
354
|
+
this.queue = [];
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
163
357
|
* Async iterator implementation.
|
|
358
|
+
*
|
|
359
|
+
* The returned iterator implements `return()` so consumers that exit
|
|
360
|
+
* `for await` early (via `break`, `throw`, or an outer `return`) cleanly
|
|
361
|
+
* release the iteration without further work. The queue itself is left
|
|
362
|
+
* in place — call {@link clear} or {@link end}/{@link abort} if buffered
|
|
363
|
+
* events should also be dropped.
|
|
164
364
|
*/
|
|
165
365
|
[Symbol.asyncIterator]() {
|
|
166
|
-
return {
|
|
366
|
+
return {
|
|
367
|
+
next: () => this.next(),
|
|
368
|
+
return: (value) => Promise.resolve({
|
|
369
|
+
value,
|
|
370
|
+
done: true
|
|
371
|
+
})
|
|
372
|
+
};
|
|
167
373
|
}
|
|
168
374
|
/**
|
|
169
375
|
* Get the next event from the queue.
|
|
@@ -421,8 +627,9 @@ function mapErrorResponse(response) {
|
|
|
421
627
|
|
|
422
628
|
//#endregion
|
|
423
629
|
//#region ../core/src/realtime/stt.ts
|
|
424
|
-
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
425
|
-
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
630
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS$1 = 5e3;
|
|
631
|
+
const MIN_KEEPALIVE_INTERVAL_MS$1 = 1e3;
|
|
632
|
+
const DEFAULT_CONNECT_TIMEOUT_MS$1 = 2e4;
|
|
426
633
|
/**
|
|
427
634
|
* Convert audio data to Uint8Array
|
|
428
635
|
* Handles Uint8Array and ArrayBuffer
|
|
@@ -512,10 +719,12 @@ function filterSpecialTokens(tokens) {
|
|
|
512
719
|
var RealtimeSttSession = class {
|
|
513
720
|
emitter = new TypedEmitter();
|
|
514
721
|
eventQueue = new AsyncEventQueue();
|
|
722
|
+
iteratorAttached = false;
|
|
515
723
|
apiKey;
|
|
516
724
|
wsBaseUrl;
|
|
517
725
|
config;
|
|
518
726
|
keepaliveIntervalMs;
|
|
727
|
+
connectTimeoutMs;
|
|
519
728
|
signal;
|
|
520
729
|
ws = null;
|
|
521
730
|
_state = "idle";
|
|
@@ -529,8 +738,10 @@ var RealtimeSttSession = class {
|
|
|
529
738
|
this.apiKey = apiKey;
|
|
530
739
|
this.wsBaseUrl = wsBaseUrl;
|
|
531
740
|
this.config = config;
|
|
532
|
-
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
533
|
-
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
741
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
742
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS$1) : DEFAULT_KEEPALIVE_INTERVAL_MS$1;
|
|
743
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
744
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS$1;
|
|
534
745
|
this.signal = options?.signal;
|
|
535
746
|
if (this.signal) {
|
|
536
747
|
this.abortHandler = () => this.handleAbort();
|
|
@@ -559,16 +770,26 @@ var RealtimeSttSession = class {
|
|
|
559
770
|
async connect() {
|
|
560
771
|
if (this._state !== "idle") throw new StateError(`Cannot connect: session is in "${this._state}" state`);
|
|
561
772
|
this.checkAborted();
|
|
562
|
-
this.setState("connecting");
|
|
773
|
+
this.setState("connecting", "user_action");
|
|
774
|
+
let connectTimer;
|
|
563
775
|
try {
|
|
564
|
-
await this.createWebSocket()
|
|
565
|
-
|
|
776
|
+
await Promise.race([this.createWebSocket().then((v) => {
|
|
777
|
+
clearTimeout(connectTimer);
|
|
778
|
+
return v;
|
|
779
|
+
}), new Promise((_resolve, reject) => {
|
|
780
|
+
connectTimer = setTimeout(() => {
|
|
781
|
+
if (this.ws) this.ws.close();
|
|
782
|
+
reject(new ConnectionError("Connection timed out"));
|
|
783
|
+
}, this.connectTimeoutMs);
|
|
784
|
+
})]);
|
|
785
|
+
this.setState("connected", "connected");
|
|
566
786
|
this.emitter.emit("connected");
|
|
567
787
|
this.updateKeepalive();
|
|
568
788
|
} catch (error) {
|
|
789
|
+
clearTimeout(connectTimer);
|
|
569
790
|
if (!this.isTerminalState(this._state)) {
|
|
570
791
|
const err = error instanceof Error ? error : new ConnectionError("Connection failed", error);
|
|
571
|
-
this.cleanup("error", err);
|
|
792
|
+
this.cleanup("error", err, "error");
|
|
572
793
|
}
|
|
573
794
|
throw error;
|
|
574
795
|
}
|
|
@@ -646,7 +867,7 @@ var RealtimeSttSession = class {
|
|
|
646
867
|
this.checkAborted();
|
|
647
868
|
if (this._state !== "connected") throw new StateError(`Cannot finish: session is in "${this._state}" state`);
|
|
648
869
|
if (this._paused) this.resume();
|
|
649
|
-
this.setState("finishing");
|
|
870
|
+
this.setState("finishing", "user_action");
|
|
650
871
|
this.updateKeepalive();
|
|
651
872
|
const finishPromise = new Promise((resolve, reject) => {
|
|
652
873
|
this.finishResolver = resolve;
|
|
@@ -662,7 +883,7 @@ var RealtimeSttSession = class {
|
|
|
662
883
|
if (this.isTerminalState(this._state)) return;
|
|
663
884
|
this.emitter.emit("disconnected", "client_closed");
|
|
664
885
|
this.settleFinish(new StateError("Session canceled"));
|
|
665
|
-
this.cleanup("canceled");
|
|
886
|
+
this.cleanup("canceled", void 0, "user_action");
|
|
666
887
|
}
|
|
667
888
|
/**
|
|
668
889
|
* Register an event handler
|
|
@@ -687,9 +908,42 @@ var RealtimeSttSession = class {
|
|
|
687
908
|
}
|
|
688
909
|
/**
|
|
689
910
|
* Async iterator for consuming events.
|
|
911
|
+
*
|
|
912
|
+
* The returned iterator's `return()` resets the internal iterator-attach
|
|
913
|
+
* flag and drops any buffered events, so consumers that exit `for await`
|
|
914
|
+
* early (via `break` etc.) stop accruing memory while the session keeps
|
|
915
|
+
* running.
|
|
690
916
|
*/
|
|
691
917
|
[Symbol.asyncIterator]() {
|
|
692
|
-
|
|
918
|
+
this.iteratorAttached = true;
|
|
919
|
+
const inner = this.eventQueue[Symbol.asyncIterator]();
|
|
920
|
+
return {
|
|
921
|
+
next: () => inner.next(),
|
|
922
|
+
return: (value) => {
|
|
923
|
+
this.iteratorAttached = false;
|
|
924
|
+
this.eventQueue.clear();
|
|
925
|
+
return inner.return?.(value) ?? Promise.resolve({
|
|
926
|
+
value,
|
|
927
|
+
done: true
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
/**
|
|
933
|
+
* @internal Debug-only: forcefully close the underlying WebSocket to
|
|
934
|
+
* simulate an unexpected network disconnection.
|
|
935
|
+
*/
|
|
936
|
+
__debugForceDisconnect() {
|
|
937
|
+
this.ws?.close(4999, "debug: simulated disconnect");
|
|
938
|
+
}
|
|
939
|
+
/**
|
|
940
|
+
* Push an event to the async iterator queue only when a consumer has
|
|
941
|
+
* attached via `[Symbol.asyncIterator]()`. Listener-only consumers
|
|
942
|
+
* (the documented `.on()` pattern) never drain the queue, so pushing
|
|
943
|
+
* unconditionally would leak buffered events on long-running sessions.
|
|
944
|
+
*/
|
|
945
|
+
enqueueIfIterating(event) {
|
|
946
|
+
if (this.iteratorAttached) this.eventQueue.push(event);
|
|
693
947
|
}
|
|
694
948
|
async createWebSocket() {
|
|
695
949
|
return new Promise((resolve, reject) => {
|
|
@@ -740,66 +994,70 @@ var RealtimeSttSession = class {
|
|
|
740
994
|
tokens: userTokens
|
|
741
995
|
};
|
|
742
996
|
this.emitter.emit("result", filteredResult);
|
|
743
|
-
this.
|
|
997
|
+
this.enqueueIfIterating({
|
|
744
998
|
kind: "result",
|
|
745
999
|
data: filteredResult
|
|
746
1000
|
});
|
|
747
1001
|
if (hasEndpoint) {
|
|
748
1002
|
this.emitter.emit("endpoint");
|
|
749
|
-
this.
|
|
1003
|
+
this.enqueueIfIterating({ kind: "endpoint" });
|
|
750
1004
|
}
|
|
751
1005
|
if (hasFinalized) {
|
|
752
1006
|
this.emitter.emit("finalized");
|
|
753
|
-
this.
|
|
1007
|
+
this.enqueueIfIterating({ kind: "finalized" });
|
|
754
1008
|
}
|
|
755
1009
|
if (result.finished) {
|
|
756
1010
|
this.emitter.emit("finished");
|
|
757
|
-
this.
|
|
1011
|
+
this.enqueueIfIterating({ kind: "finished" });
|
|
758
1012
|
this.settleFinish();
|
|
759
|
-
this.cleanup("finished");
|
|
1013
|
+
this.cleanup("finished", void 0, "finished");
|
|
760
1014
|
}
|
|
761
1015
|
} catch (error) {
|
|
762
1016
|
const err = error;
|
|
763
1017
|
this.emitter.emit("error", err);
|
|
764
1018
|
this.settleFinish(err);
|
|
765
|
-
this.cleanup("error", err);
|
|
1019
|
+
this.cleanup("error", err, "error");
|
|
766
1020
|
}
|
|
767
1021
|
}
|
|
768
1022
|
handleClose(event) {
|
|
769
1023
|
if (this.isTerminalState(this._state)) return;
|
|
770
1024
|
this.emitter.emit("disconnected", event.reason || void 0);
|
|
771
1025
|
if (this._state === "finishing") {
|
|
772
|
-
const error = new ConnectionError("WebSocket closed before finished response", event);
|
|
773
|
-
this.emitter.emit("error", error);
|
|
774
|
-
this.settleFinish(error);
|
|
775
|
-
this.cleanup("error", error);
|
|
1026
|
+
const error$1 = new ConnectionError("WebSocket closed before finished response", event);
|
|
1027
|
+
this.emitter.emit("error", error$1);
|
|
1028
|
+
this.settleFinish(error$1);
|
|
1029
|
+
this.cleanup("error", error$1, "connection_lost");
|
|
776
1030
|
return;
|
|
777
1031
|
}
|
|
778
|
-
|
|
1032
|
+
const error = new ConnectionError("WebSocket closed unexpectedly", event);
|
|
1033
|
+
this.emitter.emit("error", error);
|
|
1034
|
+
this.cleanup("closed", error, "connection_lost");
|
|
779
1035
|
}
|
|
780
1036
|
handleError(event) {
|
|
781
1037
|
const error = new ConnectionError("WebSocket error", event);
|
|
782
1038
|
this.emitter.emit("error", error);
|
|
783
1039
|
this.settleFinish(error);
|
|
784
|
-
this.cleanup("error", error);
|
|
1040
|
+
this.cleanup("error", error, "error");
|
|
785
1041
|
}
|
|
786
1042
|
handleAbort() {
|
|
787
1043
|
const error = new AbortError();
|
|
788
1044
|
this.emitter.emit("error", error);
|
|
789
1045
|
this.settleFinish(error);
|
|
790
|
-
this.cleanup("canceled", error);
|
|
1046
|
+
this.cleanup("canceled", error, "user_action");
|
|
791
1047
|
}
|
|
792
|
-
setState(newState) {
|
|
1048
|
+
setState(newState, reason) {
|
|
793
1049
|
if (this._state === newState) return;
|
|
794
1050
|
const oldState = this._state;
|
|
795
1051
|
this._state = newState;
|
|
796
|
-
|
|
1052
|
+
const update = {
|
|
797
1053
|
old_state: oldState,
|
|
798
1054
|
new_state: newState
|
|
799
|
-
}
|
|
1055
|
+
};
|
|
1056
|
+
if (reason !== void 0) update.reason = reason;
|
|
1057
|
+
this.emitter.emit("state_change", update);
|
|
800
1058
|
}
|
|
801
|
-
cleanup(finalState, error) {
|
|
802
|
-
this.setState(finalState);
|
|
1059
|
+
cleanup(finalState, error, reason) {
|
|
1060
|
+
this.setState(finalState, reason);
|
|
803
1061
|
this.stopKeepalive();
|
|
804
1062
|
if (this.signal && this.abortHandler) {
|
|
805
1063
|
this.signal.removeEventListener("abort", this.abortHandler);
|
|
@@ -833,7 +1091,7 @@ var RealtimeSttSession = class {
|
|
|
833
1091
|
const error = new ConnectionError("WebSocket is not open");
|
|
834
1092
|
this.emitter.emit("error", error);
|
|
835
1093
|
this.settleFinish(error);
|
|
836
|
-
this.cleanup("error", error);
|
|
1094
|
+
this.cleanup("error", error, "error");
|
|
837
1095
|
if (shouldThrow) throw error;
|
|
838
1096
|
return;
|
|
839
1097
|
}
|
|
@@ -843,7 +1101,7 @@ var RealtimeSttSession = class {
|
|
|
843
1101
|
const error = new ConnectionError("WebSocket send failed", err);
|
|
844
1102
|
this.emitter.emit("error", error);
|
|
845
1103
|
this.settleFinish(error);
|
|
846
|
-
this.cleanup("error", error);
|
|
1104
|
+
this.cleanup("error", error, "error");
|
|
847
1105
|
if (shouldThrow) throw error;
|
|
848
1106
|
}
|
|
849
1107
|
}
|
|
@@ -865,6 +1123,412 @@ var RealtimeSttSession = class {
|
|
|
865
1123
|
}
|
|
866
1124
|
};
|
|
867
1125
|
|
|
1126
|
+
//#endregion
|
|
1127
|
+
//#region ../core/src/realtime/tts.ts
|
|
1128
|
+
const MAX_STREAMS_PER_CONNECTION = 5;
|
|
1129
|
+
const DEFAULT_KEEPALIVE_INTERVAL_MS = 5e3;
|
|
1130
|
+
const MIN_KEEPALIVE_INTERVAL_MS = 1e3;
|
|
1131
|
+
const DEFAULT_CONNECT_TIMEOUT_MS = 2e4;
|
|
1132
|
+
function generateStreamId() {
|
|
1133
|
+
return globalThis.crypto.randomUUID();
|
|
1134
|
+
}
|
|
1135
|
+
function decodeBase64ToUint8Array(base64) {
|
|
1136
|
+
const binaryString = atob(base64);
|
|
1137
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
1138
|
+
for (let i = 0; i < binaryString.length; i++) bytes[i] = binaryString.charCodeAt(i);
|
|
1139
|
+
return bytes;
|
|
1140
|
+
}
|
|
1141
|
+
/**
|
|
1142
|
+
* Merge a partial TTS stream input with defaults, validate required fields,
|
|
1143
|
+
* and return a fully resolved config ready for the WebSocket.
|
|
1144
|
+
*/
|
|
1145
|
+
function resolveStreamConfig(input, defaults) {
|
|
1146
|
+
const merged = {
|
|
1147
|
+
...defaults,
|
|
1148
|
+
...input
|
|
1149
|
+
};
|
|
1150
|
+
const model = merged.model;
|
|
1151
|
+
const language = merged.language;
|
|
1152
|
+
const voice = merged.voice;
|
|
1153
|
+
const audio_format = merged.audio_format;
|
|
1154
|
+
const missing = [];
|
|
1155
|
+
if (!model) missing.push("model");
|
|
1156
|
+
if (!language) missing.push("language");
|
|
1157
|
+
if (!voice) missing.push("voice");
|
|
1158
|
+
if (!audio_format) missing.push("audio_format");
|
|
1159
|
+
if (missing.length > 0) throw new Error(`Missing required TTS stream fields: ${missing.join(", ")}. Provide them directly or via tts_defaults in your connection config.`);
|
|
1160
|
+
return {
|
|
1161
|
+
model,
|
|
1162
|
+
language,
|
|
1163
|
+
voice,
|
|
1164
|
+
audio_format,
|
|
1165
|
+
...merged.sample_rate !== void 0 && { sample_rate: merged.sample_rate },
|
|
1166
|
+
...merged.bitrate !== void 0 && { bitrate: merged.bitrate },
|
|
1167
|
+
stream_id: merged.stream_id ?? generateStreamId()
|
|
1168
|
+
};
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Handle for one TTS stream on a WebSocket connection.
|
|
1172
|
+
*
|
|
1173
|
+
* Emits typed events and supports async iteration over decoded audio chunks.
|
|
1174
|
+
*
|
|
1175
|
+
* @example Event-based
|
|
1176
|
+
* ```typescript
|
|
1177
|
+
* stream.on('audio', (chunk) => process(chunk));
|
|
1178
|
+
* stream.on('terminated', () => console.log('done'));
|
|
1179
|
+
* stream.sendText("Hello world");
|
|
1180
|
+
* stream.finish();
|
|
1181
|
+
* ```
|
|
1182
|
+
*
|
|
1183
|
+
* @example Async iteration
|
|
1184
|
+
* ```typescript
|
|
1185
|
+
* stream.sendText("Hello world");
|
|
1186
|
+
* stream.finish();
|
|
1187
|
+
* for await (const chunk of stream) {
|
|
1188
|
+
* process(chunk);
|
|
1189
|
+
* }
|
|
1190
|
+
* ```
|
|
1191
|
+
*/
|
|
1192
|
+
var RealtimeTtsStream = class extends TypedEmitter {
|
|
1193
|
+
streamId;
|
|
1194
|
+
_state = "active";
|
|
1195
|
+
audioQueue = new AsyncEventQueue();
|
|
1196
|
+
iteratorAttached = false;
|
|
1197
|
+
connection;
|
|
1198
|
+
ownsConnection;
|
|
1199
|
+
/** @internal */
|
|
1200
|
+
constructor(streamId, connection, ownsConnection) {
|
|
1201
|
+
super();
|
|
1202
|
+
this.streamId = streamId;
|
|
1203
|
+
this.connection = connection;
|
|
1204
|
+
this.ownsConnection = ownsConnection;
|
|
1205
|
+
}
|
|
1206
|
+
/** Current stream lifecycle state. */
|
|
1207
|
+
get state() {
|
|
1208
|
+
return this._state;
|
|
1209
|
+
}
|
|
1210
|
+
/**
|
|
1211
|
+
* Send one text chunk to the TTS stream.
|
|
1212
|
+
*
|
|
1213
|
+
* @param text - Text to synthesize
|
|
1214
|
+
* @param options.end - If true, signals this is the final text chunk
|
|
1215
|
+
*/
|
|
1216
|
+
sendText(text, options) {
|
|
1217
|
+
if (this._state !== "active") throw new StateError(`Cannot send text in state '${this._state}'`);
|
|
1218
|
+
const payload = {
|
|
1219
|
+
text,
|
|
1220
|
+
text_end: options?.end ?? false,
|
|
1221
|
+
stream_id: this.streamId
|
|
1222
|
+
};
|
|
1223
|
+
this.connection._sendJson(payload);
|
|
1224
|
+
if (options?.end) this._state = "finishing";
|
|
1225
|
+
}
|
|
1226
|
+
/**
|
|
1227
|
+
* Pipe an async iterable of text chunks into the stream.
|
|
1228
|
+
* Automatically calls {@link finish} when the iterable completes.
|
|
1229
|
+
*
|
|
1230
|
+
* Designed for concurrent use: call `sendStream()` and consume audio
|
|
1231
|
+
* via `for await` or events simultaneously.
|
|
1232
|
+
*
|
|
1233
|
+
* @example LLM token piping
|
|
1234
|
+
* ```typescript
|
|
1235
|
+
* stream.sendStream(llmTokenStream);
|
|
1236
|
+
* for await (const audio of stream) { forward(audio); }
|
|
1237
|
+
* ```
|
|
1238
|
+
*/
|
|
1239
|
+
async sendStream(source) {
|
|
1240
|
+
for await (const chunk of source) {
|
|
1241
|
+
if (this._state !== "active") break;
|
|
1242
|
+
this.sendText(chunk);
|
|
1243
|
+
}
|
|
1244
|
+
if (this._state === "active") this.finish();
|
|
1245
|
+
}
|
|
1246
|
+
/**
|
|
1247
|
+
* Signal that no more text will be sent for this stream.
|
|
1248
|
+
* The server will finish generating audio and send `terminated`.
|
|
1249
|
+
*/
|
|
1250
|
+
finish() {
|
|
1251
|
+
if (this._state !== "active") throw new StateError(`Cannot finish in state '${this._state}'`);
|
|
1252
|
+
this.sendText("", { end: true });
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* Cancel this stream. The server will stop generating and send `terminated`.
|
|
1256
|
+
*/
|
|
1257
|
+
cancel() {
|
|
1258
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1259
|
+
const payload = {
|
|
1260
|
+
stream_id: this.streamId,
|
|
1261
|
+
cancel: true
|
|
1262
|
+
};
|
|
1263
|
+
try {
|
|
1264
|
+
this.connection._sendJson(payload);
|
|
1265
|
+
} catch {}
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Close this stream. For single-stream usage (created via `tts(input)`),
|
|
1269
|
+
* also closes the underlying WebSocket connection.
|
|
1270
|
+
*/
|
|
1271
|
+
close() {
|
|
1272
|
+
this._endStream();
|
|
1273
|
+
if (this.ownsConnection) this.connection.close();
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Async iterator that yields decoded audio chunks.
|
|
1277
|
+
*
|
|
1278
|
+
* The returned iterator's `return()` resets the internal iterator-attach
|
|
1279
|
+
* flag and drops any buffered audio, so consumers that exit `for await`
|
|
1280
|
+
* early (via `break` etc.) stop accruing memory while the stream keeps
|
|
1281
|
+
* receiving server audio.
|
|
1282
|
+
*/
|
|
1283
|
+
[Symbol.asyncIterator]() {
|
|
1284
|
+
this.iteratorAttached = true;
|
|
1285
|
+
const inner = this.audioQueue[Symbol.asyncIterator]();
|
|
1286
|
+
return {
|
|
1287
|
+
next: () => inner.next(),
|
|
1288
|
+
return: (value) => {
|
|
1289
|
+
this.iteratorAttached = false;
|
|
1290
|
+
this.audioQueue.clear();
|
|
1291
|
+
return inner.return?.(value) ?? Promise.resolve({
|
|
1292
|
+
value,
|
|
1293
|
+
done: true
|
|
1294
|
+
});
|
|
1295
|
+
}
|
|
1296
|
+
};
|
|
1297
|
+
}
|
|
1298
|
+
/**
|
|
1299
|
+
* Push an audio chunk to the async iterator queue only when a consumer
|
|
1300
|
+
* has attached via `[Symbol.asyncIterator]()`. Listener-only consumers
|
|
1301
|
+
* (the documented `.on('audio', ...)` pattern) never drain the queue,
|
|
1302
|
+
* so pushing unconditionally would leak buffered chunks.
|
|
1303
|
+
*/
|
|
1304
|
+
enqueueIfIterating(chunk) {
|
|
1305
|
+
if (this.iteratorAttached) this.audioQueue.push(chunk);
|
|
1306
|
+
}
|
|
1307
|
+
/** @internal Dispatch a server event to this stream. */
|
|
1308
|
+
_handleEvent(event) {
|
|
1309
|
+
if (event.error_code !== void 0) {
|
|
1310
|
+
const errPayload = { error_code: event.error_code };
|
|
1311
|
+
if (event.error_message !== void 0) errPayload.error_message = event.error_message;
|
|
1312
|
+
const error = mapErrorResponse(errPayload);
|
|
1313
|
+
this._state = "error";
|
|
1314
|
+
this.emit("error", error);
|
|
1315
|
+
this.audioQueue.abort(error);
|
|
1316
|
+
this.connection._deactivateStream(this.streamId);
|
|
1317
|
+
return;
|
|
1318
|
+
}
|
|
1319
|
+
if (event.audio !== void 0) {
|
|
1320
|
+
const chunk = decodeBase64ToUint8Array(event.audio);
|
|
1321
|
+
this.emit("audio", chunk);
|
|
1322
|
+
this.enqueueIfIterating(chunk);
|
|
1323
|
+
}
|
|
1324
|
+
if (event.audio_end) this.emit("audioEnd");
|
|
1325
|
+
if (event.terminated) this._endStream();
|
|
1326
|
+
}
|
|
1327
|
+
/** @internal Force-end this stream (connection closing). */
|
|
1328
|
+
_forceEnd() {
|
|
1329
|
+
if (this._state === "ended" || this._state === "error") return;
|
|
1330
|
+
this._state = "ended";
|
|
1331
|
+
this.audioQueue.end();
|
|
1332
|
+
}
|
|
1333
|
+
_endStream() {
|
|
1334
|
+
if (this._state === "ended") return;
|
|
1335
|
+
this._state = "ended";
|
|
1336
|
+
this.emit("terminated");
|
|
1337
|
+
this.audioQueue.end();
|
|
1338
|
+
this.connection._deactivateStream(this.streamId);
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1341
|
+
/**
|
|
1342
|
+
* WebSocket connection for real-time Text-to-Speech.
|
|
1343
|
+
*
|
|
1344
|
+
* Supports up to 5 concurrent streams multiplexed by `stream_id`.
|
|
1345
|
+
* The connection automatically sends keepalive messages while open.
|
|
1346
|
+
*
|
|
1347
|
+
* @example Multi-stream
|
|
1348
|
+
* ```typescript
|
|
1349
|
+
* const conn = new RealtimeTtsConnection(apiKey, wsUrl, ttsDefaults);
|
|
1350
|
+
* await conn.connect();
|
|
1351
|
+
*
|
|
1352
|
+
* const s1 = conn.stream({ model, voice, language, audio_format });
|
|
1353
|
+
* s1.sendText("Hello");
|
|
1354
|
+
* s1.finish();
|
|
1355
|
+
* for await (const chunk of s1) { ... }
|
|
1356
|
+
*
|
|
1357
|
+
* conn.close();
|
|
1358
|
+
* ```
|
|
1359
|
+
*/
|
|
1360
|
+
var RealtimeTtsConnection = class extends TypedEmitter {
|
|
1361
|
+
apiKey;
|
|
1362
|
+
wsUrl;
|
|
1363
|
+
ttsDefaults;
|
|
1364
|
+
keepaliveIntervalMs;
|
|
1365
|
+
connectTimeoutMs;
|
|
1366
|
+
ws = null;
|
|
1367
|
+
connected = false;
|
|
1368
|
+
connecting = false;
|
|
1369
|
+
keepaliveTimer = null;
|
|
1370
|
+
activeStreams = /* @__PURE__ */ new Map();
|
|
1371
|
+
constructor(apiKey, wsUrl, ttsDefaults = {}, options) {
|
|
1372
|
+
super();
|
|
1373
|
+
this.apiKey = apiKey;
|
|
1374
|
+
this.wsUrl = wsUrl;
|
|
1375
|
+
this.ttsDefaults = ttsDefaults;
|
|
1376
|
+
const keepaliveMs = options?.keepalive_interval_ms ?? DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1377
|
+
this.keepaliveIntervalMs = Number.isFinite(keepaliveMs) && keepaliveMs > 0 ? Math.max(keepaliveMs, MIN_KEEPALIVE_INTERVAL_MS) : DEFAULT_KEEPALIVE_INTERVAL_MS;
|
|
1378
|
+
const connectMs = options?.connect_timeout_ms ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1379
|
+
this.connectTimeoutMs = Number.isFinite(connectMs) && connectMs > 0 ? connectMs : DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1380
|
+
}
|
|
1381
|
+
/** Whether the WebSocket is connected. */
|
|
1382
|
+
get isConnected() {
|
|
1383
|
+
return this.connected;
|
|
1384
|
+
}
|
|
1385
|
+
/**
|
|
1386
|
+
* Open the WebSocket connection and start keepalive.
|
|
1387
|
+
* Called automatically by {@link stream} if not yet connected.
|
|
1388
|
+
*/
|
|
1389
|
+
async connect() {
|
|
1390
|
+
if (this.connected) return;
|
|
1391
|
+
if (this.connecting) throw new StateError("Connection is already being established");
|
|
1392
|
+
this.connecting = true;
|
|
1393
|
+
try {
|
|
1394
|
+
await this.createWebSocket();
|
|
1395
|
+
this.connected = true;
|
|
1396
|
+
this.startKeepalive();
|
|
1397
|
+
} finally {
|
|
1398
|
+
this.connecting = false;
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* Open a new TTS stream on this connection.
|
|
1403
|
+
* Auto-connects if the WebSocket is not yet open.
|
|
1404
|
+
*
|
|
1405
|
+
* @param input - Stream configuration (merged with tts_defaults)
|
|
1406
|
+
* @returns A ready-to-use stream handle
|
|
1407
|
+
*/
|
|
1408
|
+
async stream(input = {}) {
|
|
1409
|
+
return this._openStream(input, false);
|
|
1410
|
+
}
|
|
1411
|
+
/** @internal Open a stream, optionally marking it as connection-owning. */
|
|
1412
|
+
async _openStream(input, ownsConnection) {
|
|
1413
|
+
if (!this.connected) await this.connect();
|
|
1414
|
+
if (this.activeStreams.size >= MAX_STREAMS_PER_CONNECTION) throw new StateError(`Maximum concurrent streams (${MAX_STREAMS_PER_CONNECTION}) reached`);
|
|
1415
|
+
const config = resolveStreamConfig(input, this.ttsDefaults);
|
|
1416
|
+
if (this.activeStreams.has(config.stream_id)) throw new StateError(`Stream '${config.stream_id}' is already active on this connection`);
|
|
1417
|
+
const stream = new RealtimeTtsStream(config.stream_id, this, ownsConnection);
|
|
1418
|
+
this.activeStreams.set(config.stream_id, stream);
|
|
1419
|
+
const configPayload = {
|
|
1420
|
+
api_key: this.apiKey,
|
|
1421
|
+
...config
|
|
1422
|
+
};
|
|
1423
|
+
this._sendJson(configPayload);
|
|
1424
|
+
return stream;
|
|
1425
|
+
}
|
|
1426
|
+
/**
|
|
1427
|
+
* Close the WebSocket connection and terminate all active streams.
|
|
1428
|
+
*/
|
|
1429
|
+
close() {
|
|
1430
|
+
this.stopKeepalive();
|
|
1431
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1432
|
+
this.activeStreams.clear();
|
|
1433
|
+
if (this.ws) {
|
|
1434
|
+
try {
|
|
1435
|
+
this.ws.close();
|
|
1436
|
+
} catch {}
|
|
1437
|
+
this.ws = null;
|
|
1438
|
+
}
|
|
1439
|
+
this.connected = false;
|
|
1440
|
+
this.emit("close");
|
|
1441
|
+
}
|
|
1442
|
+
/** @internal Send a JSON payload on the WebSocket. */
|
|
1443
|
+
_sendJson(payload) {
|
|
1444
|
+
if (!this.ws || !this.connected) throw new StateError("TTS connection is not open");
|
|
1445
|
+
this.ws.send(JSON.stringify(payload));
|
|
1446
|
+
}
|
|
1447
|
+
/** @internal Remove a stream from the active set. */
|
|
1448
|
+
_deactivateStream(streamId) {
|
|
1449
|
+
this.activeStreams.delete(streamId);
|
|
1450
|
+
}
|
|
1451
|
+
async createWebSocket() {
|
|
1452
|
+
return new Promise((resolve, reject) => {
|
|
1453
|
+
const timer = setTimeout(() => {
|
|
1454
|
+
try {
|
|
1455
|
+
ws.close();
|
|
1456
|
+
} catch {}
|
|
1457
|
+
reject(new ConnectionError("TTS WebSocket connection timed out"));
|
|
1458
|
+
}, this.connectTimeoutMs);
|
|
1459
|
+
let ws;
|
|
1460
|
+
try {
|
|
1461
|
+
ws = new WebSocket(this.wsUrl);
|
|
1462
|
+
ws.binaryType = "arraybuffer";
|
|
1463
|
+
} catch (err) {
|
|
1464
|
+
clearTimeout(timer);
|
|
1465
|
+
reject(new ConnectionError(`Failed to create TTS WebSocket: ${err instanceof Error ? err.message : String(err)}`));
|
|
1466
|
+
return;
|
|
1467
|
+
}
|
|
1468
|
+
const onOpen = () => {
|
|
1469
|
+
clearTimeout(timer);
|
|
1470
|
+
ws.removeEventListener("error", onError);
|
|
1471
|
+
this.ws = ws;
|
|
1472
|
+
ws.addEventListener("message", (event) => {
|
|
1473
|
+
this.handleMessage(event);
|
|
1474
|
+
});
|
|
1475
|
+
ws.addEventListener("close", () => {
|
|
1476
|
+
if (this.connected) {
|
|
1477
|
+
this.connected = false;
|
|
1478
|
+
this.stopKeepalive();
|
|
1479
|
+
for (const stream of this.activeStreams.values()) stream._forceEnd();
|
|
1480
|
+
this.activeStreams.clear();
|
|
1481
|
+
this.emit("close");
|
|
1482
|
+
}
|
|
1483
|
+
});
|
|
1484
|
+
resolve();
|
|
1485
|
+
};
|
|
1486
|
+
const onError = () => {
|
|
1487
|
+
clearTimeout(timer);
|
|
1488
|
+
ws.removeEventListener("open", onOpen);
|
|
1489
|
+
reject(new ConnectionError("TTS WebSocket connection failed"));
|
|
1490
|
+
};
|
|
1491
|
+
ws.addEventListener("open", onOpen);
|
|
1492
|
+
ws.addEventListener("error", onError);
|
|
1493
|
+
});
|
|
1494
|
+
}
|
|
1495
|
+
handleMessage(event) {
|
|
1496
|
+
if (typeof event.data !== "string") return;
|
|
1497
|
+
let parsed;
|
|
1498
|
+
try {
|
|
1499
|
+
parsed = JSON.parse(event.data);
|
|
1500
|
+
} catch {
|
|
1501
|
+
return;
|
|
1502
|
+
}
|
|
1503
|
+
const streamId = parsed.stream_id;
|
|
1504
|
+
if (streamId !== void 0) {
|
|
1505
|
+
const stream = this.activeStreams.get(streamId);
|
|
1506
|
+
if (stream) stream._handleEvent(parsed);
|
|
1507
|
+
return;
|
|
1508
|
+
}
|
|
1509
|
+
if (parsed.error_code !== void 0) {
|
|
1510
|
+
const errPayload = { error_code: parsed.error_code };
|
|
1511
|
+
if (parsed.error_message !== void 0) errPayload.error_message = parsed.error_message;
|
|
1512
|
+
const error = mapErrorResponse(errPayload);
|
|
1513
|
+
this.emit("error", error);
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
startKeepalive() {
|
|
1517
|
+
if (this.keepaliveTimer) return;
|
|
1518
|
+
this.keepaliveTimer = setInterval(() => {
|
|
1519
|
+
if (this.connected && this.ws) try {
|
|
1520
|
+
this.ws.send(JSON.stringify({ keep_alive: true }));
|
|
1521
|
+
} catch {}
|
|
1522
|
+
}, this.keepaliveIntervalMs);
|
|
1523
|
+
}
|
|
1524
|
+
stopKeepalive() {
|
|
1525
|
+
if (this.keepaliveTimer) {
|
|
1526
|
+
clearInterval(this.keepaliveTimer);
|
|
1527
|
+
this.keepaliveTimer = null;
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
};
|
|
1531
|
+
|
|
868
1532
|
//#endregion
|
|
869
1533
|
//#region ../core/src/realtime/segments.ts
|
|
870
1534
|
/**
|
|
@@ -1020,199 +1684,210 @@ var RealtimeUtteranceBuffer = class {
|
|
|
1020
1684
|
markEndpoint() {
|
|
1021
1685
|
const trailingSegments = this.segmentBuffer.flushAll();
|
|
1022
1686
|
const segments = [...this.pendingSegments, ...trailingSegments];
|
|
1023
|
-
this.pendingSegments = [];
|
|
1024
|
-
if (segments.length === 0) return;
|
|
1025
|
-
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1026
|
-
}
|
|
1027
|
-
/**
|
|
1028
|
-
* Clear buffered segments and tokens.
|
|
1029
|
-
*/
|
|
1030
|
-
reset() {
|
|
1031
|
-
this.pendingSegments = [];
|
|
1032
|
-
this.segmentBuffer.reset();
|
|
1033
|
-
}
|
|
1034
|
-
};
|
|
1035
|
-
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1036
|
-
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1037
|
-
return {
|
|
1038
|
-
text: segments.map((segment) => segment.text).join(""),
|
|
1039
|
-
segments,
|
|
1040
|
-
tokens,
|
|
1041
|
-
start_ms: segments[0]?.start_ms,
|
|
1042
|
-
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1043
|
-
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1044
|
-
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1045
|
-
final_audio_proc_ms: finalAudioProcMs,
|
|
1046
|
-
total_audio_proc_ms: totalAudioProcMs
|
|
1047
|
-
};
|
|
1048
|
-
}
|
|
1049
|
-
function getCommonValue(values) {
|
|
1050
|
-
let common;
|
|
1051
|
-
for (const value of values) {
|
|
1052
|
-
if (value === void 0) return;
|
|
1053
|
-
if (common === void 0) {
|
|
1054
|
-
common = value;
|
|
1055
|
-
continue;
|
|
1056
|
-
}
|
|
1057
|
-
if (value !== common) return;
|
|
1058
|
-
}
|
|
1059
|
-
return common;
|
|
1060
|
-
}
|
|
1061
|
-
|
|
1062
|
-
//#endregion
|
|
1063
|
-
//#region src/http/errors.ts
|
|
1064
|
-
/**
|
|
1065
|
-
* HTTP error handling for the Soniox SDK
|
|
1066
|
-
*/
|
|
1067
|
-
/** Maximum body text length to include in error details (4KB) */
|
|
1068
|
-
const MAX_BODY_TEXT_LENGTH = 4096;
|
|
1069
|
-
/**
|
|
1070
|
-
* HTTP error class for all HTTP-related failures (REST API).
|
|
1071
|
-
*
|
|
1072
|
-
* Thrown when HTTP requests fail due to network issues, timeouts,
|
|
1073
|
-
* server errors, or response parsing failures.
|
|
1074
|
-
*/
|
|
1075
|
-
var SonioxHttpError = class extends SonioxError {
|
|
1076
|
-
/** Request URL */
|
|
1077
|
-
url;
|
|
1078
|
-
/** HTTP method */
|
|
1079
|
-
method;
|
|
1080
|
-
/** Response headers (only for http_error) */
|
|
1081
|
-
headers;
|
|
1082
|
-
/** Response body text, capped at 4KB (only for http_error/parse_error) */
|
|
1083
|
-
bodyText;
|
|
1084
|
-
constructor(details) {
|
|
1085
|
-
super(details.message, details.code, details.statusCode, details.cause);
|
|
1086
|
-
this.name = "SonioxHttpError";
|
|
1087
|
-
this.url = details.url;
|
|
1088
|
-
this.method = details.method;
|
|
1089
|
-
this.headers = details.headers;
|
|
1090
|
-
this.bodyText = details.bodyText;
|
|
1091
|
-
}
|
|
1092
|
-
/**
|
|
1093
|
-
* Creates a human-readable string representation
|
|
1094
|
-
*/
|
|
1095
|
-
toString() {
|
|
1096
|
-
const parts = [`SonioxHttpError [${this.code}]: ${this.message}`];
|
|
1097
|
-
parts.push(` Method: ${this.method}`);
|
|
1098
|
-
parts.push(` URL: ${this.url}`);
|
|
1099
|
-
if (this.statusCode !== void 0) parts.push(` Status: ${this.statusCode}`);
|
|
1100
|
-
return parts.join("\n");
|
|
1687
|
+
this.pendingSegments = [];
|
|
1688
|
+
if (segments.length === 0) return;
|
|
1689
|
+
return buildUtterance(segments, this.lastFinalAudioProcMs, this.lastTotalAudioProcMs);
|
|
1101
1690
|
}
|
|
1102
1691
|
/**
|
|
1103
|
-
*
|
|
1692
|
+
* Clear buffered segments and tokens.
|
|
1104
1693
|
*/
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
code: this.code,
|
|
1109
|
-
message: this.message,
|
|
1110
|
-
url: this.url,
|
|
1111
|
-
method: this.method,
|
|
1112
|
-
...this.statusCode !== void 0 && { statusCode: this.statusCode },
|
|
1113
|
-
...this.headers !== void 0 && { headers: this.headers },
|
|
1114
|
-
...this.bodyText !== void 0 && { bodyText: this.bodyText }
|
|
1115
|
-
};
|
|
1694
|
+
reset() {
|
|
1695
|
+
this.pendingSegments = [];
|
|
1696
|
+
this.segmentBuffer.reset();
|
|
1116
1697
|
}
|
|
1117
1698
|
};
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
* Creates a timeout error
|
|
1132
|
-
*/
|
|
1133
|
-
function createTimeoutError(url, method, timeoutMs) {
|
|
1134
|
-
return new SonioxHttpError({
|
|
1135
|
-
code: "timeout",
|
|
1136
|
-
message: `Request timed out after ${timeoutMs}ms`,
|
|
1137
|
-
url,
|
|
1138
|
-
method
|
|
1139
|
-
});
|
|
1699
|
+
function buildUtterance(segments, finalAudioProcMs, totalAudioProcMs) {
|
|
1700
|
+
const tokens = segments.flatMap((segment) => segment.tokens);
|
|
1701
|
+
return {
|
|
1702
|
+
text: segments.map((segment) => segment.text).join(""),
|
|
1703
|
+
segments,
|
|
1704
|
+
tokens,
|
|
1705
|
+
start_ms: segments[0]?.start_ms,
|
|
1706
|
+
end_ms: segments[segments.length - 1]?.end_ms,
|
|
1707
|
+
speaker: getCommonValue(segments.map((segment) => segment.speaker)),
|
|
1708
|
+
language: getCommonValue(segments.map((segment) => segment.language)),
|
|
1709
|
+
final_audio_proc_ms: finalAudioProcMs,
|
|
1710
|
+
total_audio_proc_ms: totalAudioProcMs
|
|
1711
|
+
};
|
|
1140
1712
|
}
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1713
|
+
function getCommonValue(values) {
|
|
1714
|
+
let common;
|
|
1715
|
+
for (const value of values) {
|
|
1716
|
+
if (value === void 0) return;
|
|
1717
|
+
if (common === void 0) {
|
|
1718
|
+
common = value;
|
|
1719
|
+
continue;
|
|
1720
|
+
}
|
|
1721
|
+
if (value !== common) return;
|
|
1722
|
+
}
|
|
1723
|
+
return common;
|
|
1152
1724
|
}
|
|
1725
|
+
|
|
1726
|
+
//#endregion
|
|
1727
|
+
//#region ../core/src/tts-rest.ts
|
|
1153
1728
|
/**
|
|
1154
|
-
*
|
|
1729
|
+
* Browser-safe REST TTS client.
|
|
1730
|
+
*
|
|
1731
|
+
* Uses only `globalThis.fetch` — no Node-specific dependencies.
|
|
1732
|
+
* Shared by both `@soniox/node` and `@soniox/client`.
|
|
1155
1733
|
*/
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
}
|
|
1734
|
+
const DEFAULT_MODEL = "tts-rt-v1-preview";
|
|
1735
|
+
const DEFAULT_LANGUAGE = "en";
|
|
1736
|
+
const DEFAULT_AUDIO_FORMAT = "wav";
|
|
1737
|
+
function buildPayload(options) {
|
|
1738
|
+
const payload = {
|
|
1739
|
+
model: options.model ?? DEFAULT_MODEL,
|
|
1740
|
+
language: options.language ?? DEFAULT_LANGUAGE,
|
|
1741
|
+
voice: options.voice,
|
|
1742
|
+
audio_format: options.audio_format ?? DEFAULT_AUDIO_FORMAT,
|
|
1743
|
+
text: options.text
|
|
1744
|
+
};
|
|
1745
|
+
if (options.sample_rate !== void 0) payload.sample_rate = options.sample_rate;
|
|
1746
|
+
if (options.bitrate !== void 0) payload.bitrate = options.bitrate;
|
|
1747
|
+
return payload;
|
|
1167
1748
|
}
|
|
1168
1749
|
/**
|
|
1169
|
-
*
|
|
1750
|
+
* Normalizes fetch Headers to a plain object with lowercase keys.
|
|
1751
|
+
* Duplicated here (rather than imported from `@soniox/node`) to keep
|
|
1752
|
+
* this module browser-safe.
|
|
1170
1753
|
*/
|
|
1171
|
-
function
|
|
1172
|
-
const
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
code: "parse_error",
|
|
1176
|
-
message: `Parse error: ${message}`,
|
|
1177
|
-
url,
|
|
1178
|
-
method,
|
|
1179
|
-
bodyText: cappedBody,
|
|
1180
|
-
cause
|
|
1754
|
+
function headersToObject(headers) {
|
|
1755
|
+
const result = {};
|
|
1756
|
+
headers.forEach((value, key) => {
|
|
1757
|
+
result[key.toLowerCase()] = value;
|
|
1181
1758
|
});
|
|
1759
|
+
return result;
|
|
1182
1760
|
}
|
|
1183
|
-
|
|
1184
|
-
* Truncates body text to the maximum allowed length
|
|
1185
|
-
*/
|
|
1186
|
-
function truncateBodyText(text) {
|
|
1187
|
-
if (text.length <= MAX_BODY_TEXT_LENGTH) return text;
|
|
1188
|
-
return text.slice(0, MAX_BODY_TEXT_LENGTH) + "... [truncated]";
|
|
1189
|
-
}
|
|
1190
|
-
/**
|
|
1191
|
-
* Type guard to check if an error is an AbortError
|
|
1192
|
-
*/
|
|
1193
|
-
function isAbortError(error) {
|
|
1761
|
+
function isAbortLikeError(error) {
|
|
1194
1762
|
if (error instanceof Error) return error.name === "AbortError" || error.name === "TimeoutError";
|
|
1195
1763
|
return false;
|
|
1196
1764
|
}
|
|
1197
1765
|
/**
|
|
1198
|
-
*
|
|
1199
|
-
*
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
*
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
* Checks if an error is a 404 Not Found error
|
|
1766
|
+
* Browser-safe REST client for TTS generation.
|
|
1767
|
+
*
|
|
1768
|
+
* Provides `generate()` (buffered) and `generateStream()` (streaming)
|
|
1769
|
+
* using only `globalThis.fetch`. HTTP failures are surfaced as
|
|
1770
|
+
* {@link SonioxHttpError}, matching the rest of the Soniox SDK.
|
|
1771
|
+
*
|
|
1772
|
+
* Authentication uses the `Authorization: Bearer <api_key>` header.
|
|
1773
|
+
*
|
|
1774
|
+
* @example
|
|
1775
|
+
* ```typescript
|
|
1776
|
+
* const client = new TtsRestClient(apiKey, 'https://tts-rt.soniox.com');
|
|
1777
|
+
* const audio = await client.generate({ text: 'Hello', voice: 'Adrian' });
|
|
1778
|
+
* ```
|
|
1212
1779
|
*/
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1780
|
+
var TtsRestClient = class {
|
|
1781
|
+
apiKey;
|
|
1782
|
+
ttsApiUrl;
|
|
1783
|
+
constructor(apiKey, ttsApiUrl) {
|
|
1784
|
+
this.apiKey = apiKey;
|
|
1785
|
+
this.ttsApiUrl = ttsApiUrl;
|
|
1786
|
+
}
|
|
1787
|
+
/**
|
|
1788
|
+
* Generate speech audio from text. Returns the full audio as a `Uint8Array`.
|
|
1789
|
+
*
|
|
1790
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1791
|
+
* or aborted requests.
|
|
1792
|
+
*/
|
|
1793
|
+
async generate(options) {
|
|
1794
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1795
|
+
const buffer = await (await this.sendRequest(url, options)).arrayBuffer();
|
|
1796
|
+
return new Uint8Array(buffer);
|
|
1797
|
+
}
|
|
1798
|
+
/**
|
|
1799
|
+
* Generate speech audio from text as a streaming async iterable.
|
|
1800
|
+
*
|
|
1801
|
+
* Yields `Uint8Array` chunks as they arrive from the server response body.
|
|
1802
|
+
* Lower time-to-first-audio than {@link generate}.
|
|
1803
|
+
*
|
|
1804
|
+
* **Known limitation:** Mid-stream server errors (reported via HTTP trailers)
|
|
1805
|
+
* cannot be detected through the `fetch` API. The iterator may end early
|
|
1806
|
+
* without an explicit error. Use WebSocket TTS for reliable error detection.
|
|
1807
|
+
*
|
|
1808
|
+
* @throws {@link SonioxHttpError} on non-2xx responses, network failures,
|
|
1809
|
+
* or aborted requests (before the stream starts).
|
|
1810
|
+
*/
|
|
1811
|
+
async *generateStream(options) {
|
|
1812
|
+
const url = `${this.ttsApiUrl}/tts`;
|
|
1813
|
+
const response = await this.sendRequest(url, options);
|
|
1814
|
+
if (!response.body) throw createHttpError(url, "POST", response.status, headersToObject(response.headers), "Response has no body stream");
|
|
1815
|
+
const reader = response.body.getReader();
|
|
1816
|
+
try {
|
|
1817
|
+
while (true) {
|
|
1818
|
+
const { done, value } = await reader.read();
|
|
1819
|
+
if (done) break;
|
|
1820
|
+
yield value;
|
|
1821
|
+
}
|
|
1822
|
+
} finally {
|
|
1823
|
+
reader.releaseLock();
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
/**
|
|
1827
|
+
* Internal request helper. Performs the fetch, maps network/abort failures
|
|
1828
|
+
* to {@link SonioxHttpError}, and throws on non-2xx responses.
|
|
1829
|
+
*/
|
|
1830
|
+
async sendRequest(url, options) {
|
|
1831
|
+
const payload = buildPayload(options);
|
|
1832
|
+
let response;
|
|
1833
|
+
try {
|
|
1834
|
+
response = await globalThis.fetch(url, {
|
|
1835
|
+
method: "POST",
|
|
1836
|
+
headers: {
|
|
1837
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1838
|
+
"Content-Type": "application/json"
|
|
1839
|
+
},
|
|
1840
|
+
body: JSON.stringify(payload),
|
|
1841
|
+
...options.signal && { signal: options.signal }
|
|
1842
|
+
});
|
|
1843
|
+
} catch (cause) {
|
|
1844
|
+
if (isAbortLikeError(cause)) throw createAbortError(url, "POST", cause);
|
|
1845
|
+
throw createNetworkError(url, "POST", cause);
|
|
1846
|
+
}
|
|
1847
|
+
if (!response.ok) {
|
|
1848
|
+
const bodyText = await response.text().catch(() => "");
|
|
1849
|
+
throw createHttpError(url, "POST", response.status, headersToObject(response.headers), bodyText);
|
|
1850
|
+
}
|
|
1851
|
+
return response;
|
|
1852
|
+
}
|
|
1853
|
+
};
|
|
1854
|
+
|
|
1855
|
+
//#endregion
|
|
1856
|
+
//#region src/async/auth.ts
|
|
1857
|
+
var SonioxAuthAPI = class {
|
|
1858
|
+
constructor(http) {
|
|
1859
|
+
this.http = http;
|
|
1860
|
+
}
|
|
1861
|
+
/**
|
|
1862
|
+
* Creates a temporary API key for client-side use.
|
|
1863
|
+
*
|
|
1864
|
+
* @param request - Request parameters for the temporary key
|
|
1865
|
+
* @param signal - Optional AbortSignal for cancellation
|
|
1866
|
+
* @returns The temporary API key response
|
|
1867
|
+
*
|
|
1868
|
+
* @example
|
|
1869
|
+
* ```typescript
|
|
1870
|
+
* const sttKey = await client.auth.createTemporaryKey({
|
|
1871
|
+
* usage_type: 'transcribe_websocket',
|
|
1872
|
+
* expires_in_seconds: 300,
|
|
1873
|
+
* });
|
|
1874
|
+
*
|
|
1875
|
+
* const ttsKey = await client.auth.createTemporaryKey({
|
|
1876
|
+
* usage_type: 'tts_rt',
|
|
1877
|
+
* expires_in_seconds: 300,
|
|
1878
|
+
* });
|
|
1879
|
+
* ```
|
|
1880
|
+
*/
|
|
1881
|
+
async createTemporaryKey(request, signal) {
|
|
1882
|
+
if (!Number.isFinite(request.expires_in_seconds) || request.expires_in_seconds < 1 || request.expires_in_seconds > 3600) throw new Error("expires_in_seconds must be a finite number between 1 and 3600");
|
|
1883
|
+
return (await this.http.request({
|
|
1884
|
+
method: "POST",
|
|
1885
|
+
path: "/v1/auth/temporary-api-key",
|
|
1886
|
+
body: request,
|
|
1887
|
+
...signal && { signal }
|
|
1888
|
+
})).data;
|
|
1889
|
+
}
|
|
1890
|
+
};
|
|
1216
1891
|
|
|
1217
1892
|
//#endregion
|
|
1218
1893
|
//#region src/async/files.ts
|
|
@@ -2686,6 +3361,86 @@ var SonioxSttApi = class {
|
|
|
2686
3361
|
}
|
|
2687
3362
|
};
|
|
2688
3363
|
|
|
3364
|
+
//#endregion
|
|
3365
|
+
//#region src/async/tts.ts
|
|
3366
|
+
/**
|
|
3367
|
+
* REST API for Text-to-Speech generation and TTS model listing.
|
|
3368
|
+
*
|
|
3369
|
+
* Accessed via `client.tts` on {@link SonioxNodeClient}.
|
|
3370
|
+
*
|
|
3371
|
+
* Inherits browser-safe `generate()` and `generateStream()` from
|
|
3372
|
+
* `TtsRestClient` in `@soniox/core`, and adds Node-specific methods
|
|
3373
|
+
* `generateToFile()` and `listModels()`.
|
|
3374
|
+
*/
|
|
3375
|
+
var SonioxTtsApi = class extends TtsRestClient {
|
|
3376
|
+
http;
|
|
3377
|
+
constructor(apiKey, ttsApiUrl, http) {
|
|
3378
|
+
super(apiKey, ttsApiUrl);
|
|
3379
|
+
this.http = http;
|
|
3380
|
+
}
|
|
3381
|
+
/**
|
|
3382
|
+
* Generate speech audio and write to a file or writable stream.
|
|
3383
|
+
*
|
|
3384
|
+
* @param output - File path (string) or a `WritableStream<Uint8Array>`
|
|
3385
|
+
* @param options - Generation options
|
|
3386
|
+
* @returns Number of bytes written
|
|
3387
|
+
*
|
|
3388
|
+
* @example Write to file
|
|
3389
|
+
* ```typescript
|
|
3390
|
+
* const bytes = await client.tts.generateToFile('output.wav', {
|
|
3391
|
+
* text: 'Hello world',
|
|
3392
|
+
* voice: 'Adrian',
|
|
3393
|
+
* language: 'en',
|
|
3394
|
+
* });
|
|
3395
|
+
* ```
|
|
3396
|
+
*
|
|
3397
|
+
* @example Write to a writable stream
|
|
3398
|
+
* ```typescript
|
|
3399
|
+
* const bytes = await client.tts.generateToFile(writableStream, {
|
|
3400
|
+
* text: 'Hello world',
|
|
3401
|
+
* voice: 'Adrian',
|
|
3402
|
+
* language: 'en',
|
|
3403
|
+
* });
|
|
3404
|
+
* ```
|
|
3405
|
+
*/
|
|
3406
|
+
async generateToFile(output, options) {
|
|
3407
|
+
if (typeof output === "string") {
|
|
3408
|
+
const audio = await this.generate(options);
|
|
3409
|
+
await writeFile(output, audio);
|
|
3410
|
+
return audio.byteLength;
|
|
3411
|
+
}
|
|
3412
|
+
let bytesWritten = 0;
|
|
3413
|
+
const writer = output.getWriter();
|
|
3414
|
+
try {
|
|
3415
|
+
for await (const chunk of this.generateStream(options)) {
|
|
3416
|
+
await writer.write(chunk);
|
|
3417
|
+
bytesWritten += chunk.byteLength;
|
|
3418
|
+
}
|
|
3419
|
+
} finally {
|
|
3420
|
+
writer.releaseLock();
|
|
3421
|
+
}
|
|
3422
|
+
return bytesWritten;
|
|
3423
|
+
}
|
|
3424
|
+
/**
|
|
3425
|
+
* List available TTS models and their voices.
|
|
3426
|
+
*
|
|
3427
|
+
* @example
|
|
3428
|
+
* ```typescript
|
|
3429
|
+
* const models = await client.tts.listModels();
|
|
3430
|
+
* for (const model of models) {
|
|
3431
|
+
* console.log(model.id, model.voices.map(v => v.id));
|
|
3432
|
+
* }
|
|
3433
|
+
* ```
|
|
3434
|
+
*/
|
|
3435
|
+
async listModels(signal) {
|
|
3436
|
+
return (await this.http.request({
|
|
3437
|
+
method: "GET",
|
|
3438
|
+
path: "/v1/tts-models",
|
|
3439
|
+
...signal && { signal }
|
|
3440
|
+
})).data.models;
|
|
3441
|
+
}
|
|
3442
|
+
};
|
|
3443
|
+
|
|
2689
3444
|
//#endregion
|
|
2690
3445
|
//#region src/async/webhooks.ts
|
|
2691
3446
|
const VALID_STATUSES = ["completed", "error"];
|
|
@@ -3508,36 +4263,75 @@ function combineAbortSignals(...signals) {
|
|
|
3508
4263
|
//#endregion
|
|
3509
4264
|
//#region src/realtime/index.ts
|
|
3510
4265
|
/**
|
|
3511
|
-
* Real-time API factory for creating STT sessions.
|
|
4266
|
+
* Real-time API factory for creating STT sessions and TTS connections.
|
|
3512
4267
|
*
|
|
3513
|
-
* @example
|
|
4268
|
+
* @example STT
|
|
4269
|
+
* ```typescript
|
|
4270
|
+
* const session = client.realtime.stt({ model: 'stt-rt-v4' });
|
|
4271
|
+
* await session.connect();
|
|
4272
|
+
* ```
|
|
4273
|
+
*
|
|
4274
|
+
* @example TTS (single stream)
|
|
3514
4275
|
* ```typescript
|
|
3515
|
-
* const
|
|
3516
|
-
* model: '
|
|
3517
|
-
*
|
|
4276
|
+
* const stream = await client.realtime.tts({
|
|
4277
|
+
* model: 'tts-rt-v1-preview',
|
|
4278
|
+
* voice: 'Adrian',
|
|
4279
|
+
* language: 'en',
|
|
4280
|
+
* audio_format: 'wav',
|
|
3518
4281
|
* });
|
|
4282
|
+
* stream.sendText("Hello");
|
|
4283
|
+
* stream.finish();
|
|
4284
|
+
* for await (const chunk of stream) { ... }
|
|
4285
|
+
* ```
|
|
3519
4286
|
*
|
|
3520
|
-
*
|
|
4287
|
+
* @example TTS (multi-stream)
|
|
4288
|
+
* ```typescript
|
|
4289
|
+
* const conn = await client.realtime.tts.multiStream();
|
|
4290
|
+
* const stream = await conn.stream({
|
|
4291
|
+
* model: 'tts-rt-v1-preview',
|
|
4292
|
+
* voice: 'Adrian',
|
|
4293
|
+
* language: 'en',
|
|
4294
|
+
* audio_format: 'wav',
|
|
4295
|
+
* });
|
|
3521
4296
|
* ```
|
|
3522
4297
|
*/
|
|
3523
4298
|
var SonioxRealtimeApi = class {
|
|
3524
4299
|
options;
|
|
4300
|
+
tts;
|
|
3525
4301
|
constructor(options) {
|
|
3526
4302
|
this.options = options;
|
|
4303
|
+
const ttsCall = (input) => {
|
|
4304
|
+
return this.createSingleTtsStream(input ?? {});
|
|
4305
|
+
};
|
|
4306
|
+
ttsCall.multiStream = () => {
|
|
4307
|
+
return this.createTtsConnection();
|
|
4308
|
+
};
|
|
4309
|
+
this.tts = ttsCall;
|
|
3527
4310
|
}
|
|
3528
4311
|
/**
|
|
3529
4312
|
* Create a new Speech-to-Text session.
|
|
3530
4313
|
*
|
|
3531
|
-
*
|
|
3532
|
-
*
|
|
3533
|
-
* @returns New STT session instance
|
|
4314
|
+
* `config` is shallow-merged on top of `stt_defaults` from the client
|
|
4315
|
+
* options; caller-provided fields override the defaults.
|
|
3534
4316
|
*/
|
|
3535
4317
|
stt(config, options) {
|
|
3536
4318
|
const mergedOptions = {
|
|
3537
4319
|
...this.options.default_session_options,
|
|
3538
4320
|
...options
|
|
3539
4321
|
};
|
|
3540
|
-
|
|
4322
|
+
const mergedConfig = {
|
|
4323
|
+
...this.options.stt_defaults,
|
|
4324
|
+
...config
|
|
4325
|
+
};
|
|
4326
|
+
return new RealtimeSttSession(this.options.api_key, this.options.ws_base_url, mergedConfig, mergedOptions);
|
|
4327
|
+
}
|
|
4328
|
+
async createSingleTtsStream(input) {
|
|
4329
|
+
return new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options)._openStream(input, true);
|
|
4330
|
+
}
|
|
4331
|
+
async createTtsConnection() {
|
|
4332
|
+
const connection = new RealtimeTtsConnection(this.options.api_key, this.options.tts_ws_url, this.options.tts_defaults ?? {}, this.options.tts_connection_options);
|
|
4333
|
+
await connection.connect();
|
|
4334
|
+
return connection;
|
|
3541
4335
|
}
|
|
3542
4336
|
};
|
|
3543
4337
|
|
|
@@ -3545,20 +4339,37 @@ var SonioxRealtimeApi = class {
|
|
|
3545
4339
|
//#region src/client.ts
|
|
3546
4340
|
/**
|
|
3547
4341
|
* Soniox Node Client
|
|
3548
|
-
* @returns {SonioxNodeClient}
|
|
3549
4342
|
*
|
|
3550
4343
|
* @example
|
|
3551
4344
|
* ```typescript
|
|
3552
4345
|
* import { SonioxNodeClient } from '@soniox/node';
|
|
3553
4346
|
*
|
|
3554
|
-
*
|
|
3555
|
-
*
|
|
4347
|
+
* // Default (US) region
|
|
4348
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key' });
|
|
4349
|
+
*
|
|
4350
|
+
* // EU region
|
|
4351
|
+
* const client = new SonioxNodeClient({ api_key: 'your-api-key', region: 'eu' });
|
|
4352
|
+
*
|
|
4353
|
+
* // REST TTS
|
|
4354
|
+
* const audio = await client.tts.generate({
|
|
4355
|
+
* text: 'Hello',
|
|
4356
|
+
* voice: 'Adrian',
|
|
4357
|
+
* language: 'en',
|
|
4358
|
+
* });
|
|
4359
|
+
*
|
|
4360
|
+
* // WebSocket TTS
|
|
4361
|
+
* const stream = await client.realtime.tts({
|
|
4362
|
+
* model: 'tts-rt-v1-preview',
|
|
4363
|
+
* voice: 'Adrian',
|
|
4364
|
+
* language: 'en',
|
|
4365
|
+
* audio_format: 'wav',
|
|
3556
4366
|
* });
|
|
3557
4367
|
* ```
|
|
3558
4368
|
*/
|
|
3559
4369
|
var SonioxNodeClient = class {
|
|
3560
4370
|
files;
|
|
3561
4371
|
stt;
|
|
4372
|
+
tts;
|
|
3562
4373
|
models;
|
|
3563
4374
|
webhooks;
|
|
3564
4375
|
auth;
|
|
@@ -3566,7 +4377,14 @@ var SonioxNodeClient = class {
|
|
|
3566
4377
|
constructor(options = {}) {
|
|
3567
4378
|
const apiKey = options.api_key ?? process.env["SONIOX_API_KEY"];
|
|
3568
4379
|
if (!apiKey) throw new Error("Missing API key. Provide it via options.api_key or set the SONIOX_API_KEY environment variable.");
|
|
3569
|
-
const
|
|
4380
|
+
const regionDefaults = resolveConnectionConfig({
|
|
4381
|
+
api_key: apiKey,
|
|
4382
|
+
region: options.region ?? process.env["SONIOX_REGION"],
|
|
4383
|
+
base_domain: options.base_domain ?? process.env["SONIOX_BASE_DOMAIN"],
|
|
4384
|
+
stt_defaults: options.stt_defaults,
|
|
4385
|
+
tts_defaults: options.tts_defaults
|
|
4386
|
+
});
|
|
4387
|
+
const baseURL = options.base_url ?? process.env["SONIOX_API_BASE_URL"] ?? regionDefaults.api_domain;
|
|
3570
4388
|
const http = options.http_client ?? new FetchHttpClient({
|
|
3571
4389
|
base_url: baseURL,
|
|
3572
4390
|
default_headers: {
|
|
@@ -3579,14 +4397,19 @@ var SonioxNodeClient = class {
|
|
|
3579
4397
|
this.models = new SonioxModelsAPI(http);
|
|
3580
4398
|
this.webhooks = new SonioxWebhooksAPI(this.stt);
|
|
3581
4399
|
this.auth = new SonioxAuthAPI(http);
|
|
4400
|
+
this.tts = new SonioxTtsApi(apiKey, options.tts_api_url ?? process.env["SONIOX_TTS_API_URL"] ?? regionDefaults.tts_api_url, http);
|
|
3582
4401
|
this.realtime = new SonioxRealtimeApi({
|
|
3583
4402
|
api_key: apiKey,
|
|
3584
|
-
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ??
|
|
4403
|
+
ws_base_url: options.realtime?.ws_base_url ?? process.env["SONIOX_WS_URL"] ?? regionDefaults.stt_ws_url,
|
|
4404
|
+
tts_ws_url: options.realtime?.tts_ws_url ?? process.env["SONIOX_TTS_WS_URL"] ?? regionDefaults.tts_ws_url,
|
|
4405
|
+
stt_defaults: regionDefaults.stt_defaults,
|
|
4406
|
+
tts_defaults: regionDefaults.tts_defaults,
|
|
4407
|
+
tts_connection_options: options.realtime?.tts_connection_options,
|
|
3585
4408
|
default_session_options: options.realtime?.default_session_options
|
|
3586
4409
|
});
|
|
3587
4410
|
}
|
|
3588
4411
|
};
|
|
3589
4412
|
|
|
3590
4413
|
//#endregion
|
|
3591
|
-
export { AbortError, AuthError, BadRequestError, ConnectionError, FetchHttpClient, FileListResult, NetworkError, QuotaError, RealtimeError, RealtimeSegmentBuffer, RealtimeSttSession, RealtimeUtteranceBuffer, SONIOX_API_BASE_URL, SONIOX_API_WEBHOOK_HEADER_ENV, SONIOX_API_WEBHOOK_SECRET_ENV, SONIOX_API_WS_URL, SONIOX_TMP_API_KEY_DURATION_MAX, SONIOX_TMP_API_KEY_DURATION_MIN, SONIOX_TMP_API_KEY_USAGE_TYPE, SonioxError, SonioxFile, SonioxHttpError, SonioxNodeClient, SonioxRealtimeApi, SonioxTranscript, SonioxTranscription, StateError, TranscriptionListResult, buildUrl, createAbortError, createHttpError, createNetworkError, createParseError, createTimeoutError, isAbortError, isSonioxError, isSonioxHttpError, mergeHeaders, normalizeHeaders, segmentRealtimeTokens, segmentTranscript };
|
|
4414
|
+
export { AbortError, AuthError, BadRequestError, ConnectionError, FetchHttpClient, FileListResult, NetworkError, QuotaError, RealtimeError, RealtimeSegmentBuffer, RealtimeSttSession, RealtimeTtsConnection, RealtimeTtsStream, RealtimeUtteranceBuffer, SONIOX_API_BASE_URL, SONIOX_API_WEBHOOK_HEADER_ENV, SONIOX_API_WEBHOOK_SECRET_ENV, SONIOX_API_WS_URL, SONIOX_TMP_API_KEY_DURATION_MAX, SONIOX_TMP_API_KEY_DURATION_MIN, SONIOX_TMP_API_KEY_USAGE_TYPE, SONIOX_TTS_API_BASE_URL, SONIOX_TTS_WS_URL, SonioxError, SonioxFile, SonioxHttpError, SonioxNodeClient, SonioxRealtimeApi, SonioxTranscript, SonioxTranscription, SonioxTtsApi, StateError, TranscriptionListResult, buildUrl, createAbortError, createHttpError, createNetworkError, createParseError, createTimeoutError, isAbortError, isSonioxError, isSonioxHttpError, mergeHeaders, normalizeHeaders, resolveConnectionConfig, segmentRealtimeTokens, segmentTranscript };
|
|
3592
4415
|
//# sourceMappingURL=index.mjs.map
|