@inworld/tts 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @inworld/tts might be problematic. Click here for more details.
- package/CHANGELOG.md +9 -0
- package/LICENSE +21 -0
- package/README.md +332 -0
- package/dist/index.cjs +1580 -0
- package/package.json +77 -0
- package/src/client.js +929 -0
- package/src/config.js +135 -0
- package/src/encoding.js +23 -0
- package/src/errors.js +31 -0
- package/src/index.d.ts +363 -0
- package/src/index.js +149 -0
- package/src/player.browser.js +53 -0
- package/src/player.js +143 -0
- package/src/voice.js +498 -0
- package/src/write-file.browser.js +7 -0
- package/src/write-file.js +11 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,1580 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/index.js
|
|
30
|
+
var index_exports = {};
|
|
31
|
+
__export(index_exports, {
|
|
32
|
+
ApiError: () => ApiError,
|
|
33
|
+
InworldTTS: () => InworldTTS,
|
|
34
|
+
InworldTTSError: () => InworldTTSError,
|
|
35
|
+
MissingApiKeyError: () => MissingApiKeyError,
|
|
36
|
+
NetworkError: () => NetworkError,
|
|
37
|
+
createClient: () => createClient
|
|
38
|
+
});
|
|
39
|
+
module.exports = __toCommonJS(index_exports);
|
|
40
|
+
|
|
41
|
+
// src/config.js
|
|
42
|
+
var DEFAULT_BASE_URL = "https://api.inworld.ai";
|
|
43
|
+
var MAX_CHUNK_SIZE = 1900;
|
|
44
|
+
var MIN_CHUNK_SIZE = 500;
|
|
45
|
+
var CHARS_PER_SECOND = 12;
|
|
46
|
+
var CJK_CHAR_WEIGHT = 3;
|
|
47
|
+
var SPLICE_BREAK_SECONDS = 0.5;
|
|
48
|
+
var SAMPLE_RATE = 48e3;
|
|
49
|
+
var BITS_PER_SAMPLE = 16;
|
|
50
|
+
var CHANNELS = 1;
|
|
51
|
+
var MAX_CONCURRENT_REQUESTS = 2;
|
|
52
|
+
var GENERATE_MAX_CHARS = 2e3;
|
|
53
|
+
var STREAM_MAX_CHARS = 2e3;
|
|
54
|
+
var _env = typeof process !== "undefined" ? process.env : {};
|
|
55
|
+
function debugLog(config, ...args) {
|
|
56
|
+
if (config.debug || _env.DEBUG === "inworld-tts") {
|
|
57
|
+
console.debug("[inworld-tts]", ...args);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
function isRetryable(e) {
|
|
61
|
+
if (e && (e.name === "AbortError" || e.name === "TimeoutError")) return false;
|
|
62
|
+
if (e && e.name === "NetworkError") return true;
|
|
63
|
+
if (e && e.name === "ApiError" && typeof e.code === "number" && e.code >= 500) return true;
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
async function withRetry(fn, config) {
|
|
67
|
+
const retries = config.maxRetries ?? 2;
|
|
68
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
69
|
+
if (attempt > 0) {
|
|
70
|
+
const delay = Math.min(1e3 * 2 ** (attempt - 1), 16e3);
|
|
71
|
+
debugLog(config, `retry ${attempt}/${retries} after ${delay}ms`);
|
|
72
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
return await fn();
|
|
76
|
+
} catch (e) {
|
|
77
|
+
if (!isRetryable(e) || attempt >= retries) throw e;
|
|
78
|
+
debugLog(config, `retryable error (${e.name}): ${e.message}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function getBaseUrl(baseUrl) {
|
|
83
|
+
const base = (baseUrl || _env.INWORLD_BASE_URL || DEFAULT_BASE_URL).replace(/\/$/, "");
|
|
84
|
+
return base;
|
|
85
|
+
}
|
|
86
|
+
function isRunningInBrowser() {
|
|
87
|
+
return typeof window !== "undefined" && typeof window.document !== "undefined" && typeof navigator !== "undefined";
|
|
88
|
+
}
|
|
89
|
+
function getTimeoutSignal(ms) {
|
|
90
|
+
const controller = new AbortController();
|
|
91
|
+
const id = setTimeout(() => controller.abort(), ms);
|
|
92
|
+
return { signal: controller.signal, clear: () => clearTimeout(id) };
|
|
93
|
+
}
|
|
94
|
+
function getJwtExp(token) {
|
|
95
|
+
let exp = null;
|
|
96
|
+
try {
|
|
97
|
+
let b64 = token.split(".")[1].replace(/-/g, "+").replace(/_/g, "/");
|
|
98
|
+
while (b64.length % 4) b64 += "=";
|
|
99
|
+
const payload = JSON.parse(atob(b64));
|
|
100
|
+
exp = payload.exp ?? null;
|
|
101
|
+
} catch {
|
|
102
|
+
}
|
|
103
|
+
if (exp === null) {
|
|
104
|
+
console.warn("[inworld-tts] Could not parse token expiry \u2014 token refresh will not be scheduled. Ensure token is a valid JWT.");
|
|
105
|
+
}
|
|
106
|
+
return exp;
|
|
107
|
+
}
|
|
108
|
+
async function ensureFreshToken(config) {
|
|
109
|
+
if (!config._token || !config._onTokenExpiring) return;
|
|
110
|
+
const exp = getJwtExp(config._token);
|
|
111
|
+
if (!exp) return;
|
|
112
|
+
const msUntilExp = exp * 1e3 - Date.now();
|
|
113
|
+
const doRefresh = () => {
|
|
114
|
+
if (config._refreshPromise) return config._refreshPromise;
|
|
115
|
+
config._refreshPromise = Promise.resolve().then(() => config._onTokenExpiring()).then((newToken) => {
|
|
116
|
+
if (!newToken || typeof newToken !== "string") {
|
|
117
|
+
console.warn("[inworld-tts] onTokenExpiring must return a non-empty string token");
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
config._token = newToken;
|
|
121
|
+
config._authHeader = `Bearer ${newToken}`;
|
|
122
|
+
}).catch((e) => {
|
|
123
|
+
console.warn("[inworld-tts] Token refresh failed:", e.message);
|
|
124
|
+
}).finally(() => {
|
|
125
|
+
config._refreshPromise = null;
|
|
126
|
+
});
|
|
127
|
+
return config._refreshPromise;
|
|
128
|
+
};
|
|
129
|
+
if (msUntilExp <= 0) {
|
|
130
|
+
await doRefresh();
|
|
131
|
+
} else if (msUntilExp < 5 * 60 * 1e3) {
|
|
132
|
+
doRefresh();
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// src/errors.js
|
|
137
|
+
var InworldTTSError = class extends Error {
|
|
138
|
+
constructor(message) {
|
|
139
|
+
super(message);
|
|
140
|
+
this.name = "InworldTTSError";
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
var MissingApiKeyError = class extends InworldTTSError {
|
|
144
|
+
constructor(message = 'INWORLD_API_KEY is not set. To fix this:\n 1. Set the environment variable: export INWORLD_API_KEY=your_key\n 2. Pass it to the constructor: InworldTTS({ apiKey: "your_key" })\n 3. Or use a JWT token: InworldTTS({ token: "your_jwt" })\n (See: https://docs.inworld.ai/api-reference/introduction#jwt-authentication)\nGet your API key at https://platform.inworld.ai') {
|
|
145
|
+
super(message);
|
|
146
|
+
this.name = "MissingApiKeyError";
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
var ApiError = class extends InworldTTSError {
|
|
150
|
+
constructor(message, code = null, details = {}) {
|
|
151
|
+
super(message);
|
|
152
|
+
this.name = "ApiError";
|
|
153
|
+
this.code = code;
|
|
154
|
+
this.details = details;
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
var NetworkError = class extends InworldTTSError {
|
|
158
|
+
constructor(message) {
|
|
159
|
+
super(message);
|
|
160
|
+
this.name = "NetworkError";
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
// src/write-file.js
|
|
165
|
+
async function writeFileSafe(path, data) {
|
|
166
|
+
if (typeof process === "undefined" || !process.versions?.node) {
|
|
167
|
+
throw new ApiError("outputFile is not supported in browser environments. Use the returned Uint8Array directly.");
|
|
168
|
+
}
|
|
169
|
+
const { writeFile: writeFile2 } = await import("fs/promises");
|
|
170
|
+
await writeFile2(path, data);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// src/player.js
|
|
174
|
+
var import_fs = require("fs");
|
|
175
|
+
var import_promises = require("fs/promises");
|
|
176
|
+
var import_child_process = require("child_process");
|
|
177
|
+
var import_os = require("os");
|
|
178
|
+
var import_path = require("path");
|
|
179
|
+
|
|
180
|
+
// src/encoding.js
|
|
181
|
+
function detectEncoding(audio) {
|
|
182
|
+
if (!audio || audio.length < 4) return "MP3";
|
|
183
|
+
if (audio[0] === 82 && audio[1] === 73 && audio[2] === 70 && audio[3] === 70) return "WAV";
|
|
184
|
+
if (audio[0] === 102 && audio[1] === 76 && audio[2] === 97 && audio[3] === 67) return "FLAC";
|
|
185
|
+
if (audio[0] === 79 && audio[1] === 103 && audio[2] === 103 && audio[3] === 83) return "OGG_OPUS";
|
|
186
|
+
if (audio[0] === 73 && audio[1] === 68 && audio[2] === 51) return "MP3";
|
|
187
|
+
if (audio[0] === 255 && (audio[1] & 224) === 224) return "MP3";
|
|
188
|
+
return "MP3";
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// src/player.js
|
|
192
|
+
var EXT_MAP = {
|
|
193
|
+
MP3: ".mp3",
|
|
194
|
+
OGG_OPUS: ".ogg",
|
|
195
|
+
LINEAR16: ".wav",
|
|
196
|
+
WAV: ".wav",
|
|
197
|
+
PCM: ".pcm",
|
|
198
|
+
FLAC: ".flac",
|
|
199
|
+
ALAW: ".wav",
|
|
200
|
+
MULAW: ".wav"
|
|
201
|
+
};
|
|
202
|
+
function scanPath(binary) {
|
|
203
|
+
const sep = process.platform === "win32" ? ";" : ":";
|
|
204
|
+
const pathDirs = (process.env.PATH || "").split(sep);
|
|
205
|
+
for (const dir of pathDirs) {
|
|
206
|
+
if (!dir) continue;
|
|
207
|
+
const full = (0, import_path.join)(dir, binary);
|
|
208
|
+
if ((0, import_fs.existsSync)(full)) return full;
|
|
209
|
+
if (process.platform === "win32" && (0, import_fs.existsSync)(full + ".exe")) return full + ".exe";
|
|
210
|
+
}
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
var FFPLAY_ARGS = ["-nodisp", "-autoexit", "-loglevel", "quiet"];
|
|
214
|
+
function findPlayer(encoding) {
|
|
215
|
+
if (encoding === "PCM") {
|
|
216
|
+
process.stderr.write("[inworld-tts] PCM audio cannot be played directly. Use encoding: 'WAV' instead.\n");
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
const platform = process.platform;
|
|
220
|
+
const ffplay = scanPath("ffplay");
|
|
221
|
+
if (ffplay) return { binary: ffplay, args: FFPLAY_ARGS };
|
|
222
|
+
if (platform === "darwin") {
|
|
223
|
+
const afplayEncodings = ["MP3", "WAV", "FLAC", "ALAW", "MULAW"];
|
|
224
|
+
if (afplayEncodings.includes(encoding)) {
|
|
225
|
+
const afplay = scanPath("afplay");
|
|
226
|
+
if (afplay) return { binary: afplay, args: [] };
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
if (platform === "linux") {
|
|
230
|
+
if (encoding === "MP3") {
|
|
231
|
+
const mpg123 = scanPath("mpg123");
|
|
232
|
+
if (mpg123) return { binary: mpg123, args: ["-q"] };
|
|
233
|
+
}
|
|
234
|
+
if (encoding === "OGG_OPUS") {
|
|
235
|
+
const ogg123 = scanPath("ogg123");
|
|
236
|
+
if (ogg123) return { binary: ogg123, args: [] };
|
|
237
|
+
}
|
|
238
|
+
if (encoding === "WAV" || encoding === "LINEAR16") {
|
|
239
|
+
const aplay = scanPath("aplay");
|
|
240
|
+
if (aplay) return { binary: aplay, args: ["-q"] };
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if (platform === "win32") {
|
|
244
|
+
return { binary: "cmd", args: ["/c", "start", "/wait", ""] };
|
|
245
|
+
}
|
|
246
|
+
const hint = platform === "darwin" ? "Install ffmpeg (brew install ffmpeg) or use afplay (built-in for MP3/WAV/FLAC)." : platform === "linux" ? "Install ffmpeg (apt install ffmpeg) or mpg123/ogg123/aplay for specific formats." : "Install ffmpeg to enable audio playback.";
|
|
247
|
+
process.stderr.write(`[inworld-tts] No audio player found for encoding "${encoding}". ${hint}
|
|
248
|
+
`);
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
async function playFile(filePath, encoding) {
|
|
252
|
+
const player = findPlayer(encoding);
|
|
253
|
+
if (!player) return;
|
|
254
|
+
return new Promise((resolve, reject) => {
|
|
255
|
+
const args = [...player.args, filePath];
|
|
256
|
+
const proc = (0, import_child_process.spawn)(player.binary, args, { stdio: "ignore" });
|
|
257
|
+
proc.on("close", () => resolve());
|
|
258
|
+
proc.on("error", reject);
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
async function play(audio, options = {}) {
|
|
262
|
+
if (typeof audio === "string") {
|
|
263
|
+
const ext2 = audio.split(".").pop().toLowerCase();
|
|
264
|
+
const EXT_TO_ENCODING = { mp3: "MP3", wav: "WAV", ogg: "OGG_OPUS", flac: "FLAC", pcm: "PCM" };
|
|
265
|
+
const encoding2 = options.encoding ? options.encoding.toUpperCase() : EXT_TO_ENCODING[ext2] || "MP3";
|
|
266
|
+
return playFile(audio, encoding2);
|
|
267
|
+
}
|
|
268
|
+
const encoding = options.encoding ? options.encoding.toUpperCase() : detectEncoding(audio);
|
|
269
|
+
const ext = EXT_MAP[encoding] || ".mp3";
|
|
270
|
+
const tmpPath = (0, import_path.join)((0, import_os.tmpdir)(), `inworld-tts-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`);
|
|
271
|
+
await (0, import_promises.writeFile)(tmpPath, audio);
|
|
272
|
+
try {
|
|
273
|
+
await playFile(tmpPath, encoding);
|
|
274
|
+
} finally {
|
|
275
|
+
await (0, import_promises.unlink)(tmpPath).catch(() => {
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// src/client.js
|
|
281
|
+
function base64ToBytes(b64) {
|
|
282
|
+
if (typeof Buffer !== "undefined") return Buffer.from(b64, "base64");
|
|
283
|
+
const bin = atob(b64);
|
|
284
|
+
const out = new Uint8Array(bin.length);
|
|
285
|
+
for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i);
|
|
286
|
+
return out;
|
|
287
|
+
}
|
|
288
|
+
function concatBytes(arrays) {
|
|
289
|
+
if (typeof Buffer !== "undefined") return Buffer.concat(arrays);
|
|
290
|
+
const total = arrays.reduce((n, a) => n + a.length, 0);
|
|
291
|
+
const out = new Uint8Array(total);
|
|
292
|
+
let offset = 0;
|
|
293
|
+
for (const a of arrays) {
|
|
294
|
+
out.set(a, offset);
|
|
295
|
+
offset += a.length;
|
|
296
|
+
}
|
|
297
|
+
return out;
|
|
298
|
+
}
|
|
299
|
+
function _countCjk(text) {
|
|
300
|
+
return text?.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g)?.length ?? 0;
|
|
301
|
+
}
|
|
302
|
+
function estimateEffectiveLength(text, charsPerSecond = CHARS_PER_SECOND) {
|
|
303
|
+
const breakPattern = /<break\s+time="([\d.]+)(m?s)"\s*\/?>/gi;
|
|
304
|
+
let totalBreakSeconds = 0;
|
|
305
|
+
let m;
|
|
306
|
+
while ((m = breakPattern.exec(text)) !== null) {
|
|
307
|
+
const val = parseFloat(m[1]);
|
|
308
|
+
if (!isNaN(val)) totalBreakSeconds += m[2].toLowerCase() === "ms" ? val / 1e3 : val;
|
|
309
|
+
}
|
|
310
|
+
const textWithoutBreaks = text.replace(/<break\s[^>]*\/?>/gi, "");
|
|
311
|
+
const cjkCount = _countCjk(textWithoutBreaks);
|
|
312
|
+
const rawLength = textWithoutBreaks.length - cjkCount + Math.floor(cjkCount * CJK_CHAR_WEIGHT);
|
|
313
|
+
return rawLength + Math.floor(totalBreakSeconds * charsPerSecond);
|
|
314
|
+
}
|
|
315
|
+
function findBreakPoint(text, minPos, maxPos) {
|
|
316
|
+
const searchText = text.slice(0, maxPos);
|
|
317
|
+
let searchStart = minPos;
|
|
318
|
+
for (; ; ) {
|
|
319
|
+
const idx = searchText.indexOf("\n\n", searchStart);
|
|
320
|
+
if (idx === -1 || idx >= maxPos) break;
|
|
321
|
+
if (idx >= minPos) return idx + 2;
|
|
322
|
+
searchStart = idx + 1;
|
|
323
|
+
}
|
|
324
|
+
searchStart = minPos;
|
|
325
|
+
for (; ; ) {
|
|
326
|
+
const idx = searchText.indexOf("\n", searchStart);
|
|
327
|
+
if (idx === -1 || idx >= maxPos) break;
|
|
328
|
+
if (idx >= minPos) return idx + 1;
|
|
329
|
+
searchStart = idx + 1;
|
|
330
|
+
}
|
|
331
|
+
const sentenceEnd = /[.!?]["']?\s+|[。!?]["']?\s*|[.!?。!?]["']?$/g;
|
|
332
|
+
let match;
|
|
333
|
+
while ((match = sentenceEnd.exec(searchText)) !== null) {
|
|
334
|
+
if (match.index >= minPos) return match.index + match[0].length;
|
|
335
|
+
}
|
|
336
|
+
sentenceEnd.lastIndex = 0;
|
|
337
|
+
let last = -1;
|
|
338
|
+
while ((match = sentenceEnd.exec(searchText)) !== null) {
|
|
339
|
+
last = match.index + match[0].length;
|
|
340
|
+
}
|
|
341
|
+
if (last > 0) return last;
|
|
342
|
+
const spaceIdx = searchText.lastIndexOf(" ");
|
|
343
|
+
return spaceIdx > 0 ? spaceIdx + 1 : maxPos;
|
|
344
|
+
}
|
|
345
|
+
function chunkText(text) {
|
|
346
|
+
const chunks = [];
|
|
347
|
+
let pos = 0;
|
|
348
|
+
while (pos < text.length) {
|
|
349
|
+
const rest = text.slice(pos);
|
|
350
|
+
if (estimateEffectiveLength(rest) <= MAX_CHUNK_SIZE) {
|
|
351
|
+
const s2 = rest.trim();
|
|
352
|
+
if (s2) chunks.push(s2);
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
const candidate = rest.slice(0, MAX_CHUNK_SIZE);
|
|
356
|
+
const effLen = estimateEffectiveLength(candidate);
|
|
357
|
+
let effectiveMax, effectiveMin;
|
|
358
|
+
if (effLen > MAX_CHUNK_SIZE) {
|
|
359
|
+
const scale = MAX_CHUNK_SIZE / effLen;
|
|
360
|
+
effectiveMax = Math.max(1, Math.floor(candidate.length * scale));
|
|
361
|
+
effectiveMin = Math.max(1, Math.floor(MIN_CHUNK_SIZE * scale));
|
|
362
|
+
} else {
|
|
363
|
+
effectiveMax = MAX_CHUNK_SIZE;
|
|
364
|
+
effectiveMin = MIN_CHUNK_SIZE;
|
|
365
|
+
}
|
|
366
|
+
const end = findBreakPoint(rest, effectiveMin, effectiveMax);
|
|
367
|
+
const s = rest.slice(0, end).trim();
|
|
368
|
+
if (s) chunks.push(s);
|
|
369
|
+
pos += end;
|
|
370
|
+
}
|
|
371
|
+
return chunks;
|
|
372
|
+
}
|
|
373
|
+
function extractRawPcm(audioData) {
|
|
374
|
+
if (audioData.length > 44 && audioData[0] === 82 && audioData[1] === 73 && audioData[2] === 70 && audioData[3] === 70) {
|
|
375
|
+
return audioData.subarray(44);
|
|
376
|
+
}
|
|
377
|
+
return audioData;
|
|
378
|
+
}
|
|
379
|
+
function createSilence(seconds, sampleRate = SAMPLE_RATE) {
|
|
380
|
+
const byteRate = sampleRate * (BITS_PER_SAMPLE / 8) * CHANNELS;
|
|
381
|
+
let n = Math.floor(byteRate * seconds);
|
|
382
|
+
n -= n % 2;
|
|
383
|
+
return new Uint8Array(n);
|
|
384
|
+
}
|
|
385
|
+
function mergeLinear16Buffers(buffers, spliceBreak = SPLICE_BREAK_SECONDS, sampleRate = SAMPLE_RATE) {
|
|
386
|
+
if (buffers.length === 0) return new Uint8Array(0);
|
|
387
|
+
if (buffers.length === 1) return buffers[0];
|
|
388
|
+
const silence = spliceBreak > 0 ? createSilence(spliceBreak, sampleRate) : null;
|
|
389
|
+
const out = [];
|
|
390
|
+
for (let i = 0; i < buffers.length; i++) {
|
|
391
|
+
const raw = extractRawPcm(buffers[i]);
|
|
392
|
+
if (i > 0 && silence) out.push(silence);
|
|
393
|
+
out.push(raw);
|
|
394
|
+
}
|
|
395
|
+
return concatBytes(out);
|
|
396
|
+
}
|
|
397
|
+
function wavHeader(dataLen, sampleRate = SAMPLE_RATE) {
|
|
398
|
+
const byteRate = sampleRate * (BITS_PER_SAMPLE / 8) * CHANNELS;
|
|
399
|
+
const blockAlign = CHANNELS * (BITS_PER_SAMPLE / 8);
|
|
400
|
+
const ab = new ArrayBuffer(44);
|
|
401
|
+
const v = new DataView(ab);
|
|
402
|
+
v.setUint8(0, 82);
|
|
403
|
+
v.setUint8(1, 73);
|
|
404
|
+
v.setUint8(2, 70);
|
|
405
|
+
v.setUint8(3, 70);
|
|
406
|
+
v.setUint32(4, 36 + dataLen, true);
|
|
407
|
+
v.setUint8(8, 87);
|
|
408
|
+
v.setUint8(9, 65);
|
|
409
|
+
v.setUint8(10, 86);
|
|
410
|
+
v.setUint8(11, 69);
|
|
411
|
+
v.setUint8(12, 102);
|
|
412
|
+
v.setUint8(13, 109);
|
|
413
|
+
v.setUint8(14, 116);
|
|
414
|
+
v.setUint8(15, 32);
|
|
415
|
+
v.setUint32(16, 16, true);
|
|
416
|
+
v.setUint16(20, 1, true);
|
|
417
|
+
v.setUint16(22, CHANNELS, true);
|
|
418
|
+
v.setUint32(24, sampleRate, true);
|
|
419
|
+
v.setUint32(28, byteRate, true);
|
|
420
|
+
v.setUint16(32, blockAlign, true);
|
|
421
|
+
v.setUint16(34, BITS_PER_SAMPLE, true);
|
|
422
|
+
v.setUint8(36, 100);
|
|
423
|
+
v.setUint8(37, 97);
|
|
424
|
+
v.setUint8(38, 116);
|
|
425
|
+
v.setUint8(39, 97);
|
|
426
|
+
v.setUint32(40, dataLen, true);
|
|
427
|
+
return new Uint8Array(ab);
|
|
428
|
+
}
|
|
429
|
+
var VALID_ENCODINGS = ["MP3", "OGG_OPUS", "LINEAR16", "WAV", "PCM", "FLAC", "ALAW", "MULAW"];
|
|
430
|
+
var ENCODING_EXTENSIONS = {
|
|
431
|
+
MP3: [".mp3"],
|
|
432
|
+
OGG_OPUS: [".ogg", ".opus"],
|
|
433
|
+
LINEAR16: [".wav"],
|
|
434
|
+
WAV: [".wav"],
|
|
435
|
+
PCM: [".pcm"],
|
|
436
|
+
FLAC: [".flac"],
|
|
437
|
+
ALAW: [".alaw"],
|
|
438
|
+
MULAW: [".mulaw"]
|
|
439
|
+
};
|
|
440
|
+
function warnExtensionMismatch(outputFile, encoding) {
|
|
441
|
+
const ext = outputFile.slice(outputFile.lastIndexOf(".")).toLowerCase();
|
|
442
|
+
const expected = ENCODING_EXTENSIONS[encoding];
|
|
443
|
+
if (expected && !expected.includes(ext)) {
|
|
444
|
+
console.warn(
|
|
445
|
+
`[inworld-tts] Warning: outputFile "${outputFile}" has extension "${ext}" but encoding is "${encoding}" (expected ${expected.join(" or ")}). The file will contain ${encoding} audio data.`
|
|
446
|
+
);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
var KNOWN_OPTIONS = /* @__PURE__ */ new Set([
|
|
450
|
+
"text",
|
|
451
|
+
"voice",
|
|
452
|
+
"model",
|
|
453
|
+
"encoding",
|
|
454
|
+
"sampleRate",
|
|
455
|
+
"bitRate",
|
|
456
|
+
"speakingRate",
|
|
457
|
+
"temperature",
|
|
458
|
+
"timestampType",
|
|
459
|
+
"timestampTransportStrategy",
|
|
460
|
+
"applyTextNormalization",
|
|
461
|
+
"outputFile",
|
|
462
|
+
"play"
|
|
463
|
+
]);
|
|
464
|
+
var RENAMED_OPTIONS = {
|
|
465
|
+
voiceId: "voice",
|
|
466
|
+
modelId: "model",
|
|
467
|
+
audioEncoding: "encoding",
|
|
468
|
+
sampleRateHertz: "sampleRate"
|
|
469
|
+
};
|
|
470
|
+
function parseOptions(options, defaultModelId) {
|
|
471
|
+
if (!options || typeof options !== "object") throw new ApiError("options object is required");
|
|
472
|
+
for (const key of Object.keys(options)) {
|
|
473
|
+
if (RENAMED_OPTIONS[key]) {
|
|
474
|
+
console.warn(`[inworld-tts] Warning: "${key}" has been renamed to "${RENAMED_OPTIONS[key]}". Please update your code.`);
|
|
475
|
+
} else if (!KNOWN_OPTIONS.has(key)) {
|
|
476
|
+
console.warn(`[inworld-tts] Warning: unknown option "${key}" will be ignored. Use camelCase keys (e.g. voice, encoding).`);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
return {
|
|
480
|
+
text: options.text || "",
|
|
481
|
+
voice: (() => {
|
|
482
|
+
if (!options.voice) throw new ApiError("options.voice is required");
|
|
483
|
+
return options.voice;
|
|
484
|
+
})(),
|
|
485
|
+
model: options.model || defaultModelId,
|
|
486
|
+
encoding: (() => {
|
|
487
|
+
const enc = (options.encoding || "MP3").toUpperCase();
|
|
488
|
+
if (!VALID_ENCODINGS.includes(enc)) {
|
|
489
|
+
throw new ApiError(`encoding "${enc}" is not supported. Valid values: ${VALID_ENCODINGS.join(", ")}`);
|
|
490
|
+
}
|
|
491
|
+
return enc;
|
|
492
|
+
})(),
|
|
493
|
+
sampleRate: (() => {
|
|
494
|
+
const enc = (options.encoding || "MP3").toUpperCase();
|
|
495
|
+
if ((enc === "ALAW" || enc === "MULAW") && options.sampleRate == null) {
|
|
496
|
+
console.warn(`[inworld-tts] ${enc} only supports sampleRate 8000 \u2014 defaulting to 8000.`);
|
|
497
|
+
return 8e3;
|
|
498
|
+
}
|
|
499
|
+
return options.sampleRate ?? 48e3;
|
|
500
|
+
})(),
|
|
501
|
+
bitRate: options.bitRate ?? 128e3,
|
|
502
|
+
speakingRate: options.speakingRate ?? 1,
|
|
503
|
+
temperature: options.temperature ?? 1,
|
|
504
|
+
timestampType: options.timestampType,
|
|
505
|
+
timestampTransportStrategy: options.timestampTransportStrategy,
|
|
506
|
+
applyTextNormalization: options.applyTextNormalization === "none" ? "APPLY_TEXT_NORMALIZATION_UNSPECIFIED" : options.applyTextNormalization
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
function buildBody(opts, textOverride) {
|
|
510
|
+
const ac = {
|
|
511
|
+
audioEncoding: opts.encoding,
|
|
512
|
+
sampleRateHertz: opts.sampleRate,
|
|
513
|
+
speakingRate: opts.speakingRate
|
|
514
|
+
};
|
|
515
|
+
if (["MP3", "OGG_OPUS"].includes(opts.encoding)) ac.bitRate = opts.bitRate;
|
|
516
|
+
const body = {
|
|
517
|
+
text: textOverride ?? opts.text,
|
|
518
|
+
voiceId: opts.voice,
|
|
519
|
+
modelId: opts.model,
|
|
520
|
+
audioConfig: ac,
|
|
521
|
+
temperature: opts.temperature
|
|
522
|
+
};
|
|
523
|
+
if (opts.timestampType) body.timestampType = opts.timestampType;
|
|
524
|
+
if (opts.timestampTransportStrategy) body.timestampTransportStrategy = opts.timestampTransportStrategy;
|
|
525
|
+
if (opts.applyTextNormalization) body.applyTextNormalization = opts.applyTextNormalization;
|
|
526
|
+
return body;
|
|
527
|
+
}
|
|
528
|
+
async function generateOne(url, headers, body, signal, config = {}) {
|
|
529
|
+
const fetchOpts = { method: "POST", headers, body: JSON.stringify(body), signal };
|
|
530
|
+
let response;
|
|
531
|
+
try {
|
|
532
|
+
response = await fetch(url, fetchOpts);
|
|
533
|
+
} catch (e) {
|
|
534
|
+
if (e.name === "AbortError" || e.name === "TimeoutError") throw e;
|
|
535
|
+
throw new NetworkError(e.message);
|
|
536
|
+
}
|
|
537
|
+
if (!response.ok) {
|
|
538
|
+
let errMsg = response.statusText;
|
|
539
|
+
let details = {};
|
|
540
|
+
try {
|
|
541
|
+
details = await response.json();
|
|
542
|
+
errMsg = details.message || JSON.stringify(details);
|
|
543
|
+
} catch (_) {
|
|
544
|
+
}
|
|
545
|
+
throw new ApiError(errMsg, response.status, details);
|
|
546
|
+
}
|
|
547
|
+
let data;
|
|
548
|
+
try {
|
|
549
|
+
data = await response.json();
|
|
550
|
+
} catch (_) {
|
|
551
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
552
|
+
}
|
|
553
|
+
if (!data.audioContent) {
|
|
554
|
+
throw new ApiError("unexpected response: missing audioContent");
|
|
555
|
+
}
|
|
556
|
+
return { audio: base64ToBytes(data.audioContent), timestampInfo: data.timestampInfo ?? null };
|
|
557
|
+
}
|
|
558
|
+
function isEmptyTimestampInfo(ts) {
|
|
559
|
+
if (!ts) return true;
|
|
560
|
+
if (ts.wordAlignment) {
|
|
561
|
+
return !ts.wordAlignment.words || ts.wordAlignment.words.length === 0;
|
|
562
|
+
}
|
|
563
|
+
if (ts.characterAlignment) {
|
|
564
|
+
return !ts.characterAlignment.characters || ts.characterAlignment.characters.length === 0;
|
|
565
|
+
}
|
|
566
|
+
return true;
|
|
567
|
+
}
|
|
568
|
+
function getLastEndTime(ts) {
|
|
569
|
+
if (!ts) return 0;
|
|
570
|
+
if (ts.wordAlignment?.wordEndTimeSeconds?.length) {
|
|
571
|
+
const ends = ts.wordAlignment.wordEndTimeSeconds;
|
|
572
|
+
return ends[ends.length - 1];
|
|
573
|
+
}
|
|
574
|
+
if (ts.characterAlignment?.characterEndTimeSeconds?.length) {
|
|
575
|
+
const ends = ts.characterAlignment.characterEndTimeSeconds;
|
|
576
|
+
return ends[ends.length - 1];
|
|
577
|
+
}
|
|
578
|
+
return 0;
|
|
579
|
+
}
|
|
580
|
+
function extractTrailingBreakSeconds(text) {
|
|
581
|
+
const m = text.match(/<break\s+time="([\d.]+)(m?s)"\s*\/?>\s*$/i);
|
|
582
|
+
if (!m) return 0;
|
|
583
|
+
const val = parseFloat(m[1]);
|
|
584
|
+
return m[2].toLowerCase() === "ms" ? val / 1e3 : val;
|
|
585
|
+
}
|
|
586
|
+
function applyTimestampOffset(ts, offset) {
|
|
587
|
+
if (!ts || offset === 0) return ts ?? {};
|
|
588
|
+
const result = {};
|
|
589
|
+
if (ts.wordAlignment) {
|
|
590
|
+
result.wordAlignment = {
|
|
591
|
+
words: ts.wordAlignment.words,
|
|
592
|
+
wordStartTimeSeconds: ts.wordAlignment.wordStartTimeSeconds?.map((t) => t + offset) ?? [],
|
|
593
|
+
wordEndTimeSeconds: ts.wordAlignment.wordEndTimeSeconds?.map((t) => t + offset) ?? [],
|
|
594
|
+
phoneticDetails: ts.wordAlignment.phoneticDetails?.map((pd) => ({
|
|
595
|
+
...pd,
|
|
596
|
+
phones: pd.phones?.map((p) => ({ ...p, startTimeSeconds: p.startTimeSeconds + offset }))
|
|
597
|
+
})) ?? []
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
if (ts.characterAlignment) {
|
|
601
|
+
result.characterAlignment = {
|
|
602
|
+
characters: ts.characterAlignment.characters,
|
|
603
|
+
characterStartTimeSeconds: ts.characterAlignment.characterStartTimeSeconds?.map((t) => t + offset) ?? [],
|
|
604
|
+
characterEndTimeSeconds: ts.characterAlignment.characterEndTimeSeconds?.map((t) => t + offset) ?? []
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
return result;
|
|
608
|
+
}
|
|
609
|
+
function mergeTimestampInfos(infos) {
|
|
610
|
+
const result = {};
|
|
611
|
+
const hasWord = infos.some((ts) => ts?.wordAlignment);
|
|
612
|
+
if (hasWord) {
|
|
613
|
+
const allWords = [];
|
|
614
|
+
const allWordStart = [];
|
|
615
|
+
const allWordEnd = [];
|
|
616
|
+
const allPhoneticDetails = [];
|
|
617
|
+
let runningWordCount = 0;
|
|
618
|
+
for (const ts of infos) {
|
|
619
|
+
if (!ts?.wordAlignment) continue;
|
|
620
|
+
const wa = ts.wordAlignment;
|
|
621
|
+
allWords.push(...wa.words ?? []);
|
|
622
|
+
allWordStart.push(...wa.wordStartTimeSeconds ?? []);
|
|
623
|
+
allWordEnd.push(...wa.wordEndTimeSeconds ?? []);
|
|
624
|
+
for (const pd of wa.phoneticDetails ?? []) {
|
|
625
|
+
allPhoneticDetails.push({ ...pd, wordIndex: pd.wordIndex + runningWordCount });
|
|
626
|
+
}
|
|
627
|
+
runningWordCount += wa.words?.length ?? 0;
|
|
628
|
+
}
|
|
629
|
+
result.wordAlignment = {
|
|
630
|
+
words: allWords,
|
|
631
|
+
wordStartTimeSeconds: allWordStart,
|
|
632
|
+
wordEndTimeSeconds: allWordEnd,
|
|
633
|
+
phoneticDetails: allPhoneticDetails
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
const hasChar = infos.some((ts) => ts?.characterAlignment);
|
|
637
|
+
if (hasChar) {
|
|
638
|
+
const allChars = [];
|
|
639
|
+
const allCharStart = [];
|
|
640
|
+
const allCharEnd = [];
|
|
641
|
+
for (const ts of infos) {
|
|
642
|
+
if (!ts?.characterAlignment) continue;
|
|
643
|
+
const ca = ts.characterAlignment;
|
|
644
|
+
allChars.push(...ca.characters ?? []);
|
|
645
|
+
allCharStart.push(...ca.characterStartTimeSeconds ?? []);
|
|
646
|
+
allCharEnd.push(...ca.characterEndTimeSeconds ?? []);
|
|
647
|
+
}
|
|
648
|
+
result.characterAlignment = {
|
|
649
|
+
characters: allChars,
|
|
650
|
+
characterStartTimeSeconds: allCharStart,
|
|
651
|
+
characterEndTimeSeconds: allCharEnd
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
return result;
|
|
655
|
+
}
|
|
656
|
+
async function generate(options, config = {}) {
|
|
657
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
658
|
+
const url = `${config._baseUrl}/tts/v1/voice`;
|
|
659
|
+
const { outputFile, play: shouldPlay, ...rest } = options || {};
|
|
660
|
+
const opts = parseOptions(rest, "inworld-tts-1.5-max");
|
|
661
|
+
if (!opts.text) {
|
|
662
|
+
const received = (options || {}).text;
|
|
663
|
+
const typeInfo = received === void 0 ? "undefined" : received === null ? "null" : `${typeof received} (${JSON.stringify(received)})`;
|
|
664
|
+
throw new ApiError(`options.text is required (received: ${typeInfo})`);
|
|
665
|
+
}
|
|
666
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? 6e4);
|
|
667
|
+
const maxConcurrent = config.maxConcurrentRequests ?? MAX_CONCURRENT_REQUESTS;
|
|
668
|
+
const spliceBreak = SPLICE_BREAK_SECONDS;
|
|
669
|
+
const headers = {
|
|
670
|
+
"Content-Type": "application/json",
|
|
671
|
+
Authorization: config._authHeader
|
|
672
|
+
};
|
|
673
|
+
const generateStart = performance.now();
|
|
674
|
+
let audio;
|
|
675
|
+
try {
|
|
676
|
+
if (estimateEffectiveLength(opts.text) <= GENERATE_MAX_CHARS) {
|
|
677
|
+
let result;
|
|
678
|
+
try {
|
|
679
|
+
result = await withRetry(() => generateOne(url, headers, buildBody(opts), signal, config), config);
|
|
680
|
+
} catch (e) {
|
|
681
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
682
|
+
throw new NetworkError("Request timed out");
|
|
683
|
+
}
|
|
684
|
+
audio = result.audio;
|
|
685
|
+
} else {
|
|
686
|
+
const chunks = chunkText(opts.text);
|
|
687
|
+
if (chunks.length === 0) return new Uint8Array(0);
|
|
688
|
+
debugLog(config, `long text: ${chunks.length} chunks, concurrency=${Math.min(maxConcurrent, chunks.length)}`);
|
|
689
|
+
const results = new Array(chunks.length);
|
|
690
|
+
const queue = chunks.map((t, i) => ({ text: t, index: i }));
|
|
691
|
+
const concurrency = Math.min(maxConcurrent, chunks.length);
|
|
692
|
+
const ac = new AbortController();
|
|
693
|
+
signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
|
|
694
|
+
async function worker() {
|
|
695
|
+
while (queue.length > 0) {
|
|
696
|
+
if (ac.signal.aborted) return;
|
|
697
|
+
const { text: t, index: i } = queue.shift();
|
|
698
|
+
try {
|
|
699
|
+
results[i] = await withRetry(() => generateOne(url, headers, buildBody(opts, t), ac.signal, config), config);
|
|
700
|
+
} catch (e) {
|
|
701
|
+
ac.abort(e);
|
|
702
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
703
|
+
throw new NetworkError("Request timed out");
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
708
|
+
const listBuffers = results.filter((r) => r?.audio instanceof Uint8Array).map((r) => r.audio);
|
|
709
|
+
if (opts.encoding === "LINEAR16" || opts.encoding === "WAV") {
|
|
710
|
+
const merged = mergeLinear16Buffers(listBuffers, spliceBreak, opts.sampleRate);
|
|
711
|
+
audio = concatBytes([wavHeader(merged.length, opts.sampleRate), merged]);
|
|
712
|
+
} else if (opts.encoding === "PCM") {
|
|
713
|
+
audio = mergeLinear16Buffers(listBuffers, spliceBreak, opts.sampleRate);
|
|
714
|
+
} else {
|
|
715
|
+
audio = concatBytes(listBuffers);
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
} finally {
|
|
719
|
+
clear();
|
|
720
|
+
}
|
|
721
|
+
debugLog(config, `generate: ${audio.length.toLocaleString()} bytes (${((performance.now() - generateStart) / 1e3).toFixed(2)}s)`);
|
|
722
|
+
if (outputFile) {
|
|
723
|
+
warnExtensionMismatch(outputFile, opts.encoding);
|
|
724
|
+
await writeFileSafe(outputFile, audio);
|
|
725
|
+
}
|
|
726
|
+
if (shouldPlay) {
|
|
727
|
+
if (outputFile) {
|
|
728
|
+
await playFile(outputFile, opts.encoding);
|
|
729
|
+
} else {
|
|
730
|
+
await play(audio, { encoding: opts.encoding });
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
return audio;
|
|
734
|
+
}
|
|
735
|
+
async function generateWithTimestamps(options, config = {}) {
|
|
736
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
737
|
+
const url = `${config._baseUrl}/tts/v1/voice`;
|
|
738
|
+
const { outputFile, play: shouldPlay, timestampType, ...rest } = options || {};
|
|
739
|
+
if (!timestampType) throw new ApiError('options.timestampType is required ("WORD" or "CHARACTER")');
|
|
740
|
+
const opts = parseOptions({ ...rest, timestampType }, "inworld-tts-1.5-max");
|
|
741
|
+
if (!opts.text) {
|
|
742
|
+
const received = (options || {}).text;
|
|
743
|
+
const typeInfo = received === void 0 ? "undefined" : received === null ? "null" : `${typeof received} (${JSON.stringify(received)})`;
|
|
744
|
+
throw new ApiError(`options.text is required (received: ${typeInfo})`);
|
|
745
|
+
}
|
|
746
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? 6e4);
|
|
747
|
+
const maxConcurrent = config.maxConcurrentRequests ?? MAX_CONCURRENT_REQUESTS;
|
|
748
|
+
const headers = {
|
|
749
|
+
"Content-Type": "application/json",
|
|
750
|
+
Authorization: config._authHeader
|
|
751
|
+
};
|
|
752
|
+
const generateStart = performance.now();
|
|
753
|
+
let audio;
|
|
754
|
+
let timestamps;
|
|
755
|
+
try {
|
|
756
|
+
if (estimateEffectiveLength(opts.text) <= GENERATE_MAX_CHARS) {
|
|
757
|
+
let result;
|
|
758
|
+
try {
|
|
759
|
+
result = await withRetry(() => generateOne(url, headers, buildBody(opts), signal, config), config);
|
|
760
|
+
} catch (e) {
|
|
761
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
762
|
+
throw new NetworkError("Request timed out");
|
|
763
|
+
}
|
|
764
|
+
audio = result.audio;
|
|
765
|
+
timestamps = result.timestampInfo ?? {};
|
|
766
|
+
} else {
|
|
767
|
+
const chunks = chunkText(opts.text);
|
|
768
|
+
if (chunks.length === 0) return { audio: new Uint8Array(0), timestamps: {} };
|
|
769
|
+
debugLog(config, `generateWithTimestamps: ${chunks.length} chunks, concurrency=${Math.min(maxConcurrent, chunks.length)}`);
|
|
770
|
+
const results = new Array(chunks.length);
|
|
771
|
+
const queue = chunks.map((t, i) => ({ text: t, index: i }));
|
|
772
|
+
const concurrency = Math.min(maxConcurrent, chunks.length);
|
|
773
|
+
const ac = new AbortController();
|
|
774
|
+
signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
|
|
775
|
+
async function worker() {
|
|
776
|
+
while (queue.length > 0) {
|
|
777
|
+
if (ac.signal.aborted) return;
|
|
778
|
+
const { text: t, index: i } = queue.shift();
|
|
779
|
+
try {
|
|
780
|
+
results[i] = await withRetry(() => generateOne(url, headers, buildBody(opts, t), ac.signal, config), config);
|
|
781
|
+
} catch (e) {
|
|
782
|
+
ac.abort(e);
|
|
783
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
784
|
+
throw new NetworkError("Request timed out");
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
789
|
+
const spliceGap = ["LINEAR16", "WAV", "PCM"].includes(opts.encoding) ? SPLICE_BREAK_SECONDS : 0;
|
|
790
|
+
let offset = 0;
|
|
791
|
+
const adjustedTimestamps = [];
|
|
792
|
+
const audioBuffers = [];
|
|
793
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
794
|
+
const { audio: chunkAudio, timestampInfo } = results[i];
|
|
795
|
+
audioBuffers.push(chunkAudio);
|
|
796
|
+
adjustedTimestamps.push(applyTimestampOffset(timestampInfo, offset));
|
|
797
|
+
offset += getLastEndTime(timestampInfo) + extractTrailingBreakSeconds(chunks[i]) + spliceGap;
|
|
798
|
+
}
|
|
799
|
+
timestamps = mergeTimestampInfos(adjustedTimestamps);
|
|
800
|
+
if (opts.encoding === "LINEAR16" || opts.encoding === "WAV") {
|
|
801
|
+
const merged = mergeLinear16Buffers(audioBuffers, SPLICE_BREAK_SECONDS, opts.sampleRate);
|
|
802
|
+
audio = concatBytes([wavHeader(merged.length, opts.sampleRate), merged]);
|
|
803
|
+
} else if (opts.encoding === "PCM") {
|
|
804
|
+
audio = mergeLinear16Buffers(audioBuffers, SPLICE_BREAK_SECONDS, opts.sampleRate);
|
|
805
|
+
} else {
|
|
806
|
+
audio = concatBytes(audioBuffers);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
} finally {
|
|
810
|
+
clear();
|
|
811
|
+
}
|
|
812
|
+
debugLog(config, `generateWithTimestamps: ${audio.length.toLocaleString()} bytes (${((performance.now() - generateStart) / 1e3).toFixed(2)}s)`);
|
|
813
|
+
if (outputFile) {
|
|
814
|
+
warnExtensionMismatch(outputFile, opts.encoding);
|
|
815
|
+
await writeFileSafe(outputFile, audio);
|
|
816
|
+
}
|
|
817
|
+
if (shouldPlay) {
|
|
818
|
+
if (outputFile) {
|
|
819
|
+
await playFile(outputFile, opts.encoding);
|
|
820
|
+
} else {
|
|
821
|
+
await play(audio, { encoding: opts.encoding });
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
return { audio, timestamps };
|
|
825
|
+
}
|
|
826
|
+
async function* stream(options, config = {}) {
|
|
827
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
828
|
+
const url = `${config._baseUrl}/tts/v1/voice:stream`;
|
|
829
|
+
const outputFile = options?.outputFile;
|
|
830
|
+
const shouldPlay = options?.play;
|
|
831
|
+
if (outputFile && typeof window !== "undefined") {
|
|
832
|
+
throw new ApiError("outputFile is not supported in browser environments. Use stream() to collect chunks or generate() to get a Uint8Array.");
|
|
833
|
+
}
|
|
834
|
+
const opts = parseOptions(options, "inworld-tts-1.5-mini");
|
|
835
|
+
if (!opts.text) {
|
|
836
|
+
const received = (options || {}).text;
|
|
837
|
+
const typeInfo = received === void 0 ? "undefined" : received === null ? "null" : `${typeof received} (${JSON.stringify(received)})`;
|
|
838
|
+
throw new ApiError(`options.text is required (received: ${typeInfo})`);
|
|
839
|
+
}
|
|
840
|
+
const effectiveLen = estimateEffectiveLength(opts.text);
|
|
841
|
+
if (effectiveLen > STREAM_MAX_CHARS) {
|
|
842
|
+
throw new ApiError(
|
|
843
|
+
`text exceeds ${STREAM_MAX_CHARS} character limit for stream() (effective length ${effectiveLen}, raw length ${opts.text.length}). Use generate() instead \u2014 it handles any text length automatically.`
|
|
844
|
+
);
|
|
845
|
+
}
|
|
846
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? 6e4);
|
|
847
|
+
const headers = {
|
|
848
|
+
"Content-Type": "application/json",
|
|
849
|
+
Authorization: config._authHeader
|
|
850
|
+
};
|
|
851
|
+
const body = buildBody(opts);
|
|
852
|
+
const fetchOpts = { method: "POST", headers, body: JSON.stringify(body), signal };
|
|
853
|
+
const streamStart = performance.now();
|
|
854
|
+
const collectedChunks = outputFile || shouldPlay ? [] : null;
|
|
855
|
+
try {
|
|
856
|
+
let res;
|
|
857
|
+
try {
|
|
858
|
+
res = await withRetry(async () => {
|
|
859
|
+
let r;
|
|
860
|
+
try {
|
|
861
|
+
r = await fetch(url, fetchOpts);
|
|
862
|
+
} catch (e) {
|
|
863
|
+
if (e.name === "AbortError" || e.name === "TimeoutError") throw e;
|
|
864
|
+
throw new NetworkError(e.message);
|
|
865
|
+
}
|
|
866
|
+
if (r.status >= 500) {
|
|
867
|
+
const details = await r.json().catch(() => ({}));
|
|
868
|
+
throw new ApiError(details.message || r.statusText, r.status, details);
|
|
869
|
+
}
|
|
870
|
+
return r;
|
|
871
|
+
}, config);
|
|
872
|
+
} catch (e) {
|
|
873
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
874
|
+
throw new NetworkError("Request timed out");
|
|
875
|
+
}
|
|
876
|
+
if (!res.ok) {
|
|
877
|
+
let errMsg = res.statusText;
|
|
878
|
+
let details = {};
|
|
879
|
+
try {
|
|
880
|
+
details = await res.json();
|
|
881
|
+
errMsg = details.message || JSON.stringify(details);
|
|
882
|
+
} catch (_) {
|
|
883
|
+
errMsg = await res.text().catch(() => `HTTP ${res.status}`);
|
|
884
|
+
}
|
|
885
|
+
throw new ApiError(errMsg, res.status, details);
|
|
886
|
+
}
|
|
887
|
+
const resBody = res.body;
|
|
888
|
+
if (!resBody) throw new ApiError("empty response body");
|
|
889
|
+
const reader = resBody.getReader();
|
|
890
|
+
const decoder = new TextDecoder();
|
|
891
|
+
let buf = "";
|
|
892
|
+
let chunkCount = 0;
|
|
893
|
+
let firstChunkMs = null;
|
|
894
|
+
let totalBytes = 0;
|
|
895
|
+
try {
|
|
896
|
+
while (true) {
|
|
897
|
+
const { done, value } = await reader.read();
|
|
898
|
+
if (done) break;
|
|
899
|
+
buf += decoder.decode(value, { stream: true });
|
|
900
|
+
const lines = buf.split("\n");
|
|
901
|
+
buf = lines.pop() || "";
|
|
902
|
+
for (const line of lines) {
|
|
903
|
+
if (!line.trim()) continue;
|
|
904
|
+
try {
|
|
905
|
+
const data = JSON.parse(line);
|
|
906
|
+
const result = data.result;
|
|
907
|
+
if (result && result.audioContent) {
|
|
908
|
+
const chunk = base64ToBytes(result.audioContent);
|
|
909
|
+
if (firstChunkMs === null) firstChunkMs = performance.now() - streamStart;
|
|
910
|
+
chunkCount++;
|
|
911
|
+
totalBytes += chunk.length;
|
|
912
|
+
debugLog(config, `stream chunk #${chunkCount} (${chunk.length} bytes)`);
|
|
913
|
+
if (collectedChunks) collectedChunks.push(chunk);
|
|
914
|
+
yield chunk;
|
|
915
|
+
}
|
|
916
|
+
} catch (_) {
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
if (buf.trim()) {
|
|
921
|
+
try {
|
|
922
|
+
const data = JSON.parse(buf);
|
|
923
|
+
const result = data.result;
|
|
924
|
+
if (result && result.audioContent) {
|
|
925
|
+
const chunk = base64ToBytes(result.audioContent);
|
|
926
|
+
if (firstChunkMs === null) firstChunkMs = performance.now() - streamStart;
|
|
927
|
+
chunkCount++;
|
|
928
|
+
totalBytes += chunk.length;
|
|
929
|
+
debugLog(config, `stream chunk #${chunkCount} (${chunk.length} bytes)`);
|
|
930
|
+
if (collectedChunks) collectedChunks.push(chunk);
|
|
931
|
+
yield chunk;
|
|
932
|
+
}
|
|
933
|
+
} catch (_) {
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
if (chunkCount > 0) {
|
|
937
|
+
debugLog(config, `stream: first chunk ${Math.round(firstChunkMs)}ms, ${chunkCount} chunks, ${totalBytes.toLocaleString()} bytes total`);
|
|
938
|
+
}
|
|
939
|
+
} finally {
|
|
940
|
+
reader.releaseLock?.();
|
|
941
|
+
}
|
|
942
|
+
} finally {
|
|
943
|
+
clear();
|
|
944
|
+
}
|
|
945
|
+
if (collectedChunks) {
|
|
946
|
+
const audio = concatBytes(collectedChunks);
|
|
947
|
+
const enc = (opts.encoding || "MP3").toUpperCase();
|
|
948
|
+
if (outputFile) {
|
|
949
|
+
warnExtensionMismatch(outputFile, enc);
|
|
950
|
+
await writeFileSafe(outputFile, audio);
|
|
951
|
+
if (shouldPlay) await playFile(outputFile, enc);
|
|
952
|
+
} else {
|
|
953
|
+
await play(audio, { encoding: enc });
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
async function* streamWithTimestamps(options, config = {}) {
|
|
958
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
959
|
+
const url = `${config._baseUrl}/tts/v1/voice:stream`;
|
|
960
|
+
const { timestampType, outputFile, play: shouldPlay, ...rest } = options || {};
|
|
961
|
+
if (!timestampType) throw new ApiError('options.timestampType is required ("WORD" or "CHARACTER")');
|
|
962
|
+
if (outputFile && typeof window !== "undefined") {
|
|
963
|
+
throw new ApiError("outputFile is not supported in browser environments.");
|
|
964
|
+
}
|
|
965
|
+
const opts = parseOptions({ ...rest, timestampType }, "inworld-tts-1.5-mini");
|
|
966
|
+
if (!opts.text) {
|
|
967
|
+
const received = (options || {}).text;
|
|
968
|
+
const typeInfo = received === void 0 ? "undefined" : received === null ? "null" : `${typeof received} (${JSON.stringify(received)})`;
|
|
969
|
+
throw new ApiError(`options.text is required (received: ${typeInfo})`);
|
|
970
|
+
}
|
|
971
|
+
const effectiveLen = estimateEffectiveLength(opts.text);
|
|
972
|
+
if (effectiveLen > STREAM_MAX_CHARS) {
|
|
973
|
+
throw new ApiError(
|
|
974
|
+
`text exceeds ${STREAM_MAX_CHARS} character limit for streamWithTimestamps() (effective length ${effectiveLen}, raw length ${opts.text.length}). Use generateWithTimestamps() instead.`
|
|
975
|
+
);
|
|
976
|
+
}
|
|
977
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? 6e4);
|
|
978
|
+
const headers = { "Content-Type": "application/json", Authorization: config._authHeader };
|
|
979
|
+
const body = buildBody(opts);
|
|
980
|
+
body.timestampTransportStrategy = "SYNC";
|
|
981
|
+
const fetchOpts = { method: "POST", headers, body: JSON.stringify(body), signal };
|
|
982
|
+
const streamStart = performance.now();
|
|
983
|
+
const collectedChunks = outputFile || shouldPlay ? [] : null;
|
|
984
|
+
try {
|
|
985
|
+
let res;
|
|
986
|
+
try {
|
|
987
|
+
res = await withRetry(async () => {
|
|
988
|
+
let r;
|
|
989
|
+
try {
|
|
990
|
+
r = await fetch(url, fetchOpts);
|
|
991
|
+
} catch (e) {
|
|
992
|
+
if (e.name === "AbortError" || e.name === "TimeoutError") throw e;
|
|
993
|
+
throw new NetworkError(e.message);
|
|
994
|
+
}
|
|
995
|
+
if (r.status >= 500) {
|
|
996
|
+
const details = await r.json().catch(() => ({}));
|
|
997
|
+
throw new ApiError(details.message || r.statusText, r.status, details);
|
|
998
|
+
}
|
|
999
|
+
return r;
|
|
1000
|
+
}, config);
|
|
1001
|
+
} catch (e) {
|
|
1002
|
+
if (e instanceof ApiError || e instanceof NetworkError) throw e;
|
|
1003
|
+
throw new NetworkError("Request timed out");
|
|
1004
|
+
}
|
|
1005
|
+
if (!res.ok) {
|
|
1006
|
+
let errMsg = res.statusText;
|
|
1007
|
+
let details = {};
|
|
1008
|
+
try {
|
|
1009
|
+
details = await res.json();
|
|
1010
|
+
errMsg = details.message || JSON.stringify(details);
|
|
1011
|
+
} catch (_) {
|
|
1012
|
+
errMsg = await res.text().catch(() => `HTTP ${res.status}`);
|
|
1013
|
+
}
|
|
1014
|
+
throw new ApiError(errMsg, res.status, details);
|
|
1015
|
+
}
|
|
1016
|
+
const resBody = res.body;
|
|
1017
|
+
if (!resBody) throw new ApiError("empty response body");
|
|
1018
|
+
const reader = resBody.getReader();
|
|
1019
|
+
const decoder = new TextDecoder();
|
|
1020
|
+
let buf = "";
|
|
1021
|
+
let chunkCount = 0;
|
|
1022
|
+
let firstChunkMs = null;
|
|
1023
|
+
const processChunk = function* (line) {
|
|
1024
|
+
if (!line.trim()) return;
|
|
1025
|
+
try {
|
|
1026
|
+
const data = JSON.parse(line);
|
|
1027
|
+
const result = data.result;
|
|
1028
|
+
if (result && result.audioContent) {
|
|
1029
|
+
const chunk = base64ToBytes(result.audioContent);
|
|
1030
|
+
if (firstChunkMs === null) firstChunkMs = performance.now() - streamStart;
|
|
1031
|
+
chunkCount++;
|
|
1032
|
+
debugLog(config, `streamWithTimestamps chunk #${chunkCount} (${chunk.length} bytes)`);
|
|
1033
|
+
if (collectedChunks) collectedChunks.push(chunk);
|
|
1034
|
+
const ts = result.timestampInfo;
|
|
1035
|
+
if (isEmptyTimestampInfo(ts)) {
|
|
1036
|
+
yield { audio: chunk };
|
|
1037
|
+
} else {
|
|
1038
|
+
yield { audio: chunk, timestamps: ts };
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
} catch (_) {
|
|
1042
|
+
}
|
|
1043
|
+
};
|
|
1044
|
+
try {
|
|
1045
|
+
while (true) {
|
|
1046
|
+
const { done, value } = await reader.read();
|
|
1047
|
+
if (done) break;
|
|
1048
|
+
buf += decoder.decode(value, { stream: true });
|
|
1049
|
+
const lines = buf.split("\n");
|
|
1050
|
+
buf = lines.pop() || "";
|
|
1051
|
+
for (const line of lines) yield* processChunk(line);
|
|
1052
|
+
}
|
|
1053
|
+
if (buf.trim()) yield* processChunk(buf);
|
|
1054
|
+
if (chunkCount > 0) {
|
|
1055
|
+
debugLog(config, `streamWithTimestamps: first chunk ${Math.round(firstChunkMs)}ms, ${chunkCount} chunks total`);
|
|
1056
|
+
}
|
|
1057
|
+
} finally {
|
|
1058
|
+
reader.releaseLock?.();
|
|
1059
|
+
}
|
|
1060
|
+
} finally {
|
|
1061
|
+
clear();
|
|
1062
|
+
}
|
|
1063
|
+
if (collectedChunks) {
|
|
1064
|
+
const audio = concatBytes(collectedChunks);
|
|
1065
|
+
const enc = (opts.encoding || "MP3").toUpperCase();
|
|
1066
|
+
if (outputFile) {
|
|
1067
|
+
warnExtensionMismatch(outputFile, enc);
|
|
1068
|
+
await writeFileSafe(outputFile, audio);
|
|
1069
|
+
if (shouldPlay) await playFile(outputFile, enc);
|
|
1070
|
+
} else {
|
|
1071
|
+
await play(audio, { encoding: enc });
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
// src/voice.js
|
|
1077
|
+
var VOICES_PATH = "/voices/v1/voices";
|
|
1078
|
+
function bytesToBase64(bytes) {
|
|
1079
|
+
if (typeof Buffer !== "undefined") {
|
|
1080
|
+
return (Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes)).toString("base64");
|
|
1081
|
+
}
|
|
1082
|
+
let bin = "";
|
|
1083
|
+
for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
|
|
1084
|
+
return btoa(bin);
|
|
1085
|
+
}
|
|
1086
|
+
var DEFAULT_TIMEOUT = {
|
|
1087
|
+
listVoices: 3e4,
|
|
1088
|
+
cloneVoice: 3e5,
|
|
1089
|
+
designVoice: 12e4,
|
|
1090
|
+
publishVoice: 3e4,
|
|
1091
|
+
migrateAudio: 6e4
|
|
1092
|
+
};
|
|
1093
|
+
function authHeader(authValue) {
|
|
1094
|
+
return { Authorization: authValue };
|
|
1095
|
+
}
|
|
1096
|
+
function jsonHeaders(authValue) {
|
|
1097
|
+
return { "Content-Type": "application/json", Authorization: authValue };
|
|
1098
|
+
}
|
|
1099
|
+
async function parseErrorResponse(response) {
|
|
1100
|
+
let errMsg = response.statusText;
|
|
1101
|
+
let details = {};
|
|
1102
|
+
try {
|
|
1103
|
+
details = await response.json();
|
|
1104
|
+
errMsg = details.message || JSON.stringify(details);
|
|
1105
|
+
} catch (_) {
|
|
1106
|
+
}
|
|
1107
|
+
return new ApiError(errMsg, response.status, details);
|
|
1108
|
+
}
|
|
1109
|
+
async function fetchWithRetry(url, fetchOpts, config) {
|
|
1110
|
+
try {
|
|
1111
|
+
return await withRetry(async () => {
|
|
1112
|
+
let res;
|
|
1113
|
+
try {
|
|
1114
|
+
res = await fetch(url, fetchOpts);
|
|
1115
|
+
} catch (e) {
|
|
1116
|
+
if (e.name === "AbortError" || e.name === "TimeoutError") throw e;
|
|
1117
|
+
throw new NetworkError(e.message);
|
|
1118
|
+
}
|
|
1119
|
+
if (!res.ok) throw await parseErrorResponse(res);
|
|
1120
|
+
return res;
|
|
1121
|
+
}, config);
|
|
1122
|
+
} catch (e) {
|
|
1123
|
+
if (e.name === "AbortError" || e.name === "TimeoutError") throw new NetworkError("Request timed out");
|
|
1124
|
+
throw e;
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
async function listVoices(options = {}, config = {}) {
|
|
1128
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1129
|
+
const url = new URL(`${config._baseUrl}${VOICES_PATH}`);
|
|
1130
|
+
if (options.languages && !options.lang) {
|
|
1131
|
+
console.warn('[inworld-tts] Warning: "languages" has been renamed to "lang". Please update your code.');
|
|
1132
|
+
}
|
|
1133
|
+
const langs = options.lang ?? options.languages;
|
|
1134
|
+
if (langs) {
|
|
1135
|
+
const arr = Array.isArray(langs) ? langs : [langs];
|
|
1136
|
+
for (const l of arr) url.searchParams.append("languages", l);
|
|
1137
|
+
}
|
|
1138
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
1139
|
+
try {
|
|
1140
|
+
const fetchOpts = { method: "GET", headers: authHeader(config._authHeader), signal };
|
|
1141
|
+
const res = await fetchWithRetry(url.toString(), fetchOpts, config);
|
|
1142
|
+
let data;
|
|
1143
|
+
try {
|
|
1144
|
+
data = await res.json();
|
|
1145
|
+
} catch (_) {
|
|
1146
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1147
|
+
}
|
|
1148
|
+
if (!Array.isArray(data.voices)) {
|
|
1149
|
+
throw new ApiError('unexpected response: missing "voices" array');
|
|
1150
|
+
}
|
|
1151
|
+
return data.voices;
|
|
1152
|
+
} finally {
|
|
1153
|
+
clear();
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
async function cloneVoice(options, config = {}) {
|
|
1157
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1158
|
+
const url = `${config._baseUrl}${VOICES_PATH}:clone`;
|
|
1159
|
+
const rawSamples = options.audioSamples || [];
|
|
1160
|
+
if (rawSamples.length === 0) throw new ApiError("options.audioSamples is required (array of Uint8Array / Buffer / file path strings)");
|
|
1161
|
+
if (options.langCode && !options.lang) {
|
|
1162
|
+
console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
|
|
1163
|
+
}
|
|
1164
|
+
const samples = await Promise.all(rawSamples.map(async (s, i) => {
|
|
1165
|
+
if (typeof s === "string") {
|
|
1166
|
+
let readFileSync;
|
|
1167
|
+
try {
|
|
1168
|
+
({ readFileSync } = await import("fs"));
|
|
1169
|
+
} catch {
|
|
1170
|
+
throw new ApiError(`options.audioSamples[${i}] is a file path string, but file system access is not available in browser. Pass Uint8Array contents instead.`);
|
|
1171
|
+
}
|
|
1172
|
+
return readFileSync(s);
|
|
1173
|
+
}
|
|
1174
|
+
if (!(s instanceof Uint8Array)) {
|
|
1175
|
+
throw new ApiError(`options.audioSamples[${i}] must be a Uint8Array, Buffer, or file path string (got ${typeof s})`);
|
|
1176
|
+
}
|
|
1177
|
+
return s;
|
|
1178
|
+
}));
|
|
1179
|
+
const voiceSamples = samples.map((buf, i) => {
|
|
1180
|
+
const sample = { audioData: bytesToBase64(buf) };
|
|
1181
|
+
if (options.transcriptions && i < options.transcriptions.length) {
|
|
1182
|
+
sample.transcription = options.transcriptions[i];
|
|
1183
|
+
}
|
|
1184
|
+
return sample;
|
|
1185
|
+
});
|
|
1186
|
+
const body = {
|
|
1187
|
+
displayName: options.displayName || "Cloned Voice",
|
|
1188
|
+
langCode: options.lang || options.langCode || "EN_US",
|
|
1189
|
+
voiceSamples
|
|
1190
|
+
};
|
|
1191
|
+
if (options.description) body.description = options.description;
|
|
1192
|
+
if (options.tags && options.tags.length) body.tags = options.tags;
|
|
1193
|
+
if (options.removeBackgroundNoise) body.audioProcessingConfig = { removeBackgroundNoise: true };
|
|
1194
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.cloneVoice);
|
|
1195
|
+
try {
|
|
1196
|
+
const fetchOpts = { method: "POST", headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
1197
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
1198
|
+
try {
|
|
1199
|
+
return await res.json();
|
|
1200
|
+
} catch (_) {
|
|
1201
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1202
|
+
}
|
|
1203
|
+
} finally {
|
|
1204
|
+
clear();
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
async function designVoice(options, config = {}) {
|
|
1208
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1209
|
+
const url = `${config._baseUrl}${VOICES_PATH}:design`;
|
|
1210
|
+
const prompt = options.designPrompt || "";
|
|
1211
|
+
if (prompt.length < 30 || prompt.length > 250) {
|
|
1212
|
+
throw new ApiError(`designPrompt must be 30-250 characters (got ${prompt.length})`);
|
|
1213
|
+
}
|
|
1214
|
+
if (!options.previewText) throw new ApiError("options.previewText is required");
|
|
1215
|
+
if (options.langCode && !options.lang) {
|
|
1216
|
+
console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
|
|
1217
|
+
}
|
|
1218
|
+
const body = {
|
|
1219
|
+
designPrompt: prompt,
|
|
1220
|
+
previewText: options.previewText,
|
|
1221
|
+
langCode: options.lang || options.langCode || "EN_US",
|
|
1222
|
+
voiceDesignConfig: { numberOfSamples: Math.min(3, Math.max(1, options.numberOfSamples || 1)) }
|
|
1223
|
+
};
|
|
1224
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.designVoice);
|
|
1225
|
+
try {
|
|
1226
|
+
const fetchOpts = { method: "POST", headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
1227
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
1228
|
+
try {
|
|
1229
|
+
return await res.json();
|
|
1230
|
+
} catch (_) {
|
|
1231
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1232
|
+
}
|
|
1233
|
+
} finally {
|
|
1234
|
+
clear();
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
async function publishVoice(options, config = {}) {
|
|
1238
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1239
|
+
const voiceId = options.voice;
|
|
1240
|
+
if (!voiceId) throw new ApiError("options.voice is required");
|
|
1241
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}:publish`;
|
|
1242
|
+
const body = {};
|
|
1243
|
+
if (options.displayName) body.displayName = options.displayName;
|
|
1244
|
+
if (options.description) body.description = options.description;
|
|
1245
|
+
if (options.tags && options.tags.length) body.tags = options.tags;
|
|
1246
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
1247
|
+
try {
|
|
1248
|
+
const fetchOpts = { method: "POST", headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
1249
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
1250
|
+
try {
|
|
1251
|
+
return await res.json();
|
|
1252
|
+
} catch (_) {
|
|
1253
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1254
|
+
}
|
|
1255
|
+
} finally {
|
|
1256
|
+
clear();
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
async function getVoice(voiceId, config = {}) {
|
|
1260
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1261
|
+
if (!voiceId) throw new ApiError("voiceId is required");
|
|
1262
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
1263
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
1264
|
+
try {
|
|
1265
|
+
const fetchOpts = { method: "GET", headers: authHeader(config._authHeader), signal };
|
|
1266
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
1267
|
+
try {
|
|
1268
|
+
return await res.json();
|
|
1269
|
+
} catch (_) {
|
|
1270
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1271
|
+
}
|
|
1272
|
+
} finally {
|
|
1273
|
+
clear();
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
async function updateVoice(options, config = {}) {
|
|
1277
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1278
|
+
const voiceId = options.voice;
|
|
1279
|
+
if (!voiceId) throw new ApiError("options.voice is required");
|
|
1280
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
1281
|
+
const body = {};
|
|
1282
|
+
if (options.displayName != null) body.displayName = options.displayName;
|
|
1283
|
+
if (options.description != null) body.description = options.description;
|
|
1284
|
+
if (options.tags != null) body.tags = options.tags;
|
|
1285
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
1286
|
+
try {
|
|
1287
|
+
const fetchOpts = { method: "PATCH", headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
1288
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
1289
|
+
try {
|
|
1290
|
+
return await res.json();
|
|
1291
|
+
} catch (_) {
|
|
1292
|
+
throw new ApiError("unexpected response: failed to parse JSON");
|
|
1293
|
+
}
|
|
1294
|
+
} finally {
|
|
1295
|
+
clear();
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
async function deleteVoice(voiceId, config = {}) {
|
|
1299
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1300
|
+
if (!voiceId) throw new ApiError("voiceId is required");
|
|
1301
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
1302
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
1303
|
+
try {
|
|
1304
|
+
const fetchOpts = { method: "DELETE", headers: authHeader(config._authHeader), signal };
|
|
1305
|
+
await fetchWithRetry(url, fetchOpts, config);
|
|
1306
|
+
} finally {
|
|
1307
|
+
clear();
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
var LANG_TO_INWORLD = {
|
|
1311
|
+
en: "EN_US",
|
|
1312
|
+
zh: "ZH_CN",
|
|
1313
|
+
ja: "JA_JP",
|
|
1314
|
+
ko: "KO_KR",
|
|
1315
|
+
es: "ES_ES",
|
|
1316
|
+
fr: "FR_FR",
|
|
1317
|
+
de: "DE_DE",
|
|
1318
|
+
pt: "PT_BR",
|
|
1319
|
+
it: "IT_IT",
|
|
1320
|
+
pl: "PL_PL",
|
|
1321
|
+
ru: "RU_RU",
|
|
1322
|
+
hi: "HI_IN",
|
|
1323
|
+
ar: "AR_SA",
|
|
1324
|
+
nl: "NL_NL",
|
|
1325
|
+
he: "HE_IL"
|
|
1326
|
+
};
|
|
1327
|
+
function buildWavHeader(dataLen, sampleRate, channels, bitsPerSample) {
|
|
1328
|
+
const buf = new ArrayBuffer(44);
|
|
1329
|
+
const view = new DataView(buf);
|
|
1330
|
+
const writeUint32LE = (off, v) => view.setUint32(off, v, true);
|
|
1331
|
+
const writeUint16LE = (off, v) => view.setUint16(off, v, true);
|
|
1332
|
+
view.setUint8(0, 82);
|
|
1333
|
+
view.setUint8(1, 73);
|
|
1334
|
+
view.setUint8(2, 70);
|
|
1335
|
+
view.setUint8(3, 70);
|
|
1336
|
+
writeUint32LE(4, 36 + dataLen);
|
|
1337
|
+
view.setUint8(8, 87);
|
|
1338
|
+
view.setUint8(9, 65);
|
|
1339
|
+
view.setUint8(10, 86);
|
|
1340
|
+
view.setUint8(11, 69);
|
|
1341
|
+
view.setUint8(12, 102);
|
|
1342
|
+
view.setUint8(13, 109);
|
|
1343
|
+
view.setUint8(14, 116);
|
|
1344
|
+
view.setUint8(15, 32);
|
|
1345
|
+
writeUint32LE(16, 16);
|
|
1346
|
+
writeUint16LE(20, 1);
|
|
1347
|
+
writeUint16LE(22, channels);
|
|
1348
|
+
writeUint32LE(24, sampleRate);
|
|
1349
|
+
writeUint32LE(28, sampleRate * channels * (bitsPerSample >> 3));
|
|
1350
|
+
writeUint16LE(32, channels * (bitsPerSample >> 3));
|
|
1351
|
+
writeUint16LE(34, bitsPerSample);
|
|
1352
|
+
view.setUint8(36, 100);
|
|
1353
|
+
view.setUint8(37, 97);
|
|
1354
|
+
view.setUint8(38, 116);
|
|
1355
|
+
view.setUint8(39, 97);
|
|
1356
|
+
writeUint32LE(40, dataLen);
|
|
1357
|
+
return new Uint8Array(buf);
|
|
1358
|
+
}
|
|
1359
|
+
function trimWavTo15s(bytes) {
|
|
1360
|
+
if (bytes[0] !== 82 || bytes[1] !== 73 || bytes[2] !== 70 || bytes[3] !== 70) {
|
|
1361
|
+
return bytes;
|
|
1362
|
+
}
|
|
1363
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
1364
|
+
const sampleRate = view.getUint32(24, true);
|
|
1365
|
+
const channels = view.getUint16(22, true);
|
|
1366
|
+
const bitsPerSample = view.getUint16(34, true);
|
|
1367
|
+
let dataOffset = 12;
|
|
1368
|
+
while (dataOffset + 8 <= bytes.length) {
|
|
1369
|
+
const id = String.fromCharCode(bytes[dataOffset], bytes[dataOffset + 1], bytes[dataOffset + 2], bytes[dataOffset + 3]);
|
|
1370
|
+
const chunkSize = view.getUint32(dataOffset + 4, true);
|
|
1371
|
+
if (id === "data") {
|
|
1372
|
+
dataOffset += 8;
|
|
1373
|
+
break;
|
|
1374
|
+
}
|
|
1375
|
+
dataOffset += 8 + chunkSize;
|
|
1376
|
+
}
|
|
1377
|
+
const pcm = bytes.slice(dataOffset);
|
|
1378
|
+
const maxDataBytes = Math.floor(15 * sampleRate * channels * (bitsPerSample >> 3));
|
|
1379
|
+
if (pcm.length <= maxDataBytes) return bytes;
|
|
1380
|
+
const trimmedPcm = pcm.slice(0, maxDataBytes);
|
|
1381
|
+
const header = buildWavHeader(trimmedPcm.length, sampleRate, channels, bitsPerSample);
|
|
1382
|
+
const result = new Uint8Array(header.length + trimmedPcm.length);
|
|
1383
|
+
result.set(header, 0);
|
|
1384
|
+
result.set(trimmedPcm, header.length);
|
|
1385
|
+
return result;
|
|
1386
|
+
}
|
|
1387
|
+
var MP3_BITRATES = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
|
|
1388
|
+
function findMp3FrameSync(bytes, startOffset) {
|
|
1389
|
+
for (let i = startOffset; i < bytes.length - 1; i++) {
|
|
1390
|
+
if (bytes[i] === 255 && (bytes[i + 1] & 224) === 224) return i;
|
|
1391
|
+
}
|
|
1392
|
+
return -1;
|
|
1393
|
+
}
|
|
1394
|
+
function trimMp3To15s(bytes) {
|
|
1395
|
+
const firstSync = findMp3FrameSync(bytes, 0);
|
|
1396
|
+
if (firstSync < 0) return bytes;
|
|
1397
|
+
const b1 = bytes[firstSync + 1];
|
|
1398
|
+
const b2 = bytes[firstSync + 2];
|
|
1399
|
+
const mpegVersion = b1 >> 3 & 3;
|
|
1400
|
+
const layer = b1 >> 1 & 3;
|
|
1401
|
+
if (layer !== 1) return bytes;
|
|
1402
|
+
const bitrateIdx = b2 >> 4 & 15;
|
|
1403
|
+
if (bitrateIdx === 0 || bitrateIdx === 15) return bytes;
|
|
1404
|
+
const bitrateKbps = MP3_BITRATES[bitrateIdx];
|
|
1405
|
+
const estimatedBytes = Math.floor(15 * bitrateKbps * 1e3 / 8);
|
|
1406
|
+
if (bytes.length <= estimatedBytes) return bytes;
|
|
1407
|
+
const cutPos = findMp3FrameSync(bytes, estimatedBytes);
|
|
1408
|
+
const actualCut = cutPos > firstSync ? cutPos : estimatedBytes;
|
|
1409
|
+
return bytes.slice(0, actualCut);
|
|
1410
|
+
}
|
|
1411
|
+
function trimAudioTo15s(bytes) {
|
|
1412
|
+
if (bytes.length === 0) return bytes;
|
|
1413
|
+
if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70) {
|
|
1414
|
+
return trimWavTo15s(bytes);
|
|
1415
|
+
}
|
|
1416
|
+
const mp3Sync = findMp3FrameSync(bytes, 0);
|
|
1417
|
+
if (mp3Sync >= 0 && mp3Sync < 4) {
|
|
1418
|
+
return trimMp3To15s(bytes);
|
|
1419
|
+
}
|
|
1420
|
+
return bytes;
|
|
1421
|
+
}
|
|
1422
|
+
async function migrateFromElevenLabs({ elevenLabsApiKey, elevenLabsVoiceId } = {}, config = {}) {
|
|
1423
|
+
if (!elevenLabsApiKey) throw new ApiError("elevenLabsApiKey is required");
|
|
1424
|
+
if (!elevenLabsVoiceId) throw new ApiError("elevenLabsVoiceId is required");
|
|
1425
|
+
const voiceId = elevenLabsVoiceId;
|
|
1426
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
1427
|
+
const { signal: metaSignal, clear: clearMeta } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
1428
|
+
let meta;
|
|
1429
|
+
try {
|
|
1430
|
+
const metaRes = await fetch(
|
|
1431
|
+
`https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}`,
|
|
1432
|
+
{ headers: { "xi-api-key": elevenLabsApiKey }, signal: metaSignal }
|
|
1433
|
+
);
|
|
1434
|
+
if (!metaRes.ok) {
|
|
1435
|
+
const err = await metaRes.json().catch(() => ({}));
|
|
1436
|
+
const msg = (typeof err.detail === "string" ? err.detail : err.detail?.message) || err.message || metaRes.statusText;
|
|
1437
|
+
throw new ApiError(`ElevenLabs get voice failed: ${msg}`, metaRes.status, err);
|
|
1438
|
+
}
|
|
1439
|
+
meta = await metaRes.json();
|
|
1440
|
+
} finally {
|
|
1441
|
+
clearMeta();
|
|
1442
|
+
}
|
|
1443
|
+
const voiceName = meta.name;
|
|
1444
|
+
const rawLang = (meta.labels?.language || "en").toLowerCase().split(/[-_]/)[0];
|
|
1445
|
+
const inworldLang = LANG_TO_INWORLD[rawLang] ?? "EN_US";
|
|
1446
|
+
const { signal: audioSignal, clear: clearAudio } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.migrateAudio);
|
|
1447
|
+
let audioBytes;
|
|
1448
|
+
try {
|
|
1449
|
+
const samples = meta.samples;
|
|
1450
|
+
if (samples && samples.length > 0) {
|
|
1451
|
+
const sampleId = samples[0].sample_id;
|
|
1452
|
+
const sampleRes = await fetch(
|
|
1453
|
+
`https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}/samples/${encodeURIComponent(sampleId)}/audio`,
|
|
1454
|
+
{ headers: { "xi-api-key": elevenLabsApiKey }, signal: audioSignal }
|
|
1455
|
+
);
|
|
1456
|
+
if (!sampleRes.ok) {
|
|
1457
|
+
const err = await sampleRes.json().catch(() => ({}));
|
|
1458
|
+
const msg = (typeof err.detail === "string" ? err.detail : err.detail?.message) || err.message || sampleRes.statusText;
|
|
1459
|
+
throw new ApiError(`ElevenLabs get sample audio failed: ${msg}`, sampleRes.status, err);
|
|
1460
|
+
}
|
|
1461
|
+
audioBytes = new Uint8Array(await sampleRes.arrayBuffer());
|
|
1462
|
+
} else if (meta.preview_url) {
|
|
1463
|
+
const previewRes = await fetch(meta.preview_url, { signal: audioSignal });
|
|
1464
|
+
if (!previewRes.ok) {
|
|
1465
|
+
throw new ApiError(`ElevenLabs fetch preview_url failed: ${previewRes.statusText}`, previewRes.status);
|
|
1466
|
+
}
|
|
1467
|
+
audioBytes = new Uint8Array(await previewRes.arrayBuffer());
|
|
1468
|
+
} else {
|
|
1469
|
+
throw new ApiError("No voice samples or preview_url available for this ElevenLabs voice");
|
|
1470
|
+
}
|
|
1471
|
+
} finally {
|
|
1472
|
+
clearAudio();
|
|
1473
|
+
}
|
|
1474
|
+
audioBytes = trimAudioTo15s(audioBytes);
|
|
1475
|
+
const cloneResult = await cloneVoice(
|
|
1476
|
+
{ displayName: voiceName, audioSamples: [audioBytes], lang: inworldLang },
|
|
1477
|
+
config
|
|
1478
|
+
);
|
|
1479
|
+
const inworldVoiceId = cloneResult.voice?.voiceId;
|
|
1480
|
+
if (!inworldVoiceId) throw new ApiError("unexpected Inworld clone response: missing voiceId");
|
|
1481
|
+
return { elevenLabsVoiceId: voiceId, elevenLabsName: voiceName, inworldVoiceId };
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
// src/index.js
|
|
1485
|
+
function createClient(opts = {}) {
|
|
1486
|
+
const token = opts.token ?? null;
|
|
1487
|
+
const env = typeof process !== "undefined" ? process.env?.INWORLD_API_KEY : null;
|
|
1488
|
+
const apiKey = token ? null : opts.apiKey ?? env ?? null;
|
|
1489
|
+
if (opts.token && opts.apiKey) {
|
|
1490
|
+
console.warn("[inworld-tts] Both token and apiKey provided \u2014 apiKey will be ignored");
|
|
1491
|
+
}
|
|
1492
|
+
if (opts.onTokenExpiring && !token) {
|
|
1493
|
+
console.warn("[inworld-tts] onTokenExpiring is ignored when no token is provided");
|
|
1494
|
+
}
|
|
1495
|
+
if (!token && apiKey && isRunningInBrowser()) {
|
|
1496
|
+
if (!opts.dangerouslyAllowBrowser) {
|
|
1497
|
+
throw new InworldTTSError(
|
|
1498
|
+
"Running in browser with API key is disabled by default. Your API key would be exposed to end users.\nRecommended: use a JWT token instead: createClient({ token: 'your_jwt' })\nSee: https://docs.inworld.ai/api-reference/introduction#jwt-authentication\nOr to opt in anyway: createClient({ apiKey: '...', dangerouslyAllowBrowser: true })"
|
|
1499
|
+
);
|
|
1500
|
+
}
|
|
1501
|
+
console.warn("[inworld-tts] dangerouslyAllowBrowser is set. Your API key is visible to anyone using browser DevTools and can be used to make requests at your expense.");
|
|
1502
|
+
}
|
|
1503
|
+
if (!apiKey && !token) throw new MissingApiKeyError();
|
|
1504
|
+
const config = {
|
|
1505
|
+
_baseUrl: getBaseUrl(opts.baseUrl ?? null),
|
|
1506
|
+
_authHeader: token ? `Bearer ${token}` : `Basic ${apiKey}`,
|
|
1507
|
+
_token: token,
|
|
1508
|
+
_onTokenExpiring: token && opts.onTokenExpiring ? opts.onTokenExpiring : null,
|
|
1509
|
+
_refreshPromise: null,
|
|
1510
|
+
timeout: opts.timeout ?? null,
|
|
1511
|
+
maxConcurrentRequests: opts.maxConcurrentRequests,
|
|
1512
|
+
maxRetries: opts.maxRetries ?? 2,
|
|
1513
|
+
debug: opts.debug ?? false
|
|
1514
|
+
};
|
|
1515
|
+
return {
|
|
1516
|
+
async generate(options) {
|
|
1517
|
+
await ensureFreshToken(config);
|
|
1518
|
+
return generate(options, config);
|
|
1519
|
+
},
|
|
1520
|
+
async *stream(options) {
|
|
1521
|
+
await ensureFreshToken(config);
|
|
1522
|
+
yield* stream(options, config);
|
|
1523
|
+
},
|
|
1524
|
+
async generateWithTimestamps(options) {
|
|
1525
|
+
await ensureFreshToken(config);
|
|
1526
|
+
return generateWithTimestamps(options, config);
|
|
1527
|
+
},
|
|
1528
|
+
async *streamWithTimestamps(options) {
|
|
1529
|
+
await ensureFreshToken(config);
|
|
1530
|
+
yield* streamWithTimestamps(options, config);
|
|
1531
|
+
},
|
|
1532
|
+
async listVoices(options) {
|
|
1533
|
+
await ensureFreshToken(config);
|
|
1534
|
+
return listVoices(options, config);
|
|
1535
|
+
},
|
|
1536
|
+
async getVoice(voice) {
|
|
1537
|
+
await ensureFreshToken(config);
|
|
1538
|
+
return getVoice(voice, config);
|
|
1539
|
+
},
|
|
1540
|
+
async updateVoice(options) {
|
|
1541
|
+
await ensureFreshToken(config);
|
|
1542
|
+
return updateVoice(options, config);
|
|
1543
|
+
},
|
|
1544
|
+
async deleteVoice(voice) {
|
|
1545
|
+
await ensureFreshToken(config);
|
|
1546
|
+
return deleteVoice(voice, config);
|
|
1547
|
+
},
|
|
1548
|
+
async cloneVoice(options) {
|
|
1549
|
+
await ensureFreshToken(config);
|
|
1550
|
+
return cloneVoice(options, config);
|
|
1551
|
+
},
|
|
1552
|
+
async designVoice(options) {
|
|
1553
|
+
await ensureFreshToken(config);
|
|
1554
|
+
return designVoice(options, config);
|
|
1555
|
+
},
|
|
1556
|
+
async publishVoice(options) {
|
|
1557
|
+
await ensureFreshToken(config);
|
|
1558
|
+
return publishVoice(options, config);
|
|
1559
|
+
},
|
|
1560
|
+
async migrateFromElevenLabs(options) {
|
|
1561
|
+
await ensureFreshToken(config);
|
|
1562
|
+
return migrateFromElevenLabs(options, config);
|
|
1563
|
+
},
|
|
1564
|
+
async play(audio, options) {
|
|
1565
|
+
return play(audio, options);
|
|
1566
|
+
}
|
|
1567
|
+
};
|
|
1568
|
+
}
|
|
1569
|
+
function InworldTTS(opts = {}) {
|
|
1570
|
+
return createClient(opts);
|
|
1571
|
+
}
|
|
1572
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
1573
|
+
0 && (module.exports = {
|
|
1574
|
+
ApiError,
|
|
1575
|
+
InworldTTS,
|
|
1576
|
+
InworldTTSError,
|
|
1577
|
+
MissingApiKeyError,
|
|
1578
|
+
NetworkError,
|
|
1579
|
+
createClient
|
|
1580
|
+
});
|