@inworld/tts 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @inworld/tts might be problematic. Click here for more details.

@@ -0,0 +1,53 @@
1
+ /**
2
+ * Browser player — bundlers substitute this for player.js in browser builds.
3
+ * Uses <audio> element for playback. Must be called inside a user gesture.
4
+ */
5
+
6
+ import { ApiError } from './errors.js';
7
+ import { detectEncoding } from './encoding.js';
8
+
9
+ export { detectEncoding };
10
+
11
+ const MIME = {
12
+ MP3: 'audio/mpeg',
13
+ WAV: 'audio/wav',
14
+ OGG_OPUS: 'audio/ogg; codecs=opus',
15
+ FLAC: 'audio/flac',
16
+ };
17
+
18
+ const UNSUPPORTED_IN_BROWSER = new Set(['LINEAR16', 'PCM', 'ALAW', 'MULAW']);
19
+
20
+ export function findPlayer(_encoding) {
21
+ return null;
22
+ }
23
+
24
+ export async function playFile(_filePath, _encoding) {
25
+ throw new ApiError('Audio playback from file is not supported in browser environments.');
26
+ }
27
+
28
+ export async function play(audio, options = {}) {
29
+ if (typeof audio === 'string') {
30
+ throw new ApiError('File paths are not supported in browser. Pass a Uint8Array instead.');
31
+ }
32
+ const encoding = options.encoding ? options.encoding.toUpperCase() : detectEncoding(audio);
33
+ if (UNSUPPORTED_IN_BROWSER.has(encoding)) {
34
+ throw new ApiError(`${encoding} cannot be played in browser. Use AudioContext or request MP3/WAV instead.`);
35
+ }
36
+ const blob = new Blob([audio], { type: MIME[encoding] || 'audio/mpeg' });
37
+ const url = URL.createObjectURL(blob);
38
+ const el = new Audio(url);
39
+ try {
40
+ await el.play();
41
+ await new Promise((res, rej) => {
42
+ el.onended = res;
43
+ el.onerror = () => rej(new ApiError('Audio playback failed. The format may be unsupported by this browser.'));
44
+ });
45
+ } catch (e) {
46
+ if (e.name === 'NotAllowedError') {
47
+ throw new ApiError('play() was blocked by the browser. Call play() inside a user event handler (e.g. button click).');
48
+ }
49
+ throw e;
50
+ } finally {
51
+ URL.revokeObjectURL(url);
52
+ }
53
+ }
package/src/player.js ADDED
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Audio playback utilities. Node.js only — bundlers substitute player.browser.js.
3
+ */
4
+
5
+ import { existsSync } from 'fs';
6
+ import { writeFile, unlink } from 'fs/promises';
7
+ import { spawn } from 'child_process';
8
+ import { tmpdir } from 'os';
9
+ import { join } from 'path';
10
+ import { detectEncoding } from './encoding.js';
11
+
12
+ export { detectEncoding };
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // 3b. Find installed audio player
16
+ // ---------------------------------------------------------------------------
17
+
18
+ const EXT_MAP = {
19
+ MP3: '.mp3', OGG_OPUS: '.ogg', LINEAR16: '.wav', WAV: '.wav',
20
+ PCM: '.pcm', FLAC: '.flac', ALAW: '.wav', MULAW: '.wav',
21
+ };
22
+
23
+ function scanPath(binary) {
24
+ const sep = process.platform === 'win32' ? ';' : ':';
25
+ const pathDirs = (process.env.PATH || '').split(sep);
26
+ for (const dir of pathDirs) {
27
+ if (!dir) continue;
28
+ const full = join(dir, binary);
29
+ if (existsSync(full)) return full;
30
+ if (process.platform === 'win32' && existsSync(full + '.exe')) return full + '.exe';
31
+ }
32
+ return null;
33
+ }
34
+
35
+ const FFPLAY_ARGS = ['-nodisp', '-autoexit', '-loglevel', 'quiet'];
36
+
37
+ /**
38
+ * Find the best available audio player for the given encoding.
39
+ * Returns null (with a stderr warning) if no suitable player is found.
40
+ * @param {string} encoding
41
+ * @returns {{ binary: string, args: string[] } | null}
42
+ */
43
+ export function findPlayer(encoding) {
44
+ if (encoding === 'PCM') {
45
+ process.stderr.write('[inworld-tts] PCM audio cannot be played directly. Use encoding: \'WAV\' instead.\n');
46
+ return null;
47
+ }
48
+
49
+ const platform = process.platform;
50
+ const ffplay = scanPath('ffplay');
51
+ if (ffplay) return { binary: ffplay, args: FFPLAY_ARGS };
52
+
53
+ if (platform === 'darwin') {
54
+ const afplayEncodings = ['MP3', 'WAV', 'FLAC', 'ALAW', 'MULAW'];
55
+ if (afplayEncodings.includes(encoding)) {
56
+ const afplay = scanPath('afplay');
57
+ if (afplay) return { binary: afplay, args: [] };
58
+ }
59
+ }
60
+
61
+ if (platform === 'linux') {
62
+ if (encoding === 'MP3') {
63
+ const mpg123 = scanPath('mpg123');
64
+ if (mpg123) return { binary: mpg123, args: ['-q'] };
65
+ }
66
+ if (encoding === 'OGG_OPUS') {
67
+ const ogg123 = scanPath('ogg123');
68
+ if (ogg123) return { binary: ogg123, args: [] };
69
+ }
70
+ if (encoding === 'WAV' || encoding === 'LINEAR16') {
71
+ const aplay = scanPath('aplay');
72
+ if (aplay) return { binary: aplay, args: ['-q'] };
73
+ }
74
+ }
75
+
76
+ if (platform === 'win32') {
77
+ return { binary: 'cmd', args: ['/c', 'start', '/wait', ''] };
78
+ }
79
+
80
+ const hint = platform === 'darwin'
81
+ ? 'Install ffmpeg (brew install ffmpeg) or use afplay (built-in for MP3/WAV/FLAC).'
82
+ : platform === 'linux'
83
+ ? 'Install ffmpeg (apt install ffmpeg) or mpg123/ogg123/aplay for specific formats.'
84
+ : 'Install ffmpeg to enable audio playback.';
85
+ process.stderr.write(`[inworld-tts] No audio player found for encoding "${encoding}". ${hint}\n`);
86
+ return null;
87
+ }
88
+
89
+ // ---------------------------------------------------------------------------
90
+ // Internal: play a file at the given path
91
+ // ---------------------------------------------------------------------------
92
+
93
+ /**
94
+ * Play an audio file using the best available player.
95
+ * Prints a warning and returns (does not throw) if no player is found.
96
+ * @param {string} filePath
97
+ * @param {string} encoding
98
+ * @returns {Promise<void>}
99
+ */
100
+ export async function playFile(filePath, encoding) {
101
+ const player = findPlayer(encoding);
102
+ if (!player) return;
103
+ return new Promise((resolve, reject) => {
104
+ const args = [...player.args, filePath];
105
+ const proc = spawn(player.binary, args, { stdio: 'ignore' });
106
+ proc.on('close', () => resolve());
107
+ proc.on('error', reject);
108
+ });
109
+ }
110
+
111
+ // ---------------------------------------------------------------------------
112
+ // 3c. play() — public API
113
+ // ---------------------------------------------------------------------------
114
+
115
+ /**
116
+ * Play audio from a Uint8Array. Detects encoding from magic bytes unless overridden.
117
+ * Writes to a temp file, plays it, then deletes the temp file. Node.js only.
118
+ * @param {Uint8Array} audio
119
+ * @param {{ encoding?: string }} [options]
120
+ * @returns {Promise<void>}
121
+ * @example
122
+ * import { play } from '@inworld/tts';
123
+ * const audio = await tts.generate({ text: 'Hello!' });
124
+ * await play(audio); // encoding inferred from magic bytes
125
+ */
126
+ export async function play(audio, options = {}) {
127
+ // File path input: play directly without reading into memory
128
+ if (typeof audio === 'string') {
129
+ const ext = audio.split('.').pop().toLowerCase();
130
+ const EXT_TO_ENCODING = { mp3: 'MP3', wav: 'WAV', ogg: 'OGG_OPUS', flac: 'FLAC', pcm: 'PCM' };
131
+ const encoding = options.encoding ? options.encoding.toUpperCase() : (EXT_TO_ENCODING[ext] || 'MP3');
132
+ return playFile(audio, encoding);
133
+ }
134
+ const encoding = options.encoding ? options.encoding.toUpperCase() : detectEncoding(audio);
135
+ const ext = EXT_MAP[encoding] || '.mp3';
136
+ const tmpPath = join(tmpdir(), `inworld-tts-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`);
137
+ await writeFile(tmpPath, audio);
138
+ try {
139
+ await playFile(tmpPath, encoding);
140
+ } finally {
141
+ await unlink(tmpPath).catch(() => {});
142
+ }
143
+ }
package/src/voice.js ADDED
@@ -0,0 +1,498 @@
1
+ /**
2
+ * Voice API client: list, get, update, delete, clone, design, publish.
3
+ * Base URL: /voices/v1/voices
4
+ */
5
+
6
+ import { getTimeoutSignal, withRetry } from './config.js';
7
+ import { ApiError, MissingApiKeyError, NetworkError } from './errors.js';
8
+
9
+ const VOICES_PATH = '/voices/v1/voices';
10
+
11
+ function bytesToBase64(bytes) {
12
+ if (typeof Buffer !== 'undefined') {
13
+ return (Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes)).toString('base64');
14
+ }
15
+ let bin = '';
16
+ for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
17
+ return btoa(bin);
18
+ }
19
+
20
+ const DEFAULT_TIMEOUT = {
21
+ listVoices: 30_000,
22
+ cloneVoice: 300_000,
23
+ designVoice: 120_000,
24
+ publishVoice: 30_000,
25
+ migrateAudio: 60_000,
26
+ };
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Helpers
30
+ // ---------------------------------------------------------------------------
31
+
32
+ function authHeader(authValue) {
33
+ return { Authorization: authValue };
34
+ }
35
+
36
+ function jsonHeaders(authValue) {
37
+ return { 'Content-Type': 'application/json', Authorization: authValue };
38
+ }
39
+
40
+ async function parseErrorResponse(response) {
41
+ let errMsg = response.statusText;
42
+ let details = {};
43
+ try {
44
+ details = await response.json();
45
+ errMsg = details.message || JSON.stringify(details);
46
+ } catch (_) {}
47
+ return new ApiError(errMsg, response.status, details);
48
+ }
49
+
50
+ async function fetchWithRetry(url, fetchOpts, config) {
51
+ try {
52
+ return await withRetry(async () => {
53
+ let res;
54
+ try {
55
+ res = await fetch(url, fetchOpts);
56
+ } catch (e) {
57
+ // Let AbortError/TimeoutError pass through unwrapped so withRetry
58
+ // sees the original error and does not retry it.
59
+ if (e.name === 'AbortError' || e.name === 'TimeoutError') throw e;
60
+ throw new NetworkError(e.message);
61
+ }
62
+ if (!res.ok) throw await parseErrorResponse(res);
63
+ return res;
64
+ }, config);
65
+ } catch (e) {
66
+ if (e.name === 'AbortError' || e.name === 'TimeoutError') throw new NetworkError('Request timed out');
67
+ throw e;
68
+ }
69
+ }
70
+
71
+ // ---------------------------------------------------------------------------
72
+ // List voices
73
+ // GET /voices/v1/voices[?languages=EN_US&languages=es]
74
+ // ---------------------------------------------------------------------------
75
+
76
+ export async function listVoices(options = {}, config = {}) {
77
+ if (!config._authHeader) throw new MissingApiKeyError();
78
+
79
+ const url = new URL(`${config._baseUrl}${VOICES_PATH}`);
80
+ if (options.languages && !options.lang) {
81
+ console.warn('[inworld-tts] Warning: "languages" has been renamed to "lang". Please update your code.');
82
+ }
83
+ const langs = options.lang ?? options.languages;
84
+ if (langs) {
85
+ const arr = Array.isArray(langs) ? langs : [langs];
86
+ for (const l of arr) url.searchParams.append('languages', l);
87
+ }
88
+
89
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
90
+ try {
91
+ const fetchOpts = { method: 'GET', headers: authHeader(config._authHeader), signal };
92
+ const res = await fetchWithRetry(url.toString(), fetchOpts, config);
93
+ let data;
94
+ try { data = await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
95
+ if (!Array.isArray(data.voices)) {
96
+ throw new ApiError('unexpected response: missing "voices" array');
97
+ }
98
+ return data.voices;
99
+ } finally {
100
+ clear();
101
+ }
102
+ }
103
+
104
+ // ---------------------------------------------------------------------------
105
+ // Clone voice
106
+ // POST /voices/v1/voices:clone
107
+ // ---------------------------------------------------------------------------
108
+
109
+ export async function cloneVoice(options, config = {}) {
110
+ if (!config._authHeader) throw new MissingApiKeyError();
111
+ const url = `${config._baseUrl}${VOICES_PATH}:clone`;
112
+
113
+ const rawSamples = options.audioSamples || [];
114
+ if (rawSamples.length === 0) throw new ApiError('options.audioSamples is required (array of Uint8Array / Buffer / file path strings)');
115
+ if (options.langCode && !options.lang) {
116
+ console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
117
+ }
118
+
119
+ // Resolve file path strings to Uint8Array (Node.js only)
120
+ const samples = await Promise.all(rawSamples.map(async (s, i) => {
121
+ if (typeof s === 'string') {
122
+ let readFileSync;
123
+ try {
124
+ ({ readFileSync } = await import('fs'));
125
+ } catch {
126
+ throw new ApiError(`options.audioSamples[${i}] is a file path string, but file system access is not available in browser. Pass Uint8Array contents instead.`);
127
+ }
128
+ return readFileSync(s);
129
+ }
130
+ if (!(s instanceof Uint8Array)) {
131
+ throw new ApiError(`options.audioSamples[${i}] must be a Uint8Array, Buffer, or file path string (got ${typeof s})`);
132
+ }
133
+ return s;
134
+ }));
135
+
136
+ const voiceSamples = samples.map((buf, i) => {
137
+ const sample = { audioData: bytesToBase64(buf) };
138
+ if (options.transcriptions && i < options.transcriptions.length) {
139
+ sample.transcription = options.transcriptions[i];
140
+ }
141
+ return sample;
142
+ });
143
+
144
+ const body = {
145
+ displayName: options.displayName || 'Cloned Voice',
146
+ langCode: options.lang || options.langCode || 'EN_US',
147
+ voiceSamples,
148
+ };
149
+ if (options.description) body.description = options.description;
150
+ if (options.tags && options.tags.length) body.tags = options.tags;
151
+ if (options.removeBackgroundNoise) body.audioProcessingConfig = { removeBackgroundNoise: true };
152
+
153
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.cloneVoice);
154
+ try {
155
+ const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
156
+ const res = await fetchWithRetry(url, fetchOpts, config);
157
+ try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
158
+ } finally {
159
+ clear();
160
+ }
161
+ }
162
+
163
+ // ---------------------------------------------------------------------------
164
+ // Design voice
165
+ // POST /voices/v1/voices:design
166
+ // ---------------------------------------------------------------------------
167
+
168
+ export async function designVoice(options, config = {}) {
169
+ if (!config._authHeader) throw new MissingApiKeyError();
170
+ const url = `${config._baseUrl}${VOICES_PATH}:design`;
171
+
172
+ const prompt = options.designPrompt || '';
173
+ if (prompt.length < 30 || prompt.length > 250) {
174
+ throw new ApiError(`designPrompt must be 30-250 characters (got ${prompt.length})`);
175
+ }
176
+ if (!options.previewText) throw new ApiError('options.previewText is required');
177
+ if (options.langCode && !options.lang) {
178
+ console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
179
+ }
180
+
181
+ const body = {
182
+ designPrompt: prompt,
183
+ previewText: options.previewText,
184
+ langCode: options.lang || options.langCode || 'EN_US',
185
+ voiceDesignConfig: { numberOfSamples: Math.min(3, Math.max(1, options.numberOfSamples || 1)) },
186
+ };
187
+
188
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.designVoice);
189
+ try {
190
+ const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
191
+ const res = await fetchWithRetry(url, fetchOpts, config);
192
+ try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
193
+ } finally {
194
+ clear();
195
+ }
196
+ }
197
+
198
+ // ---------------------------------------------------------------------------
199
+ // Publish voice
200
+ // POST /voices/v1/voices/{voiceId}:publish
201
+ // ---------------------------------------------------------------------------
202
+
203
+ export async function publishVoice(options, config = {}) {
204
+ if (!config._authHeader) throw new MissingApiKeyError();
205
+
206
+ const voiceId = options.voice;
207
+ if (!voiceId) throw new ApiError('options.voice is required');
208
+ const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}:publish`;
209
+
210
+ const body = {};
211
+ if (options.displayName) body.displayName = options.displayName;
212
+ if (options.description) body.description = options.description;
213
+ if (options.tags && options.tags.length) body.tags = options.tags;
214
+
215
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
216
+ try {
217
+ const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
218
+ const res = await fetchWithRetry(url, fetchOpts, config);
219
+ try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
220
+ } finally {
221
+ clear();
222
+ }
223
+ }
224
+
225
+ // ---------------------------------------------------------------------------
226
+ // Get a specific voice
227
+ // GET /voices/v1/voices/{voiceId}
228
+ // ---------------------------------------------------------------------------
229
+
230
+ export async function getVoice(voiceId, config = {}) {
231
+ if (!config._authHeader) throw new MissingApiKeyError();
232
+
233
+ if (!voiceId) throw new ApiError('voiceId is required');
234
+ const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
235
+
236
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
237
+ try {
238
+ const fetchOpts = { method: 'GET', headers: authHeader(config._authHeader), signal };
239
+ const res = await fetchWithRetry(url, fetchOpts, config);
240
+ try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
241
+ } finally {
242
+ clear();
243
+ }
244
+ }
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // Update a voice
248
+ // PATCH /voices/v1/voices/{voiceId}
249
+ // ---------------------------------------------------------------------------
250
+
251
+ export async function updateVoice(options, config = {}) {
252
+ if (!config._authHeader) throw new MissingApiKeyError();
253
+
254
+ const voiceId = options.voice;
255
+ if (!voiceId) throw new ApiError('options.voice is required');
256
+ const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
257
+
258
+ const body = {};
259
+ if (options.displayName != null) body.displayName = options.displayName;
260
+ if (options.description != null) body.description = options.description;
261
+ if (options.tags != null) body.tags = options.tags;
262
+
263
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
264
+ try {
265
+ const fetchOpts = { method: 'PATCH', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
266
+ const res = await fetchWithRetry(url, fetchOpts, config);
267
+ try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
268
+ } finally {
269
+ clear();
270
+ }
271
+ }
272
+
273
+ // ---------------------------------------------------------------------------
274
+ // Delete a voice
275
+ // DELETE /voices/v1/voices/{voiceId}
276
+ // ---------------------------------------------------------------------------
277
+
278
+ export async function deleteVoice(voiceId, config = {}) {
279
+ if (!config._authHeader) throw new MissingApiKeyError();
280
+
281
+ if (!voiceId) throw new ApiError('voiceId is required');
282
+ const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
283
+
284
+ const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
285
+ try {
286
+ const fetchOpts = { method: 'DELETE', headers: authHeader(config._authHeader), signal };
287
+ await fetchWithRetry(url, fetchOpts, config);
288
+ } finally {
289
+ clear();
290
+ }
291
+ }
292
+
293
+ // ---------------------------------------------------------------------------
294
+ // Migrate from ElevenLabs (single voice)
295
+ // ---------------------------------------------------------------------------
296
+
297
+ const LANG_TO_INWORLD = {
298
+ en: 'EN_US', zh: 'ZH_CN', ja: 'JA_JP', ko: 'KO_KR',
299
+ es: 'ES_ES', fr: 'FR_FR', de: 'DE_DE', pt: 'PT_BR',
300
+ it: 'IT_IT', pl: 'PL_PL', ru: 'RU_RU', hi: 'HI_IN',
301
+ ar: 'AR_SA', nl: 'NL_NL', he: 'HE_IL',
302
+ };
303
+
304
+ // ---------------------------------------------------------------------------
305
+ // WAV header builder (Option C: preserve original format, only trim duration)
306
+ // ---------------------------------------------------------------------------
307
+
308
+ function buildWavHeader(dataLen, sampleRate, channels, bitsPerSample) {
309
+ const buf = new ArrayBuffer(44);
310
+ const view = new DataView(buf);
311
+ const writeUint32LE = (off, v) => view.setUint32(off, v, true);
312
+ const writeUint16LE = (off, v) => view.setUint16(off, v, true);
313
+
314
+ // RIFF chunk
315
+ view.setUint8(0, 0x52); view.setUint8(1, 0x49); view.setUint8(2, 0x46); view.setUint8(3, 0x46); // "RIFF"
316
+ writeUint32LE(4, 36 + dataLen); // ChunkSize
317
+ view.setUint8(8, 0x57); view.setUint8(9, 0x41); view.setUint8(10, 0x56); view.setUint8(11, 0x45); // "WAVE"
318
+
319
+ // fmt sub-chunk
320
+ view.setUint8(12, 0x66); view.setUint8(13, 0x6D); view.setUint8(14, 0x74); view.setUint8(15, 0x20); // "fmt "
321
+ writeUint32LE(16, 16); // Subchunk1Size (PCM)
322
+ writeUint16LE(20, 1); // AudioFormat (PCM)
323
+ writeUint16LE(22, channels);
324
+ writeUint32LE(24, sampleRate);
325
+ writeUint32LE(28, sampleRate * channels * (bitsPerSample >> 3)); // ByteRate
326
+ writeUint16LE(32, channels * (bitsPerSample >> 3)); // BlockAlign
327
+ writeUint16LE(34, bitsPerSample);
328
+
329
+ // data sub-chunk
330
+ view.setUint8(36, 0x64); view.setUint8(37, 0x61); view.setUint8(38, 0x74); view.setUint8(39, 0x61); // "data"
331
+ writeUint32LE(40, dataLen);
332
+
333
+ return new Uint8Array(buf);
334
+ }
335
+
336
+ function trimWavTo15s(bytes) {
337
+ // Validate RIFF header
338
+ if (bytes[0] !== 0x52 || bytes[1] !== 0x49 || bytes[2] !== 0x46 || bytes[3] !== 0x46) {
339
+ return bytes; // not a valid WAV
340
+ }
341
+
342
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
343
+ const sampleRate = view.getUint32(24, true);
344
+ const channels = view.getUint16(22, true);
345
+ const bitsPerSample = view.getUint16(34, true);
346
+
347
+ // Find the "data" sub-chunk (may not always be at offset 44)
348
+ let dataOffset = 12;
349
+ while (dataOffset + 8 <= bytes.length) {
350
+ const id = String.fromCharCode(bytes[dataOffset], bytes[dataOffset+1], bytes[dataOffset+2], bytes[dataOffset+3]);
351
+ const chunkSize = view.getUint32(dataOffset + 4, true);
352
+ if (id === 'data') {
353
+ dataOffset += 8;
354
+ break;
355
+ }
356
+ dataOffset += 8 + chunkSize;
357
+ }
358
+
359
+ const pcm = bytes.slice(dataOffset);
360
+ const maxDataBytes = Math.floor(15 * sampleRate * channels * (bitsPerSample >> 3));
361
+
362
+ if (pcm.length <= maxDataBytes) return bytes; // no trim needed
363
+
364
+ const trimmedPcm = pcm.slice(0, maxDataBytes);
365
+ const header = buildWavHeader(trimmedPcm.length, sampleRate, channels, bitsPerSample);
366
+ const result = new Uint8Array(header.length + trimmedPcm.length);
367
+ result.set(header, 0);
368
+ result.set(trimmedPcm, header.length);
369
+ return result;
370
+ }
371
+
372
+ // MP3 bitrate table for MPEG1 Layer3 (index 1-14; index 0 = free, 15 = bad)
373
+ const MP3_BITRATES = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
374
+
375
+ function findMp3FrameSync(bytes, startOffset) {
376
+ for (let i = startOffset; i < bytes.length - 1; i++) {
377
+ if (bytes[i] === 0xFF && (bytes[i+1] & 0xE0) === 0xE0) return i;
378
+ }
379
+ return -1;
380
+ }
381
+
382
+ function trimMp3To15s(bytes) {
383
+ const firstSync = findMp3FrameSync(bytes, 0);
384
+ if (firstSync < 0) return bytes;
385
+
386
+ const b1 = bytes[firstSync + 1];
387
+ const b2 = bytes[firstSync + 2];
388
+
389
+ // Verify MPEG version and layer (we only handle MPEG1/2 Layer 3)
390
+ const mpegVersion = (b1 >> 3) & 0x03; // 0b11=MPEG1, 0b10=MPEG2
391
+ const layer = (b1 >> 1) & 0x03; // 0b01=Layer3
392
+ if (layer !== 1) return bytes; // not Layer 3
393
+
394
+ const bitrateIdx = (b2 >> 4) & 0x0F;
395
+ if (bitrateIdx === 0 || bitrateIdx === 15) return bytes; // free/bad bitrate
396
+
397
+ // For MPEG2 the bitrate table differs but for estimation purposes we use the same table
398
+ const bitrateKbps = MP3_BITRATES[bitrateIdx];
399
+ const estimatedBytes = Math.floor(15 * bitrateKbps * 1000 / 8);
400
+
401
+ if (bytes.length <= estimatedBytes) return bytes;
402
+
403
+ // Scan back from estimatedBytes to find a frame boundary
404
+ const cutPos = findMp3FrameSync(bytes, estimatedBytes);
405
+ const actualCut = cutPos > firstSync ? cutPos : estimatedBytes;
406
+
407
+ return bytes.slice(0, actualCut);
408
+ }
409
+
410
+ function trimAudioTo15s(bytes) {
411
+ if (bytes.length === 0) return bytes;
412
+
413
+ // WAV: starts with RIFF
414
+ if (bytes[0] === 0x52 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x46) {
415
+ return trimWavTo15s(bytes);
416
+ }
417
+
418
+ // MP3: look for frame sync
419
+ const mp3Sync = findMp3FrameSync(bytes, 0);
420
+ if (mp3Sync >= 0 && mp3Sync < 4) {
421
+ return trimMp3To15s(bytes);
422
+ }
423
+
424
+ // Other formats (OGG, FLAC, etc.) — return as-is
425
+ return bytes;
426
+ }
427
+
428
+ export async function migrateFromElevenLabs({ elevenLabsApiKey, elevenLabsVoiceId } = {}, config = {}) {
429
+ if (!elevenLabsApiKey) throw new ApiError('elevenLabsApiKey is required');
430
+ if (!elevenLabsVoiceId) throw new ApiError('elevenLabsVoiceId is required');
431
+ const voiceId = elevenLabsVoiceId;
432
+ if (!config._authHeader) throw new MissingApiKeyError();
433
+
434
+ // 1. Fetch voice metadata
435
+ const { signal: metaSignal, clear: clearMeta } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
436
+ let meta;
437
+ try {
438
+ const metaRes = await fetch(
439
+ `https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}`,
440
+ { headers: { 'xi-api-key': elevenLabsApiKey }, signal: metaSignal }
441
+ );
442
+ if (!metaRes.ok) {
443
+ const err = await metaRes.json().catch(() => ({}));
444
+ const msg = (typeof err.detail === 'string' ? err.detail : err.detail?.message) || err.message || metaRes.statusText;
445
+ throw new ApiError(`ElevenLabs get voice failed: ${msg}`, metaRes.status, err);
446
+ }
447
+ meta = await metaRes.json();
448
+ } finally {
449
+ clearMeta();
450
+ }
451
+
452
+ const voiceName = meta.name;
453
+ const rawLang = (meta.labels?.language || 'en').toLowerCase().split(/[-_]/)[0];
454
+ const inworldLang = LANG_TO_INWORLD[rawLang] ?? 'EN_US';
455
+
456
+ // 2. Get sample audio: try first sample, fallback to preview_url
457
+ const { signal: audioSignal, clear: clearAudio } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.migrateAudio);
458
+ let audioBytes;
459
+ try {
460
+ const samples = meta.samples;
461
+ if (samples && samples.length > 0) {
462
+ const sampleId = samples[0].sample_id;
463
+ const sampleRes = await fetch(
464
+ `https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}/samples/${encodeURIComponent(sampleId)}/audio`,
465
+ { headers: { 'xi-api-key': elevenLabsApiKey }, signal: audioSignal }
466
+ );
467
+ if (!sampleRes.ok) {
468
+ const err = await sampleRes.json().catch(() => ({}));
469
+ const msg = (typeof err.detail === 'string' ? err.detail : err.detail?.message) || err.message || sampleRes.statusText;
470
+ throw new ApiError(`ElevenLabs get sample audio failed: ${msg}`, sampleRes.status, err);
471
+ }
472
+ audioBytes = new Uint8Array(await sampleRes.arrayBuffer());
473
+ } else if (meta.preview_url) {
474
+ const previewRes = await fetch(meta.preview_url, { signal: audioSignal });
475
+ if (!previewRes.ok) {
476
+ throw new ApiError(`ElevenLabs fetch preview_url failed: ${previewRes.statusText}`, previewRes.status);
477
+ }
478
+ audioBytes = new Uint8Array(await previewRes.arrayBuffer());
479
+ } else {
480
+ throw new ApiError('No voice samples or preview_url available for this ElevenLabs voice');
481
+ }
482
+ } finally {
483
+ clearAudio();
484
+ }
485
+
486
+ // 3. Trim to 15 s if needed
487
+ audioBytes = trimAudioTo15s(audioBytes);
488
+
489
+ // 4. Clone to Inworld
490
+ const cloneResult = await cloneVoice(
491
+ { displayName: voiceName, audioSamples: [audioBytes], lang: inworldLang },
492
+ config
493
+ );
494
+
495
+ const inworldVoiceId = cloneResult.voice?.voiceId;
496
+ if (!inworldVoiceId) throw new ApiError('unexpected Inworld clone response: missing voiceId');
497
+ return { elevenLabsVoiceId: voiceId, elevenLabsName: voiceName, inworldVoiceId };
498
+ }