@inworld/tts 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @inworld/tts might be problematic. Click here for more details.
- package/CHANGELOG.md +9 -0
- package/LICENSE +21 -0
- package/README.md +332 -0
- package/dist/index.cjs +1580 -0
- package/package.json +77 -0
- package/src/client.js +929 -0
- package/src/config.js +135 -0
- package/src/encoding.js +23 -0
- package/src/errors.js +31 -0
- package/src/index.d.ts +363 -0
- package/src/index.js +149 -0
- package/src/player.browser.js +53 -0
- package/src/player.js +143 -0
- package/src/voice.js +498 -0
- package/src/write-file.browser.js +7 -0
- package/src/write-file.js +11 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser player — bundlers substitute this for player.js in browser builds.
|
|
3
|
+
* Uses <audio> element for playback. Must be called inside a user gesture.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { ApiError } from './errors.js';
|
|
7
|
+
import { detectEncoding } from './encoding.js';
|
|
8
|
+
|
|
9
|
+
export { detectEncoding };
|
|
10
|
+
|
|
11
|
+
const MIME = {
|
|
12
|
+
MP3: 'audio/mpeg',
|
|
13
|
+
WAV: 'audio/wav',
|
|
14
|
+
OGG_OPUS: 'audio/ogg; codecs=opus',
|
|
15
|
+
FLAC: 'audio/flac',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const UNSUPPORTED_IN_BROWSER = new Set(['LINEAR16', 'PCM', 'ALAW', 'MULAW']);
|
|
19
|
+
|
|
20
|
+
export function findPlayer(_encoding) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function playFile(_filePath, _encoding) {
|
|
25
|
+
throw new ApiError('Audio playback from file is not supported in browser environments.');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export async function play(audio, options = {}) {
|
|
29
|
+
if (typeof audio === 'string') {
|
|
30
|
+
throw new ApiError('File paths are not supported in browser. Pass a Uint8Array instead.');
|
|
31
|
+
}
|
|
32
|
+
const encoding = options.encoding ? options.encoding.toUpperCase() : detectEncoding(audio);
|
|
33
|
+
if (UNSUPPORTED_IN_BROWSER.has(encoding)) {
|
|
34
|
+
throw new ApiError(`${encoding} cannot be played in browser. Use AudioContext or request MP3/WAV instead.`);
|
|
35
|
+
}
|
|
36
|
+
const blob = new Blob([audio], { type: MIME[encoding] || 'audio/mpeg' });
|
|
37
|
+
const url = URL.createObjectURL(blob);
|
|
38
|
+
const el = new Audio(url);
|
|
39
|
+
try {
|
|
40
|
+
await el.play();
|
|
41
|
+
await new Promise((res, rej) => {
|
|
42
|
+
el.onended = res;
|
|
43
|
+
el.onerror = () => rej(new ApiError('Audio playback failed. The format may be unsupported by this browser.'));
|
|
44
|
+
});
|
|
45
|
+
} catch (e) {
|
|
46
|
+
if (e.name === 'NotAllowedError') {
|
|
47
|
+
throw new ApiError('play() was blocked by the browser. Call play() inside a user event handler (e.g. button click).');
|
|
48
|
+
}
|
|
49
|
+
throw e;
|
|
50
|
+
} finally {
|
|
51
|
+
URL.revokeObjectURL(url);
|
|
52
|
+
}
|
|
53
|
+
}
|
package/src/player.js
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio playback utilities. Node.js only — bundlers substitute player.browser.js.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { existsSync } from 'fs';
|
|
6
|
+
import { writeFile, unlink } from 'fs/promises';
|
|
7
|
+
import { spawn } from 'child_process';
|
|
8
|
+
import { tmpdir } from 'os';
|
|
9
|
+
import { join } from 'path';
|
|
10
|
+
import { detectEncoding } from './encoding.js';
|
|
11
|
+
|
|
12
|
+
export { detectEncoding };
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// 3b. Find installed audio player
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
const EXT_MAP = {
|
|
19
|
+
MP3: '.mp3', OGG_OPUS: '.ogg', LINEAR16: '.wav', WAV: '.wav',
|
|
20
|
+
PCM: '.pcm', FLAC: '.flac', ALAW: '.wav', MULAW: '.wav',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function scanPath(binary) {
|
|
24
|
+
const sep = process.platform === 'win32' ? ';' : ':';
|
|
25
|
+
const pathDirs = (process.env.PATH || '').split(sep);
|
|
26
|
+
for (const dir of pathDirs) {
|
|
27
|
+
if (!dir) continue;
|
|
28
|
+
const full = join(dir, binary);
|
|
29
|
+
if (existsSync(full)) return full;
|
|
30
|
+
if (process.platform === 'win32' && existsSync(full + '.exe')) return full + '.exe';
|
|
31
|
+
}
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const FFPLAY_ARGS = ['-nodisp', '-autoexit', '-loglevel', 'quiet'];
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Find the best available audio player for the given encoding.
|
|
39
|
+
* Returns null (with a stderr warning) if no suitable player is found.
|
|
40
|
+
* @param {string} encoding
|
|
41
|
+
* @returns {{ binary: string, args: string[] } | null}
|
|
42
|
+
*/
|
|
43
|
+
export function findPlayer(encoding) {
|
|
44
|
+
if (encoding === 'PCM') {
|
|
45
|
+
process.stderr.write('[inworld-tts] PCM audio cannot be played directly. Use encoding: \'WAV\' instead.\n');
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const platform = process.platform;
|
|
50
|
+
const ffplay = scanPath('ffplay');
|
|
51
|
+
if (ffplay) return { binary: ffplay, args: FFPLAY_ARGS };
|
|
52
|
+
|
|
53
|
+
if (platform === 'darwin') {
|
|
54
|
+
const afplayEncodings = ['MP3', 'WAV', 'FLAC', 'ALAW', 'MULAW'];
|
|
55
|
+
if (afplayEncodings.includes(encoding)) {
|
|
56
|
+
const afplay = scanPath('afplay');
|
|
57
|
+
if (afplay) return { binary: afplay, args: [] };
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (platform === 'linux') {
|
|
62
|
+
if (encoding === 'MP3') {
|
|
63
|
+
const mpg123 = scanPath('mpg123');
|
|
64
|
+
if (mpg123) return { binary: mpg123, args: ['-q'] };
|
|
65
|
+
}
|
|
66
|
+
if (encoding === 'OGG_OPUS') {
|
|
67
|
+
const ogg123 = scanPath('ogg123');
|
|
68
|
+
if (ogg123) return { binary: ogg123, args: [] };
|
|
69
|
+
}
|
|
70
|
+
if (encoding === 'WAV' || encoding === 'LINEAR16') {
|
|
71
|
+
const aplay = scanPath('aplay');
|
|
72
|
+
if (aplay) return { binary: aplay, args: ['-q'] };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (platform === 'win32') {
|
|
77
|
+
return { binary: 'cmd', args: ['/c', 'start', '/wait', ''] };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const hint = platform === 'darwin'
|
|
81
|
+
? 'Install ffmpeg (brew install ffmpeg) or use afplay (built-in for MP3/WAV/FLAC).'
|
|
82
|
+
: platform === 'linux'
|
|
83
|
+
? 'Install ffmpeg (apt install ffmpeg) or mpg123/ogg123/aplay for specific formats.'
|
|
84
|
+
: 'Install ffmpeg to enable audio playback.';
|
|
85
|
+
process.stderr.write(`[inworld-tts] No audio player found for encoding "${encoding}". ${hint}\n`);
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Internal: play a file at the given path
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Play an audio file using the best available player.
|
|
95
|
+
* Prints a warning and returns (does not throw) if no player is found.
|
|
96
|
+
* @param {string} filePath
|
|
97
|
+
* @param {string} encoding
|
|
98
|
+
* @returns {Promise<void>}
|
|
99
|
+
*/
|
|
100
|
+
export async function playFile(filePath, encoding) {
|
|
101
|
+
const player = findPlayer(encoding);
|
|
102
|
+
if (!player) return;
|
|
103
|
+
return new Promise((resolve, reject) => {
|
|
104
|
+
const args = [...player.args, filePath];
|
|
105
|
+
const proc = spawn(player.binary, args, { stdio: 'ignore' });
|
|
106
|
+
proc.on('close', () => resolve());
|
|
107
|
+
proc.on('error', reject);
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// 3c. play() — public API
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Play audio from a Uint8Array. Detects encoding from magic bytes unless overridden.
|
|
117
|
+
* Writes to a temp file, plays it, then deletes the temp file. Node.js only.
|
|
118
|
+
* @param {Uint8Array} audio
|
|
119
|
+
* @param {{ encoding?: string }} [options]
|
|
120
|
+
* @returns {Promise<void>}
|
|
121
|
+
* @example
|
|
122
|
+
* import { play } from '@inworld/tts';
|
|
123
|
+
* const audio = await tts.generate({ text: 'Hello!' });
|
|
124
|
+
* await play(audio); // encoding inferred from magic bytes
|
|
125
|
+
*/
|
|
126
|
+
export async function play(audio, options = {}) {
|
|
127
|
+
// File path input: play directly without reading into memory
|
|
128
|
+
if (typeof audio === 'string') {
|
|
129
|
+
const ext = audio.split('.').pop().toLowerCase();
|
|
130
|
+
const EXT_TO_ENCODING = { mp3: 'MP3', wav: 'WAV', ogg: 'OGG_OPUS', flac: 'FLAC', pcm: 'PCM' };
|
|
131
|
+
const encoding = options.encoding ? options.encoding.toUpperCase() : (EXT_TO_ENCODING[ext] || 'MP3');
|
|
132
|
+
return playFile(audio, encoding);
|
|
133
|
+
}
|
|
134
|
+
const encoding = options.encoding ? options.encoding.toUpperCase() : detectEncoding(audio);
|
|
135
|
+
const ext = EXT_MAP[encoding] || '.mp3';
|
|
136
|
+
const tmpPath = join(tmpdir(), `inworld-tts-${Date.now()}-${Math.random().toString(36).slice(2)}${ext}`);
|
|
137
|
+
await writeFile(tmpPath, audio);
|
|
138
|
+
try {
|
|
139
|
+
await playFile(tmpPath, encoding);
|
|
140
|
+
} finally {
|
|
141
|
+
await unlink(tmpPath).catch(() => {});
|
|
142
|
+
}
|
|
143
|
+
}
|
package/src/voice.js
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice API client: list, get, update, delete, clone, design, publish.
|
|
3
|
+
* Base URL: /voices/v1/voices
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getTimeoutSignal, withRetry } from './config.js';
|
|
7
|
+
import { ApiError, MissingApiKeyError, NetworkError } from './errors.js';
|
|
8
|
+
|
|
9
|
+
const VOICES_PATH = '/voices/v1/voices';
|
|
10
|
+
|
|
11
|
+
function bytesToBase64(bytes) {
|
|
12
|
+
if (typeof Buffer !== 'undefined') {
|
|
13
|
+
return (Buffer.isBuffer(bytes) ? bytes : Buffer.from(bytes)).toString('base64');
|
|
14
|
+
}
|
|
15
|
+
let bin = '';
|
|
16
|
+
for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
|
|
17
|
+
return btoa(bin);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const DEFAULT_TIMEOUT = {
|
|
21
|
+
listVoices: 30_000,
|
|
22
|
+
cloneVoice: 300_000,
|
|
23
|
+
designVoice: 120_000,
|
|
24
|
+
publishVoice: 30_000,
|
|
25
|
+
migrateAudio: 60_000,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Helpers
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
function authHeader(authValue) {
|
|
33
|
+
return { Authorization: authValue };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function jsonHeaders(authValue) {
|
|
37
|
+
return { 'Content-Type': 'application/json', Authorization: authValue };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function parseErrorResponse(response) {
|
|
41
|
+
let errMsg = response.statusText;
|
|
42
|
+
let details = {};
|
|
43
|
+
try {
|
|
44
|
+
details = await response.json();
|
|
45
|
+
errMsg = details.message || JSON.stringify(details);
|
|
46
|
+
} catch (_) {}
|
|
47
|
+
return new ApiError(errMsg, response.status, details);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function fetchWithRetry(url, fetchOpts, config) {
|
|
51
|
+
try {
|
|
52
|
+
return await withRetry(async () => {
|
|
53
|
+
let res;
|
|
54
|
+
try {
|
|
55
|
+
res = await fetch(url, fetchOpts);
|
|
56
|
+
} catch (e) {
|
|
57
|
+
// Let AbortError/TimeoutError pass through unwrapped so withRetry
|
|
58
|
+
// sees the original error and does not retry it.
|
|
59
|
+
if (e.name === 'AbortError' || e.name === 'TimeoutError') throw e;
|
|
60
|
+
throw new NetworkError(e.message);
|
|
61
|
+
}
|
|
62
|
+
if (!res.ok) throw await parseErrorResponse(res);
|
|
63
|
+
return res;
|
|
64
|
+
}, config);
|
|
65
|
+
} catch (e) {
|
|
66
|
+
if (e.name === 'AbortError' || e.name === 'TimeoutError') throw new NetworkError('Request timed out');
|
|
67
|
+
throw e;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// List voices
|
|
73
|
+
// GET /voices/v1/voices[?languages=EN_US&languages=es]
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
export async function listVoices(options = {}, config = {}) {
|
|
77
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
78
|
+
|
|
79
|
+
const url = new URL(`${config._baseUrl}${VOICES_PATH}`);
|
|
80
|
+
if (options.languages && !options.lang) {
|
|
81
|
+
console.warn('[inworld-tts] Warning: "languages" has been renamed to "lang". Please update your code.');
|
|
82
|
+
}
|
|
83
|
+
const langs = options.lang ?? options.languages;
|
|
84
|
+
if (langs) {
|
|
85
|
+
const arr = Array.isArray(langs) ? langs : [langs];
|
|
86
|
+
for (const l of arr) url.searchParams.append('languages', l);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
90
|
+
try {
|
|
91
|
+
const fetchOpts = { method: 'GET', headers: authHeader(config._authHeader), signal };
|
|
92
|
+
const res = await fetchWithRetry(url.toString(), fetchOpts, config);
|
|
93
|
+
let data;
|
|
94
|
+
try { data = await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
95
|
+
if (!Array.isArray(data.voices)) {
|
|
96
|
+
throw new ApiError('unexpected response: missing "voices" array');
|
|
97
|
+
}
|
|
98
|
+
return data.voices;
|
|
99
|
+
} finally {
|
|
100
|
+
clear();
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Clone voice
|
|
106
|
+
// POST /voices/v1/voices:clone
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
export async function cloneVoice(options, config = {}) {
|
|
110
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
111
|
+
const url = `${config._baseUrl}${VOICES_PATH}:clone`;
|
|
112
|
+
|
|
113
|
+
const rawSamples = options.audioSamples || [];
|
|
114
|
+
if (rawSamples.length === 0) throw new ApiError('options.audioSamples is required (array of Uint8Array / Buffer / file path strings)');
|
|
115
|
+
if (options.langCode && !options.lang) {
|
|
116
|
+
console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Resolve file path strings to Uint8Array (Node.js only)
|
|
120
|
+
const samples = await Promise.all(rawSamples.map(async (s, i) => {
|
|
121
|
+
if (typeof s === 'string') {
|
|
122
|
+
let readFileSync;
|
|
123
|
+
try {
|
|
124
|
+
({ readFileSync } = await import('fs'));
|
|
125
|
+
} catch {
|
|
126
|
+
throw new ApiError(`options.audioSamples[${i}] is a file path string, but file system access is not available in browser. Pass Uint8Array contents instead.`);
|
|
127
|
+
}
|
|
128
|
+
return readFileSync(s);
|
|
129
|
+
}
|
|
130
|
+
if (!(s instanceof Uint8Array)) {
|
|
131
|
+
throw new ApiError(`options.audioSamples[${i}] must be a Uint8Array, Buffer, or file path string (got ${typeof s})`);
|
|
132
|
+
}
|
|
133
|
+
return s;
|
|
134
|
+
}));
|
|
135
|
+
|
|
136
|
+
const voiceSamples = samples.map((buf, i) => {
|
|
137
|
+
const sample = { audioData: bytesToBase64(buf) };
|
|
138
|
+
if (options.transcriptions && i < options.transcriptions.length) {
|
|
139
|
+
sample.transcription = options.transcriptions[i];
|
|
140
|
+
}
|
|
141
|
+
return sample;
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
const body = {
|
|
145
|
+
displayName: options.displayName || 'Cloned Voice',
|
|
146
|
+
langCode: options.lang || options.langCode || 'EN_US',
|
|
147
|
+
voiceSamples,
|
|
148
|
+
};
|
|
149
|
+
if (options.description) body.description = options.description;
|
|
150
|
+
if (options.tags && options.tags.length) body.tags = options.tags;
|
|
151
|
+
if (options.removeBackgroundNoise) body.audioProcessingConfig = { removeBackgroundNoise: true };
|
|
152
|
+
|
|
153
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.cloneVoice);
|
|
154
|
+
try {
|
|
155
|
+
const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
156
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
157
|
+
try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
158
|
+
} finally {
|
|
159
|
+
clear();
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
// Design voice
|
|
165
|
+
// POST /voices/v1/voices:design
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
export async function designVoice(options, config = {}) {
|
|
169
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
170
|
+
const url = `${config._baseUrl}${VOICES_PATH}:design`;
|
|
171
|
+
|
|
172
|
+
const prompt = options.designPrompt || '';
|
|
173
|
+
if (prompt.length < 30 || prompt.length > 250) {
|
|
174
|
+
throw new ApiError(`designPrompt must be 30-250 characters (got ${prompt.length})`);
|
|
175
|
+
}
|
|
176
|
+
if (!options.previewText) throw new ApiError('options.previewText is required');
|
|
177
|
+
if (options.langCode && !options.lang) {
|
|
178
|
+
console.warn('[inworld-tts] Warning: "langCode" has been renamed to "lang". Please update your code.');
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const body = {
|
|
182
|
+
designPrompt: prompt,
|
|
183
|
+
previewText: options.previewText,
|
|
184
|
+
langCode: options.lang || options.langCode || 'EN_US',
|
|
185
|
+
voiceDesignConfig: { numberOfSamples: Math.min(3, Math.max(1, options.numberOfSamples || 1)) },
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.designVoice);
|
|
189
|
+
try {
|
|
190
|
+
const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
191
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
192
|
+
try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
193
|
+
} finally {
|
|
194
|
+
clear();
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
// Publish voice
|
|
200
|
+
// POST /voices/v1/voices/{voiceId}:publish
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
export async function publishVoice(options, config = {}) {
|
|
204
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
205
|
+
|
|
206
|
+
const voiceId = options.voice;
|
|
207
|
+
if (!voiceId) throw new ApiError('options.voice is required');
|
|
208
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}:publish`;
|
|
209
|
+
|
|
210
|
+
const body = {};
|
|
211
|
+
if (options.displayName) body.displayName = options.displayName;
|
|
212
|
+
if (options.description) body.description = options.description;
|
|
213
|
+
if (options.tags && options.tags.length) body.tags = options.tags;
|
|
214
|
+
|
|
215
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
216
|
+
try {
|
|
217
|
+
const fetchOpts = { method: 'POST', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
218
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
219
|
+
try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
220
|
+
} finally {
|
|
221
|
+
clear();
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// ---------------------------------------------------------------------------
|
|
226
|
+
// Get a specific voice
|
|
227
|
+
// GET /voices/v1/voices/{voiceId}
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
export async function getVoice(voiceId, config = {}) {
|
|
231
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
232
|
+
|
|
233
|
+
if (!voiceId) throw new ApiError('voiceId is required');
|
|
234
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
235
|
+
|
|
236
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
237
|
+
try {
|
|
238
|
+
const fetchOpts = { method: 'GET', headers: authHeader(config._authHeader), signal };
|
|
239
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
240
|
+
try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
241
|
+
} finally {
|
|
242
|
+
clear();
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
// Update a voice
|
|
248
|
+
// PATCH /voices/v1/voices/{voiceId}
|
|
249
|
+
// ---------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
export async function updateVoice(options, config = {}) {
|
|
252
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
253
|
+
|
|
254
|
+
const voiceId = options.voice;
|
|
255
|
+
if (!voiceId) throw new ApiError('options.voice is required');
|
|
256
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
257
|
+
|
|
258
|
+
const body = {};
|
|
259
|
+
if (options.displayName != null) body.displayName = options.displayName;
|
|
260
|
+
if (options.description != null) body.description = options.description;
|
|
261
|
+
if (options.tags != null) body.tags = options.tags;
|
|
262
|
+
|
|
263
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
264
|
+
try {
|
|
265
|
+
const fetchOpts = { method: 'PATCH', headers: jsonHeaders(config._authHeader), body: JSON.stringify(body), signal };
|
|
266
|
+
const res = await fetchWithRetry(url, fetchOpts, config);
|
|
267
|
+
try { return await res.json(); } catch (_) { throw new ApiError('unexpected response: failed to parse JSON'); }
|
|
268
|
+
} finally {
|
|
269
|
+
clear();
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Delete a voice
|
|
275
|
+
// DELETE /voices/v1/voices/{voiceId}
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
export async function deleteVoice(voiceId, config = {}) {
|
|
279
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
280
|
+
|
|
281
|
+
if (!voiceId) throw new ApiError('voiceId is required');
|
|
282
|
+
const url = `${config._baseUrl}${VOICES_PATH}/${encodeURIComponent(voiceId)}`;
|
|
283
|
+
|
|
284
|
+
const { signal, clear } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.publishVoice);
|
|
285
|
+
try {
|
|
286
|
+
const fetchOpts = { method: 'DELETE', headers: authHeader(config._authHeader), signal };
|
|
287
|
+
await fetchWithRetry(url, fetchOpts, config);
|
|
288
|
+
} finally {
|
|
289
|
+
clear();
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// ---------------------------------------------------------------------------
|
|
294
|
+
// Migrate from ElevenLabs (single voice)
|
|
295
|
+
// ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
const LANG_TO_INWORLD = {
|
|
298
|
+
en: 'EN_US', zh: 'ZH_CN', ja: 'JA_JP', ko: 'KO_KR',
|
|
299
|
+
es: 'ES_ES', fr: 'FR_FR', de: 'DE_DE', pt: 'PT_BR',
|
|
300
|
+
it: 'IT_IT', pl: 'PL_PL', ru: 'RU_RU', hi: 'HI_IN',
|
|
301
|
+
ar: 'AR_SA', nl: 'NL_NL', he: 'HE_IL',
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
// ---------------------------------------------------------------------------
|
|
305
|
+
// WAV header builder (Option C: preserve original format, only trim duration)
|
|
306
|
+
// ---------------------------------------------------------------------------
|
|
307
|
+
|
|
308
|
+
function buildWavHeader(dataLen, sampleRate, channels, bitsPerSample) {
|
|
309
|
+
const buf = new ArrayBuffer(44);
|
|
310
|
+
const view = new DataView(buf);
|
|
311
|
+
const writeUint32LE = (off, v) => view.setUint32(off, v, true);
|
|
312
|
+
const writeUint16LE = (off, v) => view.setUint16(off, v, true);
|
|
313
|
+
|
|
314
|
+
// RIFF chunk
|
|
315
|
+
view.setUint8(0, 0x52); view.setUint8(1, 0x49); view.setUint8(2, 0x46); view.setUint8(3, 0x46); // "RIFF"
|
|
316
|
+
writeUint32LE(4, 36 + dataLen); // ChunkSize
|
|
317
|
+
view.setUint8(8, 0x57); view.setUint8(9, 0x41); view.setUint8(10, 0x56); view.setUint8(11, 0x45); // "WAVE"
|
|
318
|
+
|
|
319
|
+
// fmt sub-chunk
|
|
320
|
+
view.setUint8(12, 0x66); view.setUint8(13, 0x6D); view.setUint8(14, 0x74); view.setUint8(15, 0x20); // "fmt "
|
|
321
|
+
writeUint32LE(16, 16); // Subchunk1Size (PCM)
|
|
322
|
+
writeUint16LE(20, 1); // AudioFormat (PCM)
|
|
323
|
+
writeUint16LE(22, channels);
|
|
324
|
+
writeUint32LE(24, sampleRate);
|
|
325
|
+
writeUint32LE(28, sampleRate * channels * (bitsPerSample >> 3)); // ByteRate
|
|
326
|
+
writeUint16LE(32, channels * (bitsPerSample >> 3)); // BlockAlign
|
|
327
|
+
writeUint16LE(34, bitsPerSample);
|
|
328
|
+
|
|
329
|
+
// data sub-chunk
|
|
330
|
+
view.setUint8(36, 0x64); view.setUint8(37, 0x61); view.setUint8(38, 0x74); view.setUint8(39, 0x61); // "data"
|
|
331
|
+
writeUint32LE(40, dataLen);
|
|
332
|
+
|
|
333
|
+
return new Uint8Array(buf);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function trimWavTo15s(bytes) {
|
|
337
|
+
// Validate RIFF header
|
|
338
|
+
if (bytes[0] !== 0x52 || bytes[1] !== 0x49 || bytes[2] !== 0x46 || bytes[3] !== 0x46) {
|
|
339
|
+
return bytes; // not a valid WAV
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
343
|
+
const sampleRate = view.getUint32(24, true);
|
|
344
|
+
const channels = view.getUint16(22, true);
|
|
345
|
+
const bitsPerSample = view.getUint16(34, true);
|
|
346
|
+
|
|
347
|
+
// Find the "data" sub-chunk (may not always be at offset 44)
|
|
348
|
+
let dataOffset = 12;
|
|
349
|
+
while (dataOffset + 8 <= bytes.length) {
|
|
350
|
+
const id = String.fromCharCode(bytes[dataOffset], bytes[dataOffset+1], bytes[dataOffset+2], bytes[dataOffset+3]);
|
|
351
|
+
const chunkSize = view.getUint32(dataOffset + 4, true);
|
|
352
|
+
if (id === 'data') {
|
|
353
|
+
dataOffset += 8;
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
dataOffset += 8 + chunkSize;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const pcm = bytes.slice(dataOffset);
|
|
360
|
+
const maxDataBytes = Math.floor(15 * sampleRate * channels * (bitsPerSample >> 3));
|
|
361
|
+
|
|
362
|
+
if (pcm.length <= maxDataBytes) return bytes; // no trim needed
|
|
363
|
+
|
|
364
|
+
const trimmedPcm = pcm.slice(0, maxDataBytes);
|
|
365
|
+
const header = buildWavHeader(trimmedPcm.length, sampleRate, channels, bitsPerSample);
|
|
366
|
+
const result = new Uint8Array(header.length + trimmedPcm.length);
|
|
367
|
+
result.set(header, 0);
|
|
368
|
+
result.set(trimmedPcm, header.length);
|
|
369
|
+
return result;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// MP3 bitrate table for MPEG1 Layer3 (index 1-14; index 0 = free, 15 = bad)
|
|
373
|
+
const MP3_BITRATES = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320];
|
|
374
|
+
|
|
375
|
+
function findMp3FrameSync(bytes, startOffset) {
|
|
376
|
+
for (let i = startOffset; i < bytes.length - 1; i++) {
|
|
377
|
+
if (bytes[i] === 0xFF && (bytes[i+1] & 0xE0) === 0xE0) return i;
|
|
378
|
+
}
|
|
379
|
+
return -1;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function trimMp3To15s(bytes) {
|
|
383
|
+
const firstSync = findMp3FrameSync(bytes, 0);
|
|
384
|
+
if (firstSync < 0) return bytes;
|
|
385
|
+
|
|
386
|
+
const b1 = bytes[firstSync + 1];
|
|
387
|
+
const b2 = bytes[firstSync + 2];
|
|
388
|
+
|
|
389
|
+
// Verify MPEG version and layer (we only handle MPEG1/2 Layer 3)
|
|
390
|
+
const mpegVersion = (b1 >> 3) & 0x03; // 0b11=MPEG1, 0b10=MPEG2
|
|
391
|
+
const layer = (b1 >> 1) & 0x03; // 0b01=Layer3
|
|
392
|
+
if (layer !== 1) return bytes; // not Layer 3
|
|
393
|
+
|
|
394
|
+
const bitrateIdx = (b2 >> 4) & 0x0F;
|
|
395
|
+
if (bitrateIdx === 0 || bitrateIdx === 15) return bytes; // free/bad bitrate
|
|
396
|
+
|
|
397
|
+
// For MPEG2 the bitrate table differs but for estimation purposes we use the same table
|
|
398
|
+
const bitrateKbps = MP3_BITRATES[bitrateIdx];
|
|
399
|
+
const estimatedBytes = Math.floor(15 * bitrateKbps * 1000 / 8);
|
|
400
|
+
|
|
401
|
+
if (bytes.length <= estimatedBytes) return bytes;
|
|
402
|
+
|
|
403
|
+
// Scan back from estimatedBytes to find a frame boundary
|
|
404
|
+
const cutPos = findMp3FrameSync(bytes, estimatedBytes);
|
|
405
|
+
const actualCut = cutPos > firstSync ? cutPos : estimatedBytes;
|
|
406
|
+
|
|
407
|
+
return bytes.slice(0, actualCut);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function trimAudioTo15s(bytes) {
|
|
411
|
+
if (bytes.length === 0) return bytes;
|
|
412
|
+
|
|
413
|
+
// WAV: starts with RIFF
|
|
414
|
+
if (bytes[0] === 0x52 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x46) {
|
|
415
|
+
return trimWavTo15s(bytes);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// MP3: look for frame sync
|
|
419
|
+
const mp3Sync = findMp3FrameSync(bytes, 0);
|
|
420
|
+
if (mp3Sync >= 0 && mp3Sync < 4) {
|
|
421
|
+
return trimMp3To15s(bytes);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// Other formats (OGG, FLAC, etc.) — return as-is
|
|
425
|
+
return bytes;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
export async function migrateFromElevenLabs({ elevenLabsApiKey, elevenLabsVoiceId } = {}, config = {}) {
|
|
429
|
+
if (!elevenLabsApiKey) throw new ApiError('elevenLabsApiKey is required');
|
|
430
|
+
if (!elevenLabsVoiceId) throw new ApiError('elevenLabsVoiceId is required');
|
|
431
|
+
const voiceId = elevenLabsVoiceId;
|
|
432
|
+
if (!config._authHeader) throw new MissingApiKeyError();
|
|
433
|
+
|
|
434
|
+
// 1. Fetch voice metadata
|
|
435
|
+
const { signal: metaSignal, clear: clearMeta } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.listVoices);
|
|
436
|
+
let meta;
|
|
437
|
+
try {
|
|
438
|
+
const metaRes = await fetch(
|
|
439
|
+
`https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}`,
|
|
440
|
+
{ headers: { 'xi-api-key': elevenLabsApiKey }, signal: metaSignal }
|
|
441
|
+
);
|
|
442
|
+
if (!metaRes.ok) {
|
|
443
|
+
const err = await metaRes.json().catch(() => ({}));
|
|
444
|
+
const msg = (typeof err.detail === 'string' ? err.detail : err.detail?.message) || err.message || metaRes.statusText;
|
|
445
|
+
throw new ApiError(`ElevenLabs get voice failed: ${msg}`, metaRes.status, err);
|
|
446
|
+
}
|
|
447
|
+
meta = await metaRes.json();
|
|
448
|
+
} finally {
|
|
449
|
+
clearMeta();
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const voiceName = meta.name;
|
|
453
|
+
const rawLang = (meta.labels?.language || 'en').toLowerCase().split(/[-_]/)[0];
|
|
454
|
+
const inworldLang = LANG_TO_INWORLD[rawLang] ?? 'EN_US';
|
|
455
|
+
|
|
456
|
+
// 2. Get sample audio: try first sample, fallback to preview_url
|
|
457
|
+
const { signal: audioSignal, clear: clearAudio } = getTimeoutSignal(config.timeout ?? DEFAULT_TIMEOUT.migrateAudio);
|
|
458
|
+
let audioBytes;
|
|
459
|
+
try {
|
|
460
|
+
const samples = meta.samples;
|
|
461
|
+
if (samples && samples.length > 0) {
|
|
462
|
+
const sampleId = samples[0].sample_id;
|
|
463
|
+
const sampleRes = await fetch(
|
|
464
|
+
`https://api.elevenlabs.io/v1/voices/${encodeURIComponent(voiceId)}/samples/${encodeURIComponent(sampleId)}/audio`,
|
|
465
|
+
{ headers: { 'xi-api-key': elevenLabsApiKey }, signal: audioSignal }
|
|
466
|
+
);
|
|
467
|
+
if (!sampleRes.ok) {
|
|
468
|
+
const err = await sampleRes.json().catch(() => ({}));
|
|
469
|
+
const msg = (typeof err.detail === 'string' ? err.detail : err.detail?.message) || err.message || sampleRes.statusText;
|
|
470
|
+
throw new ApiError(`ElevenLabs get sample audio failed: ${msg}`, sampleRes.status, err);
|
|
471
|
+
}
|
|
472
|
+
audioBytes = new Uint8Array(await sampleRes.arrayBuffer());
|
|
473
|
+
} else if (meta.preview_url) {
|
|
474
|
+
const previewRes = await fetch(meta.preview_url, { signal: audioSignal });
|
|
475
|
+
if (!previewRes.ok) {
|
|
476
|
+
throw new ApiError(`ElevenLabs fetch preview_url failed: ${previewRes.statusText}`, previewRes.status);
|
|
477
|
+
}
|
|
478
|
+
audioBytes = new Uint8Array(await previewRes.arrayBuffer());
|
|
479
|
+
} else {
|
|
480
|
+
throw new ApiError('No voice samples or preview_url available for this ElevenLabs voice');
|
|
481
|
+
}
|
|
482
|
+
} finally {
|
|
483
|
+
clearAudio();
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// 3. Trim to 15 s if needed
|
|
487
|
+
audioBytes = trimAudioTo15s(audioBytes);
|
|
488
|
+
|
|
489
|
+
// 4. Clone to Inworld
|
|
490
|
+
const cloneResult = await cloneVoice(
|
|
491
|
+
{ displayName: voiceName, audioSamples: [audioBytes], lang: inworldLang },
|
|
492
|
+
config
|
|
493
|
+
);
|
|
494
|
+
|
|
495
|
+
const inworldVoiceId = cloneResult.voice?.voiceId;
|
|
496
|
+
if (!inworldVoiceId) throw new ApiError('unexpected Inworld clone response: missing voiceId');
|
|
497
|
+
return { elevenLabsVoiceId: voiceId, elevenLabsName: voiceName, inworldVoiceId };
|
|
498
|
+
}
|