@inworld/tts 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @inworld/tts might be problematic. Click here for more details.
- package/CHANGELOG.md +9 -0
- package/LICENSE +21 -0
- package/README.md +332 -0
- package/dist/index.cjs +1580 -0
- package/package.json +77 -0
- package/src/client.js +929 -0
- package/src/config.js +135 -0
- package/src/encoding.js +23 -0
- package/src/errors.js +31 -0
- package/src/index.d.ts +363 -0
- package/src/index.js +149 -0
- package/src/player.browser.js +53 -0
- package/src/player.js +143 -0
- package/src/voice.js +498 -0
- package/src/write-file.browser.js +7 -0
- package/src/write-file.js +11 -0
package/src/config.js
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/** Config, env helpers, and retry logic. Works in Node.js 18+ and modern browsers. */
|
|
2
|
+
|
|
3
|
+
const DEFAULT_BASE_URL = 'https://api.inworld.ai';
|
|
4
|
+
|
|
5
|
+
export const MAX_CHUNK_SIZE = 1900;
|
|
6
|
+
export const MIN_CHUNK_SIZE = 500;
|
|
7
|
+
export const CHARS_PER_SECOND = 12.0; // approx speaking rate; used to convert <break> durations to equivalent char counts
|
|
8
|
+
export const CJK_CHAR_WEIGHT = 3.0; // CJK chars produce ~3x more audio than Latin chars
|
|
9
|
+
export const SPLICE_BREAK_SECONDS = 0.5;
|
|
10
|
+
export const SAMPLE_RATE = 48000;
|
|
11
|
+
export const BITS_PER_SAMPLE = 16;
|
|
12
|
+
export const CHANNELS = 1;
|
|
13
|
+
export const MAX_CONCURRENT_REQUESTS = 2;
|
|
14
|
+
export const GENERATE_MAX_CHARS = 2000;
|
|
15
|
+
export const STREAM_MAX_CHARS = 2000;
|
|
16
|
+
|
|
17
|
+
const _env = typeof process !== 'undefined' ? process.env : {};
|
|
18
|
+
|
|
19
|
+
export function getApiKey(apiKey) {
|
|
20
|
+
return apiKey || _env.INWORLD_API_KEY || null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function debugLog(config, ...args) {
|
|
24
|
+
if (config.debug || _env.DEBUG === 'inworld-tts') {
|
|
25
|
+
console.debug('[inworld-tts]', ...args);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function isRetryable(e) {
|
|
30
|
+
if (e && (e.name === 'AbortError' || e.name === 'TimeoutError')) return false;
|
|
31
|
+
if (e && e.name === 'NetworkError') return true;
|
|
32
|
+
if (e && e.name === 'ApiError' && typeof e.code === 'number' && e.code >= 500) return true;
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function withRetry(fn, config) {
|
|
37
|
+
const retries = config.maxRetries ?? 2;
|
|
38
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
39
|
+
if (attempt > 0) {
|
|
40
|
+
const delay = Math.min(1000 * 2 ** (attempt - 1), 16000);
|
|
41
|
+
debugLog(config, `retry ${attempt}/${retries} after ${delay}ms`);
|
|
42
|
+
await new Promise(r => setTimeout(r, delay));
|
|
43
|
+
}
|
|
44
|
+
try {
|
|
45
|
+
return await fn();
|
|
46
|
+
} catch (e) {
|
|
47
|
+
if (!isRetryable(e) || attempt >= retries) throw e;
|
|
48
|
+
debugLog(config, `retryable error (${e.name}): ${e.message}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function getBaseUrl(baseUrl) {
|
|
54
|
+
const base = (baseUrl || _env.INWORLD_BASE_URL || DEFAULT_BASE_URL).replace(/\/$/, '');
|
|
55
|
+
return base;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Browser detection
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
export function isRunningInBrowser() {
|
|
63
|
+
return typeof window !== 'undefined'
|
|
64
|
+
&& typeof window.document !== 'undefined'
|
|
65
|
+
&& typeof navigator !== 'undefined';
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// AbortSignal compatibility (replaces AbortSignal.timeout())
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
export function getTimeoutSignal(ms) {
|
|
73
|
+
const controller = new AbortController();
|
|
74
|
+
const id = setTimeout(() => controller.abort(), ms);
|
|
75
|
+
return { signal: controller.signal, clear: () => clearTimeout(id) };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// JWT token refresh helpers
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
export function getJwtExp(token) {
|
|
83
|
+
let exp = null;
|
|
84
|
+
try {
|
|
85
|
+
// JWT uses base64url (- and _ instead of + and /), atob() requires standard base64 with padding
|
|
86
|
+
let b64 = token.split('.')[1].replace(/-/g, '+').replace(/_/g, '/');
|
|
87
|
+
while (b64.length % 4) b64 += '=';
|
|
88
|
+
const payload = JSON.parse(atob(b64));
|
|
89
|
+
exp = payload.exp ?? null;
|
|
90
|
+
} catch { /* parse failed, exp stays null */ }
|
|
91
|
+
if (exp === null) {
|
|
92
|
+
console.warn('[inworld-tts] Could not parse token expiry — token refresh will not be scheduled. Ensure token is a valid JWT.');
|
|
93
|
+
}
|
|
94
|
+
return exp;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Stale-while-revalidate token refresh. Called before every API request.
|
|
98
|
+
// - apiKey users: no-op (no _token)
|
|
99
|
+
// - token already expired: await refresh before proceeding
|
|
100
|
+
// - token expiring within 5 min: fire background refresh, proceed with current token
|
|
101
|
+
// - token fine: no-op
|
|
102
|
+
export async function ensureFreshToken(config) {
|
|
103
|
+
if (!config._token || !config._onTokenExpiring) return;
|
|
104
|
+
const exp = getJwtExp(config._token);
|
|
105
|
+
if (!exp) return;
|
|
106
|
+
|
|
107
|
+
const msUntilExp = exp * 1000 - Date.now();
|
|
108
|
+
|
|
109
|
+
const doRefresh = () => {
|
|
110
|
+
if (config._refreshPromise) return config._refreshPromise;
|
|
111
|
+
config._refreshPromise = Promise.resolve()
|
|
112
|
+
.then(() => config._onTokenExpiring())
|
|
113
|
+
.then(newToken => {
|
|
114
|
+
if (!newToken || typeof newToken !== 'string') {
|
|
115
|
+
console.warn('[inworld-tts] onTokenExpiring must return a non-empty string token');
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
config._token = newToken;
|
|
119
|
+
config._authHeader = `Bearer ${newToken}`;
|
|
120
|
+
})
|
|
121
|
+
.catch(e => {
|
|
122
|
+
console.warn('[inworld-tts] Token refresh failed:', e.message);
|
|
123
|
+
})
|
|
124
|
+
.finally(() => {
|
|
125
|
+
config._refreshPromise = null;
|
|
126
|
+
});
|
|
127
|
+
return config._refreshPromise;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (msUntilExp <= 0) {
|
|
131
|
+
await doRefresh(); // already expired — must wait
|
|
132
|
+
} else if (msUntilExp < 5 * 60 * 1000) {
|
|
133
|
+
doRefresh(); // expiring soon — refresh in background, don't await
|
|
134
|
+
}
|
|
135
|
+
}
|
package/src/encoding.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio encoding detection from magic bytes. Platform-independent — works in Node.js and browsers.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Infer audio encoding from magic bytes. Falls back to 'MP3'.
|
|
7
|
+
* @param {Uint8Array} audio
|
|
8
|
+
* @returns {string}
|
|
9
|
+
*/
|
|
10
|
+
export function detectEncoding(audio) {
|
|
11
|
+
if (!audio || audio.length < 4) return 'MP3';
|
|
12
|
+
// RIFF → WAV
|
|
13
|
+
if (audio[0] === 0x52 && audio[1] === 0x49 && audio[2] === 0x46 && audio[3] === 0x46) return 'WAV';
|
|
14
|
+
// fLaC → FLAC
|
|
15
|
+
if (audio[0] === 0x66 && audio[1] === 0x4C && audio[2] === 0x61 && audio[3] === 0x43) return 'FLAC';
|
|
16
|
+
// OggS → OGG_OPUS
|
|
17
|
+
if (audio[0] === 0x4F && audio[1] === 0x67 && audio[2] === 0x67 && audio[3] === 0x53) return 'OGG_OPUS';
|
|
18
|
+
// ID3 → MP3
|
|
19
|
+
if (audio[0] === 0x49 && audio[1] === 0x44 && audio[2] === 0x33) return 'MP3';
|
|
20
|
+
// MP3 sync bytes
|
|
21
|
+
if (audio[0] === 0xFF && (audio[1] & 0xE0) === 0xE0) return 'MP3';
|
|
22
|
+
return 'MP3';
|
|
23
|
+
}
|
package/src/errors.js
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/** TTS SDK errors. */
|
|
2
|
+
|
|
3
|
+
export class InworldTTSError extends Error {
|
|
4
|
+
constructor(message) {
|
|
5
|
+
super(message);
|
|
6
|
+
this.name = 'InworldTTSError';
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class MissingApiKeyError extends InworldTTSError {
|
|
11
|
+
constructor(message = 'INWORLD_API_KEY is not set. To fix this:\n 1. Set the environment variable: export INWORLD_API_KEY=your_key\n 2. Pass it to the constructor: InworldTTS({ apiKey: "your_key" })\n 3. Or use a JWT token: InworldTTS({ token: "your_jwt" })\n (See: https://docs.inworld.ai/api-reference/introduction#jwt-authentication)\nGet your API key at https://platform.inworld.ai') {
|
|
12
|
+
super(message);
|
|
13
|
+
this.name = 'MissingApiKeyError';
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class ApiError extends InworldTTSError {
|
|
18
|
+
constructor(message, code = null, details = {}) {
|
|
19
|
+
super(message);
|
|
20
|
+
this.name = 'ApiError';
|
|
21
|
+
this.code = code;
|
|
22
|
+
this.details = details;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export class NetworkError extends InworldTTSError {
|
|
27
|
+
constructor(message) {
|
|
28
|
+
super(message);
|
|
29
|
+
this.name = 'NetworkError';
|
|
30
|
+
}
|
|
31
|
+
}
|
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Errors
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
export class InworldTTSError extends Error {
|
|
6
|
+
name: 'InworldTTSError';
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export class MissingApiKeyError extends InworldTTSError {
|
|
10
|
+
name: 'MissingApiKeyError';
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class ApiError extends InworldTTSError {
|
|
14
|
+
name: 'ApiError';
|
|
15
|
+
code: number | null;
|
|
16
|
+
details: Record<string, unknown>;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class NetworkError extends InworldTTSError {
|
|
20
|
+
name: 'NetworkError';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Shared option types
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
export type AudioEncoding =
|
|
28
|
+
| 'MP3'
|
|
29
|
+
| 'OGG_OPUS'
|
|
30
|
+
| 'LINEAR16'
|
|
31
|
+
| 'WAV'
|
|
32
|
+
| 'PCM'
|
|
33
|
+
| 'FLAC'
|
|
34
|
+
| 'ALAW'
|
|
35
|
+
| 'MULAW';
|
|
36
|
+
|
|
37
|
+
export type TextNormalization = 'ON' | 'OFF' | 'none';
|
|
38
|
+
|
|
39
|
+
/** Options shared by generate() and stream(). */
|
|
40
|
+
export interface TtsOptions {
|
|
41
|
+
/** Text to synthesize. Required. */
|
|
42
|
+
text: string;
|
|
43
|
+
/** Voice ID. Required. */
|
|
44
|
+
voice: string;
|
|
45
|
+
/** Model ID. Defaults differ per method (see method docs). */
|
|
46
|
+
model?: string;
|
|
47
|
+
/** Audio encoding format. Default: 'MP3'. */
|
|
48
|
+
encoding?: AudioEncoding;
|
|
49
|
+
/** Sample rate in Hz. Default: 48000. */
|
|
50
|
+
sampleRate?: number;
|
|
51
|
+
/** Bit rate in bps for MP3/OGG_OPUS. Default: 128000. */
|
|
52
|
+
bitRate?: number;
|
|
53
|
+
/** Speaking rate multiplier (0.5–1.5). Default: 1.0. */
|
|
54
|
+
speakingRate?: number;
|
|
55
|
+
/** Temperature for expressiveness (0.0–2.0). Default: 1.0. */
|
|
56
|
+
temperature?: number;
|
|
57
|
+
/** Text normalization mode. */
|
|
58
|
+
applyTextNormalization?: TextNormalization;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Options for generate(). Extends TtsOptions with outputFile convenience. */
|
|
62
|
+
export interface GenerateOptions extends TtsOptions {
|
|
63
|
+
/**
|
|
64
|
+
* If provided, the audio is also written to this file path (Node.js only).
|
|
65
|
+
* The Uint8Array is still returned regardless.
|
|
66
|
+
*/
|
|
67
|
+
outputFile?: string;
|
|
68
|
+
/** If true, play the generated audio immediately (Node.js only). */
|
|
69
|
+
play?: boolean;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Options for stream(). Same options as generate() except model defaults to 'inworld-tts-1.5-mini'. Max 2000 chars. */
|
|
73
|
+
export interface StreamOptions extends TtsOptions {
|
|
74
|
+
/** Write audio to this file path after streaming completes (Node.js only). */
|
|
75
|
+
outputFile?: string;
|
|
76
|
+
/** Play audio after streaming completes. */
|
|
77
|
+
play?: boolean;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Timestamp types
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
export type TimestampType = 'WORD' | 'CHARACTER';
|
|
85
|
+
|
|
86
|
+
export interface PhoneInfo {
|
|
87
|
+
phoneSymbol: string;
|
|
88
|
+
startTimeSeconds: number;
|
|
89
|
+
durationSeconds: number;
|
|
90
|
+
visemeSymbol: string;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface PhoneticDetail {
|
|
94
|
+
wordIndex: number;
|
|
95
|
+
phones: PhoneInfo[];
|
|
96
|
+
isPartial: boolean;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface WordAlignment {
|
|
100
|
+
words: string[];
|
|
101
|
+
wordStartTimeSeconds: number[];
|
|
102
|
+
wordEndTimeSeconds: number[];
|
|
103
|
+
phoneticDetails: PhoneticDetail[];
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export interface CharacterAlignment {
|
|
107
|
+
characters: string[];
|
|
108
|
+
characterStartTimeSeconds: number[];
|
|
109
|
+
characterEndTimeSeconds: number[];
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export interface TimestampInfo {
|
|
113
|
+
/** Present when timestampType is 'WORD'. */
|
|
114
|
+
wordAlignment?: WordAlignment;
|
|
115
|
+
/** Present when timestampType is 'CHARACTER'. */
|
|
116
|
+
characterAlignment?: CharacterAlignment;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export interface GenerateWithTimestampsOptions extends TtsOptions {
|
|
120
|
+
/** Required. Whether to align by word or character. */
|
|
121
|
+
timestampType: TimestampType;
|
|
122
|
+
/** Write audio to this file path (Node.js only). */
|
|
123
|
+
outputFile?: string;
|
|
124
|
+
/** Play audio after generating (Node.js only). */
|
|
125
|
+
play?: boolean;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export interface StreamWithTimestampsOptions extends TtsOptions {
|
|
129
|
+
/** Required. Whether to align by word or character. */
|
|
130
|
+
timestampType: TimestampType;
|
|
131
|
+
/** Write audio to this file path after streaming completes (Node.js only). */
|
|
132
|
+
outputFile?: string;
|
|
133
|
+
/** Play audio after streaming completes (Node.js only). */
|
|
134
|
+
play?: boolean;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// Voice API types
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
export interface ListVoicesOptions {
|
|
142
|
+
/** Filter by language(s), e.g. 'EN_US' or ['EN_US', 'es']. */
|
|
143
|
+
lang?: string | string[];
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export interface VoiceSample {
|
|
147
|
+
audioData: string;
|
|
148
|
+
transcription?: string;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export interface VoiceInfo {
|
|
152
|
+
voiceId: string;
|
|
153
|
+
displayName: string;
|
|
154
|
+
langCode?: string;
|
|
155
|
+
description?: string;
|
|
156
|
+
tags?: string[];
|
|
157
|
+
name?: string;
|
|
158
|
+
source?: string;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** @deprecated Use VoiceInfo instead. */
|
|
162
|
+
export type Voice = VoiceInfo;
|
|
163
|
+
|
|
164
|
+
export interface CloneVoiceOptions {
|
|
165
|
+
/** Human-readable name for the cloned voice. Default: 'Cloned Voice'. */
|
|
166
|
+
displayName?: string;
|
|
167
|
+
/** Array of audio samples: WAV or MP3 file contents (Uint8Array/Buffer) or file path strings (Node.js only). */
|
|
168
|
+
audioSamples: Array<Uint8Array | string>;
|
|
169
|
+
/** Language code. Default: 'EN_US'. */
|
|
170
|
+
lang?: string;
|
|
171
|
+
/** Voice description. */
|
|
172
|
+
description?: string;
|
|
173
|
+
/** Tags for the voice. */
|
|
174
|
+
tags?: string[];
|
|
175
|
+
/** Transcriptions aligned with audioSamples. */
|
|
176
|
+
transcriptions?: string[];
|
|
177
|
+
/** Enable background noise removal. */
|
|
178
|
+
removeBackgroundNoise?: boolean;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export interface AudioSampleValidation {
|
|
182
|
+
transcription?: string;
|
|
183
|
+
langCode?: string;
|
|
184
|
+
warnings?: Array<Record<string, unknown>>;
|
|
185
|
+
errors?: Array<Record<string, unknown>>;
|
|
186
|
+
audioData?: string;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export interface CloneVoiceResult {
|
|
190
|
+
voice: VoiceInfo;
|
|
191
|
+
audioSamplesValidated?: AudioSampleValidation[];
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface DesignVoiceOptions {
|
|
195
|
+
/** Voice description prompt (30-250 characters). */
|
|
196
|
+
designPrompt: string;
|
|
197
|
+
/** Text to be spoken in preview audio. */
|
|
198
|
+
previewText: string;
|
|
199
|
+
/** Language code. Default: 'EN_US'. */
|
|
200
|
+
lang?: string;
|
|
201
|
+
/** Number of preview voice samples to generate (1-3). Default: 1. */
|
|
202
|
+
numberOfSamples?: number;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export interface PreviewVoice {
|
|
206
|
+
voiceId: string;
|
|
207
|
+
previewText?: string;
|
|
208
|
+
previewAudio?: string;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
export interface DesignVoiceResult {
|
|
212
|
+
previewVoices: PreviewVoice[];
|
|
213
|
+
langCode?: string;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export interface ElevenLabsMigrateResult {
|
|
217
|
+
elevenLabsVoiceId: string;
|
|
218
|
+
elevenLabsName: string;
|
|
219
|
+
inworldVoiceId: string;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export interface PublishVoiceOptions {
|
|
223
|
+
/** Voice ID from a design preview or clone. */
|
|
224
|
+
voice: string;
|
|
225
|
+
/** Display name for the published voice. */
|
|
226
|
+
displayName?: string;
|
|
227
|
+
/** Description. */
|
|
228
|
+
description?: string;
|
|
229
|
+
/** Tags. */
|
|
230
|
+
tags?: string[];
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export interface UpdateVoiceOptions {
|
|
234
|
+
/** Voice ID to update. */
|
|
235
|
+
voice: string;
|
|
236
|
+
/** New display name. */
|
|
237
|
+
displayName?: string;
|
|
238
|
+
/** New description. */
|
|
239
|
+
description?: string;
|
|
240
|
+
/** New tags. */
|
|
241
|
+
tags?: string[];
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// Client
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
export interface ClientOptions {
|
|
249
|
+
/** API key. Falls back to INWORLD_API_KEY environment variable. Mutually exclusive with token. */
|
|
250
|
+
apiKey?: string;
|
|
251
|
+
/** JWT token for browser use. Mutually exclusive with apiKey. */
|
|
252
|
+
token?: string;
|
|
253
|
+
/**
|
|
254
|
+
* Called automatically when the token is about to expire. Must return a fresh JWT string.
|
|
255
|
+
* Only used when token is provided.
|
|
256
|
+
*/
|
|
257
|
+
onTokenExpiring?: () => Promise<string>;
|
|
258
|
+
/**
|
|
259
|
+
* Allow API key usage in browser environments.
|
|
260
|
+
* Your key will be visible in DevTools and billed to your account.
|
|
261
|
+
* Recommended: use token + onTokenExpiring for browser instead.
|
|
262
|
+
*/
|
|
263
|
+
dangerouslyAllowBrowser?: boolean;
|
|
264
|
+
/** Override the API base URL. */
|
|
265
|
+
baseUrl?: string;
|
|
266
|
+
/**
|
|
267
|
+
* HTTP request timeout in milliseconds. Overrides per-method defaults:
|
|
268
|
+
* generate/stream: 60s, listVoices: 30s, cloneVoice: 120s, designVoice: 60s, publishVoice: 30s.
|
|
269
|
+
*/
|
|
270
|
+
timeout?: number;
|
|
271
|
+
/** Max parallel requests for long-text chunking. Default: 2. */
|
|
272
|
+
maxConcurrentRequests?: number;
|
|
273
|
+
/** Number of retries on NetworkError or 5xx responses (exponential backoff). Default: 2. */
|
|
274
|
+
maxRetries?: number;
|
|
275
|
+
/** Enable debug logging. Also activated by setting DEBUG=inworld-tts env var. */
|
|
276
|
+
debug?: boolean;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
export interface InworldTTSClient {
|
|
280
|
+
/**
|
|
281
|
+
* Synthesize speech from text of any length.
|
|
282
|
+
* Short text (≤2000 chars) → single request.
|
|
283
|
+
* Long text → auto-chunked, parallelized, merged into one Uint8Array.
|
|
284
|
+
* Default model: inworld-tts-1.5-max.
|
|
285
|
+
* @returns Audio data as a Uint8Array (Buffer in Node.js).
|
|
286
|
+
*/
|
|
287
|
+
generate(options: GenerateOptions): Promise<Uint8Array>;
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Stream TTS audio over HTTP (max 2000 chars per call).
|
|
291
|
+
* Default model: inworld-tts-1.5-mini (lower latency).
|
|
292
|
+
* @returns Async iterable of Uint8Array audio chunks (Buffer in Node.js).
|
|
293
|
+
*/
|
|
294
|
+
stream(options: StreamOptions): AsyncGenerator<Uint8Array>;
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Synthesize speech with word/character timestamp alignment.
|
|
298
|
+
* Short text (≤2000 chars) → single request.
|
|
299
|
+
* Long text → auto-chunked, timestamps offset-adjusted and merged.
|
|
300
|
+
* Default model: inworld-tts-1.5-max.
|
|
301
|
+
*/
|
|
302
|
+
generateWithTimestamps(options: GenerateWithTimestampsOptions): Promise<{ audio: Uint8Array; timestamps: TimestampInfo }>;
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Stream TTS audio with word/character timestamp alignment (max 2000 chars per call).
|
|
306
|
+
* Timestamps are delivered synchronously with each audio chunk.
|
|
307
|
+
* Default model: inworld-tts-1.5-mini.
|
|
308
|
+
*/
|
|
309
|
+
streamWithTimestamps(options: StreamWithTimestampsOptions): AsyncGenerator<{ audio: Uint8Array; timestamps?: TimestampInfo }>;
|
|
310
|
+
|
|
311
|
+
/** List voices in the workspace. */
|
|
312
|
+
listVoices(options?: ListVoicesOptions): Promise<VoiceInfo[]>;
|
|
313
|
+
|
|
314
|
+
/** Get details of a specific voice by ID. */
|
|
315
|
+
getVoice(voice: string): Promise<VoiceInfo>;
|
|
316
|
+
|
|
317
|
+
/** Update a voice's metadata. */
|
|
318
|
+
updateVoice(options: UpdateVoiceOptions): Promise<VoiceInfo>;
|
|
319
|
+
|
|
320
|
+
/** Delete a voice from your workspace. */
|
|
321
|
+
deleteVoice(voice: string): Promise<void>;
|
|
322
|
+
|
|
323
|
+
/** Clone a voice from audio samples. */
|
|
324
|
+
cloneVoice(options: CloneVoiceOptions): Promise<CloneVoiceResult>;
|
|
325
|
+
|
|
326
|
+
/** Design a voice from a text description. */
|
|
327
|
+
designVoice(options: DesignVoiceOptions): Promise<DesignVoiceResult>;
|
|
328
|
+
|
|
329
|
+
/** Publish a designed/cloned voice preview to your library. */
|
|
330
|
+
publishVoice(options: PublishVoiceOptions): Promise<VoiceInfo>;
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Play audio from a Uint8Array. Encoding detected from magic bytes unless overridden.
|
|
334
|
+
* - Node.js: writes a temp file, plays via system player, deletes. Does not throw if no player found.
|
|
335
|
+
* - Browser: uses <audio> element. Supports MP3/WAV/OGG_OPUS/FLAC. Must be called inside a user
|
|
336
|
+
* event handler (e.g. button click) — browsers block autoplay outside user gestures.
|
|
337
|
+
* Prefer encoding: 'MP3' for widest browser compatibility.
|
|
338
|
+
*/
|
|
339
|
+
play(audio: Uint8Array | string, options?: { encoding?: string }): Promise<void>;
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Migrate a single ElevenLabs voice to your Inworld workspace.
|
|
343
|
+
* Fetches metadata and sample audio from ElevenLabs, then clones into Inworld.
|
|
344
|
+
* Uses only fetch — no ElevenLabs SDK required.
|
|
345
|
+
*/
|
|
346
|
+
migrateFromElevenLabs(options: { elevenLabsApiKey: string; elevenLabsVoiceId: string }): Promise<ElevenLabsMigrateResult>;
|
|
347
|
+
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ---------------------------------------------------------------------------
|
|
351
|
+
// Exports
|
|
352
|
+
// ---------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Create an Inworld TTS client.
|
|
356
|
+
* Throws MissingApiKeyError immediately if no API key is found.
|
|
357
|
+
*/
|
|
358
|
+
export function createClient(opts?: ClientOptions): InworldTTSClient;
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Alias for createClient().
|
|
362
|
+
*/
|
|
363
|
+
export function InworldTTS(opts?: ClientOptions): InworldTTSClient;
|
package/src/index.js
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inworld TTS SDK: generate, stream, and Voice management.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
generate as generateReq,
|
|
7
|
+
stream as streamReq,
|
|
8
|
+
generateWithTimestamps as generateWithTimestampsReq,
|
|
9
|
+
streamWithTimestamps as streamWithTimestampsReq,
|
|
10
|
+
} from './client.js';
|
|
11
|
+
import {
|
|
12
|
+
listVoices as listVoicesReq,
|
|
13
|
+
getVoice as getVoiceReq,
|
|
14
|
+
updateVoice as updateVoiceReq,
|
|
15
|
+
deleteVoice as deleteVoiceReq,
|
|
16
|
+
cloneVoice as cloneVoiceReq,
|
|
17
|
+
designVoice as designVoiceReq,
|
|
18
|
+
publishVoice as publishVoiceReq,
|
|
19
|
+
migrateFromElevenLabs as migrateFromElevenLabsReq,
|
|
20
|
+
} from './voice.js';
|
|
21
|
+
import { ApiError, InworldTTSError, MissingApiKeyError, NetworkError } from './errors.js';
|
|
22
|
+
import { play } from './player.js';
|
|
23
|
+
import { isRunningInBrowser, ensureFreshToken, getBaseUrl } from './config.js';
|
|
24
|
+
|
|
25
|
+
export { ApiError, InworldTTSError, MissingApiKeyError, NetworkError };
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Create an Inworld TTS client.
|
|
29
|
+
* @param {{
|
|
30
|
+
* apiKey?: string,
|
|
31
|
+
* token?: string,
|
|
32
|
+
* onTokenExpiring?: () => Promise<string>,
|
|
33
|
+
* dangerouslyAllowBrowser?: boolean,
|
|
34
|
+
* baseUrl?: string,
|
|
35
|
+
* timeout?: number,
|
|
36
|
+
* maxConcurrentRequests?: number,
|
|
37
|
+
* maxRetries?: number,
|
|
38
|
+
* debug?: boolean
|
|
39
|
+
* }} [opts]
|
|
40
|
+
* @returns {{ generate, stream, generateWithTimestamps, streamWithTimestamps, play, listVoices, getVoice, updateVoice, deleteVoice, cloneVoice, designVoice, publishVoice, migrateFromElevenLabs }}
|
|
41
|
+
* @example
|
|
42
|
+
* import { createClient } from '@inworld/tts';
|
|
43
|
+
* import { writeFileSync } from 'fs';
|
|
44
|
+
*
|
|
45
|
+
* // Node.js — API key from env
|
|
46
|
+
* const tts = createClient();
|
|
47
|
+
* const audio = await tts.generate({ text: 'Hello world!' });
|
|
48
|
+
* writeFileSync('hello.mp3', audio);
|
|
49
|
+
*
|
|
50
|
+
* // Browser — JWT token
|
|
51
|
+
* const tts = createClient({ token: await fetchToken(), onTokenExpiring: fetchToken });
|
|
52
|
+
* const audio = await tts.generate({ text: 'Hello!', encoding: 'MP3' });
|
|
53
|
+
* button.onclick = () => tts.play(audio);
|
|
54
|
+
*/
|
|
55
|
+
export function createClient(opts = {}) {
|
|
56
|
+
const token = opts.token ?? null;
|
|
57
|
+
const env = typeof process !== 'undefined' ? process.env?.INWORLD_API_KEY : null;
|
|
58
|
+
const apiKey = token ? null : (opts.apiKey ?? env ?? null);
|
|
59
|
+
|
|
60
|
+
// 4b: Warn about redundant/misused options
|
|
61
|
+
if (opts.token && opts.apiKey) {
|
|
62
|
+
console.warn('[inworld-tts] Both token and apiKey provided — apiKey will be ignored');
|
|
63
|
+
}
|
|
64
|
+
if (opts.onTokenExpiring && !token) {
|
|
65
|
+
console.warn('[inworld-tts] onTokenExpiring is ignored when no token is provided');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// 4c: Browser safety gate
|
|
69
|
+
if (!token && apiKey && isRunningInBrowser()) {
|
|
70
|
+
if (!opts.dangerouslyAllowBrowser) {
|
|
71
|
+
throw new InworldTTSError(
|
|
72
|
+
'Running in browser with API key is disabled by default. ' +
|
|
73
|
+
'Your API key would be exposed to end users.\n' +
|
|
74
|
+
'Recommended: use a JWT token instead: createClient({ token: \'your_jwt\' })\n' +
|
|
75
|
+
'See: https://docs.inworld.ai/api-reference/introduction#jwt-authentication\n' +
|
|
76
|
+
'Or to opt in anyway: createClient({ apiKey: \'...\', dangerouslyAllowBrowser: true })'
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
console.warn('[inworld-tts] dangerouslyAllowBrowser is set. Your API key is visible to anyone using browser DevTools and can be used to make requests at your expense.');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// 4a: Require at least one auth method
|
|
83
|
+
if (!apiKey && !token) throw new MissingApiKeyError();
|
|
84
|
+
|
|
85
|
+
// 4d: Build config with auth header and resolved baseUrl
|
|
86
|
+
const config = {
|
|
87
|
+
_baseUrl: getBaseUrl(opts.baseUrl ?? null),
|
|
88
|
+
_authHeader: token ? `Bearer ${token}` : `Basic ${apiKey}`,
|
|
89
|
+
_token: token,
|
|
90
|
+
_onTokenExpiring: (token && opts.onTokenExpiring) ? opts.onTokenExpiring : null,
|
|
91
|
+
_refreshPromise: null,
|
|
92
|
+
timeout: opts.timeout ?? null,
|
|
93
|
+
maxConcurrentRequests: opts.maxConcurrentRequests,
|
|
94
|
+
maxRetries: opts.maxRetries ?? 2,
|
|
95
|
+
debug: opts.debug ?? false,
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
async generate(options) {
|
|
100
|
+
await ensureFreshToken(config);
|
|
101
|
+
return generateReq(options, config);
|
|
102
|
+
},
|
|
103
|
+
async *stream(options) {
|
|
104
|
+
await ensureFreshToken(config);
|
|
105
|
+
yield* streamReq(options, config);
|
|
106
|
+
},
|
|
107
|
+
async generateWithTimestamps(options) {
|
|
108
|
+
await ensureFreshToken(config);
|
|
109
|
+
return generateWithTimestampsReq(options, config);
|
|
110
|
+
},
|
|
111
|
+
async *streamWithTimestamps(options) {
|
|
112
|
+
await ensureFreshToken(config);
|
|
113
|
+
yield* streamWithTimestampsReq(options, config);
|
|
114
|
+
},
|
|
115
|
+
async listVoices(options) { await ensureFreshToken(config); return listVoicesReq(options, config); },
|
|
116
|
+
async getVoice(voice) { await ensureFreshToken(config); return getVoiceReq(voice, config); },
|
|
117
|
+
async updateVoice(options) { await ensureFreshToken(config); return updateVoiceReq(options, config); },
|
|
118
|
+
async deleteVoice(voice) { await ensureFreshToken(config); return deleteVoiceReq(voice, config); },
|
|
119
|
+
async cloneVoice(options) { await ensureFreshToken(config); return cloneVoiceReq(options, config); },
|
|
120
|
+
async designVoice(options) { await ensureFreshToken(config); return designVoiceReq(options, config); },
|
|
121
|
+
async publishVoice(options) { await ensureFreshToken(config); return publishVoiceReq(options, config); },
|
|
122
|
+
async migrateFromElevenLabs(options) { await ensureFreshToken(config); return migrateFromElevenLabsReq(options, config); },
|
|
123
|
+
async play(audio, options) { return play(audio, options); },
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Alias for createClient(). Accepts the same options.
|
|
129
|
+
* @param {{
|
|
130
|
+
* apiKey?: string,
|
|
131
|
+
* token?: string,
|
|
132
|
+
* onTokenExpiring?: () => Promise<string>,
|
|
133
|
+
* dangerouslyAllowBrowser?: boolean,
|
|
134
|
+
* baseUrl?: string,
|
|
135
|
+
* timeout?: number,
|
|
136
|
+
* maxConcurrentRequests?: number,
|
|
137
|
+
* maxRetries?: number,
|
|
138
|
+
* debug?: boolean
|
|
139
|
+
* }} [opts]
|
|
140
|
+
* @returns {{ generate, stream, generateWithTimestamps, streamWithTimestamps, play, listVoices, getVoice, updateVoice, deleteVoice, cloneVoice, designVoice, publishVoice, migrateFromElevenLabs }}
|
|
141
|
+
* @example
|
|
142
|
+
* import { InworldTTS } from '@inworld/tts';
|
|
143
|
+
*
|
|
144
|
+
* const tts = InworldTTS(); // reads INWORLD_API_KEY from env
|
|
145
|
+
* const audio = await tts.generate({ text: 'Hello world!' });
|
|
146
|
+
*/
|
|
147
|
+
export function InworldTTS(opts = {}) {
|
|
148
|
+
return createClient(opts);
|
|
149
|
+
}
|