@mastra/voice-inworld 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -0
- package/dist/index.cjs +256 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +104 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +254 -0
- package/dist/index.js.map +1 -0
- package/package.json +62 -0
package/README.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# @mastra/voice-inworld
|
|
2
|
+
|
|
3
|
+
[Inworld AI](https://inworld.ai) voice provider for [Mastra](https://mastra.ai) — streaming TTS and batch STT.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @mastra/voice-inworld @mastra/core
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { InworldVoice } from '@mastra/voice-inworld';
|
|
15
|
+
|
|
16
|
+
const voice = new InworldVoice({
|
|
17
|
+
speaker: 'Dennis', // 22 built-in voices available
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
// Text-to-Speech (streaming)
|
|
21
|
+
const audioStream = await voice.speak('Hello from Inworld!');
|
|
22
|
+
|
|
23
|
+
// Speech-to-Text
|
|
24
|
+
const transcript = await voice.listen(audioStream);
|
|
25
|
+
|
|
26
|
+
// List available voices
|
|
27
|
+
const voices = await voice.getSpeakers();
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Configuration
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
const voice = new InworldVoice({
|
|
34
|
+
speechModel: {
|
|
35
|
+
name: 'inworld-tts-2', // default; also 'inworld-tts-1.5-max' or 'inworld-tts-1.5-mini'
|
|
36
|
+
apiKey: 'your-key', // or set INWORLD_API_KEY env var
|
|
37
|
+
},
|
|
38
|
+
listeningModel: {
|
|
39
|
+
name: 'groq/whisper-large-v3',
|
|
40
|
+
},
|
|
41
|
+
speaker: 'Dennis', // default voice
|
|
42
|
+
audioEncoding: 'MP3', // MP3, WAV, OGG_OPUS, LINEAR16, PCM, ALAW, MULAW, FLAC
|
|
43
|
+
sampleRateHertz: 48000, // 8000-48000
|
|
44
|
+
language: 'en-US', // BCP-47 language code for STT
|
|
45
|
+
});
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Speak Options
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
const stream = await voice.speak('Hello', {
|
|
52
|
+
speaker: 'Olivia', // override voice
|
|
53
|
+
audioEncoding: 'WAV', // override format
|
|
54
|
+
sampleRateHertz: 24000, // override sample rate
|
|
55
|
+
speakingRate: 1.2, // 0.5 - 1.5
|
|
56
|
+
temperature: 0.8, // (0, 2] — ignored on inworld-tts-2
|
|
57
|
+
deliveryMode: 'CREATIVE', // STABLE | BALANCED | CREATIVE — only honored on inworld-tts-2
|
|
58
|
+
language: 'fr-FR', // BCP-47 per-call override; auto-detected when omitted
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Listen Options
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
const text = await voice.listen(audioStream, {
|
|
66
|
+
audioEncoding: 'AUTO_DETECT', // or 'MP3', 'LINEAR16', etc.
|
|
67
|
+
sampleRateHertz: 16000,
|
|
68
|
+
language: 'en-US',
|
|
69
|
+
});
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## CompositeVoice
|
|
73
|
+
|
|
74
|
+
Mix Inworld with other providers:
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import { CompositeVoice } from '@mastra/core/voice';
|
|
78
|
+
import { InworldVoice } from '@mastra/voice-inworld';
|
|
79
|
+
import { DeepgramVoice } from '@mastra/voice-deepgram';
|
|
80
|
+
|
|
81
|
+
const voice = new CompositeVoice({
|
|
82
|
+
output: new InworldVoice({ speaker: 'Olivia' }), // Inworld for TTS
|
|
83
|
+
input: new DeepgramVoice(), // Deepgram for STT
|
|
84
|
+
});
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Available Voices
|
|
88
|
+
|
|
89
|
+
Alex, Ashley, Craig, Deborah, Dennis, Dominus, Edward, Elizabeth, Hades, Heitor, Julia, Maite, Mark, Olivia, Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy.
|
|
90
|
+
|
|
91
|
+
## TTS Models
|
|
92
|
+
|
|
93
|
+
| Model | Quality | Latency | Notes |
|
|
94
|
+
| ---------------------- | ------- | ------------- | ----------------------------------------------- |
|
|
95
|
+
| `inworld-tts-2` | Highest | ~200ms median | **Default.** Flagship; supports `deliveryMode`. |
|
|
96
|
+
| `inworld-tts-1.5-max` | High | ~200ms median | Previous flagship. Supports `temperature`. |
|
|
97
|
+
| `inworld-tts-1.5-mini` | Good | ~100ms median | Lower latency, reduced quality. |
|
|
98
|
+
|
|
99
|
+
## STT Models
|
|
100
|
+
|
|
101
|
+
| Model | Languages | Notes |
|
|
102
|
+
| ----------------------- | --------- | -------------------------- |
|
|
103
|
+
| `groq/whisper-large-v3` | 99+ | Best multilingual coverage |
|
|
104
|
+
|
|
105
|
+
## Streaming
|
|
106
|
+
|
|
107
|
+
The `speak()` method uses Inworld's streaming TTS endpoint (`/tts/v1/voice:stream`), returning audio chunks progressively as they are generated. This is ideal for agentic workflows where low time-to-first-audio matters.
|
|
108
|
+
|
|
109
|
+
## Authentication
|
|
110
|
+
|
|
111
|
+
Set your API key via the `INWORLD_API_KEY` environment variable or pass it in the config. Get your key from [platform.inworld.ai](https://platform.inworld.ai) → Settings → API Keys.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var stream = require('stream');
|
|
4
|
+
var voice = require('@mastra/core/voice');
|
|
5
|
+
|
|
6
|
+
// src/index.ts
|
|
7
|
+
var INWORLD_API_BASE = "https://api.inworld.ai";
|
|
8
|
+
var InworldVoice = class extends voice.MastraVoice {
|
|
9
|
+
speechApiKey;
|
|
10
|
+
listeningApiKey;
|
|
11
|
+
audioEncoding;
|
|
12
|
+
sampleRateHertz;
|
|
13
|
+
language;
|
|
14
|
+
/**
|
|
15
|
+
* Creates an instance of the InworldVoice class.
|
|
16
|
+
*
|
|
17
|
+
* @param {Object} options - The options for the voice configuration.
|
|
18
|
+
* @param {InworldVoiceConfig} [options.speechModel] - TTS model config. Default: inworld-tts-2.
|
|
19
|
+
* @param {InworldListeningConfig} [options.listeningModel] - STT model config. Default: groq/whisper-large-v3.
|
|
20
|
+
* @param {string} [options.speaker] - Default voice ID. Default: 'Dennis'.
|
|
21
|
+
* @param {AudioEncoding} [options.audioEncoding] - TTS audio format. Default: 'MP3'.
|
|
22
|
+
* @param {number} [options.sampleRateHertz] - TTS sample rate. Default: 48000.
|
|
23
|
+
* @param {string} [options.language] - STT language (BCP-47). Default: 'en-US'.
|
|
24
|
+
*
|
|
25
|
+
* @throws {Error} If no API key is provided or found in INWORLD_API_KEY env var.
|
|
26
|
+
*/
|
|
27
|
+
constructor({
|
|
28
|
+
speechModel,
|
|
29
|
+
listeningModel,
|
|
30
|
+
speaker,
|
|
31
|
+
audioEncoding,
|
|
32
|
+
sampleRateHertz,
|
|
33
|
+
language
|
|
34
|
+
} = {}) {
|
|
35
|
+
const envKey = process.env.INWORLD_API_KEY;
|
|
36
|
+
const speechApiKey = speechModel?.apiKey ?? listeningModel?.apiKey ?? envKey;
|
|
37
|
+
const listeningApiKey = listeningModel?.apiKey ?? speechModel?.apiKey ?? envKey;
|
|
38
|
+
if (!speechApiKey || !listeningApiKey) {
|
|
39
|
+
throw new Error(
|
|
40
|
+
"Inworld API key is required. Pass apiKey in speechModel/listeningModel config or set INWORLD_API_KEY env var."
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
super({
|
|
44
|
+
speechModel: {
|
|
45
|
+
name: speechModel?.name ?? "inworld-tts-2",
|
|
46
|
+
apiKey: speechApiKey
|
|
47
|
+
},
|
|
48
|
+
listeningModel: {
|
|
49
|
+
name: listeningModel?.name ?? "groq/whisper-large-v3",
|
|
50
|
+
apiKey: listeningApiKey
|
|
51
|
+
},
|
|
52
|
+
speaker: speaker ?? "Dennis"
|
|
53
|
+
});
|
|
54
|
+
this.speechApiKey = speechApiKey;
|
|
55
|
+
this.listeningApiKey = listeningApiKey;
|
|
56
|
+
this.audioEncoding = audioEncoding ?? "MP3";
|
|
57
|
+
this.sampleRateHertz = sampleRateHertz ?? 48e3;
|
|
58
|
+
this.language = language ?? "en-US";
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Retrieves a list of available voices from the Inworld API.
|
|
62
|
+
*
|
|
63
|
+
* @returns {Promise<Array<{ voiceId: string; name: string; language: string; description: string; tags: string[]; source: string }>>}
|
|
64
|
+
*/
|
|
65
|
+
async getSpeakers() {
|
|
66
|
+
const response = await fetch(`${INWORLD_API_BASE}/voices/v1/voices`, {
|
|
67
|
+
headers: { Authorization: `Basic ${this.speechApiKey}` }
|
|
68
|
+
});
|
|
69
|
+
if (!response.ok) {
|
|
70
|
+
const text = await response.text();
|
|
71
|
+
throw new Error(`Inworld list voices failed (${response.status}): ${text}`);
|
|
72
|
+
}
|
|
73
|
+
const data = await response.json();
|
|
74
|
+
return data.voices?.map((v) => ({
|
|
75
|
+
voiceId: v.voiceId,
|
|
76
|
+
name: v.displayName ?? v.voiceId,
|
|
77
|
+
language: v.langCode ?? "en",
|
|
78
|
+
description: v.description ?? "",
|
|
79
|
+
tags: v.tags ?? [],
|
|
80
|
+
source: v.source ?? "SYSTEM"
|
|
81
|
+
})) ?? [];
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Checks if listening capabilities are enabled.
|
|
85
|
+
*/
|
|
86
|
+
async getListener() {
|
|
87
|
+
return { enabled: !!this.listeningModel };
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Converts text to speech using Inworld's streaming TTS endpoint.
|
|
91
|
+
*
|
|
92
|
+
* Returns a ReadableStream that emits audio chunks progressively as they
|
|
93
|
+
* arrive from the API — following the same streaming pattern used by the
|
|
94
|
+
* Deepgram and PlayAI Mastra voice providers.
|
|
95
|
+
*
|
|
96
|
+
* Uses the `/tts/v1/voice:stream` endpoint which returns newline-delimited
|
|
97
|
+
* JSON, each line containing a base64-encoded audio chunk.
|
|
98
|
+
*
|
|
99
|
+
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech.
|
|
100
|
+
* @param {InworldSpeakOptions} [options] - TTS options.
|
|
101
|
+
* @returns {Promise<NodeJS.ReadableStream>} Progressive audio stream.
|
|
102
|
+
*/
|
|
103
|
+
async speak(input, options) {
|
|
104
|
+
const text = typeof input === "string" ? input : await this.streamToString(input);
|
|
105
|
+
if (text.trim().length === 0) {
|
|
106
|
+
throw new Error("Input text is empty");
|
|
107
|
+
}
|
|
108
|
+
const speaker = options?.speaker ?? this.speaker;
|
|
109
|
+
const body = {
|
|
110
|
+
text,
|
|
111
|
+
voiceId: speaker,
|
|
112
|
+
modelId: this.speechModel?.name ?? "inworld-tts-2",
|
|
113
|
+
audioConfig: {
|
|
114
|
+
audioEncoding: options?.audioEncoding ?? this.audioEncoding,
|
|
115
|
+
sampleRateHertz: options?.sampleRateHertz ?? this.sampleRateHertz,
|
|
116
|
+
...options?.speakingRate !== void 0 && { speakingRate: options.speakingRate }
|
|
117
|
+
},
|
|
118
|
+
...options?.temperature !== void 0 && { temperature: options.temperature },
|
|
119
|
+
...options?.deliveryMode !== void 0 && { deliveryMode: options.deliveryMode },
|
|
120
|
+
...options?.language !== void 0 && { language: options.language }
|
|
121
|
+
};
|
|
122
|
+
const response = await fetch(`${INWORLD_API_BASE}/tts/v1/voice:stream`, {
|
|
123
|
+
method: "POST",
|
|
124
|
+
headers: {
|
|
125
|
+
"Content-Type": "application/json",
|
|
126
|
+
Authorization: `Basic ${this.speechApiKey}`
|
|
127
|
+
},
|
|
128
|
+
body: JSON.stringify(body)
|
|
129
|
+
});
|
|
130
|
+
if (!response.ok) {
|
|
131
|
+
const errorText = await response.text();
|
|
132
|
+
throw new Error(`Inworld TTS failed (${response.status}): ${errorText}`);
|
|
133
|
+
}
|
|
134
|
+
if (!response.body) {
|
|
135
|
+
throw new Error("Inworld TTS streaming response has no body");
|
|
136
|
+
}
|
|
137
|
+
const outputStream = new stream.PassThrough();
|
|
138
|
+
const reader = response.body.getReader();
|
|
139
|
+
const decoder = new TextDecoder();
|
|
140
|
+
let buffer = "";
|
|
141
|
+
const writeChunk = async (data) => {
|
|
142
|
+
if (outputStream.destroyed) return;
|
|
143
|
+
if (!outputStream.write(data)) {
|
|
144
|
+
await new Promise((resolve, reject) => {
|
|
145
|
+
outputStream.once("drain", resolve);
|
|
146
|
+
outputStream.once("close", resolve);
|
|
147
|
+
outputStream.once("error", reject);
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
outputStream.once("close", () => {
|
|
152
|
+
void reader.cancel().catch(() => {
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
void (async () => {
|
|
156
|
+
try {
|
|
157
|
+
while (true) {
|
|
158
|
+
if (outputStream.destroyed) {
|
|
159
|
+
void reader.cancel().catch(() => {
|
|
160
|
+
});
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
const { done, value } = await reader.read();
|
|
164
|
+
if (done) {
|
|
165
|
+
const remaining = buffer.trim();
|
|
166
|
+
if (remaining) {
|
|
167
|
+
const chunk = JSON.parse(remaining);
|
|
168
|
+
const audioContent = chunk.result?.audioContent ?? chunk.audioContent;
|
|
169
|
+
if (audioContent) {
|
|
170
|
+
await writeChunk(Buffer.from(audioContent, "base64"));
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
outputStream.end();
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
buffer += decoder.decode(value, { stream: true });
|
|
177
|
+
const lines = buffer.split("\n");
|
|
178
|
+
buffer = lines.pop() ?? "";
|
|
179
|
+
for (const line of lines) {
|
|
180
|
+
const trimmed = line.trim();
|
|
181
|
+
if (!trimmed) continue;
|
|
182
|
+
if (outputStream.destroyed) {
|
|
183
|
+
void reader.cancel().catch(() => {
|
|
184
|
+
});
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
const chunk = JSON.parse(trimmed);
|
|
188
|
+
const audioContent = chunk.result?.audioContent ?? chunk.audioContent;
|
|
189
|
+
if (audioContent) {
|
|
190
|
+
await writeChunk(Buffer.from(audioContent, "base64"));
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
} catch (err) {
|
|
195
|
+
if (!outputStream.destroyed) {
|
|
196
|
+
outputStream.destroy(err instanceof Error ? err : new Error(String(err)));
|
|
197
|
+
}
|
|
198
|
+
void reader.cancel().catch(() => {
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
})().catch(() => {
|
|
202
|
+
});
|
|
203
|
+
return outputStream;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Converts audio to text using Inworld's batch STT endpoint.
|
|
207
|
+
*
|
|
208
|
+
* @param {NodeJS.ReadableStream} input - Audio stream to transcribe.
|
|
209
|
+
* @param {InworldListenOptions} [options] - STT options.
|
|
210
|
+
* @returns {Promise<string>} Transcribed text.
|
|
211
|
+
*/
|
|
212
|
+
async listen(input, options) {
|
|
213
|
+
const chunks = [];
|
|
214
|
+
for await (const chunk of input) {
|
|
215
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
216
|
+
}
|
|
217
|
+
const audioBase64 = Buffer.concat(chunks).toString("base64");
|
|
218
|
+
const body = {
|
|
219
|
+
transcribeConfig: {
|
|
220
|
+
modelId: this.listeningModel?.name ?? "groq/whisper-large-v3",
|
|
221
|
+
audioEncoding: options?.audioEncoding ?? "AUTO_DETECT",
|
|
222
|
+
language: options?.language ?? this.language,
|
|
223
|
+
sampleRateHertz: options?.sampleRateHertz ?? 16e3,
|
|
224
|
+
numberOfChannels: options?.numberOfChannels ?? 1
|
|
225
|
+
},
|
|
226
|
+
audioData: {
|
|
227
|
+
content: audioBase64
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
const response = await fetch(`${INWORLD_API_BASE}/stt/v1/transcribe`, {
|
|
231
|
+
method: "POST",
|
|
232
|
+
headers: {
|
|
233
|
+
"Content-Type": "application/json",
|
|
234
|
+
Authorization: `Basic ${this.listeningApiKey}`
|
|
235
|
+
},
|
|
236
|
+
body: JSON.stringify(body)
|
|
237
|
+
});
|
|
238
|
+
if (!response.ok) {
|
|
239
|
+
const text = await response.text();
|
|
240
|
+
throw new Error(`Inworld STT failed (${response.status}): ${text}`);
|
|
241
|
+
}
|
|
242
|
+
const result = await response.json();
|
|
243
|
+
return result.transcription?.transcript ?? "";
|
|
244
|
+
}
|
|
245
|
+
async streamToString(stream) {
|
|
246
|
+
const chunks = [];
|
|
247
|
+
for await (const chunk of stream) {
|
|
248
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
249
|
+
}
|
|
250
|
+
return Buffer.concat(chunks).toString("utf-8");
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
|
|
254
|
+
exports.InworldVoice = InworldVoice;
|
|
255
|
+
//# sourceMappingURL=index.cjs.map
|
|
256
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","PassThrough"],"mappings":";;;;;;AAGA,IAAM,gBAAA,GAAmB,wBAAA;AAmDlB,IAAM,YAAA,GAAN,cAA2BA,iBAAA,CAAY;AAAA,EACpC,YAAA;AAAA,EACA,eAAA;AAAA,EACA,aAAA;AAAA,EACA,eAAA;AAAA,EACA,QAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAeR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA,OAAA;AAAA,IACA,aAAA;AAAA,IACA,eAAA;AAAA,IACA;AAAA,GACF,GAOI,EAAC,EAAG;AAKN,IAAA,MAAM,MAAA,GAAS,QAAQ,GAAA,CAAI,eAAA;AAC3B,IAAA,MAAM,YAAA,GAAe,WAAA,EAAa,MAAA,IAAU,cAAA,EAAgB,MAAA,IAAU,MAAA;AACtE,IAAA,MAAM,eAAA,GAAkB,cAAA,EAAgB,MAAA,IAAU,WAAA,EAAa,MAAA,IAAU,MAAA;AAEzE,IAAA,IAAI,CAAC,YAAA,IAAgB,CAAC,eAAA,EAAiB;AACrC,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,eAAA;AAAA,QAC3B,MAAA,EAAQ;AAAA,OACV;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,uBAAA;AAAA,QAC9B,MAAA,EAAQ;AAAA,OACV;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,IAAA,CAAK,YAAA,GAAe,YAAA;AACpB,IAAA,IAAA,CAAK,eAAA,GAAkB,eAAA;AACvB,IAAA,IAAA,CAAK,gBAAgB,aAAA,IAAiB,KAAA;AACtC,IAAA,IAAA,CAAK,kBAAkB,eAAA,IAAmB,IAAA;AAC1C,IAAA,IAAA,CAAK,WAAW,QAAA,IAAY,OAAA;AAAA,EAC9B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,iBAAA,CAAA,EAAqB;AAAA,MACnE,SAAS,EAAE,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,YAAY,CAAA,CAAA;AAAG,KACxD,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,IAAA,EAAK;AACjC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,4BAAA,EAA+B,SAAS,MAAM,CAAA,GAAA,EAAM,IAAI,CAAA,CAAE,CAAA;AAAA,IAC5E;AAEA,IAAA,MAAM,IAAA,GAAQ,MAAM,QAAA,CAAS,IAAA,EAAK;AAWlC,IAAA,OACE,IAAA,CAAK,MAAA,EAAQ,GAAA,CAAI,CAAA,CAAA,MAAM;AAAA,MACrB,SAAS,CAAA,CAAE,OAAA;AAAA,MACX,IAAA,EAAM,CAAA,CAAE,WAAA,IAAe,CAAA,CAAE,OAAA;AAAA,MACzB,QAAA,EAAU,EAAE,QAAA,IAAY,IAAA;AAAA,MACxB,WAAA,EAAa,EAAE,WAAA,IAAe,EAAA;AAAA,MAC9B,IAAA,EAAM,CAAA,CAAE,IAAA,IAAQ,EAAC;AAAA,MACjB,MAAA,EAAQ,EAAE,MAAA,IAAU;AAAA,KACtB,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,OAAA,EAAS,CAAC,CAAC,KAAK,cAAA,EAAe;AAAA,EAC1C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,IAAI,IAAA,CAAK,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC5B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAEzC,IAAA,MAAM,IAAA,GAAO;AAAA,MACX,IAAA;AAAA,MACA,OAAA,EAAS,OAAA;AAAA,MACT,OAAA,EAAS,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,eAAA;AAAA,MACnC,WAAA,EAAa;AAAA,QACX,aAAA,EAAe,OAAA,EAAS,aAAA,IAAiB,IAAA,CAAK,aAAA;AAAA,QAC9C,eAAA,EAAiB,OAAA,EAAS,eAAA,IAAmB,IAAA,CAAK,eAAA;AAAA,QAClD,GAAI,OAAA,EAAS,YAAA,KAAiB,UAAa,EAAE,YAAA,EAAc,QAAQ,YAAA;AAAa,OAClF;AAAA,MACA,GAAI,OAAA,EAAS,WAAA,KAAgB,UAAa,EAAE,WAAA,EAAa,QAAQ,WAAA,EAAY;AAAA,MAC7E,GAAI,OAAA,EAAS,YAAA,KAAiB,UAAa,EAAE,YAAA,EAAc,QAAQ,YAAA,EAAa;AAAA,MAChF,GAAI,OAAA,EAAS,QAAA,KAAa,UAAa,EAAE,QAAA,EAAU,QAAQ,QAAA;AAAS,KACtE;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,oBAAA,CAAA,EAAwB;AAAA,MACtE,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,cAAA,EAAgB,kBAAA;AAAA,QAChB,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,YAAY,CAAA;AAAA,OAC3C;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,IAAI;AAAA,KAC1B,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,SAAA,GAAY,MAAM,QAAA,CAAS,IAAA,EAAK;AACtC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oBAAA,EAAuB,SAAS,MAAM,CAAA,GAAA,EAAM,SAAS,CAAA,CAAE,CAAA;AAAA,IACzE;AAEA,IAAA,IAAI,CAAC,SAAS,IAAA,EAAM;AAClB,MAAA,MAAM,IAAI,MAAM,4CAA4C,CAAA;AAAA,IAC9D;AAIA,IAAA,MAAM,YAAA,GAAe,IAAIC,kBAAA,EAAY;AACrC,IAAA,MAAM,MAAA,GAAS,QAAA,CAAS,IAAA,CAAK,SAAA,EAAU;AACvC,IAAA,MAAM,OAAA,GAAU,IAAI,WAAA,EAAY;AAChC,IAAA,IAAI,MAAA,GAAS,EAAA;AAGb,IAAA,MAAM,UAAA,GAAa,OAAO,IAAA,KAAiB;AACzC,MAAA,IAAI,aAAa,SAAA,EAAW;AAC5B,MAAA,IAAI,CAAC,YAAA,CAAa,KAAA,CAAM,IAAI,CAAA,EAAG;AAC7B,QAAA,MAAM,IAAI,OAAA,CAAc,CAAC,OAAA,EAAS,MAAA,KAAW;AAC3C,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,OAAO,CAAA;AAClC,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,OAAO,CAAA;AAClC,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,MAAM,CAAA;AAAA,QACnC,CAAC,CAAA;AAAA,MACH;AAAA,IACF,CAAA;AAGA,IAAA,YAAA,CAAa,IAAA,CAAK,SAAS,MAAM;AAC/B,MAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,MAAC,CAAC,CAAA;AAAA,IACrC,CAAC,CAAA;AAED,IAAA,KAAA,CAAM,YAAY;AAChB,MAAA,IAAI;AACF,QAAA,OAAO,IAAA,EAAM;AACX,UAAA,IAAI,aAAa,SAAA,EAAW;AAC1B,YAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,YAAC,CAAC,CAAA;AACnC,YAAA;AAAA,UACF;AAEA,UAAA,MAAM,EAAE,IAAA,EAAM,KAAA,EAAM,GAAI,MAAM,OAAO,IAAA,EAAK;AAC1C,UAAA,IAAI,IAAA,EAAM;AAER,YAAA,MAAM,SAAA,GAAY,OAAO,IAAA,EAAK;AAC9B,YAAA,IAAI,SAAA,EAAW;AACb,cAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,SAAS,CAAA;AAClC,cAAA,MAAM,YAAA,GAAe,KAAA,CAAM,MAAA,EAAQ,YAAA,IAAgB,KAAA,CAAM,YAAA;AACzD,cAAA,IAAI,YAAA,EAAc;AAChB,gBAAA,MAAM,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,YAAA,EAAc,QAAQ,CAAC,CAAA;AAAA,cACtD;AAAA,YACF;AACA,YAAA,YAAA,CAAa,GAAA,EAAI;AACjB,YAAA;AAAA,UACF;AAEA,UAAA,MAAA,IAAU,QAAQ,MAAA,CAAO,KAAA,EAAO,EAAE,MAAA,EAAQ,MAAM,CAAA;AAChD,UAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,KAAA,CAAM,IAAI,CAAA;AAC/B,UAAA,MAAA,GAAS,KAAA,CAAM,KAAI,IAAK,EAAA;AAExB,UAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,YAAA,MAAM,OAAA,GAAU,KAAK,IAAA,EAAK;AAC1B,YAAA,IAAI,CAAC,OAAA,EAAS;AAEd,YAAA,IAAI,aAAa,SAAA,EAAW;AAC1B,cAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,cAAC,CAAC,CAAA;AACnC,cAAA;AAAA,YACF;AAEA,YAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,OAAO,CAAA;AAChC,YAAA,MAAM,YAAA,GAAe,KAAA,CAAM,MAAA,EAAQ,YAAA,IAAgB,KAAA,CAAM,YAAA;AACzD,YAAA,IAAI,YAAA,EAAc;AAChB,cAAA,MAAM,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,YAAA,EAAc,QAAQ,CAAC,CAAA;AAAA,YACtD;AAAA,UACF;AAAA,QACF;AAAA,MACF,SAAS,GAAA,EAAK;AACZ,QAAA,IAAI,CAAC,aAAa,SAAA,EAAW;AAC3B,UAAA,YAAA,CAAa,OAAA,CAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAC,CAAA;AAAA,QAC1E;AACA,QAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,QAAC,CAAC,CAAA;AAAA,MACrC;AAAA,IACF,CAAA,GAAG,CAAE,KAAA,CAAM,MAAM;AAAA,IAAC,CAAC,CAAA;AAEnB,IAAA,OAAO,YAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAiD;AAC1F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,MAAA,CAAO,IAAA,CAAK,OAAO,KAAA,KAAU,QAAA,GAAW,OAAO,IAAA,CAAK,KAAK,IAAK,KAAgB,CAAA;AAAA,IAChF;AACA,IAAA,MAAM,cAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,QAAQ,CAAA;AAE3D,IAAA,MAAM,IAAA,GAAO;AAAA,MACX,gBAAA,EAAkB;AAAA,QAChB,OAAA,EAAS,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,uBAAA;AAAA,QACtC,aAAA,EAAe,SAAS,aAAA,IAAiB,aAAA;AAAA,QACzC,QAAA,EAAU,OAAA,EAAS,QAAA,IAAY,IAAA,CAAK,QAAA;AAAA,QACpC,eAAA,EAAiB,SAAS,eAAA,IAAmB,IAAA;AAAA,QAC7C,gBAAA,EAAkB,SAAS,gBAAA,IAAoB;AAAA,OACjD;AAAA,MACA,SAAA,EAAW;AAAA,QACT,OAAA,EAAS;AAAA;AACX,KACF;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,kBAAA,CAAA,EAAsB;AAAA,MACpE,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,cAAA,EAAgB,kBAAA;AAAA,QAChB,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,eAAe,CAAA;AAAA,OAC9C;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,IAAI;AAAA,KAC1B,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,IAAA,EAAK;AACjC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oBAAA,EAAuB,SAAS,MAAM,CAAA,GAAA,EAAM,IAAI,CAAA,CAAE,CAAA;AAAA,IACpE;AAEA,IAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAOpC,IAAA,OAAO,MAAA,CAAO,eAAe,UAAA,IAAc,EAAA;AAAA,EAC7C;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,MAAA,CAAO,IAAA,CAAK,OAAO,KAAA,KAAU,QAAA,GAAW,OAAO,IAAA,CAAK,KAAK,IAAK,KAAgB,CAAA;AAAA,IAChF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AACF","file":"index.cjs","sourcesContent":["import { PassThrough } from 'node:stream';\nimport { MastraVoice } from '@mastra/core/voice';\n\nconst INWORLD_API_BASE = 'https://api.inworld.ai';\n\ntype InworldTtsModel = 'inworld-tts-2' | 'inworld-tts-1.5-max' | 'inworld-tts-1.5-mini';\n\ntype DeliveryMode = 'STABLE' | 'BALANCED' | 'CREATIVE';\n\ntype InworldSttModel = 'groq/whisper-large-v3';\n\ntype AudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'ALAW' | 'MULAW' | 'FLAC' | 'PCM' | 'WAV';\n\ntype SttAudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'FLAC' | 'AUTO_DETECT';\n\ninterface InworldVoiceConfig {\n name?: InworldTtsModel;\n apiKey?: string;\n}\n\ninterface InworldListeningConfig {\n name?: InworldSttModel;\n apiKey?: string;\n}\n\ninterface InworldSpeakOptions {\n speaker?: string;\n audioEncoding?: AudioEncoding;\n sampleRateHertz?: number;\n speakingRate?: number;\n temperature?: number;\n deliveryMode?: DeliveryMode;\n language?: string;\n}\n\ninterface InworldListenOptions {\n audioEncoding?: SttAudioEncoding;\n sampleRateHertz?: number;\n language?: string;\n numberOfChannels?: number;\n}\n\nexport type {\n InworldVoiceConfig,\n InworldListeningConfig,\n InworldSpeakOptions,\n InworldListenOptions,\n InworldTtsModel,\n InworldSttModel,\n AudioEncoding,\n SttAudioEncoding,\n DeliveryMode,\n};\n\nexport class InworldVoice extends MastraVoice {\n private speechApiKey: string;\n private listeningApiKey: string;\n private audioEncoding: AudioEncoding;\n private sampleRateHertz: number;\n private language: string;\n\n /**\n * Creates an instance of the InworldVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {InworldVoiceConfig} [options.speechModel] - TTS model config. Default: inworld-tts-2.\n * @param {InworldListeningConfig} [options.listeningModel] - STT model config. Default: groq/whisper-large-v3.\n * @param {string} [options.speaker] - Default voice ID. Default: 'Dennis'.\n * @param {AudioEncoding} [options.audioEncoding] - TTS audio format. Default: 'MP3'.\n * @param {number} [options.sampleRateHertz] - TTS sample rate. Default: 48000.\n * @param {string} [options.language] - STT language (BCP-47). Default: 'en-US'.\n *\n * @throws {Error} If no API key is provided or found in INWORLD_API_KEY env var.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n audioEncoding,\n sampleRateHertz,\n language,\n }: {\n speechModel?: InworldVoiceConfig;\n listeningModel?: InworldListeningConfig;\n speaker?: string;\n audioEncoding?: AudioEncoding;\n sampleRateHertz?: number;\n language?: string;\n } = {}) {\n // Resolve TTS and STT keys independently so callers can scope credentials\n // per service. If only one model is configured with a key, it is reused as\n // the fallback for the other (preserving the \"one key works everywhere\"\n // ergonomic). The INWORLD_API_KEY env var is the final fallback.\n const envKey = process.env.INWORLD_API_KEY;\n const speechApiKey = speechModel?.apiKey ?? listeningModel?.apiKey ?? envKey;\n const listeningApiKey = listeningModel?.apiKey ?? speechModel?.apiKey ?? envKey;\n\n if (!speechApiKey || !listeningApiKey) {\n throw new Error(\n 'Inworld API key is required. Pass apiKey in speechModel/listeningModel config or set INWORLD_API_KEY env var.',\n );\n }\n\n super({\n speechModel: {\n name: speechModel?.name ?? 'inworld-tts-2',\n apiKey: speechApiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'groq/whisper-large-v3',\n apiKey: listeningApiKey,\n },\n speaker: speaker ?? 'Dennis',\n });\n\n this.speechApiKey = speechApiKey;\n this.listeningApiKey = listeningApiKey;\n this.audioEncoding = audioEncoding ?? 'MP3';\n this.sampleRateHertz = sampleRateHertz ?? 48000;\n this.language = language ?? 'en-US';\n }\n\n /**\n * Retrieves a list of available voices from the Inworld API.\n *\n * @returns {Promise<Array<{ voiceId: string; name: string; language: string; description: string; tags: string[]; source: string }>>}\n */\n async getSpeakers() {\n const response = await fetch(`${INWORLD_API_BASE}/voices/v1/voices`, {\n headers: { Authorization: `Basic ${this.speechApiKey}` },\n });\n\n if (!response.ok) {\n const text = await response.text();\n throw new Error(`Inworld list voices failed (${response.status}): ${text}`);\n }\n\n const data = (await response.json()) as {\n voices?: Array<{\n voiceId: string;\n displayName?: string;\n langCode?: string;\n description?: string;\n tags?: string[];\n source?: string;\n }>;\n };\n\n return (\n data.voices?.map(v => ({\n voiceId: v.voiceId,\n name: v.displayName ?? v.voiceId,\n language: v.langCode ?? 'en',\n description: v.description ?? '',\n tags: v.tags ?? [],\n source: v.source ?? 'SYSTEM',\n })) ?? []\n );\n }\n\n /**\n * Checks if listening capabilities are enabled.\n */\n async getListener() {\n return { enabled: !!this.listeningModel };\n }\n\n /**\n * Converts text to speech using Inworld's streaming TTS endpoint.\n *\n * Returns a ReadableStream that emits audio chunks progressively as they\n * arrive from the API — following the same streaming pattern used by the\n * Deepgram and PlayAI Mastra voice providers.\n *\n * Uses the `/tts/v1/voice:stream` endpoint which returns newline-delimited\n * JSON, each line containing a base64-encoded audio chunk.\n *\n * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech.\n * @param {InworldSpeakOptions} [options] - TTS options.\n * @returns {Promise<NodeJS.ReadableStream>} Progressive audio stream.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: InworldSpeakOptions & { speaker?: string },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n if (text.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const speaker = options?.speaker ?? this.speaker;\n\n const body = {\n text,\n voiceId: speaker,\n modelId: this.speechModel?.name ?? 'inworld-tts-2',\n audioConfig: {\n audioEncoding: options?.audioEncoding ?? this.audioEncoding,\n sampleRateHertz: options?.sampleRateHertz ?? this.sampleRateHertz,\n ...(options?.speakingRate !== undefined && { speakingRate: options.speakingRate }),\n },\n ...(options?.temperature !== undefined && { temperature: options.temperature }),\n ...(options?.deliveryMode !== undefined && { deliveryMode: options.deliveryMode }),\n ...(options?.language !== undefined && { language: options.language }),\n };\n\n const response = await fetch(`${INWORLD_API_BASE}/tts/v1/voice:stream`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Basic ${this.speechApiKey}`,\n },\n body: JSON.stringify(body),\n });\n\n if (!response.ok) {\n const errorText = await response.text();\n throw new Error(`Inworld TTS failed (${response.status}): ${errorText}`);\n }\n\n if (!response.body) {\n throw new Error('Inworld TTS streaming response has no body');\n }\n\n // Progressive streaming: return the PassThrough immediately while reading\n // NDJSON chunks from the response body in a background async task.\n const outputStream = new PassThrough();\n const reader = response.body.getReader();\n const decoder = new TextDecoder();\n let buffer = '';\n\n // Write with backpressure: await drain when the consumer is slow.\n const writeChunk = async (data: Buffer) => {\n if (outputStream.destroyed) return;\n if (!outputStream.write(data)) {\n await new Promise<void>((resolve, reject) => {\n outputStream.once('drain', resolve);\n outputStream.once('close', resolve);\n outputStream.once('error', reject);\n });\n }\n };\n\n // Cancel the upstream reader when the consumer closes early.\n outputStream.once('close', () => {\n void reader.cancel().catch(() => {});\n });\n\n void (async () => {\n try {\n while (true) {\n if (outputStream.destroyed) {\n void reader.cancel().catch(() => {});\n break;\n }\n\n const { done, value } = await reader.read();\n if (done) {\n // Process any remaining data in the buffer before ending\n const remaining = buffer.trim();\n if (remaining) {\n const chunk = JSON.parse(remaining);\n const audioContent = chunk.result?.audioContent ?? chunk.audioContent;\n if (audioContent) {\n await writeChunk(Buffer.from(audioContent, 'base64'));\n }\n }\n outputStream.end();\n break;\n }\n\n buffer += decoder.decode(value, { stream: true });\n const lines = buffer.split('\\n');\n buffer = lines.pop() ?? '';\n\n for (const line of lines) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n\n if (outputStream.destroyed) {\n void reader.cancel().catch(() => {});\n return;\n }\n\n const chunk = JSON.parse(trimmed);\n const audioContent = chunk.result?.audioContent ?? chunk.audioContent;\n if (audioContent) {\n await writeChunk(Buffer.from(audioContent, 'base64'));\n }\n }\n }\n } catch (err) {\n if (!outputStream.destroyed) {\n outputStream.destroy(err instanceof Error ? err : new Error(String(err)));\n }\n void reader.cancel().catch(() => {});\n }\n })().catch(() => {});\n\n return outputStream;\n }\n\n /**\n * Converts audio to text using Inworld's batch STT endpoint.\n *\n * @param {NodeJS.ReadableStream} input - Audio stream to transcribe.\n * @param {InworldListenOptions} [options] - STT options.\n * @returns {Promise<string>} Transcribed text.\n */\n async listen(input: NodeJS.ReadableStream, options?: InworldListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : (chunk as Buffer));\n }\n const audioBase64 = Buffer.concat(chunks).toString('base64');\n\n const body = {\n transcribeConfig: {\n modelId: this.listeningModel?.name ?? 'groq/whisper-large-v3',\n audioEncoding: options?.audioEncoding ?? 'AUTO_DETECT',\n language: options?.language ?? this.language,\n sampleRateHertz: options?.sampleRateHertz ?? 16000,\n numberOfChannels: options?.numberOfChannels ?? 1,\n },\n audioData: {\n content: audioBase64,\n },\n };\n\n const response = await fetch(`${INWORLD_API_BASE}/stt/v1/transcribe`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Basic ${this.listeningApiKey}`,\n },\n body: JSON.stringify(body),\n });\n\n if (!response.ok) {\n const text = await response.text();\n throw new Error(`Inworld STT failed (${response.status}): ${text}`);\n }\n\n const result = (await response.json()) as {\n transcription?: {\n transcript?: string;\n isFinal?: boolean;\n };\n };\n\n return result.transcription?.transcript ?? '';\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : (chunk as Buffer));\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n}\n"]}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
type InworldTtsModel = 'inworld-tts-2' | 'inworld-tts-1.5-max' | 'inworld-tts-1.5-mini';
|
|
3
|
+
type DeliveryMode = 'STABLE' | 'BALANCED' | 'CREATIVE';
|
|
4
|
+
type InworldSttModel = 'groq/whisper-large-v3';
|
|
5
|
+
type AudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'ALAW' | 'MULAW' | 'FLAC' | 'PCM' | 'WAV';
|
|
6
|
+
type SttAudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'FLAC' | 'AUTO_DETECT';
|
|
7
|
+
interface InworldVoiceConfig {
|
|
8
|
+
name?: InworldTtsModel;
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
}
|
|
11
|
+
interface InworldListeningConfig {
|
|
12
|
+
name?: InworldSttModel;
|
|
13
|
+
apiKey?: string;
|
|
14
|
+
}
|
|
15
|
+
interface InworldSpeakOptions {
|
|
16
|
+
speaker?: string;
|
|
17
|
+
audioEncoding?: AudioEncoding;
|
|
18
|
+
sampleRateHertz?: number;
|
|
19
|
+
speakingRate?: number;
|
|
20
|
+
temperature?: number;
|
|
21
|
+
deliveryMode?: DeliveryMode;
|
|
22
|
+
language?: string;
|
|
23
|
+
}
|
|
24
|
+
interface InworldListenOptions {
|
|
25
|
+
audioEncoding?: SttAudioEncoding;
|
|
26
|
+
sampleRateHertz?: number;
|
|
27
|
+
language?: string;
|
|
28
|
+
numberOfChannels?: number;
|
|
29
|
+
}
|
|
30
|
+
export type { InworldVoiceConfig, InworldListeningConfig, InworldSpeakOptions, InworldListenOptions, InworldTtsModel, InworldSttModel, AudioEncoding, SttAudioEncoding, DeliveryMode, };
|
|
31
|
+
export declare class InworldVoice extends MastraVoice {
|
|
32
|
+
private speechApiKey;
|
|
33
|
+
private listeningApiKey;
|
|
34
|
+
private audioEncoding;
|
|
35
|
+
private sampleRateHertz;
|
|
36
|
+
private language;
|
|
37
|
+
/**
|
|
38
|
+
* Creates an instance of the InworldVoice class.
|
|
39
|
+
*
|
|
40
|
+
* @param {Object} options - The options for the voice configuration.
|
|
41
|
+
* @param {InworldVoiceConfig} [options.speechModel] - TTS model config. Default: inworld-tts-2.
|
|
42
|
+
* @param {InworldListeningConfig} [options.listeningModel] - STT model config. Default: groq/whisper-large-v3.
|
|
43
|
+
* @param {string} [options.speaker] - Default voice ID. Default: 'Dennis'.
|
|
44
|
+
* @param {AudioEncoding} [options.audioEncoding] - TTS audio format. Default: 'MP3'.
|
|
45
|
+
* @param {number} [options.sampleRateHertz] - TTS sample rate. Default: 48000.
|
|
46
|
+
* @param {string} [options.language] - STT language (BCP-47). Default: 'en-US'.
|
|
47
|
+
*
|
|
48
|
+
* @throws {Error} If no API key is provided or found in INWORLD_API_KEY env var.
|
|
49
|
+
*/
|
|
50
|
+
constructor({ speechModel, listeningModel, speaker, audioEncoding, sampleRateHertz, language, }?: {
|
|
51
|
+
speechModel?: InworldVoiceConfig;
|
|
52
|
+
listeningModel?: InworldListeningConfig;
|
|
53
|
+
speaker?: string;
|
|
54
|
+
audioEncoding?: AudioEncoding;
|
|
55
|
+
sampleRateHertz?: number;
|
|
56
|
+
language?: string;
|
|
57
|
+
});
|
|
58
|
+
/**
|
|
59
|
+
* Retrieves a list of available voices from the Inworld API.
|
|
60
|
+
*
|
|
61
|
+
* @returns {Promise<Array<{ voiceId: string; name: string; language: string; description: string; tags: string[]; source: string }>>}
|
|
62
|
+
*/
|
|
63
|
+
getSpeakers(): Promise<{
|
|
64
|
+
voiceId: string;
|
|
65
|
+
name: string;
|
|
66
|
+
language: string;
|
|
67
|
+
description: string;
|
|
68
|
+
tags: string[];
|
|
69
|
+
source: string;
|
|
70
|
+
}[]>;
|
|
71
|
+
/**
|
|
72
|
+
* Checks if listening capabilities are enabled.
|
|
73
|
+
*/
|
|
74
|
+
getListener(): Promise<{
|
|
75
|
+
enabled: boolean;
|
|
76
|
+
}>;
|
|
77
|
+
/**
|
|
78
|
+
* Converts text to speech using Inworld's streaming TTS endpoint.
|
|
79
|
+
*
|
|
80
|
+
* Returns a ReadableStream that emits audio chunks progressively as they
|
|
81
|
+
* arrive from the API — following the same streaming pattern used by the
|
|
82
|
+
* Deepgram and PlayAI Mastra voice providers.
|
|
83
|
+
*
|
|
84
|
+
* Uses the `/tts/v1/voice:stream` endpoint which returns newline-delimited
|
|
85
|
+
* JSON, each line containing a base64-encoded audio chunk.
|
|
86
|
+
*
|
|
87
|
+
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech.
|
|
88
|
+
* @param {InworldSpeakOptions} [options] - TTS options.
|
|
89
|
+
* @returns {Promise<NodeJS.ReadableStream>} Progressive audio stream.
|
|
90
|
+
*/
|
|
91
|
+
speak(input: string | NodeJS.ReadableStream, options?: InworldSpeakOptions & {
|
|
92
|
+
speaker?: string;
|
|
93
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
94
|
+
/**
|
|
95
|
+
* Converts audio to text using Inworld's batch STT endpoint.
|
|
96
|
+
*
|
|
97
|
+
* @param {NodeJS.ReadableStream} input - Audio stream to transcribe.
|
|
98
|
+
* @param {InworldListenOptions} [options] - STT options.
|
|
99
|
+
* @returns {Promise<string>} Transcribed text.
|
|
100
|
+
*/
|
|
101
|
+
listen(input: NodeJS.ReadableStream, options?: InworldListenOptions): Promise<string>;
|
|
102
|
+
private streamToString;
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAIjD,KAAK,eAAe,GAAG,eAAe,GAAG,qBAAqB,GAAG,sBAAsB,CAAC;AAExF,KAAK,YAAY,GAAG,QAAQ,GAAG,UAAU,GAAG,UAAU,CAAC;AAEvD,KAAK,eAAe,GAAG,uBAAuB,CAAC;AAE/C,KAAK,aAAa,GAAG,UAAU,GAAG,KAAK,GAAG,UAAU,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC;AAEjG,KAAK,gBAAgB,GAAG,UAAU,GAAG,KAAK,GAAG,UAAU,GAAG,MAAM,GAAG,aAAa,CAAC;AAEjF,UAAU,kBAAkB;IAC1B,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,sBAAsB;IAC9B,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,mBAAmB;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,oBAAoB;IAC5B,aAAa,CAAC,EAAE,gBAAgB,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,YAAY,EACV,kBAAkB,EAClB,sBAAsB,EACtB,mBAAmB,EACnB,oBAAoB,EACpB,eAAe,EACf,eAAe,EACf,aAAa,EACb,gBAAgB,EAChB,YAAY,GACb,CAAC;AAEF,qBAAa,YAAa,SAAQ,WAAW;IAC3C,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAS;IAChC,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,eAAe,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAS;IAEzB;;;;;;;;;;;;OAYG;gBACS,EACV,WAAW,EACX,cAAc,EACd,OAAO,EACP,aAAa,EACb,eAAe,EACf,QAAQ,GACT,GAAE;QACD,WAAW,CAAC,EAAE,kBAAkB,CAAC;QACjC,cAAc,CAAC,EAAE,sBAAsB,CAAC;QACxC,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,aAAa,CAAC,EAAE,aAAa,CAAC;QAC9B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACd;IAkCN;;;;OAIG;IACG,WAAW;;;;;;;;IAiCjB;;OAEG;IACG,WAAW;;;IAIjB;;;;;;;;;;;;;OAaG;IACG,KAAK,CACT,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EACrC,OAAO,CAAC,EAAE,mBAAmB,GAAG;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GACnD,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IAuHjC;;;;;;OAMG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;YA4C7E,cAAc;CAO7B"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import { PassThrough } from 'stream';
|
|
2
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
3
|
+
|
|
4
|
+
// src/index.ts
|
|
5
|
+
var INWORLD_API_BASE = "https://api.inworld.ai";
|
|
6
|
+
var InworldVoice = class extends MastraVoice {
|
|
7
|
+
speechApiKey;
|
|
8
|
+
listeningApiKey;
|
|
9
|
+
audioEncoding;
|
|
10
|
+
sampleRateHertz;
|
|
11
|
+
language;
|
|
12
|
+
/**
|
|
13
|
+
* Creates an instance of the InworldVoice class.
|
|
14
|
+
*
|
|
15
|
+
* @param {Object} options - The options for the voice configuration.
|
|
16
|
+
* @param {InworldVoiceConfig} [options.speechModel] - TTS model config. Default: inworld-tts-2.
|
|
17
|
+
* @param {InworldListeningConfig} [options.listeningModel] - STT model config. Default: groq/whisper-large-v3.
|
|
18
|
+
* @param {string} [options.speaker] - Default voice ID. Default: 'Dennis'.
|
|
19
|
+
* @param {AudioEncoding} [options.audioEncoding] - TTS audio format. Default: 'MP3'.
|
|
20
|
+
* @param {number} [options.sampleRateHertz] - TTS sample rate. Default: 48000.
|
|
21
|
+
* @param {string} [options.language] - STT language (BCP-47). Default: 'en-US'.
|
|
22
|
+
*
|
|
23
|
+
* @throws {Error} If no API key is provided or found in INWORLD_API_KEY env var.
|
|
24
|
+
*/
|
|
25
|
+
constructor({
|
|
26
|
+
speechModel,
|
|
27
|
+
listeningModel,
|
|
28
|
+
speaker,
|
|
29
|
+
audioEncoding,
|
|
30
|
+
sampleRateHertz,
|
|
31
|
+
language
|
|
32
|
+
} = {}) {
|
|
33
|
+
const envKey = process.env.INWORLD_API_KEY;
|
|
34
|
+
const speechApiKey = speechModel?.apiKey ?? listeningModel?.apiKey ?? envKey;
|
|
35
|
+
const listeningApiKey = listeningModel?.apiKey ?? speechModel?.apiKey ?? envKey;
|
|
36
|
+
if (!speechApiKey || !listeningApiKey) {
|
|
37
|
+
throw new Error(
|
|
38
|
+
"Inworld API key is required. Pass apiKey in speechModel/listeningModel config or set INWORLD_API_KEY env var."
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
super({
|
|
42
|
+
speechModel: {
|
|
43
|
+
name: speechModel?.name ?? "inworld-tts-2",
|
|
44
|
+
apiKey: speechApiKey
|
|
45
|
+
},
|
|
46
|
+
listeningModel: {
|
|
47
|
+
name: listeningModel?.name ?? "groq/whisper-large-v3",
|
|
48
|
+
apiKey: listeningApiKey
|
|
49
|
+
},
|
|
50
|
+
speaker: speaker ?? "Dennis"
|
|
51
|
+
});
|
|
52
|
+
this.speechApiKey = speechApiKey;
|
|
53
|
+
this.listeningApiKey = listeningApiKey;
|
|
54
|
+
this.audioEncoding = audioEncoding ?? "MP3";
|
|
55
|
+
this.sampleRateHertz = sampleRateHertz ?? 48e3;
|
|
56
|
+
this.language = language ?? "en-US";
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Retrieves a list of available voices from the Inworld API.
|
|
60
|
+
*
|
|
61
|
+
* @returns {Promise<Array<{ voiceId: string; name: string; language: string; description: string; tags: string[]; source: string }>>}
|
|
62
|
+
*/
|
|
63
|
+
async getSpeakers() {
|
|
64
|
+
const response = await fetch(`${INWORLD_API_BASE}/voices/v1/voices`, {
|
|
65
|
+
headers: { Authorization: `Basic ${this.speechApiKey}` }
|
|
66
|
+
});
|
|
67
|
+
if (!response.ok) {
|
|
68
|
+
const text = await response.text();
|
|
69
|
+
throw new Error(`Inworld list voices failed (${response.status}): ${text}`);
|
|
70
|
+
}
|
|
71
|
+
const data = await response.json();
|
|
72
|
+
return data.voices?.map((v) => ({
|
|
73
|
+
voiceId: v.voiceId,
|
|
74
|
+
name: v.displayName ?? v.voiceId,
|
|
75
|
+
language: v.langCode ?? "en",
|
|
76
|
+
description: v.description ?? "",
|
|
77
|
+
tags: v.tags ?? [],
|
|
78
|
+
source: v.source ?? "SYSTEM"
|
|
79
|
+
})) ?? [];
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Checks if listening capabilities are enabled.
|
|
83
|
+
*/
|
|
84
|
+
async getListener() {
|
|
85
|
+
return { enabled: !!this.listeningModel };
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Converts text to speech using Inworld's streaming TTS endpoint.
|
|
89
|
+
*
|
|
90
|
+
* Returns a ReadableStream that emits audio chunks progressively as they
|
|
91
|
+
* arrive from the API — following the same streaming pattern used by the
|
|
92
|
+
* Deepgram and PlayAI Mastra voice providers.
|
|
93
|
+
*
|
|
94
|
+
* Uses the `/tts/v1/voice:stream` endpoint which returns newline-delimited
|
|
95
|
+
* JSON, each line containing a base64-encoded audio chunk.
|
|
96
|
+
*
|
|
97
|
+
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech.
|
|
98
|
+
* @param {InworldSpeakOptions} [options] - TTS options.
|
|
99
|
+
* @returns {Promise<NodeJS.ReadableStream>} Progressive audio stream.
|
|
100
|
+
*/
|
|
101
|
+
async speak(input, options) {
|
|
102
|
+
const text = typeof input === "string" ? input : await this.streamToString(input);
|
|
103
|
+
if (text.trim().length === 0) {
|
|
104
|
+
throw new Error("Input text is empty");
|
|
105
|
+
}
|
|
106
|
+
const speaker = options?.speaker ?? this.speaker;
|
|
107
|
+
const body = {
|
|
108
|
+
text,
|
|
109
|
+
voiceId: speaker,
|
|
110
|
+
modelId: this.speechModel?.name ?? "inworld-tts-2",
|
|
111
|
+
audioConfig: {
|
|
112
|
+
audioEncoding: options?.audioEncoding ?? this.audioEncoding,
|
|
113
|
+
sampleRateHertz: options?.sampleRateHertz ?? this.sampleRateHertz,
|
|
114
|
+
...options?.speakingRate !== void 0 && { speakingRate: options.speakingRate }
|
|
115
|
+
},
|
|
116
|
+
...options?.temperature !== void 0 && { temperature: options.temperature },
|
|
117
|
+
...options?.deliveryMode !== void 0 && { deliveryMode: options.deliveryMode },
|
|
118
|
+
...options?.language !== void 0 && { language: options.language }
|
|
119
|
+
};
|
|
120
|
+
const response = await fetch(`${INWORLD_API_BASE}/tts/v1/voice:stream`, {
|
|
121
|
+
method: "POST",
|
|
122
|
+
headers: {
|
|
123
|
+
"Content-Type": "application/json",
|
|
124
|
+
Authorization: `Basic ${this.speechApiKey}`
|
|
125
|
+
},
|
|
126
|
+
body: JSON.stringify(body)
|
|
127
|
+
});
|
|
128
|
+
if (!response.ok) {
|
|
129
|
+
const errorText = await response.text();
|
|
130
|
+
throw new Error(`Inworld TTS failed (${response.status}): ${errorText}`);
|
|
131
|
+
}
|
|
132
|
+
if (!response.body) {
|
|
133
|
+
throw new Error("Inworld TTS streaming response has no body");
|
|
134
|
+
}
|
|
135
|
+
const outputStream = new PassThrough();
|
|
136
|
+
const reader = response.body.getReader();
|
|
137
|
+
const decoder = new TextDecoder();
|
|
138
|
+
let buffer = "";
|
|
139
|
+
const writeChunk = async (data) => {
|
|
140
|
+
if (outputStream.destroyed) return;
|
|
141
|
+
if (!outputStream.write(data)) {
|
|
142
|
+
await new Promise((resolve, reject) => {
|
|
143
|
+
outputStream.once("drain", resolve);
|
|
144
|
+
outputStream.once("close", resolve);
|
|
145
|
+
outputStream.once("error", reject);
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
outputStream.once("close", () => {
|
|
150
|
+
void reader.cancel().catch(() => {
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
void (async () => {
|
|
154
|
+
try {
|
|
155
|
+
while (true) {
|
|
156
|
+
if (outputStream.destroyed) {
|
|
157
|
+
void reader.cancel().catch(() => {
|
|
158
|
+
});
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
const { done, value } = await reader.read();
|
|
162
|
+
if (done) {
|
|
163
|
+
const remaining = buffer.trim();
|
|
164
|
+
if (remaining) {
|
|
165
|
+
const chunk = JSON.parse(remaining);
|
|
166
|
+
const audioContent = chunk.result?.audioContent ?? chunk.audioContent;
|
|
167
|
+
if (audioContent) {
|
|
168
|
+
await writeChunk(Buffer.from(audioContent, "base64"));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
outputStream.end();
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
buffer += decoder.decode(value, { stream: true });
|
|
175
|
+
const lines = buffer.split("\n");
|
|
176
|
+
buffer = lines.pop() ?? "";
|
|
177
|
+
for (const line of lines) {
|
|
178
|
+
const trimmed = line.trim();
|
|
179
|
+
if (!trimmed) continue;
|
|
180
|
+
if (outputStream.destroyed) {
|
|
181
|
+
void reader.cancel().catch(() => {
|
|
182
|
+
});
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
const chunk = JSON.parse(trimmed);
|
|
186
|
+
const audioContent = chunk.result?.audioContent ?? chunk.audioContent;
|
|
187
|
+
if (audioContent) {
|
|
188
|
+
await writeChunk(Buffer.from(audioContent, "base64"));
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
} catch (err) {
|
|
193
|
+
if (!outputStream.destroyed) {
|
|
194
|
+
outputStream.destroy(err instanceof Error ? err : new Error(String(err)));
|
|
195
|
+
}
|
|
196
|
+
void reader.cancel().catch(() => {
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
})().catch(() => {
|
|
200
|
+
});
|
|
201
|
+
return outputStream;
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Converts audio to text using Inworld's batch STT endpoint.
|
|
205
|
+
*
|
|
206
|
+
* @param {NodeJS.ReadableStream} input - Audio stream to transcribe.
|
|
207
|
+
* @param {InworldListenOptions} [options] - STT options.
|
|
208
|
+
* @returns {Promise<string>} Transcribed text.
|
|
209
|
+
*/
|
|
210
|
+
async listen(input, options) {
|
|
211
|
+
const chunks = [];
|
|
212
|
+
for await (const chunk of input) {
|
|
213
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
214
|
+
}
|
|
215
|
+
const audioBase64 = Buffer.concat(chunks).toString("base64");
|
|
216
|
+
const body = {
|
|
217
|
+
transcribeConfig: {
|
|
218
|
+
modelId: this.listeningModel?.name ?? "groq/whisper-large-v3",
|
|
219
|
+
audioEncoding: options?.audioEncoding ?? "AUTO_DETECT",
|
|
220
|
+
language: options?.language ?? this.language,
|
|
221
|
+
sampleRateHertz: options?.sampleRateHertz ?? 16e3,
|
|
222
|
+
numberOfChannels: options?.numberOfChannels ?? 1
|
|
223
|
+
},
|
|
224
|
+
audioData: {
|
|
225
|
+
content: audioBase64
|
|
226
|
+
}
|
|
227
|
+
};
|
|
228
|
+
const response = await fetch(`${INWORLD_API_BASE}/stt/v1/transcribe`, {
|
|
229
|
+
method: "POST",
|
|
230
|
+
headers: {
|
|
231
|
+
"Content-Type": "application/json",
|
|
232
|
+
Authorization: `Basic ${this.listeningApiKey}`
|
|
233
|
+
},
|
|
234
|
+
body: JSON.stringify(body)
|
|
235
|
+
});
|
|
236
|
+
if (!response.ok) {
|
|
237
|
+
const text = await response.text();
|
|
238
|
+
throw new Error(`Inworld STT failed (${response.status}): ${text}`);
|
|
239
|
+
}
|
|
240
|
+
const result = await response.json();
|
|
241
|
+
return result.transcription?.transcript ?? "";
|
|
242
|
+
}
|
|
243
|
+
async streamToString(stream) {
|
|
244
|
+
const chunks = [];
|
|
245
|
+
for await (const chunk of stream) {
|
|
246
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
247
|
+
}
|
|
248
|
+
return Buffer.concat(chunks).toString("utf-8");
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
export { InworldVoice };
|
|
253
|
+
//# sourceMappingURL=index.js.map
|
|
254
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;AAGA,IAAM,gBAAA,GAAmB,wBAAA;AAmDlB,IAAM,YAAA,GAAN,cAA2B,WAAA,CAAY;AAAA,EACpC,YAAA;AAAA,EACA,eAAA;AAAA,EACA,aAAA;AAAA,EACA,eAAA;AAAA,EACA,QAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAeR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA,OAAA;AAAA,IACA,aAAA;AAAA,IACA,eAAA;AAAA,IACA;AAAA,GACF,GAOI,EAAC,EAAG;AAKN,IAAA,MAAM,MAAA,GAAS,QAAQ,GAAA,CAAI,eAAA;AAC3B,IAAA,MAAM,YAAA,GAAe,WAAA,EAAa,MAAA,IAAU,cAAA,EAAgB,MAAA,IAAU,MAAA;AACtE,IAAA,MAAM,eAAA,GAAkB,cAAA,EAAgB,MAAA,IAAU,WAAA,EAAa,MAAA,IAAU,MAAA;AAEzE,IAAA,IAAI,CAAC,YAAA,IAAgB,CAAC,eAAA,EAAiB;AACrC,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,eAAA;AAAA,QAC3B,MAAA,EAAQ;AAAA,OACV;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,uBAAA;AAAA,QAC9B,MAAA,EAAQ;AAAA,OACV;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,IAAA,CAAK,YAAA,GAAe,YAAA;AACpB,IAAA,IAAA,CAAK,eAAA,GAAkB,eAAA;AACvB,IAAA,IAAA,CAAK,gBAAgB,aAAA,IAAiB,KAAA;AACtC,IAAA,IAAA,CAAK,kBAAkB,eAAA,IAAmB,IAAA;AAC1C,IAAA,IAAA,CAAK,WAAW,QAAA,IAAY,OAAA;AAAA,EAC9B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,iBAAA,CAAA,EAAqB;AAAA,MACnE,SAAS,EAAE,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,YAAY,CAAA,CAAA;AAAG,KACxD,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,IAAA,EAAK;AACjC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,4BAAA,EAA+B,SAAS,MAAM,CAAA,GAAA,EAAM,IAAI,CAAA,CAAE,CAAA;AAAA,IAC5E;AAEA,IAAA,MAAM,IAAA,GAAQ,MAAM,QAAA,CAAS,IAAA,EAAK;AAWlC,IAAA,OACE,IAAA,CAAK,MAAA,EAAQ,GAAA,CAAI,CAAA,CAAA,MAAM;AAAA,MACrB,SAAS,CAAA,CAAE,OAAA;AAAA,MACX,IAAA,EAAM,CAAA,CAAE,WAAA,IAAe,CAAA,CAAE,OAAA;AAAA,MACzB,QAAA,EAAU,EAAE,QAAA,IAAY,IAAA;AAAA,MACxB,WAAA,EAAa,EAAE,WAAA,IAAe,EAAA;AAAA,MAC9B,IAAA,EAAM,CAAA,CAAE,IAAA,IAAQ,EAAC;AAAA,MACjB,MAAA,EAAQ,EAAE,MAAA,IAAU;AAAA,KACtB,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,OAAA,EAAS,CAAC,CAAC,KAAK,cAAA,EAAe;AAAA,EAC1C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,IAAI,IAAA,CAAK,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC5B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAEzC,IAAA,MAAM,IAAA,GAAO;AAAA,MACX,IAAA;AAAA,MACA,OAAA,EAAS,OAAA;AAAA,MACT,OAAA,EAAS,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,eAAA;AAAA,MACnC,WAAA,EAAa;AAAA,QACX,aAAA,EAAe,OAAA,EAAS,aAAA,IAAiB,IAAA,CAAK,aAAA;AAAA,QAC9C,eAAA,EAAiB,OAAA,EAAS,eAAA,IAAmB,IAAA,CAAK,eAAA;AAAA,QAClD,GAAI,OAAA,EAAS,YAAA,KAAiB,UAAa,EAAE,YAAA,EAAc,QAAQ,YAAA;AAAa,OAClF;AAAA,MACA,GAAI,OAAA,EAAS,WAAA,KAAgB,UAAa,EAAE,WAAA,EAAa,QAAQ,WAAA,EAAY;AAAA,MAC7E,GAAI,OAAA,EAAS,YAAA,KAAiB,UAAa,EAAE,YAAA,EAAc,QAAQ,YAAA,EAAa;AAAA,MAChF,GAAI,OAAA,EAAS,QAAA,KAAa,UAAa,EAAE,QAAA,EAAU,QAAQ,QAAA;AAAS,KACtE;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,oBAAA,CAAA,EAAwB;AAAA,MACtE,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,cAAA,EAAgB,kBAAA;AAAA,QAChB,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,YAAY,CAAA;AAAA,OAC3C;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,IAAI;AAAA,KAC1B,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,SAAA,GAAY,MAAM,QAAA,CAAS,IAAA,EAAK;AACtC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oBAAA,EAAuB,SAAS,MAAM,CAAA,GAAA,EAAM,SAAS,CAAA,CAAE,CAAA;AAAA,IACzE;AAEA,IAAA,IAAI,CAAC,SAAS,IAAA,EAAM;AAClB,MAAA,MAAM,IAAI,MAAM,4CAA4C,CAAA;AAAA,IAC9D;AAIA,IAAA,MAAM,YAAA,GAAe,IAAI,WAAA,EAAY;AACrC,IAAA,MAAM,MAAA,GAAS,QAAA,CAAS,IAAA,CAAK,SAAA,EAAU;AACvC,IAAA,MAAM,OAAA,GAAU,IAAI,WAAA,EAAY;AAChC,IAAA,IAAI,MAAA,GAAS,EAAA;AAGb,IAAA,MAAM,UAAA,GAAa,OAAO,IAAA,KAAiB;AACzC,MAAA,IAAI,aAAa,SAAA,EAAW;AAC5B,MAAA,IAAI,CAAC,YAAA,CAAa,KAAA,CAAM,IAAI,CAAA,EAAG;AAC7B,QAAA,MAAM,IAAI,OAAA,CAAc,CAAC,OAAA,EAAS,MAAA,KAAW;AAC3C,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,OAAO,CAAA;AAClC,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,OAAO,CAAA;AAClC,UAAA,YAAA,CAAa,IAAA,CAAK,SAAS,MAAM,CAAA;AAAA,QACnC,CAAC,CAAA;AAAA,MACH;AAAA,IACF,CAAA;AAGA,IAAA,YAAA,CAAa,IAAA,CAAK,SAAS,MAAM;AAC/B,MAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,MAAC,CAAC,CAAA;AAAA,IACrC,CAAC,CAAA;AAED,IAAA,KAAA,CAAM,YAAY;AAChB,MAAA,IAAI;AACF,QAAA,OAAO,IAAA,EAAM;AACX,UAAA,IAAI,aAAa,SAAA,EAAW;AAC1B,YAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,YAAC,CAAC,CAAA;AACnC,YAAA;AAAA,UACF;AAEA,UAAA,MAAM,EAAE,IAAA,EAAM,KAAA,EAAM,GAAI,MAAM,OAAO,IAAA,EAAK;AAC1C,UAAA,IAAI,IAAA,EAAM;AAER,YAAA,MAAM,SAAA,GAAY,OAAO,IAAA,EAAK;AAC9B,YAAA,IAAI,SAAA,EAAW;AACb,cAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,SAAS,CAAA;AAClC,cAAA,MAAM,YAAA,GAAe,KAAA,CAAM,MAAA,EAAQ,YAAA,IAAgB,KAAA,CAAM,YAAA;AACzD,cAAA,IAAI,YAAA,EAAc;AAChB,gBAAA,MAAM,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,YAAA,EAAc,QAAQ,CAAC,CAAA;AAAA,cACtD;AAAA,YACF;AACA,YAAA,YAAA,CAAa,GAAA,EAAI;AACjB,YAAA;AAAA,UACF;AAEA,UAAA,MAAA,IAAU,QAAQ,MAAA,CAAO,KAAA,EAAO,EAAE,MAAA,EAAQ,MAAM,CAAA;AAChD,UAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,KAAA,CAAM,IAAI,CAAA;AAC/B,UAAA,MAAA,GAAS,KAAA,CAAM,KAAI,IAAK,EAAA;AAExB,UAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,YAAA,MAAM,OAAA,GAAU,KAAK,IAAA,EAAK;AAC1B,YAAA,IAAI,CAAC,OAAA,EAAS;AAEd,YAAA,IAAI,aAAa,SAAA,EAAW;AAC1B,cAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,cAAC,CAAC,CAAA;AACnC,cAAA;AAAA,YACF;AAEA,YAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,OAAO,CAAA;AAChC,YAAA,MAAM,YAAA,GAAe,KAAA,CAAM,MAAA,EAAQ,YAAA,IAAgB,KAAA,CAAM,YAAA;AACzD,YAAA,IAAI,YAAA,EAAc;AAChB,cAAA,MAAM,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,YAAA,EAAc,QAAQ,CAAC,CAAA;AAAA,YACtD;AAAA,UACF;AAAA,QACF;AAAA,MACF,SAAS,GAAA,EAAK;AACZ,QAAA,IAAI,CAAC,aAAa,SAAA,EAAW;AAC3B,UAAA,YAAA,CAAa,OAAA,CAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAC,CAAA;AAAA,QAC1E;AACA,QAAA,KAAK,MAAA,CAAO,MAAA,EAAO,CAAE,KAAA,CAAM,MAAM;AAAA,QAAC,CAAC,CAAA;AAAA,MACrC;AAAA,IACF,CAAA,GAAG,CAAE,KAAA,CAAM,MAAM;AAAA,IAAC,CAAC,CAAA;AAEnB,IAAA,OAAO,YAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAiD;AAC1F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,MAAA,CAAO,IAAA,CAAK,OAAO,KAAA,KAAU,QAAA,GAAW,OAAO,IAAA,CAAK,KAAK,IAAK,KAAgB,CAAA;AAAA,IAChF;AACA,IAAA,MAAM,cAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,QAAQ,CAAA;AAE3D,IAAA,MAAM,IAAA,GAAO;AAAA,MACX,gBAAA,EAAkB;AAAA,QAChB,OAAA,EAAS,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,uBAAA;AAAA,QACtC,aAAA,EAAe,SAAS,aAAA,IAAiB,aAAA;AAAA,QACzC,QAAA,EAAU,OAAA,EAAS,QAAA,IAAY,IAAA,CAAK,QAAA;AAAA,QACpC,eAAA,EAAiB,SAAS,eAAA,IAAmB,IAAA;AAAA,QAC7C,gBAAA,EAAkB,SAAS,gBAAA,IAAoB;AAAA,OACjD;AAAA,MACA,SAAA,EAAW;AAAA,QACT,OAAA,EAAS;AAAA;AACX,KACF;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,gBAAgB,CAAA,kBAAA,CAAA,EAAsB;AAAA,MACpE,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,cAAA,EAAgB,kBAAA;AAAA,QAChB,aAAA,EAAe,CAAA,MAAA,EAAS,IAAA,CAAK,eAAe,CAAA;AAAA,OAC9C;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,IAAI;AAAA,KAC1B,CAAA;AAED,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,IAAA,EAAK;AACjC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oBAAA,EAAuB,SAAS,MAAM,CAAA,GAAA,EAAM,IAAI,CAAA,CAAE,CAAA;AAAA,IACpE;AAEA,IAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAOpC,IAAA,OAAO,MAAA,CAAO,eAAe,UAAA,IAAc,EAAA;AAAA,EAC7C;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,MAAA,CAAO,IAAA,CAAK,OAAO,KAAA,KAAU,QAAA,GAAW,OAAO,IAAA,CAAK,KAAK,IAAK,KAAgB,CAAA;AAAA,IAChF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'node:stream';\nimport { MastraVoice } from '@mastra/core/voice';\n\nconst INWORLD_API_BASE = 'https://api.inworld.ai';\n\ntype InworldTtsModel = 'inworld-tts-2' | 'inworld-tts-1.5-max' | 'inworld-tts-1.5-mini';\n\ntype DeliveryMode = 'STABLE' | 'BALANCED' | 'CREATIVE';\n\ntype InworldSttModel = 'groq/whisper-large-v3';\n\ntype AudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'ALAW' | 'MULAW' | 'FLAC' | 'PCM' | 'WAV';\n\ntype SttAudioEncoding = 'LINEAR16' | 'MP3' | 'OGG_OPUS' | 'FLAC' | 'AUTO_DETECT';\n\ninterface InworldVoiceConfig {\n name?: InworldTtsModel;\n apiKey?: string;\n}\n\ninterface InworldListeningConfig {\n name?: InworldSttModel;\n apiKey?: string;\n}\n\ninterface InworldSpeakOptions {\n speaker?: string;\n audioEncoding?: AudioEncoding;\n sampleRateHertz?: number;\n speakingRate?: number;\n temperature?: number;\n deliveryMode?: DeliveryMode;\n language?: string;\n}\n\ninterface InworldListenOptions {\n audioEncoding?: SttAudioEncoding;\n sampleRateHertz?: number;\n language?: string;\n numberOfChannels?: number;\n}\n\nexport type {\n InworldVoiceConfig,\n InworldListeningConfig,\n InworldSpeakOptions,\n InworldListenOptions,\n InworldTtsModel,\n InworldSttModel,\n AudioEncoding,\n SttAudioEncoding,\n DeliveryMode,\n};\n\nexport class InworldVoice extends MastraVoice {\n private speechApiKey: string;\n private listeningApiKey: string;\n private audioEncoding: AudioEncoding;\n private sampleRateHertz: number;\n private language: string;\n\n /**\n * Creates an instance of the InworldVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {InworldVoiceConfig} [options.speechModel] - TTS model config. Default: inworld-tts-2.\n * @param {InworldListeningConfig} [options.listeningModel] - STT model config. Default: groq/whisper-large-v3.\n * @param {string} [options.speaker] - Default voice ID. Default: 'Dennis'.\n * @param {AudioEncoding} [options.audioEncoding] - TTS audio format. Default: 'MP3'.\n * @param {number} [options.sampleRateHertz] - TTS sample rate. Default: 48000.\n * @param {string} [options.language] - STT language (BCP-47). Default: 'en-US'.\n *\n * @throws {Error} If no API key is provided or found in INWORLD_API_KEY env var.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n audioEncoding,\n sampleRateHertz,\n language,\n }: {\n speechModel?: InworldVoiceConfig;\n listeningModel?: InworldListeningConfig;\n speaker?: string;\n audioEncoding?: AudioEncoding;\n sampleRateHertz?: number;\n language?: string;\n } = {}) {\n // Resolve TTS and STT keys independently so callers can scope credentials\n // per service. If only one model is configured with a key, it is reused as\n // the fallback for the other (preserving the \"one key works everywhere\"\n // ergonomic). The INWORLD_API_KEY env var is the final fallback.\n const envKey = process.env.INWORLD_API_KEY;\n const speechApiKey = speechModel?.apiKey ?? listeningModel?.apiKey ?? envKey;\n const listeningApiKey = listeningModel?.apiKey ?? speechModel?.apiKey ?? envKey;\n\n if (!speechApiKey || !listeningApiKey) {\n throw new Error(\n 'Inworld API key is required. Pass apiKey in speechModel/listeningModel config or set INWORLD_API_KEY env var.',\n );\n }\n\n super({\n speechModel: {\n name: speechModel?.name ?? 'inworld-tts-2',\n apiKey: speechApiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'groq/whisper-large-v3',\n apiKey: listeningApiKey,\n },\n speaker: speaker ?? 'Dennis',\n });\n\n this.speechApiKey = speechApiKey;\n this.listeningApiKey = listeningApiKey;\n this.audioEncoding = audioEncoding ?? 'MP3';\n this.sampleRateHertz = sampleRateHertz ?? 48000;\n this.language = language ?? 'en-US';\n }\n\n /**\n * Retrieves a list of available voices from the Inworld API.\n *\n * @returns {Promise<Array<{ voiceId: string; name: string; language: string; description: string; tags: string[]; source: string }>>}\n */\n async getSpeakers() {\n const response = await fetch(`${INWORLD_API_BASE}/voices/v1/voices`, {\n headers: { Authorization: `Basic ${this.speechApiKey}` },\n });\n\n if (!response.ok) {\n const text = await response.text();\n throw new Error(`Inworld list voices failed (${response.status}): ${text}`);\n }\n\n const data = (await response.json()) as {\n voices?: Array<{\n voiceId: string;\n displayName?: string;\n langCode?: string;\n description?: string;\n tags?: string[];\n source?: string;\n }>;\n };\n\n return (\n data.voices?.map(v => ({\n voiceId: v.voiceId,\n name: v.displayName ?? v.voiceId,\n language: v.langCode ?? 'en',\n description: v.description ?? '',\n tags: v.tags ?? [],\n source: v.source ?? 'SYSTEM',\n })) ?? []\n );\n }\n\n /**\n * Checks if listening capabilities are enabled.\n */\n async getListener() {\n return { enabled: !!this.listeningModel };\n }\n\n /**\n * Converts text to speech using Inworld's streaming TTS endpoint.\n *\n * Returns a ReadableStream that emits audio chunks progressively as they\n * arrive from the API — following the same streaming pattern used by the\n * Deepgram and PlayAI Mastra voice providers.\n *\n * Uses the `/tts/v1/voice:stream` endpoint which returns newline-delimited\n * JSON, each line containing a base64-encoded audio chunk.\n *\n * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech.\n * @param {InworldSpeakOptions} [options] - TTS options.\n * @returns {Promise<NodeJS.ReadableStream>} Progressive audio stream.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: InworldSpeakOptions & { speaker?: string },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n if (text.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const speaker = options?.speaker ?? this.speaker;\n\n const body = {\n text,\n voiceId: speaker,\n modelId: this.speechModel?.name ?? 'inworld-tts-2',\n audioConfig: {\n audioEncoding: options?.audioEncoding ?? this.audioEncoding,\n sampleRateHertz: options?.sampleRateHertz ?? this.sampleRateHertz,\n ...(options?.speakingRate !== undefined && { speakingRate: options.speakingRate }),\n },\n ...(options?.temperature !== undefined && { temperature: options.temperature }),\n ...(options?.deliveryMode !== undefined && { deliveryMode: options.deliveryMode }),\n ...(options?.language !== undefined && { language: options.language }),\n };\n\n const response = await fetch(`${INWORLD_API_BASE}/tts/v1/voice:stream`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Basic ${this.speechApiKey}`,\n },\n body: JSON.stringify(body),\n });\n\n if (!response.ok) {\n const errorText = await response.text();\n throw new Error(`Inworld TTS failed (${response.status}): ${errorText}`);\n }\n\n if (!response.body) {\n throw new Error('Inworld TTS streaming response has no body');\n }\n\n // Progressive streaming: return the PassThrough immediately while reading\n // NDJSON chunks from the response body in a background async task.\n const outputStream = new PassThrough();\n const reader = response.body.getReader();\n const decoder = new TextDecoder();\n let buffer = '';\n\n // Write with backpressure: await drain when the consumer is slow.\n const writeChunk = async (data: Buffer) => {\n if (outputStream.destroyed) return;\n if (!outputStream.write(data)) {\n await new Promise<void>((resolve, reject) => {\n outputStream.once('drain', resolve);\n outputStream.once('close', resolve);\n outputStream.once('error', reject);\n });\n }\n };\n\n // Cancel the upstream reader when the consumer closes early.\n outputStream.once('close', () => {\n void reader.cancel().catch(() => {});\n });\n\n void (async () => {\n try {\n while (true) {\n if (outputStream.destroyed) {\n void reader.cancel().catch(() => {});\n break;\n }\n\n const { done, value } = await reader.read();\n if (done) {\n // Process any remaining data in the buffer before ending\n const remaining = buffer.trim();\n if (remaining) {\n const chunk = JSON.parse(remaining);\n const audioContent = chunk.result?.audioContent ?? chunk.audioContent;\n if (audioContent) {\n await writeChunk(Buffer.from(audioContent, 'base64'));\n }\n }\n outputStream.end();\n break;\n }\n\n buffer += decoder.decode(value, { stream: true });\n const lines = buffer.split('\\n');\n buffer = lines.pop() ?? '';\n\n for (const line of lines) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n\n if (outputStream.destroyed) {\n void reader.cancel().catch(() => {});\n return;\n }\n\n const chunk = JSON.parse(trimmed);\n const audioContent = chunk.result?.audioContent ?? chunk.audioContent;\n if (audioContent) {\n await writeChunk(Buffer.from(audioContent, 'base64'));\n }\n }\n }\n } catch (err) {\n if (!outputStream.destroyed) {\n outputStream.destroy(err instanceof Error ? err : new Error(String(err)));\n }\n void reader.cancel().catch(() => {});\n }\n })().catch(() => {});\n\n return outputStream;\n }\n\n /**\n * Converts audio to text using Inworld's batch STT endpoint.\n *\n * @param {NodeJS.ReadableStream} input - Audio stream to transcribe.\n * @param {InworldListenOptions} [options] - STT options.\n * @returns {Promise<string>} Transcribed text.\n */\n async listen(input: NodeJS.ReadableStream, options?: InworldListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : (chunk as Buffer));\n }\n const audioBase64 = Buffer.concat(chunks).toString('base64');\n\n const body = {\n transcribeConfig: {\n modelId: this.listeningModel?.name ?? 'groq/whisper-large-v3',\n audioEncoding: options?.audioEncoding ?? 'AUTO_DETECT',\n language: options?.language ?? this.language,\n sampleRateHertz: options?.sampleRateHertz ?? 16000,\n numberOfChannels: options?.numberOfChannels ?? 1,\n },\n audioData: {\n content: audioBase64,\n },\n };\n\n const response = await fetch(`${INWORLD_API_BASE}/stt/v1/transcribe`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Basic ${this.listeningApiKey}`,\n },\n body: JSON.stringify(body),\n });\n\n if (!response.ok) {\n const text = await response.text();\n throw new Error(`Inworld STT failed (${response.status}): ${text}`);\n }\n\n const result = (await response.json()) as {\n transcription?: {\n transcript?: string;\n isFinal?: boolean;\n };\n };\n\n return result.transcription?.transcript ?? '';\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : (chunk as Buffer));\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n}\n"]}
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mastra/voice-inworld",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"description": "Mastra Inworld AI voice integration — streaming TTS and batch STT",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"files": [
|
|
7
|
+
"dist",
|
|
8
|
+
"CHANGELOG.md"
|
|
9
|
+
],
|
|
10
|
+
"main": "dist/index.js",
|
|
11
|
+
"types": "dist/index.d.ts",
|
|
12
|
+
"exports": {
|
|
13
|
+
".": {
|
|
14
|
+
"import": {
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
|
+
"default": "./dist/index.js"
|
|
17
|
+
},
|
|
18
|
+
"require": {
|
|
19
|
+
"types": "./dist/index.d.ts",
|
|
20
|
+
"default": "./dist/index.cjs"
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"./package.json": "./package.json"
|
|
24
|
+
},
|
|
25
|
+
"scripts": {
|
|
26
|
+
"build": "tsup --silent --config tsup.config.ts",
|
|
27
|
+
"prepack": "pnpx tsx ../../scripts/generate-package-docs.ts",
|
|
28
|
+
"build:watch": "tsup --watch --silent --config tsup.config.ts",
|
|
29
|
+
"test": "vitest run",
|
|
30
|
+
"lint": "eslint ."
|
|
31
|
+
},
|
|
32
|
+
"license": "Apache-2.0",
|
|
33
|
+
"dependencies": {},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"@internal/lint": "workspace:*",
|
|
36
|
+
"@internal/types-builder": "workspace:*",
|
|
37
|
+
"@mastra/core": "workspace:*",
|
|
38
|
+
"@types/node": "22.19.15",
|
|
39
|
+
"@vitest/coverage-v8": "catalog:",
|
|
40
|
+
"@vitest/ui": "catalog:",
|
|
41
|
+
"eslint": "^10.2.1",
|
|
42
|
+
"tsup": "^8.5.1",
|
|
43
|
+
"typescript": "catalog:",
|
|
44
|
+
"vitest": "catalog:"
|
|
45
|
+
},
|
|
46
|
+
"peerDependencies": {
|
|
47
|
+
"@mastra/core": ">=1.0.0-0 <2.0.0-0",
|
|
48
|
+
"zod": "^3.25.0 || ^4.0.0"
|
|
49
|
+
},
|
|
50
|
+
"homepage": "https://mastra.ai",
|
|
51
|
+
"repository": {
|
|
52
|
+
"type": "git",
|
|
53
|
+
"url": "git+https://github.com/mastra-ai/mastra.git",
|
|
54
|
+
"directory": "voice/inworld"
|
|
55
|
+
},
|
|
56
|
+
"bugs": {
|
|
57
|
+
"url": "https://github.com/mastra-ai/mastra/issues"
|
|
58
|
+
},
|
|
59
|
+
"engines": {
|
|
60
|
+
"node": ">=22.13.0"
|
|
61
|
+
}
|
|
62
|
+
}
|