@livekit/agents 1.0.25 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/connection_pool.cjs +242 -0
- package/dist/connection_pool.cjs.map +1 -0
- package/dist/connection_pool.d.cts +123 -0
- package/dist/connection_pool.d.ts +123 -0
- package/dist/connection_pool.d.ts.map +1 -0
- package/dist/connection_pool.js +218 -0
- package/dist/connection_pool.js.map +1 -0
- package/dist/connection_pool.test.cjs +256 -0
- package/dist/connection_pool.test.cjs.map +1 -0
- package/dist/connection_pool.test.js +255 -0
- package/dist/connection_pool.test.js.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/tts.cjs +172 -58
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +3 -1
- package/dist/inference/tts.d.ts +3 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +173 -59
- package/dist/inference/tts.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +6 -3
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +1 -1
- package/dist/tts/stream_adapter.d.ts +1 -1
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +6 -3
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +26 -15
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +7 -4
- package/dist/tts/tts.d.ts +7 -4
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +26 -15
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +20 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +19 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +3 -1
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +3 -1
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +6 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +6 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +1 -1
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.js +1 -1
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs +77 -37
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/background_audio.d.cts +10 -3
- package/dist/voice/background_audio.d.ts +10 -3
- package/dist/voice/background_audio.d.ts.map +1 -1
- package/dist/voice/background_audio.js +78 -37
- package/dist/voice/background_audio.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs +10 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +18 -1
- package/dist/voice/io.d.ts +18 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +10 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +1 -1
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +1 -1
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.js +1 -1
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/transcription/synchronizer.cjs +1 -1
- package/dist/voice/transcription/synchronizer.cjs.map +1 -1
- package/dist/voice/transcription/synchronizer.js +1 -1
- package/dist/voice/transcription/synchronizer.js.map +1 -1
- package/dist/worker.cjs +4 -6
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -6
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
- package/src/connection_pool.test.ts +346 -0
- package/src/connection_pool.ts +307 -0
- package/src/index.ts +1 -0
- package/src/inference/tts.ts +206 -65
- package/src/tts/stream_adapter.ts +10 -3
- package/src/tts/tts.ts +41 -18
- package/src/utils.ts +25 -0
- package/src/voice/agent_activity.ts +7 -1
- package/src/voice/agent_session.ts +6 -1
- package/src/voice/avatar/datastream_io.ts +1 -1
- package/src/voice/background_audio.ts +95 -55
- package/src/voice/index.ts +1 -0
- package/src/voice/io.ts +24 -0
- package/src/voice/recorder_io/recorder_io.ts +1 -1
- package/src/voice/room_io/_output.ts +1 -1
- package/src/voice/transcription/synchronizer.ts +1 -1
- package/src/worker.ts +4 -7
package/dist/inference/tts.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { WebSocket } from "ws";
|
|
2
2
|
import { APIError, APIStatusError } from "../_exceptions.js";
|
|
3
3
|
import { AudioByteStream } from "../audio.js";
|
|
4
|
+
import { ConnectionPool } from "../connection_pool.js";
|
|
4
5
|
import { log } from "../log.js";
|
|
5
6
|
import { createStreamChannel } from "../stream/stream_channel.js";
|
|
6
7
|
import { basic as tokenizeBasic } from "../tokenize/index.js";
|
|
7
8
|
import { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from "../tts/index.js";
|
|
8
9
|
import { DEFAULT_API_CONNECT_OPTIONS } from "../types.js";
|
|
9
|
-
import { shortuuid } from "../utils.js";
|
|
10
|
+
import { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from "../utils.js";
|
|
10
11
|
import {
|
|
11
12
|
ttsClientEventSchema,
|
|
12
13
|
ttsServerEventSchema
|
|
@@ -20,6 +21,7 @@ const DEFAULT_LANGUAGE = "en";
|
|
|
20
21
|
class TTS extends BaseTTS {
|
|
21
22
|
opts;
|
|
22
23
|
streams = /* @__PURE__ */ new Set();
|
|
24
|
+
pool;
|
|
23
25
|
#logger = log();
|
|
24
26
|
constructor(opts) {
|
|
25
27
|
const sampleRate = (opts == null ? void 0 : opts.sampleRate) ?? DEFAULT_SAMPLE_RATE;
|
|
@@ -71,6 +73,14 @@ class TTS extends BaseTTS {
|
|
|
71
73
|
apiSecret: lkApiSecret,
|
|
72
74
|
modelOptions
|
|
73
75
|
};
|
|
76
|
+
this.pool = new ConnectionPool({
|
|
77
|
+
connectCb: (timeout) => this.connectWs(timeout),
|
|
78
|
+
closeCb: (ws) => this.closeWs(ws),
|
|
79
|
+
maxSessionDuration: 3e5,
|
|
80
|
+
markRefreshedOnGet: true,
|
|
81
|
+
connectTimeout: 1e4
|
|
82
|
+
// 10 seconds default
|
|
83
|
+
});
|
|
74
84
|
}
|
|
75
85
|
get label() {
|
|
76
86
|
return "inference.TTS";
|
|
@@ -114,6 +124,7 @@ class TTS extends BaseTTS {
|
|
|
114
124
|
if (this.opts.voice) params.voice = this.opts.voice;
|
|
115
125
|
if (this.opts.model) params.model = this.opts.model;
|
|
116
126
|
if (this.opts.language) params.language = this.opts.language;
|
|
127
|
+
this.#logger.debug({ url }, "inference.TTS creating new websocket connection (pool miss)");
|
|
117
128
|
const socket = await connectWs(url, headers, timeout);
|
|
118
129
|
socket.send(JSON.stringify(params));
|
|
119
130
|
return socket;
|
|
@@ -121,23 +132,25 @@ class TTS extends BaseTTS {
|
|
|
121
132
|
async closeWs(ws) {
|
|
122
133
|
await ws.close();
|
|
123
134
|
}
|
|
135
|
+
prewarm() {
|
|
136
|
+
this.pool.prewarm();
|
|
137
|
+
}
|
|
124
138
|
async close() {
|
|
125
139
|
for (const stream of this.streams) {
|
|
126
140
|
await stream.close();
|
|
127
141
|
}
|
|
128
142
|
this.streams.clear();
|
|
143
|
+
await this.pool.close();
|
|
129
144
|
}
|
|
130
145
|
}
|
|
131
146
|
class SynthesizeStream extends BaseSynthesizeStream {
|
|
132
147
|
opts;
|
|
133
148
|
tts;
|
|
134
|
-
connOptions;
|
|
135
149
|
#logger = log();
|
|
136
150
|
constructor(tts, opts, connOptions) {
|
|
137
151
|
super(tts, connOptions);
|
|
138
152
|
this.opts = opts;
|
|
139
153
|
this.tts = tts;
|
|
140
|
-
this.connOptions = connOptions;
|
|
141
154
|
}
|
|
142
155
|
get label() {
|
|
143
156
|
return "inference.SynthesizeStream";
|
|
@@ -146,25 +159,23 @@ class SynthesizeStream extends BaseSynthesizeStream {
|
|
|
146
159
|
this.opts = { ...this.opts, ...opts };
|
|
147
160
|
}
|
|
148
161
|
async run() {
|
|
149
|
-
let ws = null;
|
|
150
162
|
let closing = false;
|
|
151
|
-
let finalReceived = false;
|
|
152
163
|
let lastFrame;
|
|
153
164
|
const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();
|
|
154
165
|
const eventChannel = createStreamChannel();
|
|
155
166
|
const requestId = shortuuid("tts_request_");
|
|
156
|
-
const
|
|
167
|
+
const inputSentEvent = new Event();
|
|
168
|
+
const completionFuture = new Future();
|
|
169
|
+
const resourceCleanup = async () => {
|
|
157
170
|
if (closing) return;
|
|
158
171
|
closing = true;
|
|
159
172
|
sendTokenizerStream.close();
|
|
160
|
-
eventChannel.close();
|
|
161
|
-
ws == null ? void 0 : ws.removeAllListeners();
|
|
162
|
-
ws == null ? void 0 : ws.close();
|
|
173
|
+
await eventChannel.close();
|
|
163
174
|
};
|
|
164
|
-
const sendClientEvent = async (event) => {
|
|
165
|
-
if (
|
|
175
|
+
const sendClientEvent = async (event, ws, signal) => {
|
|
176
|
+
if (signal.aborted || closing) return;
|
|
166
177
|
const validatedEvent = await ttsClientEventSchema.parseAsync(event);
|
|
167
|
-
if (
|
|
178
|
+
if (ws.readyState !== WebSocket.OPEN) {
|
|
168
179
|
this.#logger.warn("Trying to send client TTS event to a closed WebSocket");
|
|
169
180
|
return;
|
|
170
181
|
}
|
|
@@ -176,9 +187,9 @@ class SynthesizeStream extends BaseSynthesizeStream {
|
|
|
176
187
|
lastFrame = void 0;
|
|
177
188
|
}
|
|
178
189
|
};
|
|
179
|
-
const createInputTask = async () => {
|
|
190
|
+
const createInputTask = async (signal) => {
|
|
180
191
|
for await (const data of this.input) {
|
|
181
|
-
if (
|
|
192
|
+
if (signal.aborted || closing) break;
|
|
182
193
|
if (data === SynthesizeStream.FLUSH_SENTINEL) {
|
|
183
194
|
sendTokenizerStream.flush();
|
|
184
195
|
continue;
|
|
@@ -189,54 +200,95 @@ class SynthesizeStream extends BaseSynthesizeStream {
|
|
|
189
200
|
sendTokenizerStream.endInput();
|
|
190
201
|
}
|
|
191
202
|
};
|
|
192
|
-
const createSentenceStreamTask = async () => {
|
|
203
|
+
const createSentenceStreamTask = async (ws, signal) => {
|
|
193
204
|
for await (const ev of sendTokenizerStream) {
|
|
194
|
-
if (
|
|
195
|
-
sendClientEvent(
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
205
|
+
if (signal.aborted || closing) break;
|
|
206
|
+
await sendClientEvent(
|
|
207
|
+
{
|
|
208
|
+
type: "input_transcript",
|
|
209
|
+
transcript: ev.token + " "
|
|
210
|
+
},
|
|
211
|
+
ws,
|
|
212
|
+
signal
|
|
213
|
+
);
|
|
214
|
+
inputSentEvent.set();
|
|
199
215
|
}
|
|
200
|
-
sendClientEvent({ type: "session.flush" });
|
|
216
|
+
await sendClientEvent({ type: "session.flush" }, ws, signal);
|
|
217
|
+
inputSentEvent.set();
|
|
201
218
|
};
|
|
202
|
-
const createWsListenerTask = async (
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
resourceCleanup();
|
|
206
|
-
resolve();
|
|
207
|
-
});
|
|
208
|
-
ws2.on("message", async (data) => {
|
|
219
|
+
const createWsListenerTask = async (ws, signal) => {
|
|
220
|
+
const onMessage = (data) => {
|
|
221
|
+
try {
|
|
209
222
|
const eventJson = JSON.parse(data.toString());
|
|
210
223
|
const validatedEvent = ttsServerEventSchema.parse(eventJson);
|
|
211
|
-
eventChannel.write(validatedEvent)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
})
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
224
|
+
void eventChannel.write(validatedEvent).catch((error) => {
|
|
225
|
+
this.#logger.debug(
|
|
226
|
+
{ error },
|
|
227
|
+
"Failed writing TTS event to stream channel (likely closed)"
|
|
228
|
+
);
|
|
229
|
+
});
|
|
230
|
+
} catch (e) {
|
|
231
|
+
this.#logger.error({ error: e }, "Error parsing WebSocket message");
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
const onError = (e) => {
|
|
235
|
+
var _a;
|
|
236
|
+
this.#logger.error({ error: e }, "WebSocket error");
|
|
237
|
+
void resourceCleanup();
|
|
238
|
+
try {
|
|
239
|
+
(_a = ws.terminate) == null ? void 0 : _a.call(ws);
|
|
240
|
+
} catch {
|
|
241
|
+
}
|
|
242
|
+
this.tts.pool.remove(ws);
|
|
243
|
+
completionFuture.reject(e);
|
|
244
|
+
};
|
|
245
|
+
const onClose = () => {
|
|
246
|
+
if (!closing) {
|
|
247
|
+
this.#logger.error("WebSocket closed unexpectedly");
|
|
248
|
+
void resourceCleanup();
|
|
249
|
+
this.tts.pool.remove(ws);
|
|
250
|
+
completionFuture.reject(
|
|
223
251
|
new APIStatusError({
|
|
224
252
|
message: "Gateway connection closed unexpectedly",
|
|
225
253
|
options: { requestId }
|
|
226
254
|
})
|
|
227
255
|
);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
const onAbort = () => {
|
|
259
|
+
var _a;
|
|
260
|
+
void resourceCleanup();
|
|
261
|
+
try {
|
|
262
|
+
(_a = ws.terminate) == null ? void 0 : _a.call(ws);
|
|
263
|
+
} catch {
|
|
264
|
+
}
|
|
265
|
+
this.tts.pool.remove(ws);
|
|
266
|
+
inputSentEvent.set();
|
|
267
|
+
completionFuture.resolve();
|
|
268
|
+
};
|
|
269
|
+
ws.on("message", onMessage);
|
|
270
|
+
ws.on("error", onError);
|
|
271
|
+
ws.on("close", onClose);
|
|
272
|
+
signal.addEventListener("abort", onAbort);
|
|
273
|
+
try {
|
|
274
|
+
await completionFuture.await;
|
|
275
|
+
} finally {
|
|
276
|
+
ws.off("message", onMessage);
|
|
277
|
+
ws.off("error", onError);
|
|
278
|
+
ws.off("close", onClose);
|
|
279
|
+
signal.removeEventListener("abort", onAbort);
|
|
280
|
+
}
|
|
230
281
|
};
|
|
231
|
-
const createRecvTask = async () => {
|
|
282
|
+
const createRecvTask = async (signal) => {
|
|
232
283
|
let currentSessionId = null;
|
|
233
284
|
const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
|
|
234
285
|
const serverEventStream = eventChannel.stream();
|
|
235
286
|
const reader = serverEventStream.getReader();
|
|
236
287
|
try {
|
|
237
|
-
|
|
288
|
+
await inputSentEvent.wait();
|
|
289
|
+
while (!this.closed && !signal.aborted) {
|
|
238
290
|
const result = await reader.read();
|
|
239
|
-
if (
|
|
291
|
+
if (signal.aborted) return;
|
|
240
292
|
if (result.done) return;
|
|
241
293
|
const serverEvent = result.value;
|
|
242
294
|
switch (serverEvent.type) {
|
|
@@ -251,24 +303,29 @@ class SynthesizeStream extends BaseSynthesizeStream {
|
|
|
251
303
|
}
|
|
252
304
|
break;
|
|
253
305
|
case "done":
|
|
254
|
-
finalReceived = true;
|
|
255
306
|
for (const frame of bstream.flush()) {
|
|
256
307
|
sendLastFrame(currentSessionId, false);
|
|
257
308
|
lastFrame = frame;
|
|
258
309
|
}
|
|
259
310
|
sendLastFrame(currentSessionId, true);
|
|
260
311
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
261
|
-
|
|
312
|
+
await resourceCleanup();
|
|
313
|
+
completionFuture.resolve();
|
|
314
|
+
return;
|
|
262
315
|
case "session.closed":
|
|
263
|
-
resourceCleanup();
|
|
264
|
-
|
|
316
|
+
await resourceCleanup();
|
|
317
|
+
completionFuture.resolve();
|
|
318
|
+
return;
|
|
265
319
|
case "error":
|
|
266
320
|
this.#logger.error(
|
|
267
321
|
{ serverEvent },
|
|
268
322
|
"Received error message from LiveKit TTS WebSocket"
|
|
269
323
|
);
|
|
270
|
-
resourceCleanup();
|
|
271
|
-
|
|
324
|
+
await resourceCleanup();
|
|
325
|
+
completionFuture.reject(
|
|
326
|
+
new APIError(`LiveKit TTS returned error: ${serverEvent.message}`)
|
|
327
|
+
);
|
|
328
|
+
return;
|
|
272
329
|
default:
|
|
273
330
|
this.#logger.warn("Unexpected message %s", serverEvent);
|
|
274
331
|
break;
|
|
@@ -284,15 +341,72 @@ class SynthesizeStream extends BaseSynthesizeStream {
|
|
|
284
341
|
}
|
|
285
342
|
};
|
|
286
343
|
try {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
344
|
+
await this.tts.pool.withConnection(
|
|
345
|
+
async (ws) => {
|
|
346
|
+
try {
|
|
347
|
+
const runController = new AbortController();
|
|
348
|
+
const onStreamAbort = () => runController.abort(this.abortController.signal.reason);
|
|
349
|
+
this.abortController.signal.addEventListener("abort", onStreamAbort, { once: true });
|
|
350
|
+
const tasks = [
|
|
351
|
+
Task.from(
|
|
352
|
+
async (controller) => {
|
|
353
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
354
|
+
await createInputTask(combined);
|
|
355
|
+
},
|
|
356
|
+
void 0,
|
|
357
|
+
"inference-tts-input"
|
|
358
|
+
),
|
|
359
|
+
Task.from(
|
|
360
|
+
async (controller) => {
|
|
361
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
362
|
+
await createSentenceStreamTask(ws, combined);
|
|
363
|
+
},
|
|
364
|
+
void 0,
|
|
365
|
+
"inference-tts-sentence"
|
|
366
|
+
),
|
|
367
|
+
Task.from(
|
|
368
|
+
async (controller) => {
|
|
369
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
370
|
+
await createWsListenerTask(ws, combined);
|
|
371
|
+
},
|
|
372
|
+
void 0,
|
|
373
|
+
"inference-tts-ws-listener"
|
|
374
|
+
),
|
|
375
|
+
Task.from(
|
|
376
|
+
async (controller) => {
|
|
377
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
378
|
+
await createRecvTask(combined);
|
|
379
|
+
},
|
|
380
|
+
void 0,
|
|
381
|
+
"inference-tts-recv"
|
|
382
|
+
)
|
|
383
|
+
];
|
|
384
|
+
try {
|
|
385
|
+
await Promise.all(tasks.map((t) => t.result));
|
|
386
|
+
} finally {
|
|
387
|
+
inputSentEvent.set();
|
|
388
|
+
await resourceCleanup();
|
|
389
|
+
await cancelAndWait(tasks, 5e3);
|
|
390
|
+
this.abortController.signal.removeEventListener("abort", onStreamAbort);
|
|
391
|
+
}
|
|
392
|
+
} catch (e) {
|
|
393
|
+
if (e instanceof Error && e.name === "AbortError") {
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
throw e;
|
|
397
|
+
}
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
timeout: this.connOptions.timeoutMs
|
|
401
|
+
}
|
|
402
|
+
);
|
|
403
|
+
} catch (e) {
|
|
404
|
+
if (e instanceof Error && e.name === "AbortError") {
|
|
405
|
+
return;
|
|
406
|
+
}
|
|
407
|
+
throw e;
|
|
294
408
|
} finally {
|
|
295
|
-
resourceCleanup();
|
|
409
|
+
await resourceCleanup();
|
|
296
410
|
}
|
|
297
411
|
}
|
|
298
412
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n this.connOptions = connOptions;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,iBAAiB;AAC1B;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AACpD,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOA,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["ws"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { ConnectionPool } from '../connection_pool.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n pool: ConnectionPool<WebSocket>;\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n\n // Initialize connection pool\n this.pool = new ConnectionPool<WebSocket>({\n connectCb: (timeout) => this.connectWs(timeout),\n closeCb: (ws) => this.closeWs(ws),\n maxSessionDuration: 300_000,\n markRefreshedOnGet: true,\n connectTimeout: 10_000, // 10 seconds default\n });\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n this.#logger.debug({ url }, 'inference.TTS creating new websocket connection (pool miss)');\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n prewarm(): void {\n this.pool.prewarm();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n await this.pool.close();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let closing = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n const inputSentEvent = new Event();\n\n // Signal for protocol-driven completion (when 'done' message is received)\n const completionFuture = new Future<void>();\n\n const resourceCleanup = async () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n // close() returns a promise; don't leak it\n await eventChannel.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent, ws: WebSocket, signal: AbortSignal) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async (signal: AbortSignal) => {\n for await (const data of this.input) {\n if (signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async (ws: WebSocket, signal: AbortSignal) => {\n for await (const ev of sendTokenizerStream) {\n if (signal.aborted || closing) break;\n\n await sendClientEvent(\n {\n type: 'input_transcript',\n transcript: ev.token + ' ',\n },\n ws,\n signal,\n );\n inputSentEvent.set();\n }\n\n await sendClientEvent({ type: 'session.flush' }, ws, signal);\n // needed in case empty input is sent\n inputSentEvent.set();\n };\n\n // Handles WebSocket message routing and error handling\n // Completes based on protocol messages, NOT on ws.close()\n const createWsListenerTask = async (ws: WebSocket, signal: AbortSignal) => {\n const onMessage = (data: Buffer) => {\n try {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n // writer.write returns a promise; avoid unhandled rejections if stream is closed\n void eventChannel.write(validatedEvent).catch((error) => {\n this.#logger.debug(\n { error },\n 'Failed writing TTS event to stream channel (likely closed)',\n );\n });\n } catch (e) {\n this.#logger.error({ error: e }, 'Error parsing WebSocket message');\n }\n };\n\n const onError = (e: Error) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n void resourceCleanup();\n try {\n // If the ws is misbehaving, hard-stop it immediately to avoid buffering.\n ws.terminate?.();\n } catch {\n // ignore\n }\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(e);\n };\n\n const onClose = () => {\n // WebSocket closed unexpectedly (not by us)\n if (!closing) {\n this.#logger.error('WebSocket closed unexpectedly');\n void resourceCleanup();\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n }\n };\n\n const onAbort = () => {\n void resourceCleanup();\n try {\n // On interruption/abort, close the websocket immediately so the server stops streaming\n // and the ws library doesn't buffer unread frames in memory.\n ws.terminate?.();\n } catch {\n // ignore\n }\n this.tts.pool.remove(ws);\n inputSentEvent.set();\n completionFuture.resolve();\n };\n\n // Attach listeners\n ws.on('message', onMessage);\n ws.on('error', onError);\n ws.on('close', onClose);\n signal.addEventListener('abort', onAbort);\n\n try {\n // Wait for protocol-driven completion or error\n await completionFuture.await;\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('error', onError);\n ws.off('close', onClose);\n signal.removeEventListener('abort', onAbort);\n }\n };\n\n const createRecvTask = async (signal: AbortSignal) => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n await inputSentEvent.wait();\n\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'session.closed':\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n await resourceCleanup();\n completionFuture.reject(\n new APIError(`LiveKit TTS returned error: ${serverEvent.message}`),\n );\n return;\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n await this.tts.pool.withConnection(\n async (ws: WebSocket) => {\n try {\n // IMPORTANT: don't cancel the stream's controller on normal completion,\n // otherwise the pool will remove+close the ws and every run becomes a pool miss.\n const runController = new AbortController();\n const onStreamAbort = () => runController.abort(this.abortController.signal.reason);\n this.abortController.signal.addEventListener('abort', onStreamAbort, { once: true });\n\n const tasks = [\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createInputTask(combined);\n },\n undefined,\n 'inference-tts-input',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createSentenceStreamTask(ws, combined);\n },\n undefined,\n 'inference-tts-sentence',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createWsListenerTask(ws, combined);\n },\n undefined,\n 'inference-tts-ws-listener',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createRecvTask(combined);\n },\n undefined,\n 'inference-tts-recv',\n ),\n ];\n\n try {\n await Promise.all(tasks.map((t) => t.result));\n } finally {\n // Mirror python finally: unblock recv and cancel all tasks.\n inputSentEvent.set();\n await resourceCleanup();\n await cancelAndWait(tasks, 5000);\n this.abortController.signal.removeEventListener('abort', onStreamAbort);\n }\n } catch (e) {\n // If aborted, don't throw - let cleanup handle it\n if (e instanceof Error && e.name === 'AbortError') {\n return;\n }\n throw e;\n }\n },\n {\n timeout: this.connOptions.timeoutMs,\n },\n );\n } catch (e) {\n // Handle connection errors\n if (e instanceof Error && e.name === 'AbortError') {\n // Abort is expected during normal shutdown\n return;\n }\n throw e;\n } finally {\n // Ensure cleanup always runs (and don't leak the promise)\n await resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,sBAAsB;AAC/B,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,OAAO,QAAQ,MAAM,eAAe,gBAAgB,iBAAiB;AAC9E;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EACzD;AAAA,EAEA,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAGA,SAAK,OAAO,IAAI,eAA0B;AAAA,MACxC,WAAW,CAAC,YAAY,KAAK,UAAU,OAAO;AAAA,MAC9C,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE;AAAA,MAChC,oBAAoB;AAAA,MACpB,oBAAoB;AAAA,MACpB,gBAAgB;AAAA;AAAA,IAClB,CAAC;AAAA,EACH;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,SAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6DAA6D;AACzF,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,UAAgB;AACd,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,KAAK,KAAK,MAAM;AAAA,EACxB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,UAAU;AACd,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAC1C,UAAM,iBAAiB,IAAI,MAAM;AAGjC,UAAM,mBAAmB,IAAI,OAAa;AAE1C,UAAM,kBAAkB,YAAY;AAClC,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAE1B,YAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,kBAAkB,OAAO,OAAuB,IAAe,WAAwB;AAE3F,UAAI,OAAO,WAAW,QAAS;AAE/B,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,OAAO,WAAwB;AACrD,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,OAAO,WAAW,QAAS;AAC/B,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,OAAO,IAAe,WAAwB;AAC7E,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,OAAO,WAAW,QAAS;AAE/B,cAAM;AAAA,UACJ;AAAA,YACE,MAAM;AAAA,YACN,YAAY,GAAG,QAAQ;AAAA,UACzB;AAAA,UACA;AAAA,UACA;AAAA,QACF;AACA,uBAAe,IAAI;AAAA,MACrB;AAEA,YAAM,gBAAgB,EAAE,MAAM,gBAAgB,GAAG,IAAI,MAAM;AAE3D,qBAAe,IAAI;AAAA,IACrB;AAIA,UAAM,uBAAuB,OAAO,IAAe,WAAwB;AACzE,YAAM,YAAY,CAAC,SAAiB;AAClC,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAE3D,eAAK,aAAa,MAAM,cAAc,EAAE,MAAM,CAAC,UAAU;AACvD,iBAAK,QAAQ;AAAA,cACX,EAAE,MAAM;AAAA,cACR;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iCAAiC;AAAA,QACpE;AAAA,MACF;AAEA,YAAM,UAAU,CAAC,MAAa;AA/WpC;AAgXQ,aAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,aAAK,gBAAgB;AACrB,YAAI;AAEF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AAEA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,yBAAiB,OAAO,CAAC;AAAA,MAC3B;AAEA,YAAM,UAAU,MAAM;AAEpB,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,+BAA+B;AAClD,eAAK,gBAAgB;AAErB,eAAK,IAAI,KAAK,OAAO,EAAE;AACvB,2BAAiB;AAAA,YACf,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,MAAM;AA7Y5B;AA8YQ,aAAK,gBAAgB;AACrB,YAAI;AAGF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,uBAAe,IAAI;AACnB,yBAAiB,QAAQ;AAAA,MAC3B;AAGA,SAAG,GAAG,WAAW,SAAS;AAC1B,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AACtB,aAAO,iBAAiB,SAAS,OAAO;AAExC,UAAI;AAEF,cAAM,iBAAiB;AAAA,MACzB,UAAE;AAEA,WAAG,IAAI,WAAW,SAAS;AAC3B,WAAG,IAAI,SAAS,OAAO;AACvB,WAAG,IAAI,SAAS,OAAO;AACvB,eAAO,oBAAoB,SAAS,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,UAAM,iBAAiB,OAAO,WAAwB;AACpD,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,cAAM,eAAe,KAAK;AAE1B,eAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,QAAS;AACpB,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,oBAAM,gBAAgB;AACtB,+BAAiB;AAAA,gBACf,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,cACnE;AACA;AAAA,YACF;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,YAAM,KAAK,IAAI,KAAK;AAAA,QAClB,OAAO,OAAkB;AACvB,cAAI;AAGF,kBAAM,gBAAgB,IAAI,gBAAgB;AAC1C,kBAAM,gBAAgB,MAAM,cAAc,MAAM,KAAK,gBAAgB,OAAO,MAAM;AAClF,iBAAK,gBAAgB,OAAO,iBAAiB,SAAS,eAAe,EAAE,MAAM,KAAK,CAAC;AAEnF,kBAAM,QAAQ;AAAA,cACZ,KAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,WAAW,eAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,gBAAgB,QAAQ;AAAA,gBAChC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,KAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,WAAW,eAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,yBAAyB,IAAI,QAAQ;AAAA,gBAC7C;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,KAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,WAAW,eAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,qBAAqB,IAAI,QAAQ;AAAA,gBACzC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,KAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,WAAW,eAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,eAAe,QAAQ;AAAA,gBAC/B;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAEA,gBAAI;AACF,oBAAM,QAAQ,IAAI,MAAM,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAAA,YAC9C,UAAE;AAEA,6BAAe,IAAI;AACnB,oBAAM,gBAAgB;AACtB,oBAAM,cAAc,OAAO,GAAI;AAC/B,mBAAK,gBAAgB,OAAO,oBAAoB,SAAS,aAAa;AAAA,YACxE;AAAA,UACF,SAAS,GAAG;AAEV,gBAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AACjD;AAAA,YACF;AACA,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,QACA;AAAA,UACE,SAAS,KAAK,YAAY;AAAA,QAC5B;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AAEV,UAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AAEjD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AAEA,YAAM,gBAAgB;AAAA,IACxB;AAAA,EACF;AACF;","names":[]}
|
|
@@ -37,9 +37,12 @@ class StreamAdapter extends import_tts.TTS {
|
|
|
37
37
|
this.#tts.on("metrics_collected", (metrics) => {
|
|
38
38
|
this.emit("metrics_collected", metrics);
|
|
39
39
|
});
|
|
40
|
+
this.#tts.on("error", (error) => {
|
|
41
|
+
this.emit("error", error);
|
|
42
|
+
});
|
|
40
43
|
}
|
|
41
|
-
synthesize(text) {
|
|
42
|
-
return this.#tts.synthesize(text);
|
|
44
|
+
synthesize(text, connOptions, abortSignal) {
|
|
45
|
+
return this.#tts.synthesize(text, connOptions, abortSignal);
|
|
43
46
|
}
|
|
44
47
|
stream(options) {
|
|
45
48
|
return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options == null ? void 0 : options.connOptions);
|
|
@@ -83,7 +86,7 @@ class StreamAdapterWrapper extends import_tts.SynthesizeStream {
|
|
|
83
86
|
this.queue.put(import_tts.SynthesizeStream.END_OF_STREAM);
|
|
84
87
|
};
|
|
85
88
|
const synthesize = async (token, prevTask, controller) => {
|
|
86
|
-
const audioStream = this.#tts.synthesize(token);
|
|
89
|
+
const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);
|
|
87
90
|
await (prevTask == null ? void 0 : prevTask.result);
|
|
88
91
|
if (controller.signal.aborted) return;
|
|
89
92
|
for await (const audio of audioStream) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n synthesize(text: string): ChunkedStream {\n return this.#tts.synthesize(text);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,mBAAqB;AAErB,iBAAsC;AAE/B,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,
|
|
1
|
+
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n this.#tts.on('error', (error) => {\n this.emit('error', error);\n });\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return this.#tts.synthesize(text, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,mBAAqB;AAErB,iBAAsC;AAE/B,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AACD,SAAK,KAAK,GAAG,SAAS,CAAC,UAAU;AAC/B,WAAK,KAAK,SAAS,KAAK;AAAA,IAC1B,CAAC;AAAA,EACH;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,KAAK,KAAK,WAAW,MAAM,aAAa,WAAW;AAAA,EAC5D;AAAA,EAEA,OAAO,SAAqE;AAC1E,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,oBAAoB,mCAAS,WAAW;AAAA,EAC1F;AACF;AAEO,MAAM,6BAA6B,4BAAiB;AAAA,EACzD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC,aAAiC;AAC3F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,kBAAkB,kBAAkB,OAAO;AAChD,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,UAAU,4BAAiB,gBAAgB;AAC7C,eAAK,gBAAgB,MAAM;AAAA,QAC7B,OAAO;AACL,eAAK,gBAAgB,SAAS,KAAK;AAAA,QACrC;AAAA,MACF;AACA,WAAK,gBAAgB,SAAS;AAC9B,WAAK,gBAAgB,MAAM;AAAA,IAC7B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,UAAI;AACJ,YAAM,uBAAqC,CAAC;AAE5C,uBAAiB,MAAM,KAAK,iBAAiB;AAC3C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAGzC,eAAO,kBAAK;AAAA,UACV,CAAC,eAAe,WAAW,GAAG,OAAO,MAAM,UAAU;AAAA,UACrD,KAAK;AAAA,QACP;AAEA,6BAAqB,KAAK,IAAI;AAAA,MAChC;AAEA,YAAM,QAAQ,IAAI,qBAAqB,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAC3D,WAAK,MAAM,IAAI,4BAAiB,aAAa;AAAA,IAC/C;AAEA,UAAM,aAAa,OACjB,OACA,UACA,eACG;AACH,YAAM,cAAc,KAAK,KAAK,WAAW,OAAO,KAAK,aAAa,KAAK,WAAW;AAIlF,aAAM,qCAAU;AAChB,UAAI,WAAW,OAAO,QAAS;AAE/B,uBAAiB,SAAS,aAAa;AACrC,YAAI,WAAW,OAAO,QAAS;AAC/B,aAAK,MAAM,IAAI,KAAK;AAAA,MACtB;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,aAAa,GAAG,yBAAyB,CAAC,CAAC;AAAA,EAChE;AACF;","names":[]}
|
|
@@ -6,7 +6,7 @@ export declare class StreamAdapter extends TTS {
|
|
|
6
6
|
#private;
|
|
7
7
|
label: string;
|
|
8
8
|
constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer);
|
|
9
|
-
synthesize(text: string): ChunkedStream;
|
|
9
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
10
10
|
stream(options?: {
|
|
11
11
|
connOptions?: APIConnectOptions;
|
|
12
12
|
}): StreamAdapterWrapper;
|
|
@@ -6,7 +6,7 @@ export declare class StreamAdapter extends TTS {
|
|
|
6
6
|
#private;
|
|
7
7
|
label: string;
|
|
8
8
|
constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer);
|
|
9
|
-
synthesize(text: string): ChunkedStream;
|
|
9
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
10
10
|
stream(options?: {
|
|
11
11
|
connOptions?: APIConnectOptions;
|
|
12
12
|
}): StreamAdapterWrapper;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/tts/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB;
|
|
1
|
+
{"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/tts/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB;IAe1D,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,oBAAoB;CAG5E;AAED,qBAAa,oBAAqB,SAAQ,gBAAgB;;IAGxD,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAO3E,GAAG;CAuDpB"}
|
|
@@ -13,9 +13,12 @@ class StreamAdapter extends TTS {
|
|
|
13
13
|
this.#tts.on("metrics_collected", (metrics) => {
|
|
14
14
|
this.emit("metrics_collected", metrics);
|
|
15
15
|
});
|
|
16
|
+
this.#tts.on("error", (error) => {
|
|
17
|
+
this.emit("error", error);
|
|
18
|
+
});
|
|
16
19
|
}
|
|
17
|
-
synthesize(text) {
|
|
18
|
-
return this.#tts.synthesize(text);
|
|
20
|
+
synthesize(text, connOptions, abortSignal) {
|
|
21
|
+
return this.#tts.synthesize(text, connOptions, abortSignal);
|
|
19
22
|
}
|
|
20
23
|
stream(options) {
|
|
21
24
|
return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options == null ? void 0 : options.connOptions);
|
|
@@ -59,7 +62,7 @@ class StreamAdapterWrapper extends SynthesizeStream {
|
|
|
59
62
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
60
63
|
};
|
|
61
64
|
const synthesize = async (token, prevTask, controller) => {
|
|
62
|
-
const audioStream = this.#tts.synthesize(token);
|
|
65
|
+
const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);
|
|
63
66
|
await (prevTask == null ? void 0 : prevTask.result);
|
|
64
67
|
if (controller.signal.aborted) return;
|
|
65
68
|
for await (const audio of audioStream) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n synthesize(text: string): ChunkedStream {\n return this.#tts.synthesize(text);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":"AAKA,SAAS,YAAY;AAErB,SAAS,kBAAkB,WAAW;AAE/B,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,
|
|
1
|
+
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n this.#tts.on('error', (error) => {\n this.emit('error', error);\n });\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return this.#tts.synthesize(text, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":"AAKA,SAAS,YAAY;AAErB,SAAS,kBAAkB,WAAW;AAE/B,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AACD,SAAK,KAAK,GAAG,SAAS,CAAC,UAAU;AAC/B,WAAK,KAAK,SAAS,KAAK;AAAA,IAC1B,CAAC;AAAA,EACH;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,KAAK,KAAK,WAAW,MAAM,aAAa,WAAW;AAAA,EAC5D;AAAA,EAEA,OAAO,SAAqE;AAC1E,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,oBAAoB,mCAAS,WAAW;AAAA,EAC1F;AACF;AAEO,MAAM,6BAA6B,iBAAiB;AAAA,EACzD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC,aAAiC;AAC3F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,kBAAkB,kBAAkB,OAAO;AAChD,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,UAAU,iBAAiB,gBAAgB;AAC7C,eAAK,gBAAgB,MAAM;AAAA,QAC7B,OAAO;AACL,eAAK,gBAAgB,SAAS,KAAK;AAAA,QACrC;AAAA,MACF;AACA,WAAK,gBAAgB,SAAS;AAC9B,WAAK,gBAAgB,MAAM;AAAA,IAC7B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,UAAI;AACJ,YAAM,uBAAqC,CAAC;AAE5C,uBAAiB,MAAM,KAAK,iBAAiB;AAC3C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAGzC,eAAO,KAAK;AAAA,UACV,CAAC,eAAe,WAAW,GAAG,OAAO,MAAM,UAAU;AAAA,UACrD,KAAK;AAAA,QACP;AAEA,6BAAqB,KAAK,IAAI;AAAA,MAChC;AAEA,YAAM,QAAQ,IAAI,qBAAqB,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAC3D,WAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,IAC/C;AAEA,UAAM,aAAa,OACjB,OACA,UACA,eACG;AACH,YAAM,cAAc,KAAK,KAAK,WAAW,OAAO,KAAK,aAAa,KAAK,WAAW;AAIlF,aAAM,qCAAU;AAChB,UAAI,WAAW,OAAO,QAAS;AAE/B,uBAAiB,SAAS,aAAa;AACrC,YAAI,WAAW,OAAO,QAAS;AAC/B,aAAK,MAAM,IAAI,KAAK;AAAA,MACtB;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,aAAa,GAAG,yBAAyB,CAAC,CAAC;AAAA,EAChE;AACF;","names":[]}
|