@livekit/agents 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.cjs +104 -0
  9. package/dist/inference/api_protos.cjs.map +1 -0
  10. package/dist/inference/api_protos.d.cts +222 -0
  11. package/dist/inference/api_protos.d.ts +222 -0
  12. package/dist/inference/api_protos.d.ts.map +1 -0
  13. package/dist/inference/api_protos.js +70 -0
  14. package/dist/inference/api_protos.js.map +1 -0
  15. package/dist/inference/index.cjs +56 -0
  16. package/dist/inference/index.cjs.map +1 -0
  17. package/dist/inference/index.d.cts +9 -0
  18. package/dist/inference/index.d.ts +9 -0
  19. package/dist/inference/index.d.ts.map +1 -0
  20. package/dist/inference/index.js +16 -0
  21. package/dist/inference/index.js.map +1 -0
  22. package/dist/inference/llm.cjs +315 -0
  23. package/dist/inference/llm.cjs.map +1 -0
  24. package/dist/inference/llm.d.cts +92 -0
  25. package/dist/inference/llm.d.ts +92 -0
  26. package/dist/inference/llm.d.ts.map +1 -0
  27. package/dist/inference/llm.js +286 -0
  28. package/dist/inference/llm.js.map +1 -0
  29. package/dist/inference/stt.cjs +305 -0
  30. package/dist/inference/stt.cjs.map +1 -0
  31. package/dist/inference/stt.d.cts +79 -0
  32. package/dist/inference/stt.d.ts +79 -0
  33. package/dist/inference/stt.d.ts.map +1 -0
  34. package/dist/inference/stt.js +284 -0
  35. package/dist/inference/stt.js.map +1 -0
  36. package/dist/inference/tts.cjs +317 -0
  37. package/dist/inference/tts.cjs.map +1 -0
  38. package/dist/inference/tts.d.cts +75 -0
  39. package/dist/inference/tts.d.ts +75 -0
  40. package/dist/inference/tts.d.ts.map +1 -0
  41. package/dist/inference/tts.js +299 -0
  42. package/dist/inference/tts.js.map +1 -0
  43. package/dist/inference/utils.cjs +76 -0
  44. package/dist/inference/utils.cjs.map +1 -0
  45. package/dist/inference/utils.d.cts +5 -0
  46. package/dist/inference/utils.d.ts +5 -0
  47. package/dist/inference/utils.d.ts.map +1 -0
  48. package/dist/inference/utils.js +51 -0
  49. package/dist/inference/utils.js.map +1 -0
  50. package/dist/tts/tts.cjs +1 -1
  51. package/dist/tts/tts.cjs.map +1 -1
  52. package/dist/tts/tts.js +1 -1
  53. package/dist/tts/tts.js.map +1 -1
  54. package/dist/utils.cjs +11 -0
  55. package/dist/utils.cjs.map +1 -1
  56. package/dist/utils.d.cts +1 -0
  57. package/dist/utils.d.ts +1 -0
  58. package/dist/utils.d.ts.map +1 -1
  59. package/dist/utils.js +10 -0
  60. package/dist/utils.js.map +1 -1
  61. package/dist/voice/agent.cjs +16 -3
  62. package/dist/voice/agent.cjs.map +1 -1
  63. package/dist/voice/agent.d.cts +4 -3
  64. package/dist/voice/agent.d.ts +4 -3
  65. package/dist/voice/agent.d.ts.map +1 -1
  66. package/dist/voice/agent.js +20 -3
  67. package/dist/voice/agent.js.map +1 -1
  68. package/dist/voice/agent_session.cjs +16 -3
  69. package/dist/voice/agent_session.cjs.map +1 -1
  70. package/dist/voice/agent_session.d.cts +4 -3
  71. package/dist/voice/agent_session.d.ts +4 -3
  72. package/dist/voice/agent_session.d.ts.map +1 -1
  73. package/dist/voice/agent_session.js +20 -3
  74. package/dist/voice/agent_session.js.map +1 -1
  75. package/dist/voice/room_io/_input.cjs +9 -0
  76. package/dist/voice/room_io/_input.cjs.map +1 -1
  77. package/dist/voice/room_io/_input.d.ts.map +1 -1
  78. package/dist/voice/room_io/_input.js +10 -0
  79. package/dist/voice/room_io/_input.js.map +1 -1
  80. package/dist/worker.cjs.map +1 -1
  81. package/dist/worker.d.ts.map +1 -1
  82. package/dist/worker.js +1 -1
  83. package/dist/worker.js.map +1 -1
  84. package/package.json +3 -2
  85. package/src/index.ts +2 -1
  86. package/src/inference/api_protos.ts +82 -0
  87. package/src/inference/index.ts +12 -0
  88. package/src/inference/llm.ts +485 -0
  89. package/src/inference/stt.ts +414 -0
  90. package/src/inference/tts.ts +421 -0
  91. package/src/inference/utils.ts +66 -0
  92. package/src/tts/tts.ts +1 -1
  93. package/src/utils.ts +11 -0
  94. package/src/voice/agent.ts +30 -6
  95. package/src/voice/agent_session.ts +29 -6
  96. package/src/voice/room_io/_input.ts +12 -1
  97. package/src/worker.ts +2 -7
@@ -0,0 +1,284 @@
1
+ import {} from "@livekit/rtc-node";
2
+ import { WebSocket } from "ws";
3
+ import { APIError, APIStatusError } from "../_exceptions.js";
4
+ import { AudioByteStream } from "../audio.js";
5
+ import { log } from "../log.js";
6
+ import {
7
+ STT as BaseSTT,
8
+ SpeechStream as BaseSpeechStream,
9
+ SpeechEventType
10
+ } from "../stt/index.js";
11
+ import { DEFAULT_API_CONNECT_OPTIONS } from "../types.js";
12
+ import { Event, Task, cancelAndWait, shortuuid, waitForAbort } from "../utils.js";
13
+ import { connectWs, createAccessToken } from "./utils.js";
14
+ const DEFAULT_ENCODING = "pcm_s16le";
15
+ const DEFAULT_SAMPLE_RATE = 16e3;
16
+ const DEFAULT_BASE_URL = "wss://agent-gateway.livekit.cloud/v1";
17
+ const DEFAULT_CANCEL_TIMEOUT = 5e3;
18
+ class STT extends BaseSTT {
19
+ opts;
20
+ streams = /* @__PURE__ */ new Set();
21
+ constructor(opts) {
22
+ super({ streaming: true, interimResults: true });
23
+ const {
24
+ model,
25
+ language,
26
+ baseURL,
27
+ encoding = DEFAULT_ENCODING,
28
+ sampleRate = DEFAULT_SAMPLE_RATE,
29
+ apiKey,
30
+ apiSecret,
31
+ extraKwargs = {}
32
+ } = opts || {};
33
+ const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
34
+ const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
35
+ if (!lkApiKey) {
36
+ throw new Error("apiKey is required: pass apiKey or set LIVEKIT_API_KEY");
37
+ }
38
+ const lkApiSecret = apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;
39
+ if (!lkApiSecret) {
40
+ throw new Error("apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET");
41
+ }
42
+ this.opts = {
43
+ model,
44
+ language,
45
+ encoding,
46
+ sampleRate,
47
+ baseURL: lkBaseURL,
48
+ apiKey: lkApiKey,
49
+ apiSecret: lkApiSecret,
50
+ extraKwargs
51
+ };
52
+ }
53
+ get label() {
54
+ return "inference.STT";
55
+ }
56
+ async _recognize(_) {
57
+ throw new Error("LiveKit STT does not support batch recognition, use stream() instead");
58
+ }
59
+ updateOptions(opts) {
60
+ this.opts = { ...this.opts, ...opts };
61
+ for (const stream of this.streams) {
62
+ stream.updateOptions(opts);
63
+ }
64
+ }
65
+ stream(options) {
66
+ const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};
67
+ const streamOpts = {
68
+ ...this.opts,
69
+ language: language ?? this.opts.language
70
+ };
71
+ const stream = new SpeechStream(this, streamOpts, connOptions);
72
+ this.streams.add(stream);
73
+ return stream;
74
+ }
75
+ }
76
+ class SpeechStream extends BaseSpeechStream {
77
+ opts;
78
+ requestId = shortuuid("stt_request_");
79
+ speaking = false;
80
+ speechDuration = 0;
81
+ reconnectEvent = new Event();
82
+ #logger = log();
83
+ constructor(sttImpl, opts, connOptions) {
84
+ super(sttImpl, opts.sampleRate, connOptions);
85
+ this.opts = opts;
86
+ }
87
+ get label() {
88
+ return "inference.SpeechStream";
89
+ }
90
+ updateOptions(opts) {
91
+ this.opts = { ...this.opts, ...opts };
92
+ }
93
+ async run() {
94
+ let ws = null;
95
+ let closingWs = false;
96
+ this.reconnectEvent.set();
97
+ const connect = async () => {
98
+ const params = {
99
+ settings: {
100
+ sample_rate: String(this.opts.sampleRate),
101
+ encoding: this.opts.encoding,
102
+ extra: this.opts.extraKwargs
103
+ }
104
+ };
105
+ if (this.opts.model) {
106
+ params.model = this.opts.model;
107
+ }
108
+ if (this.opts.language) {
109
+ params.settings.language = this.opts.language;
110
+ }
111
+ let baseURL = this.opts.baseURL;
112
+ if (baseURL.startsWith("http://") || baseURL.startsWith("https://")) {
113
+ baseURL = baseURL.replace("http", "ws");
114
+ }
115
+ const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
116
+ const url = `${baseURL}/stt`;
117
+ const headers = { Authorization: `Bearer ${token}` };
118
+ const socket = await connectWs(url, headers, 1e4);
119
+ const msg = { ...params, type: "session.create" };
120
+ socket.send(JSON.stringify(msg));
121
+ return socket;
122
+ };
123
+ const send = async (socket, signal) => {
124
+ const audioStream = new AudioByteStream(
125
+ this.opts.sampleRate,
126
+ 1,
127
+ Math.floor(this.opts.sampleRate / 20)
128
+ // 50ms
129
+ );
130
+ for await (const ev of this.input) {
131
+ if (signal.aborted) break;
132
+ let frames;
133
+ if (ev === SpeechStream.FLUSH_SENTINEL) {
134
+ frames = audioStream.flush();
135
+ } else {
136
+ const frame = ev;
137
+ frames = audioStream.write(new Int16Array(frame.data).buffer);
138
+ }
139
+ for (const frame of frames) {
140
+ this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
141
+ const base64 = Buffer.from(frame.data.buffer).toString("base64");
142
+ const msg = { type: "input_audio", audio: base64 };
143
+ socket.send(JSON.stringify(msg));
144
+ }
145
+ }
146
+ closingWs = true;
147
+ socket.send(JSON.stringify({ type: "session.finalize" }));
148
+ };
149
+ const recv = async (socket, signal) => {
150
+ while (!this.closed && !signal.aborted) {
151
+ const dataPromise = new Promise((resolve, reject) => {
152
+ const messageHandler = (d) => {
153
+ resolve(d.toString());
154
+ removeListeners();
155
+ };
156
+ const errorHandler = (e) => {
157
+ reject(e);
158
+ removeListeners();
159
+ };
160
+ const closeHandler = (code) => {
161
+ if (closingWs) {
162
+ resolve("");
163
+ } else {
164
+ reject(
165
+ new APIStatusError({
166
+ message: "LiveKit STT connection closed unexpectedly",
167
+ options: { statusCode: code }
168
+ })
169
+ );
170
+ }
171
+ removeListeners();
172
+ };
173
+ const removeListeners = () => {
174
+ socket.removeListener("message", messageHandler);
175
+ socket.removeListener("error", errorHandler);
176
+ socket.removeListener("close", closeHandler);
177
+ };
178
+ socket.once("message", messageHandler);
179
+ socket.once("error", errorHandler);
180
+ socket.once("close", closeHandler);
181
+ });
182
+ const data = await Promise.race([dataPromise, waitForAbort(signal)]);
183
+ if (!data || signal.aborted) return;
184
+ const json = JSON.parse(data);
185
+ const type = json.type;
186
+ switch (type) {
187
+ case "session.created":
188
+ case "session.finalized":
189
+ case "session.closed":
190
+ break;
191
+ case "interim_transcript":
192
+ this.processTranscript(json, false);
193
+ break;
194
+ case "final_transcript":
195
+ this.processTranscript(json, true);
196
+ break;
197
+ case "error":
198
+ this.#logger.error("received error from LiveKit STT: %o", json);
199
+ throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
200
+ default:
201
+ this.#logger.warn("received unexpected message from LiveKit STT: %o", json);
202
+ break;
203
+ }
204
+ }
205
+ };
206
+ while (true) {
207
+ try {
208
+ ws = await connect();
209
+ const sendTask = Task.from(async ({ signal }) => {
210
+ await send(ws, signal);
211
+ });
212
+ const recvTask = Task.from(async ({ signal }) => {
213
+ await recv(ws, signal);
214
+ });
215
+ const tasks = [sendTask, recvTask];
216
+ const waitReconnectTask = Task.from(async ({ signal }) => {
217
+ await Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]);
218
+ });
219
+ try {
220
+ await Promise.race([
221
+ Promise.all(tasks.map((task) => task.result)),
222
+ waitReconnectTask.result
223
+ ]);
224
+ if (!waitReconnectTask.done) break;
225
+ this.reconnectEvent.clear();
226
+ } finally {
227
+ await cancelAndWait([sendTask, recvTask, waitReconnectTask], DEFAULT_CANCEL_TIMEOUT);
228
+ }
229
+ } finally {
230
+ try {
231
+ if (ws) ws.close();
232
+ } catch {
233
+ }
234
+ }
235
+ }
236
+ }
237
+ processTranscript(data, isFinal) {
238
+ const requestId = data.request_id ?? this.requestId;
239
+ const text = data.transcript ?? "";
240
+ const language = data.language ?? this.opts.language ?? "en";
241
+ if (!text && !isFinal) return;
242
+ if (!this.speaking) {
243
+ this.speaking = true;
244
+ this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
245
+ }
246
+ const speechData = {
247
+ language,
248
+ startTime: data.start ?? 0,
249
+ endTime: data.duration ?? 0,
250
+ confidence: data.confidence ?? 1,
251
+ text
252
+ };
253
+ if (isFinal) {
254
+ if (this.speechDuration > 0) {
255
+ this.queue.put({
256
+ type: SpeechEventType.RECOGNITION_USAGE,
257
+ requestId,
258
+ recognitionUsage: { audioDuration: this.speechDuration }
259
+ });
260
+ this.speechDuration = 0;
261
+ }
262
+ this.queue.put({
263
+ type: SpeechEventType.FINAL_TRANSCRIPT,
264
+ requestId,
265
+ alternatives: [speechData]
266
+ });
267
+ if (this.speaking) {
268
+ this.speaking = false;
269
+ this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
270
+ }
271
+ } else {
272
+ this.queue.put({
273
+ type: SpeechEventType.INTERIM_TRANSCRIPT,
274
+ requestId,
275
+ alternatives: [speechData]
276
+ });
277
+ }
278
+ }
279
+ }
280
+ export {
281
+ STT,
282
+ SpeechStream
283
+ };
284
+ //# sourceMappingURL=stt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type AnyModels, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-general'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-general'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';\n\nexport interface CartesiaOptions {\n min_volume?: number; // default: not specified\n max_silence_duration_secs?: number; // default: not specified\n}\n\nexport interface DeepgramOptions {\n filler_words?: boolean; // default: true\n interim_results?: boolean; // default: true\n endpointing?: number; // default: 25 (ms)\n punctuate?: boolean; // default: false\n smart_format?: boolean;\n keywords?: Array<[string, number]>;\n keyterms?: string[];\n profanity_filter?: boolean;\n numerals?: boolean;\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyaiOptions {\n format_turns?: boolean; // default: false\n end_of_turn_confidence_threshold?: number; // default: 0.01\n min_end_of_turn_silence_when_confident?: number; // default: 0\n max_turn_silence?: number; // default: not specified\n keyterms_prompt?: string[]; // default: not specified\n}\n\nexport type STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels | AnyModels;\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyaiOptions\n : Record<string, unknown>;\n\nexport type STTLanguages = 'en' | 'de' | 'es' | 'fr' | 'ja' | 'pt' | 'zh';\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model: TModel;\n language?: STTLanguages | string;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n extraKwargs: STTOptions<TModel>;\n}\n\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n constructor(opts: {\n model: TModel;\n language?: STTLanguages | string;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n extraKwargs?: STTOptions<TModel>;\n }) {\n super({ streaming: true, interimResults: true });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n extraKwargs = {} as STTOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n this.opts = {\n model,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n extraKwargs,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closingWs = false;\n\n this.reconnectEvent.set();\n\n const connect = async () => {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.extraKwargs,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model) {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, 10000);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n for await (const ev of this.input) {\n if (signal.aborted) break;\n let frames: AudioFrame[];\n\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closingWs = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n };\n\n const recv = async (socket: WebSocket, signal: AbortSignal) => {\n while (!this.closed && !signal.aborted) {\n const dataPromise = new Promise<string>((resolve, reject) => {\n const messageHandler = (d: RawData) => {\n resolve(d.toString());\n removeListeners();\n };\n const errorHandler = (e: Error) => {\n reject(e);\n removeListeners();\n };\n const closeHandler = (code: number) => {\n if (closingWs) {\n resolve('');\n } else {\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n }\n removeListeners();\n };\n const removeListeners = () => {\n socket.removeListener('message', messageHandler);\n socket.removeListener('error', errorHandler);\n socket.removeListener('close', closeHandler);\n };\n socket.once('message', messageHandler);\n socket.once('error', errorHandler);\n socket.once('close', closeHandler);\n });\n\n const data = await Promise.race([dataPromise, waitForAbort(signal)]);\n\n if (!data || signal.aborted) return;\n\n const json = JSON.parse(data);\n const type = json.type as string | undefined;\n\n switch (type) {\n case 'session.created':\n case 'session.finalized':\n case 'session.closed':\n break;\n case 'interim_transcript':\n this.processTranscript(json, false);\n break;\n case 'final_transcript':\n this.processTranscript(json, true);\n break;\n case 'error':\n this.#logger.error('received error from LiveKit STT: %o', json);\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);\n default:\n this.#logger.warn('received unexpected message from LiveKit STT: %o', json);\n break;\n }\n }\n };\n\n while (true) {\n try {\n ws = await connect();\n\n const sendTask = Task.from(async ({ signal }) => {\n await send(ws!, signal);\n });\n\n const recvTask = Task.from(async ({ signal }) => {\n await recv(ws!, signal);\n });\n\n const tasks = [sendTask, recvTask];\n const waitReconnectTask = Task.from(async ({ signal }) => {\n await Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]);\n });\n\n try {\n await Promise.race([\n Promise.all(tasks.map((task) => task.result)),\n waitReconnectTask.result,\n ]);\n\n if (!waitReconnectTask.done) break;\n this.reconnectEvent.clear();\n } finally {\n await cancelAndWait([sendTask, recvTask, waitReconnectTask], DEFAULT_CANCEL_TIMEOUT);\n }\n } finally {\n try {\n if (ws) ws.close();\n } catch {}\n }\n }\n }\n\n private processTranscript(data: Record<string, any>, isFinal: boolean) {\n const requestId = data.request_id ?? this.requestId;\n const text = data.transcript ?? '';\n const language = data.language ?? this.opts.language ?? 'en';\n\n if (!text && !isFinal) return;\n\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: data.start ?? 0,\n endTime: data.duration ?? 0,\n confidence: data.confidence ?? 1.0,\n text,\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n }\n}\n"],"mappings":"AAGA,eAAgC;AAChC,SAAuB,iBAAiB;AACxC,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB;AAAA,EACE,OAAO;AAAA,EACP,gBAAgB;AAAA,EAGhB;AAAA,OACK;AACP,SAAiC,mCAAmC;AACpE,SAA2B,OAAO,MAAM,eAAe,WAAW,oBAAoB;AACtF,SAAyB,WAAW,yBAAyB;AAsD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAaxB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,YAAY,MAST;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,KAAK,CAAC;AAE/C,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,cAAc,CAAC;AAAA,IACjB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAC5E,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,iBAAiB;AAAA,EACnE;AAAA,EACA,YAAY,UAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,MAAM;AAAA,EAEnC,UAAU,IAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,YAAY;AAEhB,SAAK,eAAe,IAAI;AAExB,UAAM,UAAU,YAAY;AAC1B,YAAM,SAAS;AAAA,QACb,UAAU;AAAA,UACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,UACxC,UAAU,KAAK,KAAK;AAAA,UACpB,OAAO,KAAK,KAAK;AAAA,QACnB;AAAA,MACF;AAEA,UAAI,KAAK,KAAK,OAAO;AACnB,eAAO,QAAQ,KAAK,KAAK;AAAA,MAC3B;AAEA,UAAI,KAAK,KAAK,UAAU;AACtB,QAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,MACpE;AAEA,UAAI,UAAU,KAAK,KAAK;AACxB,UAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,kBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,MACxC;AAEA,YAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,YAAM,MAAM,GAAG,OAAO;AACtB,YAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,YAAM,SAAS,MAAM,UAAU,KAAK,SAAS,GAAK;AAClD,YAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,aAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,aAAO;AAAA,IACT;AAEA,UAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,YAAM,cAAc,IAAI;AAAA,QACtB,KAAK,KAAK;AAAA,QACV;AAAA,QACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,MACtC;AAEA,uBAAiB,MAAM,KAAK,OAAO;AACjC,YAAI,OAAO,QAAS;AACpB,YAAI;AAEJ,YAAI,OAAO,aAAa,gBAAgB;AACtC,mBAAS,YAAY,MAAM;AAAA,QAC7B,OAAO;AACL,gBAAM,QAAQ;AACd,mBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,QAC9D;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,gBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,gBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,iBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,QACjC;AAAA,MACF;AAEA,kBAAY;AACZ,aAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,IAC1D;AAEA,UAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,aAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,cAAM,cAAc,IAAI,QAAgB,CAAC,SAAS,WAAW;AAC3D,gBAAM,iBAAiB,CAAC,MAAe;AACrC,oBAAQ,EAAE,SAAS,CAAC;AACpB,4BAAgB;AAAA,UAClB;AACA,gBAAM,eAAe,CAAC,MAAa;AACjC,mBAAO,CAAC;AACR,4BAAgB;AAAA,UAClB;AACA,gBAAM,eAAe,CAAC,SAAiB;AACrC,gBAAI,WAAW;AACb,sBAAQ,EAAE;AAAA,YACZ,OAAO;AACL;AAAA,gBACE,IAAI,eAAe;AAAA,kBACjB,SAAS;AAAA,kBACT,SAAS,EAAE,YAAY,KAAK;AAAA,gBAC9B,CAAC;AAAA,cACH;AAAA,YACF;AACA,4BAAgB;AAAA,UAClB;AACA,gBAAM,kBAAkB,MAAM;AAC5B,mBAAO,eAAe,WAAW,cAAc;AAC/C,mBAAO,eAAe,SAAS,YAAY;AAC3C,mBAAO,eAAe,SAAS,YAAY;AAAA,UAC7C;AACA,iBAAO,KAAK,WAAW,cAAc;AACrC,iBAAO,KAAK,SAAS,YAAY;AACjC,iBAAO,KAAK,SAAS,YAAY;AAAA,QACnC,CAAC;AAED,cAAM,OAAO,MAAM,QAAQ,KAAK,CAAC,aAAa,aAAa,MAAM,CAAC,CAAC;AAEnE,YAAI,CAAC,QAAQ,OAAO,QAAS;AAE7B,cAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,cAAM,OAAO,KAAK;AAElB,gBAAQ,MAAM;AAAA,UACZ,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK;AACH;AAAA,UACF,KAAK;AACH,iBAAK,kBAAkB,MAAM,KAAK;AAClC;AAAA,UACF,KAAK;AACH,iBAAK,kBAAkB,MAAM,IAAI;AACjC;AAAA,UACF,KAAK;AACH,iBAAK,QAAQ,MAAM,uCAAuC,IAAI;AAC9D,kBAAM,IAAI,SAAS,+BAA+B,KAAK,UAAU,IAAI,CAAC,EAAE;AAAA,UAC1E;AACE,iBAAK,QAAQ,KAAK,oDAAoD,IAAI;AAC1E;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,WAAO,MAAM;AACX,UAAI;AACF,aAAK,MAAM,QAAQ;AAEnB,cAAM,WAAW,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AAC/C,gBAAM,KAAK,IAAK,MAAM;AAAA,QACxB,CAAC;AAED,cAAM,WAAW,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AAC/C,gBAAM,KAAK,IAAK,MAAM;AAAA,QACxB,CAAC;AAED,cAAM,QAAQ,CAAC,UAAU,QAAQ;AACjC,cAAM,oBAAoB,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AACxD,gBAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,QACvE,CAAC;AAED,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,MAAM,IAAI,CAAC,SAAS,KAAK,MAAM,CAAC;AAAA,YAC5C,kBAAkB;AAAA,UACpB,CAAC;AAED,cAAI,CAAC,kBAAkB,KAAM;AAC7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AACA,gBAAM,cAAc,CAAC,UAAU,UAAU,iBAAiB,GAAG,sBAAsB;AAAA,QACrF;AAAA,MACF,UAAE;AACA,YAAI;AACF,cAAI,GAAI,IAAG,MAAM;AAAA,QACnB,QAAQ;AAAA,QAAC;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA2B,SAAkB;AACrE,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK,cAAc;AAChC,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAGvB,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,WAAW;AAChB,WAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AAAA,IAC1D;AAEA,UAAM,aAAyB;AAAA,MAC7B;AAAA,MACA,WAAW,KAAK,SAAS;AAAA,MACzB,SAAS,KAAK,YAAY;AAAA,MAC1B,YAAY,KAAK,cAAc;AAAA,MAC/B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,UAAI,KAAK,iBAAiB,GAAG;AAC3B,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,QACzD,CAAC;AACD,aAAK,iBAAiB;AAAA,MACxB;AAEA,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAED,UAAI,KAAK,UAAU;AACjB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAAA,MACxD;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,317 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var tts_exports = {};
20
+ __export(tts_exports, {
21
+ SynthesizeStream: () => SynthesizeStream,
22
+ TTS: () => TTS
23
+ });
24
+ module.exports = __toCommonJS(tts_exports);
25
+ var import_ws = require("ws");
26
+ var import_exceptions = require("../_exceptions.cjs");
27
+ var import_audio = require("../audio.cjs");
28
+ var import_log = require("../log.cjs");
29
+ var import_stream_channel = require("../stream/stream_channel.cjs");
30
+ var import_tokenize = require("../tokenize/index.cjs");
31
+ var import_tts = require("../tts/index.cjs");
32
+ var import_types = require("../types.cjs");
33
+ var import_utils = require("../utils.cjs");
34
+ var import_api_protos = require("./api_protos.cjs");
35
+ var import_utils2 = require("./utils.cjs");
36
+ const DEFAULT_ENCODING = "pcm_s16le";
37
+ const DEFAULT_SAMPLE_RATE = 16e3;
38
+ const DEFAULT_BASE_URL = "https://agent-gateway.livekit.cloud/v1";
39
+ const NUM_CHANNELS = 1;
40
+ const DEFAULT_LANGUAGE = "en";
41
+ class TTS extends import_tts.TTS {
42
+ opts;
43
+ streams = /* @__PURE__ */ new Set();
44
+ #logger = (0, import_log.log)();
45
+ constructor(opts) {
46
+ const sampleRate = (opts == null ? void 0 : opts.sampleRate) ?? DEFAULT_SAMPLE_RATE;
47
+ super(sampleRate, 1, { streaming: true });
48
+ const {
49
+ model,
50
+ voice,
51
+ language = DEFAULT_LANGUAGE,
52
+ baseURL,
53
+ encoding = DEFAULT_ENCODING,
54
+ apiKey,
55
+ apiSecret,
56
+ extraKwargs = {}
57
+ } = opts || {};
58
+ const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
59
+ const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
60
+ if (!lkApiKey) {
61
+ throw new Error("apiKey is required: pass apiKey or set LIVEKIT_API_KEY");
62
+ }
63
+ const lkApiSecret = apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;
64
+ if (!lkApiSecret) {
65
+ throw new Error("apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET");
66
+ }
67
+ let nextModel = model;
68
+ let nextVoice = voice;
69
+ if (typeof nextModel === "string") {
70
+ const idx = nextModel.lastIndexOf(":");
71
+ if (idx !== -1) {
72
+ const voiceFromModel = nextModel.slice(idx + 1);
73
+ if (nextVoice && nextVoice !== voiceFromModel) {
74
+ this.#logger.warn(
75
+ "`voice` is provided via both argument and model, using the one from the argument",
76
+ { voice: nextVoice, model: nextModel }
77
+ );
78
+ } else {
79
+ nextVoice = voiceFromModel;
80
+ }
81
+ nextModel = nextModel.slice(0, idx);
82
+ }
83
+ }
84
+ this.opts = {
85
+ model: nextModel,
86
+ voice: nextVoice,
87
+ language,
88
+ encoding,
89
+ sampleRate,
90
+ baseURL: lkBaseURL,
91
+ apiKey: lkApiKey,
92
+ apiSecret: lkApiSecret,
93
+ extraKwargs
94
+ };
95
+ }
96
+ get label() {
97
+ return "inference.TTS";
98
+ }
99
+ updateOptions(opts) {
100
+ this.opts = { ...this.opts, ...opts };
101
+ for (const stream of this.streams) {
102
+ stream.updateOptions(opts);
103
+ }
104
+ }
105
+ synthesize(_) {
106
+ throw new Error("ChunkedStream is not implemented");
107
+ }
108
+ stream(options) {
109
+ const { connOptions = import_types.DEFAULT_API_CONNECT_OPTIONS } = options || {};
110
+ const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);
111
+ this.streams.add(stream);
112
+ return stream;
113
+ }
114
+ async connectWs(timeout) {
115
+ let baseURL = this.opts.baseURL;
116
+ if (baseURL.startsWith("http://") || baseURL.startsWith("https://")) {
117
+ baseURL = baseURL.replace("http", "ws");
118
+ }
119
+ const token = await (0, import_utils2.createAccessToken)(this.opts.apiKey, this.opts.apiSecret);
120
+ const url = `${baseURL}/tts`;
121
+ const headers = { Authorization: `Bearer ${token}` };
122
+ const params = {
123
+ type: "session.create",
124
+ sample_rate: String(this.opts.sampleRate),
125
+ encoding: this.opts.encoding,
126
+ extra: this.opts.extraKwargs
127
+ };
128
+ if (this.opts.voice) params.voice = this.opts.voice;
129
+ if (this.opts.model) params.model = this.opts.model;
130
+ if (this.opts.language) params.language = this.opts.language;
131
+ const socket = await (0, import_utils2.connectWs)(url, headers, timeout);
132
+ socket.send(JSON.stringify(params));
133
+ return socket;
134
+ }
135
+ async closeWs(ws) {
136
+ await ws.close();
137
+ }
138
+ async close() {
139
+ for (const stream of this.streams) {
140
+ await stream.close();
141
+ }
142
+ this.streams.clear();
143
+ }
144
+ }
145
+ class SynthesizeStream extends import_tts.SynthesizeStream {
146
+ opts;
147
+ tts;
148
+ connOptions;
149
+ #logger = (0, import_log.log)();
150
+ constructor(tts, opts, connOptions) {
151
+ super(tts, connOptions);
152
+ this.opts = opts;
153
+ this.tts = tts;
154
+ this.connOptions = connOptions;
155
+ }
156
+ get label() {
157
+ return "inference.SynthesizeStream";
158
+ }
159
+ updateOptions(opts) {
160
+ this.opts = { ...this.opts, ...opts };
161
+ }
162
+ async run() {
163
+ let ws = null;
164
+ let closing = false;
165
+ let finalReceived = false;
166
+ let lastFrame;
167
+ const sendTokenizerStream = new import_tokenize.basic.SentenceTokenizer().stream();
168
+ const eventChannel = (0, import_stream_channel.createStreamChannel)();
169
+ const requestId = (0, import_utils.shortuuid)("tts_request_");
170
+ const resourceCleanup = () => {
171
+ if (closing) return;
172
+ closing = true;
173
+ sendTokenizerStream.close();
174
+ eventChannel.close();
175
+ ws == null ? void 0 : ws.removeAllListeners();
176
+ ws == null ? void 0 : ws.close();
177
+ };
178
+ const sendClientEvent = async (event) => {
179
+ const validatedEvent = await import_api_protos.ttsClientEventSchema.parseAsync(event);
180
+ if (!ws || ws.readyState !== import_ws.WebSocket.OPEN) {
181
+ this.#logger.warn("Trying to send client TTS event to a closed WebSocket");
182
+ return;
183
+ }
184
+ ws.send(JSON.stringify(validatedEvent));
185
+ };
186
+ const sendLastFrame = (segmentId, final) => {
187
+ if (lastFrame) {
188
+ this.queue.put({ requestId, segmentId, frame: lastFrame, final });
189
+ lastFrame = void 0;
190
+ }
191
+ };
192
+ const createInputTask = async () => {
193
+ for await (const data of this.input) {
194
+ if (this.abortController.signal.aborted) break;
195
+ if (data === SynthesizeStream.FLUSH_SENTINEL) {
196
+ sendTokenizerStream.flush();
197
+ continue;
198
+ }
199
+ sendTokenizerStream.pushText(data);
200
+ }
201
+ sendTokenizerStream.endInput();
202
+ };
203
+ const createSentenceStreamTask = async () => {
204
+ for await (const ev of sendTokenizerStream) {
205
+ if (this.abortController.signal.aborted) break;
206
+ sendClientEvent({
207
+ type: "input_transcript",
208
+ transcript: ev.token + " "
209
+ });
210
+ }
211
+ sendClientEvent({ type: "session.flush" });
212
+ };
213
+ const createWsListenerTask = async (ws2) => {
214
+ return new Promise((resolve, reject) => {
215
+ this.abortController.signal.addEventListener("abort", () => {
216
+ resourceCleanup();
217
+ reject(new Error("WebSocket connection aborted"));
218
+ });
219
+ ws2.on("message", async (data) => {
220
+ const eventJson = JSON.parse(data.toString());
221
+ const validatedEvent = import_api_protos.ttsServerEventSchema.parse(eventJson);
222
+ eventChannel.write(validatedEvent);
223
+ });
224
+ ws2.on("error", (e) => {
225
+ this.#logger.error({ error: e }, "WebSocket error");
226
+ resourceCleanup();
227
+ reject(e);
228
+ });
229
+ ws2.on("close", () => {
230
+ resourceCleanup();
231
+ if (!closing) return this.#logger.error("WebSocket closed unexpectedly");
232
+ if (finalReceived) return resolve();
233
+ reject(
234
+ new import_exceptions.APIStatusError({
235
+ message: "Gateway connection closed unexpectedly",
236
+ options: { requestId }
237
+ })
238
+ );
239
+ });
240
+ });
241
+ };
242
+ const createRecvTask = async () => {
243
+ let currentSessionId = null;
244
+ const bstream = new import_audio.AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
245
+ const serverEventStream = eventChannel.stream();
246
+ const reader = serverEventStream.getReader();
247
+ try {
248
+ while (!this.closed && !this.abortController.signal.aborted) {
249
+ const result = await reader.read();
250
+ if (this.abortController.signal.aborted) return;
251
+ if (result.done) return;
252
+ const serverEvent = result.value;
253
+ switch (serverEvent.type) {
254
+ case "session.created":
255
+ currentSessionId = serverEvent.session_id;
256
+ break;
257
+ case "output_audio":
258
+ const base64Data = new Int8Array(Buffer.from(serverEvent.audio, "base64"));
259
+ for (const frame of bstream.write(base64Data.buffer)) {
260
+ sendLastFrame(currentSessionId, false);
261
+ lastFrame = frame;
262
+ }
263
+ break;
264
+ case "done":
265
+ finalReceived = true;
266
+ for (const frame of bstream.flush()) {
267
+ sendLastFrame(currentSessionId, false);
268
+ lastFrame = frame;
269
+ }
270
+ sendLastFrame(currentSessionId, true);
271
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
272
+ break;
273
+ case "session.closed":
274
+ resourceCleanup();
275
+ break;
276
+ case "error":
277
+ this.#logger.error(
278
+ { serverEvent },
279
+ "Received error message from LiveKit TTS WebSocket"
280
+ );
281
+ resourceCleanup();
282
+ throw new import_exceptions.APIError(`LiveKit TTS returned error: ${serverEvent.message}`);
283
+ default:
284
+ this.#logger.warn("Unexpected message %s", serverEvent);
285
+ break;
286
+ }
287
+ }
288
+ } finally {
289
+ reader.releaseLock();
290
+ try {
291
+ await serverEventStream.cancel();
292
+ } catch (e) {
293
+ this.#logger.debug("Error cancelling serverEventStream (may already be cancelled):", e);
294
+ }
295
+ }
296
+ };
297
+ try {
298
+ ws = await this.tts.connectWs(this.connOptions.timeoutMs);
299
+ await Promise.all([
300
+ createInputTask(),
301
+ createSentenceStreamTask(),
302
+ createWsListenerTask(ws),
303
+ createRecvTask()
304
+ ]);
305
+ } catch (e) {
306
+ this.#logger.error("Error in SynthesizeStream", { error: e });
307
+ } finally {
308
+ resourceCleanup();
309
+ }
310
+ }
311
+ }
312
+ // Annotate the CommonJS export names for ESM import in node:
313
+ 0 && (module.exports = {
314
+ SynthesizeStream,
315
+ TTS
316
+ });
317
+ //# sourceMappingURL=tts.cjs.map