@livekit/agents-plugin-baseten 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +92 -0
  3. package/dist/index.cjs +48 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +5 -0
  6. package/dist/index.d.ts +5 -0
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +21 -0
  9. package/dist/index.js.map +1 -0
  10. package/dist/llm.cjs +143 -0
  11. package/dist/llm.cjs.map +1 -0
  12. package/dist/llm.d.cts +44 -0
  13. package/dist/llm.d.ts +44 -0
  14. package/dist/llm.d.ts.map +1 -0
  15. package/dist/llm.js +117 -0
  16. package/dist/llm.js.map +1 -0
  17. package/dist/llm.test.cjs +14 -0
  18. package/dist/llm.test.cjs.map +1 -0
  19. package/dist/llm.test.d.cts +2 -0
  20. package/dist/llm.test.d.ts +2 -0
  21. package/dist/llm.test.d.ts.map +1 -0
  22. package/dist/llm.test.js +13 -0
  23. package/dist/llm.test.js.map +1 -0
  24. package/dist/stt.cjs +271 -0
  25. package/dist/stt.cjs.map +1 -0
  26. package/dist/stt.d.cts +18 -0
  27. package/dist/stt.d.ts +18 -0
  28. package/dist/stt.d.ts.map +1 -0
  29. package/dist/stt.js +246 -0
  30. package/dist/stt.js.map +1 -0
  31. package/dist/stt.test.cjs +9 -0
  32. package/dist/stt.test.cjs.map +1 -0
  33. package/dist/stt.test.d.cts +2 -0
  34. package/dist/stt.test.d.ts +2 -0
  35. package/dist/stt.test.d.ts.map +1 -0
  36. package/dist/stt.test.js +8 -0
  37. package/dist/stt.test.js.map +1 -0
  38. package/dist/tts.cjs +161 -0
  39. package/dist/tts.cjs.map +1 -0
  40. package/dist/tts.d.cts +45 -0
  41. package/dist/tts.d.ts +45 -0
  42. package/dist/tts.d.ts.map +1 -0
  43. package/dist/tts.js +141 -0
  44. package/dist/tts.js.map +1 -0
  45. package/dist/tts.test.cjs +9 -0
  46. package/dist/tts.test.cjs.map +1 -0
  47. package/dist/tts.test.d.cts +2 -0
  48. package/dist/tts.test.d.ts +2 -0
  49. package/dist/tts.test.d.ts.map +1 -0
  50. package/dist/tts.test.js +8 -0
  51. package/dist/tts.test.js.map +1 -0
  52. package/dist/types.cjs +17 -0
  53. package/dist/types.cjs.map +1 -0
  54. package/dist/types.d.cts +54 -0
  55. package/dist/types.d.ts +54 -0
  56. package/dist/types.d.ts.map +1 -0
  57. package/dist/types.js +1 -0
  58. package/dist/types.js.map +1 -0
  59. package/package.json +68 -0
  60. package/src/index.ts +20 -0
  61. package/src/llm.test.ts +16 -0
  62. package/src/llm.ts +172 -0
  63. package/src/stt.test.ts +11 -0
  64. package/src/stt.ts +298 -0
  65. package/src/tts.test.ts +11 -0
  66. package/src/tts.ts +202 -0
  67. package/src/types.ts +55 -0
package/dist/stt.js ADDED
@@ -0,0 +1,246 @@
1
+ import { AudioByteStream, Task, log, stt, waitForAbort } from "@livekit/agents";
2
+ import { WebSocket } from "ws";
3
+ const defaultSTTOptions = {
4
+ environment: "production",
5
+ encoding: "pcm_s16le",
6
+ sampleRate: 16e3,
7
+ bufferSizeSeconds: 0.032,
8
+ enablePartialTranscripts: true,
9
+ partialTranscriptIntervalS: 0.5,
10
+ finalTranscriptMaxDurationS: 5,
11
+ audioLanguage: "en",
12
+ languageDetectionOnly: false,
13
+ vadThreshold: 0.5,
14
+ vadMinSilenceDurationMs: 300,
15
+ vadSpeechPadMs: 30
16
+ };
17
+ class STT extends stt.STT {
18
+ #opts;
19
+ #logger = log();
20
+ label = "baseten.STT";
21
+ constructor(opts = {}) {
22
+ super({
23
+ streaming: true,
24
+ interimResults: opts.enablePartialTranscripts ?? defaultSTTOptions.enablePartialTranscripts
25
+ });
26
+ const apiKey = opts.apiKey ?? process.env.BASETEN_API_KEY;
27
+ const modelId = opts.modelId ?? process.env.BASETEN_STT_MODEL_ID;
28
+ if (!apiKey) {
29
+ throw new Error(
30
+ "Baseten API key is required, either pass it as `apiKey` or set $BASETEN_API_KEY"
31
+ );
32
+ }
33
+ if (!modelId) {
34
+ throw new Error(
35
+ "Baseten model ID is required, either pass it as `modelId` or set $BASETEN_STT_MODEL_ID"
36
+ );
37
+ }
38
+ this.#opts = {
39
+ ...defaultSTTOptions,
40
+ ...opts,
41
+ apiKey,
42
+ modelId
43
+ };
44
+ }
45
+ // eslint-disable-next-line
46
+ async _recognize(_) {
47
+ throw new Error("Recognize is not supported on Baseten STT");
48
+ }
49
+ updateOptions(opts) {
50
+ this.#opts = { ...this.#opts, ...opts };
51
+ }
52
+ stream() {
53
+ return new SpeechStream(this, this.#opts);
54
+ }
55
+ }
56
+ class SpeechStream extends stt.SpeechStream {
57
+ #opts;
58
+ #logger = log();
59
+ #speaking = false;
60
+ #requestId = "";
61
+ label = "baseten.SpeechStream";
62
+ constructor(stt2, opts) {
63
+ super(stt2, opts.sampleRate);
64
+ this.#opts = opts;
65
+ this.closed = false;
66
+ }
67
+ getWsUrl() {
68
+ return `wss://model-${this.#opts.modelId}.api.baseten.co/environments/${this.#opts.environment}/websocket`;
69
+ }
70
+ async run() {
71
+ const maxRetry = 32;
72
+ let retries = 0;
73
+ while (!this.input.closed && !this.closed) {
74
+ const url = this.getWsUrl();
75
+ const headers = {
76
+ Authorization: `Api-Key ${this.#opts.apiKey}`
77
+ };
78
+ const ws = new WebSocket(url, { headers });
79
+ try {
80
+ await new Promise((resolve, reject) => {
81
+ ws.on("open", resolve);
82
+ ws.on("error", (error) => reject(error));
83
+ ws.on("close", (code) => reject(`WebSocket returned ${code}`));
84
+ });
85
+ await this.#runWS(ws);
86
+ } catch (e) {
87
+ if (!this.closed && !this.input.closed) {
88
+ if (retries >= maxRetry) {
89
+ throw new Error(`failed to connect to Baseten after ${retries} attempts: ${e}`);
90
+ }
91
+ const delay = Math.min(retries * 5, 10);
92
+ retries++;
93
+ this.#logger.warn(
94
+ `failed to connect to Baseten, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
95
+ );
96
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
97
+ } else {
98
+ this.#logger.warn(
99
+ `Baseten disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`
100
+ );
101
+ }
102
+ }
103
+ }
104
+ this.closed = true;
105
+ }
106
+ async #runWS(ws) {
107
+ let closing = false;
108
+ const metadata = {
109
+ streaming_vad_config: {
110
+ threshold: this.#opts.vadThreshold,
111
+ min_silence_duration_ms: this.#opts.vadMinSilenceDurationMs,
112
+ speech_pad_ms: this.#opts.vadSpeechPadMs
113
+ },
114
+ streaming_params: {
115
+ encoding: this.#opts.encoding ?? "pcm_s16le",
116
+ sample_rate: this.#opts.sampleRate ?? 16e3,
117
+ enable_partial_transcripts: this.#opts.enablePartialTranscripts,
118
+ partial_transcript_interval_s: this.#opts.partialTranscriptIntervalS,
119
+ final_transcript_max_duration_s: this.#opts.finalTranscriptMaxDurationS
120
+ },
121
+ whisper_params: {
122
+ prompt: this.#opts.prompt,
123
+ audio_language: this.#opts.audioLanguage ?? "en",
124
+ language_detection_only: this.#opts.languageDetectionOnly ?? false
125
+ }
126
+ };
127
+ ws.send(JSON.stringify(metadata));
128
+ const sendTask = async () => {
129
+ const sampleRate = this.#opts.sampleRate ?? 16e3;
130
+ const samplesPerChunk = sampleRate === 16e3 ? 512 : 256;
131
+ const audioByteStream = new AudioByteStream(sampleRate, 1, samplesPerChunk);
132
+ try {
133
+ while (!this.closed) {
134
+ const result = await this.input.next();
135
+ if (result.done) {
136
+ break;
137
+ }
138
+ const data = result.value;
139
+ let frames;
140
+ if (data === SpeechStream.FLUSH_SENTINEL) {
141
+ frames = audioByteStream.flush();
142
+ } else {
143
+ if (data.sampleRate !== sampleRate || data.channels !== 1) {
144
+ throw new Error(
145
+ `sample rate or channel count mismatch: expected ${sampleRate}Hz/1ch, got ${data.sampleRate}Hz/${data.channels}ch`
146
+ );
147
+ }
148
+ frames = audioByteStream.write(data.data.buffer);
149
+ }
150
+ for (const frame of frames) {
151
+ const buffer = Buffer.from(
152
+ frame.data.buffer,
153
+ frame.data.byteOffset,
154
+ frame.data.byteLength
155
+ );
156
+ ws.send(buffer);
157
+ }
158
+ }
159
+ } finally {
160
+ closing = true;
161
+ ws.close();
162
+ }
163
+ };
164
+ const listenTask = Task.from(async (controller) => {
165
+ const listenMessage = new Promise((resolve, reject) => {
166
+ ws.on("message", (data) => {
167
+ try {
168
+ let jsonString;
169
+ if (typeof data === "string") {
170
+ jsonString = data;
171
+ } else if (data instanceof Buffer) {
172
+ jsonString = data.toString("utf-8");
173
+ } else if (Array.isArray(data)) {
174
+ jsonString = Buffer.concat(data).toString("utf-8");
175
+ } else {
176
+ return;
177
+ }
178
+ const msg = JSON.parse(jsonString);
179
+ const isFinal = msg.is_final ?? true;
180
+ const segments = msg.segments ?? [];
181
+ const transcript = msg.transcript ?? "";
182
+ const confidence = msg.confidence ?? 0;
183
+ const languageCode = msg.language_code ?? this.#opts.audioLanguage;
184
+ if (!transcript) {
185
+ this.#logger.debug("Received non-transcript message:", msg);
186
+ return;
187
+ }
188
+ if (!this.#speaking && !isFinal) {
189
+ this.#speaking = true;
190
+ this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
191
+ }
192
+ const startTime = segments.length > 0 ? segments[0].start ?? 0 : 0;
193
+ const endTime = segments.length > 0 ? segments[segments.length - 1].end ?? 0 : 0;
194
+ const speechData = {
195
+ language: languageCode,
196
+ text: transcript,
197
+ startTime,
198
+ endTime,
199
+ confidence
200
+ };
201
+ if (!isFinal) {
202
+ this.queue.put({
203
+ type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
204
+ alternatives: [speechData]
205
+ });
206
+ } else {
207
+ this.queue.put({
208
+ type: stt.SpeechEventType.FINAL_TRANSCRIPT,
209
+ alternatives: [speechData]
210
+ });
211
+ if (this.#speaking) {
212
+ this.#speaking = false;
213
+ this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
214
+ }
215
+ }
216
+ if (this.closed || closing) {
217
+ resolve();
218
+ }
219
+ } catch (err) {
220
+ this.#logger.error(`STT: Error processing message: ${data}`);
221
+ reject(err);
222
+ }
223
+ });
224
+ ws.on("error", (err) => {
225
+ if (!closing) {
226
+ reject(err);
227
+ }
228
+ });
229
+ ws.on("close", () => {
230
+ if (!closing) {
231
+ resolve();
232
+ }
233
+ });
234
+ });
235
+ await Promise.race([listenMessage, waitForAbort(controller.signal)]);
236
+ }, this.abortController);
237
+ await Promise.all([sendTask(), listenTask.result]);
238
+ closing = true;
239
+ ws.close();
240
+ }
241
+ }
242
+ export {
243
+ STT,
244
+ SpeechStream
245
+ };
246
+ //# sourceMappingURL=stt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, AudioByteStream, Task, log, stt, waitForAbort } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport type { BasetenSttOptions } from './types.js';\n\nconst defaultSTTOptions: Partial<BasetenSttOptions> = {\n environment: 'production',\n encoding: 'pcm_s16le',\n sampleRate: 16000,\n bufferSizeSeconds: 0.032,\n enablePartialTranscripts: true,\n partialTranscriptIntervalS: 0.5,\n finalTranscriptMaxDurationS: 5,\n audioLanguage: 'en',\n languageDetectionOnly: false,\n vadThreshold: 0.5,\n vadMinSilenceDurationMs: 300,\n vadSpeechPadMs: 30,\n};\n\nexport class STT extends stt.STT {\n #opts: BasetenSttOptions;\n #logger = log();\n label = 'baseten.STT';\n\n constructor(opts: Partial<BasetenSttOptions> = {}) {\n super({\n streaming: true,\n interimResults: opts.enablePartialTranscripts ?? defaultSTTOptions.enablePartialTranscripts!,\n });\n\n const apiKey = opts.apiKey ?? process.env.BASETEN_API_KEY;\n const modelId = opts.modelId ?? process.env.BASETEN_STT_MODEL_ID;\n\n if (!apiKey) {\n throw new Error(\n 'Baseten API key is required, either pass it as `apiKey` or set $BASETEN_API_KEY',\n );\n }\n if (!modelId) {\n throw new Error(\n 'Baseten model ID is required, either pass it as `modelId` or set $BASETEN_STT_MODEL_ID',\n );\n }\n\n this.#opts = {\n ...defaultSTTOptions,\n ...opts,\n apiKey,\n modelId,\n } as BasetenSttOptions;\n }\n\n // eslint-disable-next-line\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Baseten STT');\n }\n\n updateOptions(opts: Partial<BasetenSttOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: BasetenSttOptions;\n #logger = log();\n #speaking = false;\n #requestId = '';\n label = 'baseten.SpeechStream';\n\n constructor(stt: STT, opts: BasetenSttOptions) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n }\n\n private getWsUrl(): string {\n return `wss://model-${this.#opts.modelId}.api.baseten.co/environments/${this.#opts.environment}/websocket`;\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n\n while (!this.input.closed && !this.closed) {\n const url = this.getWsUrl();\n const headers = {\n Authorization: `Api-Key ${this.#opts.apiKey}`,\n };\n\n const ws = new WebSocket(url, { headers });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Baseten after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Baseten, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Baseten disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n let closing = false;\n\n // Send initial metadata\n const metadata = {\n streaming_vad_config: {\n threshold: this.#opts.vadThreshold,\n min_silence_duration_ms: this.#opts.vadMinSilenceDurationMs,\n speech_pad_ms: this.#opts.vadSpeechPadMs,\n },\n streaming_params: {\n encoding: this.#opts.encoding ?? 'pcm_s16le',\n sample_rate: this.#opts.sampleRate ?? 16000,\n enable_partial_transcripts: this.#opts.enablePartialTranscripts,\n partial_transcript_interval_s: this.#opts.partialTranscriptIntervalS,\n final_transcript_max_duration_s: this.#opts.finalTranscriptMaxDurationS,\n },\n whisper_params: {\n prompt: this.#opts.prompt,\n audio_language: this.#opts.audioLanguage ?? 'en',\n language_detection_only: this.#opts.languageDetectionOnly ?? false,\n },\n };\n ws.send(JSON.stringify(metadata));\n\n const sendTask = async () => {\n const sampleRate = this.#opts.sampleRate ?? 16000;\n const samplesPerChunk = sampleRate === 16000 ? 512 : 256;\n const audioByteStream = new AudioByteStream(sampleRate, 1, samplesPerChunk);\n\n try {\n while (!this.closed) {\n const result = await this.input.next();\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n // Flush any remaining buffered audio\n frames = audioByteStream.flush();\n } else {\n if (data.sampleRate !== sampleRate || data.channels !== 1) {\n throw new Error(\n `sample rate or channel count mismatch: expected ${sampleRate}Hz/1ch, got ${data.sampleRate}Hz/${data.channels}ch`,\n );\n }\n frames = audioByteStream.write(data.data.buffer as ArrayBuffer);\n }\n\n for (const frame of frames) {\n const buffer = Buffer.from(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.data.byteLength,\n );\n ws.send(buffer);\n }\n }\n } finally {\n closing = true;\n ws.close();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (data) => {\n try {\n let jsonString: string;\n\n if (typeof data === 'string') {\n jsonString = data;\n } else if (data instanceof Buffer) {\n jsonString = data.toString('utf-8');\n } else if (Array.isArray(data)) {\n jsonString = Buffer.concat(data).toString('utf-8');\n } else {\n return;\n }\n\n const msg = JSON.parse(jsonString);\n\n // Parse response format matching Python implementation\n const isFinal = msg.is_final ?? true;\n const segments = msg.segments ?? [];\n const transcript = msg.transcript ?? '';\n const confidence = msg.confidence ?? 0.0;\n const languageCode = msg.language_code ?? this.#opts.audioLanguage;\n\n // Skip if no transcript text\n if (!transcript) {\n this.#logger.debug('Received non-transcript message:', msg);\n return;\n }\n\n // Emit START_OF_SPEECH if not already speaking (only for interim or first final)\n if (!this.#speaking && !isFinal) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n // Extract timing from segments\n const startTime = segments.length > 0 ? segments[0].start ?? 0.0 : 0.0;\n const endTime = segments.length > 0 ? segments[segments.length - 1].end ?? 0.0 : 0.0;\n\n const speechData: stt.SpeechData = {\n language: languageCode!,\n text: transcript,\n startTime,\n endTime,\n confidence,\n };\n\n // Handle interim vs final transcripts (matching Python implementation)\n if (!isFinal) {\n // Interim transcript\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [speechData],\n });\n } else {\n // Final transcript\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [speechData],\n });\n\n // Emit END_OF_SPEECH after final transcript\n if (this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${data}`);\n reject(err);\n }\n });\n\n ws.on('error', (err) => {\n if (!closing) {\n reject(err);\n }\n });\n\n ws.on('close', () => {\n if (!closing) {\n resolve();\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.all([sendTask(), listenTask.result]);\n closing = true;\n ws.close();\n }\n}\n"],"mappings":"AAGA,SAA2B,iBAAiB,MAAM,KAAK,KAAK,oBAAoB;AAEhF,SAAS,iBAAiB;AAG1B,MAAM,oBAAgD;AAAA,EACpD,aAAa;AAAA,EACb,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,4BAA4B;AAAA,EAC5B,6BAA6B;AAAA,EAC7B,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,cAAc;AAAA,EACd,yBAAyB;AAAA,EACzB,gBAAgB;AAClB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,4BAA4B,kBAAkB;AAAA,IACrE,CAAC;AAED,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,UAAM,UAAU,KAAK,WAAW,QAAQ,IAAI;AAE5C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,2CAA2C;AAAA,EAC7D;AAAA,EAEA,cAAc,MAAkC;AAC9C,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAyB;AAC7C,UAAMA,MAAK,KAAK,UAAU;AAC1B,SAAK,QAAQ;AACb,SAAK,SAAS;AAAA,EAChB;AAAA,EAEQ,WAAmB;AACzB,WAAO,eAAe,KAAK,MAAM,OAAO,gCAAgC,KAAK,MAAM,WAAW;AAAA,EAChG;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AAEd,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,MAAM,KAAK,SAAS;AAC1B,YAAM,UAAU;AAAA,QACd,eAAe,WAAW,KAAK,MAAM,MAAM;AAAA,MAC7C;AAEA,YAAM,KAAK,IAAI,UAAU,KAAK,EAAE,QAAQ,CAAC;AAEzC,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,sCAAsC,OAAO,cAAc,CAAC,EAAE;AAAA,UAChF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,6CAA6C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC1F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,+CAA+C,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAC/G;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,QAAI,UAAU;AAGd,UAAM,WAAW;AAAA,MACf,sBAAsB;AAAA,QACpB,WAAW,KAAK,MAAM;AAAA,QACtB,yBAAyB,KAAK,MAAM;AAAA,QACpC,eAAe,KAAK,MAAM;AAAA,MAC5B;AAAA,MACA,kBAAkB;AAAA,QAChB,UAAU,KAAK,MAAM,YAAY;AAAA,QACjC,aAAa,KAAK,MAAM,cAAc;AAAA,QACtC,4BAA4B,KAAK,MAAM;AAAA,QACvC,+BAA+B,KAAK,MAAM;AAAA,QAC1C,iCAAiC,KAAK,MAAM;AAAA,MAC9C;AAAA,MACA,gBAAgB;AAAA,QACd,QAAQ,KAAK,MAAM;AAAA,QACnB,gBAAgB,KAAK,MAAM,iBAAiB;AAAA,QAC5C,yBAAyB,KAAK,MAAM,yBAAyB;AAAA,MAC/D;AAAA,IACF;AACA,OAAG,KAAK,KAAK,UAAU,QAAQ,CAAC;AAEhC,UAAM,WAAW,YAAY;AAC3B,YAAM,aAAa,KAAK,MAAM,cAAc;AAC5C,YAAM,kBAAkB,eAAe,OAAQ,MAAM;AACrD,YAAM,kBAAkB,IAAI,gBAAgB,YAAY,GAAG,eAAe;AAE1E,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,KAAK,MAAM,KAAK;AACrC,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AAExC,qBAAS,gBAAgB,MAAM;AAAA,UACjC,OAAO;AACL,gBAAI,KAAK,eAAe,cAAc,KAAK,aAAa,GAAG;AACzD,oBAAM,IAAI;AAAA,gBACR,mDAAmD,UAAU,eAAe,KAAK,UAAU,MAAM,KAAK,QAAQ;AAAA,cAChH;AAAA,YACF;AACA,qBAAS,gBAAgB,MAAM,KAAK,KAAK,MAAqB;AAAA,UAChE;AAEA,qBAAW,SAAS,QAAQ;AAC1B,kBAAM,SAAS,OAAO;AAAA,cACpB,MAAM,KAAK;AAAA,cACX,MAAM,KAAK;AAAA,cACX,MAAM,KAAK;AAAA,YACb;AACA,eAAG,KAAK,MAAM;AAAA,UAChB;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,SAAS;AACzB,cAAI;AACF,gBAAI;AAEJ,gBAAI,OAAO,SAAS,UAAU;AAC5B,2BAAa;AAAA,YACf,WAAW,gBAAgB,QAAQ;AACjC,2BAAa,KAAK,SAAS,OAAO;AAAA,YACpC,WAAW,MAAM,QAAQ,IAAI,GAAG;AAC9B,2BAAa,OAAO,OAAO,IAAI,EAAE,SAAS,OAAO;AAAA,YACnD,OAAO;AACL;AAAA,YACF;AAEA,kBAAM,MAAM,KAAK,MAAM,UAAU;AAGjC,kBAAM,UAAU,IAAI,YAAY;AAChC,kBAAM,WAAW,IAAI,YAAY,CAAC;AAClC,kBAAM,aAAa,IAAI,cAAc;AACrC,kBAAM,aAAa,IAAI,cAAc;AACrC,kBAAM,eAAe,IAAI,iBAAiB,KAAK,MAAM;AAGrD,gBAAI,CAAC,YAAY;AACf,mBAAK,QAAQ,MAAM,oCAAoC,GAAG;AAC1D;AAAA,YACF;AAGA,gBAAI,CAAC,KAAK,aAAa,CAAC,SAAS;AAC/B,mBAAK,YAAY;AACjB,mBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,YAC9D;AAGA,kBAAM,YAAY,SAAS,SAAS,IAAI,SAAS,CAAC,EAAE,SAAS,IAAM;AACnE,kBAAM,UAAU,SAAS,SAAS,IAAI,SAAS,SAAS,SAAS,CAAC,EAAE,OAAO,IAAM;AAEjF,kBAAM,aAA6B;AAAA,cACjC,UAAU;AAAA,cACV,MAAM;AAAA,cACN;AAAA,cACA;AAAA,cACA;AAAA,YACF;AAGA,gBAAI,CAAC,SAAS;AAEZ,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,IAAI,gBAAgB;AAAA,gBAC1B,cAAc,CAAC,UAAU;AAAA,cAC3B,CAAC;AAAA,YACH,OAAO;AAEL,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,IAAI,gBAAgB;AAAA,gBAC1B,cAAc,CAAC,UAAU;AAAA,cAC3B,CAAC;AAGD,kBAAI,KAAK,WAAW;AAClB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,cAC5D;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,IAAI,EAAE;AAC3D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,QAAQ;AACtB,cAAI,CAAC,SAAS;AACZ,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,SAAS;AACZ,oBAAQ;AAAA,UACV;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,MAAM,CAAC;AACjD,cAAU;AACV,OAAG,MAAM;AAAA,EACX;AACF;","names":["stt"]}
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+ var import_agents_plugin_silero = require("@livekit/agents-plugin-silero");
3
+ var import_agents_plugins_test = require("@livekit/agents-plugins-test");
4
+ var import_vitest = require("vitest");
5
+ var import_stt = require("./stt.cjs");
6
+ (0, import_vitest.describe)("Baseten", async () => {
7
+ await (0, import_agents_plugins_test.stt)(new import_stt.STT(), await import_agents_plugin_silero.VAD.load(), { streaming: true });
8
+ });
9
+ //# sourceMappingURL=stt.test.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { VAD } from '@livekit/agents-plugin-silero';\nimport { stt } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { STT } from './stt.js';\n\ndescribe('Baseten', async () => {\n await stt(new STT(), await VAD.load(), { streaming: true });\n});\n"],"mappings":";AAGA,kCAAoB;AACpB,iCAAoB;AACpB,oBAAyB;AACzB,iBAAoB;AAAA,IAEpB,wBAAS,WAAW,YAAY;AAC9B,YAAM,gCAAI,IAAI,eAAI,GAAG,MAAM,gCAAI,KAAK,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,CAAC;","names":[]}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=stt.test.d.ts.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=stt.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stt.test.d.ts","sourceRoot":"","sources":["../src/stt.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,8 @@
1
+ import { VAD } from "@livekit/agents-plugin-silero";
2
+ import { stt } from "@livekit/agents-plugins-test";
3
+ import { describe } from "vitest";
4
+ import { STT } from "./stt.js";
5
+ describe("Baseten", async () => {
6
+ await stt(new STT(), await VAD.load(), { streaming: true });
7
+ });
8
+ //# sourceMappingURL=stt.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { VAD } from '@livekit/agents-plugin-silero';\nimport { stt } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { STT } from './stt.js';\n\ndescribe('Baseten', async () => {\n await stt(new STT(), await VAD.load(), { streaming: true });\n});\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,WAAW;AACpB,SAAS,gBAAgB;AACzB,SAAS,WAAW;AAEpB,SAAS,WAAW,YAAY;AAC9B,QAAM,IAAI,IAAI,IAAI,GAAG,MAAM,IAAI,KAAK,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,CAAC;","names":[]}
package/dist/tts.cjs ADDED
@@ -0,0 +1,161 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var tts_exports = {};
20
+ __export(tts_exports, {
21
+ ChunkedStream: () => ChunkedStream,
22
+ TTS: () => TTS
23
+ });
24
+ module.exports = __toCommonJS(tts_exports);
25
+ var import_agents = require("@livekit/agents");
26
+ const defaultTTSOptions = {
27
+ voice: "tara",
28
+ language: "en",
29
+ temperature: 0.6
30
+ };
31
+ class TTS extends import_agents.tts.TTS {
32
+ opts;
33
+ label = "baseten.TTS";
34
+ abortController = new AbortController();
35
+ constructor(opts = {}) {
36
+ super(24e3, 1, { streaming: false });
37
+ const apiKey = opts.apiKey ?? process.env.BASETEN_API_KEY;
38
+ const modelEndpoint = opts.modelEndpoint ?? process.env.BASETEN_MODEL_ENDPOINT;
39
+ if (!apiKey) {
40
+ throw new Error(
41
+ "Baseten API key is required, either pass it as `apiKey` or set $BASETEN_API_KEY"
42
+ );
43
+ }
44
+ if (!modelEndpoint) {
45
+ throw new Error(
46
+ "Baseten model endpoint is required, either pass it as `modelEndpoint` or set $BASETEN_MODEL_ENDPOINT"
47
+ );
48
+ }
49
+ this.opts = {
50
+ ...defaultTTSOptions,
51
+ ...opts,
52
+ apiKey,
53
+ modelEndpoint
54
+ };
55
+ }
56
+ updateOptions(opts) {
57
+ this.opts = {
58
+ ...this.opts,
59
+ ...opts
60
+ };
61
+ }
62
+ /**
63
+ * Synthesize speech for a given piece of text. Returns a `ChunkedStream`
64
+ * which will asynchronously fetch audio from Baseten and push frames into
65
+ * LiveKit's playback pipeline. If you need to cancel synthesis you can
66
+ * call {@link ChunkedStream.stop} on the returned object.
67
+ */
68
+ synthesize(text, connOptions, abortSignal) {
69
+ return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
70
+ }
71
+ stream() {
72
+ throw new Error("Streaming is not supported on Baseten TTS");
73
+ }
74
+ async close() {
75
+ this.abortController.abort();
76
+ }
77
+ }
78
+ class ChunkedStream extends import_agents.tts.ChunkedStream {
79
+ label = "baseten.ChunkedStream";
80
+ opts;
81
+ constructor(tts2, text, opts, connOptions, abortSignal) {
82
+ super(text, tts2, connOptions, abortSignal);
83
+ this.opts = opts;
84
+ }
85
+ /**
86
+ * Execute the synthesis request. This method is automatically invoked
87
+ * by the base class when the stream starts. It performs a POST request
88
+ * to the configured `modelEndpoint` with the input text and optional
89
+ * parameters. Audio chunks are streamed as they arrive and transformed
90
+ * into a sequence of `AudioFrame` objects that are enqueued immediately
91
+ * for playback.
92
+ */
93
+ async run() {
94
+ const { apiKey, modelEndpoint, voice, language, temperature, maxTokens } = this.opts;
95
+ const payload = {
96
+ prompt: this.inputText
97
+ };
98
+ if (voice) payload.voice = voice;
99
+ if (language) payload.language = language;
100
+ if (temperature !== void 0) payload.temperature = temperature;
101
+ if (maxTokens !== void 0) payload.max_tokens = maxTokens;
102
+ const headers = {
103
+ Authorization: `Api-Key ${apiKey}`,
104
+ "Content-Type": "application/json"
105
+ };
106
+ const response = await fetch(modelEndpoint, {
107
+ method: "POST",
108
+ headers,
109
+ body: JSON.stringify(payload),
110
+ signal: this.abortSignal
111
+ });
112
+ if (!response.ok) {
113
+ let errText;
114
+ try {
115
+ errText = await response.text();
116
+ } catch {
117
+ errText = response.statusText;
118
+ }
119
+ throw new Error(`Baseten TTS request failed: ${response.status} ${errText}`);
120
+ }
121
+ if (!response.body) {
122
+ throw new Error("Response body is not available for streaming");
123
+ }
124
+ const requestId = (0, import_agents.shortuuid)();
125
+ const audioByteStream = new import_agents.AudioByteStream(24e3, 1);
126
+ const reader = response.body.getReader();
127
+ try {
128
+ let lastFrame;
129
+ const sendLastFrame = (segmentId, final) => {
130
+ if (lastFrame) {
131
+ this.queue.put({ requestId, segmentId, frame: lastFrame, final });
132
+ lastFrame = void 0;
133
+ }
134
+ };
135
+ const abortPromise = (0, import_agents.waitForAbort)(this.abortSignal);
136
+ while (!this.abortSignal.aborted) {
137
+ const result = await Promise.race([reader.read(), abortPromise]);
138
+ if (result === void 0) break;
139
+ const { done, value } = result;
140
+ if (done) {
141
+ break;
142
+ }
143
+ const frames = audioByteStream.write(value.buffer);
144
+ for (const frame of frames) {
145
+ sendLastFrame(requestId, false);
146
+ lastFrame = frame;
147
+ }
148
+ }
149
+ sendLastFrame(requestId, true);
150
+ } finally {
151
+ reader.releaseLock();
152
+ this.queue.close();
153
+ }
154
+ }
155
+ }
156
+ // Annotate the CommonJS export names for ESM import in node:
157
+ 0 && (module.exports = {
158
+ ChunkedStream,
159
+ TTS
160
+ });
161
+ //# sourceMappingURL=tts.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n shortuuid,\n tts,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { BasetenTTSOptions } from './types.js';\n\nconst defaultTTSOptions: Partial<BasetenTTSOptions> = {\n voice: 'tara',\n language: 'en',\n temperature: 0.6,\n};\n\n/**\n * Baseten TTS implementation (streaming, 24kHz mono)\n */\nexport class TTS extends tts.TTS {\n private opts: BasetenTTSOptions;\n label = 'baseten.TTS';\n private abortController = new AbortController();\n constructor(opts: Partial<BasetenTTSOptions> = {}) {\n /**\n * Baseten audio is 24kHz mono.\n * The Orpheus model generates audio chunks that are processed as they arrive,\n * which reduces latency and improves agent responsiveness.\n */\n super(24000, 1, { streaming: false });\n\n // Apply defaults and environment fallbacks.\n const apiKey = opts.apiKey ?? process.env.BASETEN_API_KEY;\n const modelEndpoint = opts.modelEndpoint ?? process.env.BASETEN_MODEL_ENDPOINT;\n\n if (!apiKey) {\n throw new Error(\n 'Baseten API key is required, either pass it as `apiKey` or set $BASETEN_API_KEY',\n );\n }\n if (!modelEndpoint) {\n throw new Error(\n 'Baseten model endpoint is required, either pass it as `modelEndpoint` or set $BASETEN_MODEL_ENDPOINT',\n );\n }\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n apiKey,\n modelEndpoint,\n } as BasetenTTSOptions;\n }\n\n updateOptions(opts: Partial<Omit<BasetenTTSOptions, 'apiKey' | 'modelEndpoint'>>) {\n this.opts = {\n ...this.opts,\n ...opts,\n } as BasetenTTSOptions;\n }\n\n /**\n * Synthesize speech for a given piece of text. Returns a `ChunkedStream`\n * which will asynchronously fetch audio from Baseten and push frames into\n * LiveKit's playback pipeline. If you need to cancel synthesis you can\n * call {@link ChunkedStream.stop} on the returned object.\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n throw new Error('Streaming is not supported on Baseten TTS');\n }\n\n async close(): Promise<void> {\n this.abortController.abort();\n }\n}\n\n/**\n * Internal helper that performs the actual HTTP request and converts the\n * response into audio frames. It inherits from `tts.ChunkedStream` to\n * integrate with LiveKit's event and cancellation framework.\n *\n * This implementation streams audio chunks as they arrive from the Baseten\n * model endpoint, processing them incrementally instead of waiting for the\n * complete response.\n */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'baseten.ChunkedStream';\n private readonly opts: BasetenTTSOptions;\n\n constructor(\n tts: TTS,\n text: string,\n opts: BasetenTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n /**\n * Execute the synthesis request. This method is automatically invoked\n * by the base class when the stream starts. It performs a POST request\n * to the configured `modelEndpoint` with the input text and optional\n * parameters. Audio chunks are streamed as they arrive and transformed\n * into a sequence of `AudioFrame` objects that are enqueued immediately\n * for playback.\n */\n protected async run() {\n const { apiKey, modelEndpoint, voice, language, temperature, maxTokens } = this.opts;\n const payload: Record<string, unknown> = {\n prompt: this.inputText,\n };\n if (voice) payload.voice = voice;\n if (language) payload.language = language;\n if (temperature !== undefined) payload.temperature = temperature;\n if (maxTokens !== undefined) payload.max_tokens = maxTokens;\n\n const headers: Record<string, string> = {\n Authorization: `Api-Key ${apiKey}`,\n 'Content-Type': 'application/json',\n };\n\n const response = await fetch(modelEndpoint, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n let errText: string;\n try {\n errText = await response.text();\n } catch {\n errText = response.statusText;\n }\n throw new Error(`Baseten TTS request failed: ${response.status} ${errText}`);\n }\n\n // Stream the response body as chunks arrive\n if (!response.body) {\n throw new Error('Response body is not available for streaming');\n }\n\n const requestId = shortuuid();\n const audioByteStream = new AudioByteStream(24000, 1);\n const reader = response.body.getReader();\n\n try {\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n while (!this.abortSignal.aborted) {\n const result = await Promise.race([reader.read(), abortPromise]);\n\n if (result === undefined) break; // aborted\n\n const { done, value } = result;\n\n if (done) {\n break;\n }\n\n // Process the chunk and convert to audio frames\n // Convert Uint8Array to ArrayBuffer for AudioByteStream\n const frames = audioByteStream.write(value.buffer);\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n }\n\n // Send the final frame\n sendLastFrame(requestId, true);\n } finally {\n reader.releaseLock();\n this.queue.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAMO;AAIP,MAAM,oBAAgD;AAAA,EACpD,OAAO;AAAA,EACP,UAAU;AAAA,EACV,aAAa;AACf;AAKO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAC9C,YAAY,OAAmC,CAAC,GAAG;AAMjD,UAAM,MAAO,GAAG,EAAE,WAAW,MAAM,CAAC;AAGpC,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,UAAM,gBAAgB,KAAK,iBAAiB,QAAQ,IAAI;AAExD,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,eAAe;AAClB,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAAoE;AAChF,SAAK,OAAO;AAAA,MACV,GAAG,KAAK;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,MAAM,aAAa,WAAW;AAAA,EAC1E;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,2CAA2C;AAAA,EAC7D;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAWO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACS;AAAA,EAEjB,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAgB,MAAM;AACpB,UAAM,EAAE,QAAQ,eAAe,OAAO,UAAU,aAAa,UAAU,IAAI,KAAK;AAChF,UAAM,UAAmC;AAAA,MACvC,QAAQ,KAAK;AAAA,IACf;AACA,QAAI,MAAO,SAAQ,QAAQ;AAC3B,QAAI,SAAU,SAAQ,WAAW;AACjC,QAAI,gBAAgB,OAAW,SAAQ,cAAc;AACrD,QAAI,cAAc,OAAW,SAAQ,aAAa;AAElD,UAAM,UAAkC;AAAA,MACtC,eAAe,WAAW,MAAM;AAAA,MAChC,gBAAgB;AAAA,IAClB;AAEA,UAAM,WAAW,MAAM,MAAM,eAAe;AAAA,MAC1C,QAAQ;AAAA,MACR;AAAA,MACA,MAAM,KAAK,UAAU,OAAO;AAAA,MAC5B,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,UAAI;AACJ,UAAI;AACF,kBAAU,MAAM,SAAS,KAAK;AAAA,MAChC,QAAQ;AACN,kBAAU,SAAS;AAAA,MACrB;AACA,YAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,IAAI,OAAO,EAAE;AAAA,IAC7E;AAGA,QAAI,CAAC,SAAS,MAAM;AAClB,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AAEA,UAAM,gBAAY,yBAAU;AAC5B,UAAM,kBAAkB,IAAI,8BAAgB,MAAO,CAAC;AACpD,UAAM,SAAS,SAAS,KAAK,UAAU;AAEvC,QAAI;AACF,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAIA,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAElD,aAAO,CAAC,KAAK,YAAY,SAAS;AAChC,cAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,OAAO,KAAK,GAAG,YAAY,CAAC;AAE/D,YAAI,WAAW,OAAW;AAE1B,cAAM,EAAE,MAAM,MAAM,IAAI;AAExB,YAAI,MAAM;AACR;AAAA,QACF;AAIA,cAAM,SAAS,gBAAgB,MAAM,MAAM,MAAM;AAEjD,mBAAW,SAAS,QAAQ;AAC1B,wBAAc,WAAW,KAAK;AAC9B,sBAAY;AAAA,QACd;AAAA,MACF;AAGA,oBAAc,WAAW,IAAI;AAAA,IAC/B,UAAE;AACA,aAAO,YAAY;AACnB,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AACF;","names":["tts"]}
package/dist/tts.d.cts ADDED
@@ -0,0 +1,45 @@
1
+ import { type APIConnectOptions, tts } from '@livekit/agents';
2
+ import type { BasetenTTSOptions } from './types.js';
3
+ /**
4
+ * Baseten TTS implementation (streaming, 24kHz mono)
5
+ */
6
+ export declare class TTS extends tts.TTS {
7
+ private opts;
8
+ label: string;
9
+ private abortController;
10
+ constructor(opts?: Partial<BasetenTTSOptions>);
11
+ updateOptions(opts: Partial<Omit<BasetenTTSOptions, 'apiKey' | 'modelEndpoint'>>): void;
12
+ /**
13
+ * Synthesize speech for a given piece of text. Returns a `ChunkedStream`
14
+ * which will asynchronously fetch audio from Baseten and push frames into
15
+ * LiveKit's playback pipeline. If you need to cancel synthesis you can
16
+ * call {@link ChunkedStream.stop} on the returned object.
17
+ */
18
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
19
+ stream(): tts.SynthesizeStream;
20
+ close(): Promise<void>;
21
+ }
22
+ /**
23
+ * Internal helper that performs the actual HTTP request and converts the
24
+ * response into audio frames. It inherits from `tts.ChunkedStream` to
25
+ * integrate with LiveKit's event and cancellation framework.
26
+ *
27
+ * This implementation streams audio chunks as they arrive from the Baseten
28
+ * model endpoint, processing them incrementally instead of waiting for the
29
+ * complete response.
30
+ */
31
+ export declare class ChunkedStream extends tts.ChunkedStream {
32
+ label: string;
33
+ private readonly opts;
34
+ constructor(tts: TTS, text: string, opts: BasetenTTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
35
+ /**
36
+ * Execute the synthesis request. This method is automatically invoked
37
+ * by the base class when the stream starts. It performs a POST request
38
+ * to the configured `modelEndpoint` with the input text and optional
39
+ * parameters. Audio chunks are streamed as they arrive and transformed
40
+ * into a sequence of `AudioFrame` objects that are enqueued immediately
41
+ * for playback.
42
+ */
43
+ protected run(): Promise<void>;
44
+ }
45
+ //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts ADDED
@@ -0,0 +1,45 @@
1
+ import { type APIConnectOptions, tts } from '@livekit/agents';
2
+ import type { BasetenTTSOptions } from './types.js';
3
+ /**
4
+ * Baseten TTS implementation (streaming, 24kHz mono)
5
+ */
6
+ export declare class TTS extends tts.TTS {
7
+ private opts;
8
+ label: string;
9
+ private abortController;
10
+ constructor(opts?: Partial<BasetenTTSOptions>);
11
+ updateOptions(opts: Partial<Omit<BasetenTTSOptions, 'apiKey' | 'modelEndpoint'>>): void;
12
+ /**
13
+ * Synthesize speech for a given piece of text. Returns a `ChunkedStream`
14
+ * which will asynchronously fetch audio from Baseten and push frames into
15
+ * LiveKit's playback pipeline. If you need to cancel synthesis you can
16
+ * call {@link ChunkedStream.stop} on the returned object.
17
+ */
18
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
19
+ stream(): tts.SynthesizeStream;
20
+ close(): Promise<void>;
21
+ }
22
+ /**
23
+ * Internal helper that performs the actual HTTP request and converts the
24
+ * response into audio frames. It inherits from `tts.ChunkedStream` to
25
+ * integrate with LiveKit's event and cancellation framework.
26
+ *
27
+ * This implementation streams audio chunks as they arrive from the Baseten
28
+ * model endpoint, processing them incrementally instead of waiting for the
29
+ * complete response.
30
+ */
31
+ export declare class ChunkedStream extends tts.ChunkedStream {
32
+ label: string;
33
+ private readonly opts;
34
+ constructor(tts: TTS, text: string, opts: BasetenTTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
35
+ /**
36
+ * Execute the synthesis request. This method is automatically invoked
37
+ * by the base class when the stream starts. It performs a POST request
38
+ * to the configured `modelEndpoint` with the input text and optional
39
+ * parameters. Audio chunks are streamed as they arrive and transformed
40
+ * into a sequence of `AudioFrame` objects that are enqueued immediately
41
+ * for playback.
42
+ */
43
+ protected run(): Promise<void>;
44
+ }
45
+ //# sourceMappingURL=tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAGtB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAQpD;;GAEG;AACH,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAoB;IAChC,KAAK,SAAiB;IACtB,OAAO,CAAC,eAAe,CAAyB;gBACpC,IAAI,GAAE,OAAO,CAAC,iBAAiB,CAAM;IA+BjD,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,iBAAiB,EAAE,QAAQ,GAAG,eAAe,CAAC,CAAC;IAOhF;;;;;OAKG;IACH,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,IAAI,GAAG,CAAC,gBAAgB;IAIxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;GAQG;AACH,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA2B;IAChC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoB;gBAGvC,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,iBAAiB,EACvB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;IAM3B;;;;;;;OAOG;cACa,GAAG;CAkFpB"}