@livekit/agents 1.0.25 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/tts.cjs +0 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +0 -1
- package/dist/inference/tts.d.ts +0 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +0 -2
- package/dist/inference/tts.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +6 -3
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +1 -1
- package/dist/tts/stream_adapter.d.ts +1 -1
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +6 -3
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +26 -15
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +7 -4
- package/dist/tts/tts.d.ts +7 -4
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +26 -15
- package/dist/tts/tts.js.map +1 -1
- package/dist/voice/agent_session.cjs +2 -0
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +2 -0
- package/dist/voice/agent_session.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/tts.ts +0 -2
- package/src/tts/stream_adapter.ts +10 -3
- package/src/tts/tts.ts +41 -18
- package/src/voice/agent_session.ts +2 -0
package/dist/inference/tts.cjs
CHANGED
|
@@ -152,13 +152,11 @@ class TTS extends import_tts.TTS {
|
|
|
152
152
|
class SynthesizeStream extends import_tts.SynthesizeStream {
|
|
153
153
|
opts;
|
|
154
154
|
tts;
|
|
155
|
-
connOptions;
|
|
156
155
|
#logger = (0, import_log.log)();
|
|
157
156
|
constructor(tts, opts, connOptions) {
|
|
158
157
|
super(tts, connOptions);
|
|
159
158
|
this.opts = opts;
|
|
160
159
|
this.tts = tts;
|
|
161
|
-
this.connOptions = connOptions;
|
|
162
160
|
}
|
|
163
161
|
get label() {
|
|
164
162
|
return "inference.SynthesizeStream";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n this.connOptions = connOptions;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,gBAA0B;AAC1B,wBAAyC;AACzC,mBAAgC;AAChC,iBAAoB;AACpB,4BAAoC;AACpC,sBAAuC;AAEvC,iBAAyE;AACzE,mBAAoE;AACpE,mBAA0B;AAC1B,wBAMO;AACP,IAAAA,gBAA6D;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,cAAU,gBAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,yCAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,WAAAC,iBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,gBAAAC,MAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,mBAAe,2CAAoC;AACzD,UAAM,gBAAY,wBAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,uCAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,oBAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AACpD,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOC,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,uCAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,iCAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,6BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,2BAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["import_utils","BaseTTS","BaseSynthesizeStream","tokenizeBasic","ws"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,gBAA0B;AAC1B,wBAAyC;AACzC,mBAAgC;AAChC,iBAAoB;AACpB,4BAAoC;AACpC,sBAAuC;AAEvC,iBAAyE;AACzE,mBAAoE;AACpE,mBAA0B;AAC1B,wBAMO;AACP,IAAAA,gBAA6D;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,cAAU,gBAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,yCAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,WAAAC,iBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,gBAAAC,MAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,mBAAe,2CAAoC;AACzD,UAAM,gBAAY,wBAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,uCAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,oBAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AACpD,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOC,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,uCAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,iCAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,6BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,2BAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["import_utils","BaseTTS","BaseSynthesizeStream","tokenizeBasic","ws"]}
|
package/dist/inference/tts.d.cts
CHANGED
|
@@ -68,7 +68,6 @@ export declare class SynthesizeStream<TModel extends TTSModels> extends BaseSynt
|
|
|
68
68
|
#private;
|
|
69
69
|
private opts;
|
|
70
70
|
private tts;
|
|
71
|
-
private connOptions;
|
|
72
71
|
constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions);
|
|
73
72
|
get label(): string;
|
|
74
73
|
updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>): void;
|
package/dist/inference/tts.d.ts
CHANGED
|
@@ -68,7 +68,6 @@ export declare class SynthesizeStream<TModel extends TTSModels> extends BaseSynt
|
|
|
68
68
|
#private;
|
|
69
69
|
private opts;
|
|
70
70
|
private tts;
|
|
71
|
-
private connOptions;
|
|
72
71
|
constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions);
|
|
73
72
|
get label(): string;
|
|
74
73
|
updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/inference/tts.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAM/B,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,GAAG,IAAI,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC3F,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AASlF,OAAO,EAAE,KAAK,SAAS,EAAgC,MAAM,YAAY,CAAC;AAE1E,MAAM,MAAM,cAAc,GACtB,UAAU,GACV,gBAAgB,GAChB,kBAAkB,GAClB,sBAAsB,CAAC;AAE3B,MAAM,MAAM,gBAAgB,GACxB,YAAY,GACZ,4BAA4B,GAC5B,8BAA8B,GAC9B,4BAA4B,GAC5B,8BAA8B,GAC9B,mCAAmC,CAAC;AAExC,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,WAAW,GAAG,aAAa,GAAG,aAAa,CAAC;AAE9E,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,uBAAuB,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;CACpC;AAED,MAAM,WAAW,iBAAiB;IAChC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,wBAAwB,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,WAAW;CAAG;AAE/B,MAAM,WAAW,cAAc;CAAG;AAElC,KAAK,UAAU,GAAG,cAAc,GAAG,gBAAgB,GAAG,UAAU,GAAG,aAAa,CAAC;AAEjF,MAAM,MAAM,SAAS,GAAG,cAAc,GAAG,gBAAgB,GAAG,UAAU,GAAG,aAAa,GAAG,SAAS,CAAC;AAEnG,MAAM,MAAM,cAAc,GAAG,GAAG,UAAU,IAAI,MAAM,EAAE,GAAG,SAAS,CAAC;AAEnE,MAAM,MAAM,UAAU,CAAC,MAAM,SAAS,SAAS,IAAI,MAAM,SAAS,cAAc,GAC5E,eAAe,GACf,MAAM,SAAS,gBAAgB,GAC7B,iBAAiB,GACjB,MAAM,SAAS,WAAW,GACxB,WAAW,GACX,MAAM,SAAS,cAAc,GAC3B,cAAc,GACd,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAElC,KAAK,WAAW,GAAG,WAAW,CAAC;AAQ/B,MAAM,WAAW,mBAAmB,CAAC,MAAM,SAAS,SAAS;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,GAAG,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,OAAO;;IACxD,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,OAAO,CAA4C;gBAI/C,IAAI,EAAE;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,WAAW,CAAC;QACvB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;KACnC;IA2DD,IAAI,KAAK,WAER;IAED,MAAM,CAAC,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC;IAQ3D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC;IAO9F,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,aAAa;IAIpC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC;IAOzE,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC;IA0B9C,OAAO,CAAC,EAAE,EAAE,SAAS;IAIrB,KAAK;CAMZ;AAED,qBAAa,gBAAgB,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,oBAAoB;;IAClF,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,GAAG,CAAc;
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/inference/tts.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAM/B,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,GAAG,IAAI,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC3F,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AASlF,OAAO,EAAE,KAAK,SAAS,EAAgC,MAAM,YAAY,CAAC;AAE1E,MAAM,MAAM,cAAc,GACtB,UAAU,GACV,gBAAgB,GAChB,kBAAkB,GAClB,sBAAsB,CAAC;AAE3B,MAAM,MAAM,gBAAgB,GACxB,YAAY,GACZ,4BAA4B,GAC5B,8BAA8B,GAC9B,4BAA4B,GAC5B,8BAA8B,GAC9B,mCAAmC,CAAC;AAExC,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,WAAW,GAAG,aAAa,GAAG,aAAa,CAAC;AAE9E,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,uBAAuB,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;CACpC;AAED,MAAM,WAAW,iBAAiB;IAChC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,wBAAwB,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,WAAW;CAAG;AAE/B,MAAM,WAAW,cAAc;CAAG;AAElC,KAAK,UAAU,GAAG,cAAc,GAAG,gBAAgB,GAAG,UAAU,GAAG,aAAa,CAAC;AAEjF,MAAM,MAAM,SAAS,GAAG,cAAc,GAAG,gBAAgB,GAAG,UAAU,GAAG,aAAa,GAAG,SAAS,CAAC;AAEnG,MAAM,MAAM,cAAc,GAAG,GAAG,UAAU,IAAI,MAAM,EAAE,GAAG,SAAS,CAAC;AAEnE,MAAM,MAAM,UAAU,CAAC,MAAM,SAAS,SAAS,IAAI,MAAM,SAAS,cAAc,GAC5E,eAAe,GACf,MAAM,SAAS,gBAAgB,GAC7B,iBAAiB,GACjB,MAAM,SAAS,WAAW,GACxB,WAAW,GACX,MAAM,SAAS,cAAc,GAC3B,cAAc,GACd,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAElC,KAAK,WAAW,GAAG,WAAW,CAAC;AAQ/B,MAAM,WAAW,mBAAmB,CAAC,MAAM,SAAS,SAAS;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,GAAG,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,OAAO;;IACxD,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,OAAO,CAA4C;gBAI/C,IAAI,EAAE;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,WAAW,CAAC;QACvB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;KACnC;IA2DD,IAAI,KAAK,WAER;IAED,MAAM,CAAC,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC;IAQ3D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC;IAO9F,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,aAAa;IAIpC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC;IAOzE,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC;IA0B9C,OAAO,CAAC,EAAE,EAAE,SAAS;IAIrB,KAAK;CAMZ;AAED,qBAAa,gBAAgB,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,oBAAoB;;IAClF,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,GAAG,CAAc;gBAIb,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,mBAAmB,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,iBAAiB;IAM/F,IAAI,KAAK,WAER;IAED,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC;cAI9E,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;CA6KrC"}
|
package/dist/inference/tts.js
CHANGED
|
@@ -131,13 +131,11 @@ class TTS extends BaseTTS {
|
|
|
131
131
|
class SynthesizeStream extends BaseSynthesizeStream {
|
|
132
132
|
opts;
|
|
133
133
|
tts;
|
|
134
|
-
connOptions;
|
|
135
134
|
#logger = log();
|
|
136
135
|
constructor(tts, opts, connOptions) {
|
|
137
136
|
super(tts, connOptions);
|
|
138
137
|
this.opts = opts;
|
|
139
138
|
this.tts = tts;
|
|
140
|
-
this.connOptions = connOptions;
|
|
141
139
|
}
|
|
142
140
|
get label() {
|
|
143
141
|
return "inference.SynthesizeStream";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n this.connOptions = connOptions;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,iBAAiB;AAC1B;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AACpD,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOA,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["ws"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,iBAAiB;AAC1B;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AACpD,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOA,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["ws"]}
|
|
@@ -37,9 +37,12 @@ class StreamAdapter extends import_tts.TTS {
|
|
|
37
37
|
this.#tts.on("metrics_collected", (metrics) => {
|
|
38
38
|
this.emit("metrics_collected", metrics);
|
|
39
39
|
});
|
|
40
|
+
this.#tts.on("error", (error) => {
|
|
41
|
+
this.emit("error", error);
|
|
42
|
+
});
|
|
40
43
|
}
|
|
41
|
-
synthesize(text) {
|
|
42
|
-
return this.#tts.synthesize(text);
|
|
44
|
+
synthesize(text, connOptions, abortSignal) {
|
|
45
|
+
return this.#tts.synthesize(text, connOptions, abortSignal);
|
|
43
46
|
}
|
|
44
47
|
stream(options) {
|
|
45
48
|
return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options == null ? void 0 : options.connOptions);
|
|
@@ -83,7 +86,7 @@ class StreamAdapterWrapper extends import_tts.SynthesizeStream {
|
|
|
83
86
|
this.queue.put(import_tts.SynthesizeStream.END_OF_STREAM);
|
|
84
87
|
};
|
|
85
88
|
const synthesize = async (token, prevTask, controller) => {
|
|
86
|
-
const audioStream = this.#tts.synthesize(token);
|
|
89
|
+
const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);
|
|
87
90
|
await (prevTask == null ? void 0 : prevTask.result);
|
|
88
91
|
if (controller.signal.aborted) return;
|
|
89
92
|
for await (const audio of audioStream) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n synthesize(text: string): ChunkedStream {\n return this.#tts.synthesize(text);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,mBAAqB;AAErB,iBAAsC;AAE/B,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,
|
|
1
|
+
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n this.#tts.on('error', (error) => {\n this.emit('error', error);\n });\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return this.#tts.synthesize(text, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,mBAAqB;AAErB,iBAAsC;AAE/B,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AACD,SAAK,KAAK,GAAG,SAAS,CAAC,UAAU;AAC/B,WAAK,KAAK,SAAS,KAAK;AAAA,IAC1B,CAAC;AAAA,EACH;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,KAAK,KAAK,WAAW,MAAM,aAAa,WAAW;AAAA,EAC5D;AAAA,EAEA,OAAO,SAAqE;AAC1E,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,oBAAoB,mCAAS,WAAW;AAAA,EAC1F;AACF;AAEO,MAAM,6BAA6B,4BAAiB;AAAA,EACzD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC,aAAiC;AAC3F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,kBAAkB,kBAAkB,OAAO;AAChD,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,UAAU,4BAAiB,gBAAgB;AAC7C,eAAK,gBAAgB,MAAM;AAAA,QAC7B,OAAO;AACL,eAAK,gBAAgB,SAAS,KAAK;AAAA,QACrC;AAAA,MACF;AACA,WAAK,gBAAgB,SAAS;AAC9B,WAAK,gBAAgB,MAAM;AAAA,IAC7B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,UAAI;AACJ,YAAM,uBAAqC,CAAC;AAE5C,uBAAiB,MAAM,KAAK,iBAAiB;AAC3C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAGzC,eAAO,kBAAK;AAAA,UACV,CAAC,eAAe,WAAW,GAAG,OAAO,MAAM,UAAU;AAAA,UACrD,KAAK;AAAA,QACP;AAEA,6BAAqB,KAAK,IAAI;AAAA,MAChC;AAEA,YAAM,QAAQ,IAAI,qBAAqB,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAC3D,WAAK,MAAM,IAAI,4BAAiB,aAAa;AAAA,IAC/C;AAEA,UAAM,aAAa,OACjB,OACA,UACA,eACG;AACH,YAAM,cAAc,KAAK,KAAK,WAAW,OAAO,KAAK,aAAa,KAAK,WAAW;AAIlF,aAAM,qCAAU;AAChB,UAAI,WAAW,OAAO,QAAS;AAE/B,uBAAiB,SAAS,aAAa;AACrC,YAAI,WAAW,OAAO,QAAS;AAC/B,aAAK,MAAM,IAAI,KAAK;AAAA,MACtB;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,aAAa,GAAG,yBAAyB,CAAC,CAAC;AAAA,EAChE;AACF;","names":[]}
|
|
@@ -6,7 +6,7 @@ export declare class StreamAdapter extends TTS {
|
|
|
6
6
|
#private;
|
|
7
7
|
label: string;
|
|
8
8
|
constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer);
|
|
9
|
-
synthesize(text: string): ChunkedStream;
|
|
9
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
10
10
|
stream(options?: {
|
|
11
11
|
connOptions?: APIConnectOptions;
|
|
12
12
|
}): StreamAdapterWrapper;
|
|
@@ -6,7 +6,7 @@ export declare class StreamAdapter extends TTS {
|
|
|
6
6
|
#private;
|
|
7
7
|
label: string;
|
|
8
8
|
constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer);
|
|
9
|
-
synthesize(text: string): ChunkedStream;
|
|
9
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
|
|
10
10
|
stream(options?: {
|
|
11
11
|
connOptions?: APIConnectOptions;
|
|
12
12
|
}): StreamAdapterWrapper;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/tts/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB;
|
|
1
|
+
{"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/tts/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB;IAe1D,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,oBAAoB;CAG5E;AAED,qBAAa,oBAAqB,SAAQ,gBAAgB;;IAGxD,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAO3E,GAAG;CAuDpB"}
|
|
@@ -13,9 +13,12 @@ class StreamAdapter extends TTS {
|
|
|
13
13
|
this.#tts.on("metrics_collected", (metrics) => {
|
|
14
14
|
this.emit("metrics_collected", metrics);
|
|
15
15
|
});
|
|
16
|
+
this.#tts.on("error", (error) => {
|
|
17
|
+
this.emit("error", error);
|
|
18
|
+
});
|
|
16
19
|
}
|
|
17
|
-
synthesize(text) {
|
|
18
|
-
return this.#tts.synthesize(text);
|
|
20
|
+
synthesize(text, connOptions, abortSignal) {
|
|
21
|
+
return this.#tts.synthesize(text, connOptions, abortSignal);
|
|
19
22
|
}
|
|
20
23
|
stream(options) {
|
|
21
24
|
return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options == null ? void 0 : options.connOptions);
|
|
@@ -59,7 +62,7 @@ class StreamAdapterWrapper extends SynthesizeStream {
|
|
|
59
62
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
60
63
|
};
|
|
61
64
|
const synthesize = async (token, prevTask, controller) => {
|
|
62
|
-
const audioStream = this.#tts.synthesize(token);
|
|
65
|
+
const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);
|
|
63
66
|
await (prevTask == null ? void 0 : prevTask.result);
|
|
64
67
|
if (controller.signal.aborted) return;
|
|
65
68
|
for await (const audio of audioStream) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n synthesize(text: string): ChunkedStream {\n return this.#tts.synthesize(text);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":"AAKA,SAAS,YAAY;AAErB,SAAS,kBAAkB,WAAW;AAE/B,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,
|
|
1
|
+
{"version":3,"sources":["../../src/tts/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { Task } from '../utils.js';\nimport type { ChunkedStream } from './tts.js';\nimport { SynthesizeStream, TTS } from './tts.js';\n\nexport class StreamAdapter extends TTS {\n #tts: TTS;\n #sentenceTokenizer: SentenceTokenizer;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {\n super(tts.sampleRate, tts.numChannels, { streaming: true });\n this.#tts = tts;\n this.#sentenceTokenizer = sentenceTokenizer;\n this.label = this.#tts.label;\n this.label = `tts.StreamAdapter<${this.#tts.label}>`;\n\n this.#tts.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n this.#tts.on('error', (error) => {\n this.emit('error', error);\n });\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return this.#tts.synthesize(text, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);\n }\n}\n\nexport class StreamAdapterWrapper extends SynthesizeStream {\n #tts: TTS;\n #sentenceStream: SentenceStream;\n label: string;\n\n constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#tts = tts;\n this.#sentenceStream = sentenceTokenizer.stream();\n this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (this.abortController.signal.aborted) break;\n\n if (input === SynthesizeStream.FLUSH_SENTINEL) {\n this.#sentenceStream.flush();\n } else {\n this.#sentenceStream.pushText(input);\n }\n }\n this.#sentenceStream.endInput();\n this.#sentenceStream.close();\n };\n\n const synthesizeSentenceStream = async () => {\n let task: Task<void> | undefined;\n const tokenCompletionTasks: Task<void>[] = [];\n\n for await (const ev of this.#sentenceStream) {\n if (this.abortController.signal.aborted) break;\n\n // this will enable non-blocking synthesis of the stream of tokens\n task = Task.from(\n (controller) => synthesize(ev.token, task, controller),\n this.abortController,\n );\n\n tokenCompletionTasks.push(task);\n }\n\n await Promise.all(tokenCompletionTasks.map((t) => t.result));\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n };\n\n const synthesize = async (\n token: string,\n prevTask: Task<void> | undefined,\n controller: AbortController,\n ) => {\n const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);\n\n // wait for previous audio transcription to complete before starting\n // to queuing audio frames of the current token\n await prevTask?.result;\n if (controller.signal.aborted) return;\n\n for await (const audio of audioStream) {\n if (controller.signal.aborted) break;\n this.queue.put(audio);\n }\n };\n\n await Promise.all([forwardInput(), synthesizeSentenceStream()]);\n }\n}\n"],"mappings":"AAKA,SAAS,YAAY;AAErB,SAAS,kBAAkB,WAAW;AAE/B,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC;AAC1D,UAAM,IAAI,YAAY,IAAI,aAAa,EAAE,WAAW,KAAK,CAAC;AAC1D,SAAK,OAAO;AACZ,SAAK,qBAAqB;AAC1B,SAAK,QAAQ,KAAK,KAAK;AACvB,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AACD,SAAK,KAAK,GAAG,SAAS,CAAC,UAAU;AAC/B,WAAK,KAAK,SAAS,KAAK;AAAA,IAC1B,CAAC;AAAA,EACH;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,KAAK,KAAK,WAAW,MAAM,aAAa,WAAW;AAAA,EAC5D;AAAA,EAEA,OAAO,SAAqE;AAC1E,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,oBAAoB,mCAAS,WAAW;AAAA,EAC1F;AACF;AAEO,MAAM,6BAA6B,iBAAiB;AAAA,EACzD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,mBAAsC,aAAiC;AAC3F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,kBAAkB,kBAAkB,OAAO;AAChD,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,UAAU,iBAAiB,gBAAgB;AAC7C,eAAK,gBAAgB,MAAM;AAAA,QAC7B,OAAO;AACL,eAAK,gBAAgB,SAAS,KAAK;AAAA,QACrC;AAAA,MACF;AACA,WAAK,gBAAgB,SAAS;AAC9B,WAAK,gBAAgB,MAAM;AAAA,IAC7B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,UAAI;AACJ,YAAM,uBAAqC,CAAC;AAE5C,uBAAiB,MAAM,KAAK,iBAAiB;AAC3C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAGzC,eAAO,KAAK;AAAA,UACV,CAAC,eAAe,WAAW,GAAG,OAAO,MAAM,UAAU;AAAA,UACrD,KAAK;AAAA,QACP;AAEA,6BAAqB,KAAK,IAAI;AAAA,MAChC;AAEA,YAAM,QAAQ,IAAI,qBAAqB,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAC3D,WAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,IAC/C;AAEA,UAAM,aAAa,OACjB,OACA,UACA,eACG;AACH,YAAM,cAAc,KAAK,KAAK,WAAW,OAAO,KAAK,aAAa,KAAK,WAAW;AAIlF,aAAM,qCAAU;AAChB,UAAI,WAAW,OAAO,QAAS;AAE/B,uBAAiB,SAAS,aAAa;AACrC,YAAI,WAAW,OAAO,QAAS;AAC/B,aAAK,MAAM,IAAI,KAAK;AAAA,MACtB;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,aAAa,GAAG,yBAAyB,CAAC,CAAC;AAAA,EAChE;AACF;","names":[]}
|
package/dist/tts/tts.cjs
CHANGED
|
@@ -63,24 +63,24 @@ class SynthesizeStream {
|
|
|
63
63
|
queue = new import_utils.AsyncIterableQueue();
|
|
64
64
|
output = new import_utils.AsyncIterableQueue();
|
|
65
65
|
closed = false;
|
|
66
|
+
connOptions;
|
|
67
|
+
abortController = new AbortController();
|
|
68
|
+
deferredInputStream;
|
|
69
|
+
logger = (0, import_log.log)();
|
|
66
70
|
#tts;
|
|
67
71
|
#metricsPendingTexts = [];
|
|
68
72
|
#metricsText = "";
|
|
69
73
|
#monitorMetricsTask;
|
|
70
|
-
_connOptions;
|
|
71
|
-
abortController = new AbortController();
|
|
72
74
|
#ttsRequestSpan;
|
|
73
|
-
deferredInputStream;
|
|
74
|
-
logger = (0, import_log.log)();
|
|
75
75
|
constructor(tts, connOptions = import_types.DEFAULT_API_CONNECT_OPTIONS) {
|
|
76
76
|
this.#tts = tts;
|
|
77
|
-
this.
|
|
77
|
+
this.connOptions = connOptions;
|
|
78
78
|
this.deferredInputStream = new import_deferred_stream.DeferredReadableStream();
|
|
79
79
|
this.pumpInput();
|
|
80
80
|
this.abortController.signal.addEventListener("abort", () => {
|
|
81
81
|
this.deferredInputStream.detachSource();
|
|
82
|
-
this.input.close();
|
|
83
|
-
this.output.close();
|
|
82
|
+
if (!this.input.closed) this.input.close();
|
|
83
|
+
if (!this.output.closed) this.output.close();
|
|
84
84
|
this.closed = true;
|
|
85
85
|
});
|
|
86
86
|
(0, import_utils.startSoon)(() => this.mainTask().then(() => this.queue.close()));
|
|
@@ -91,7 +91,7 @@ class SynthesizeStream {
|
|
|
91
91
|
[import_telemetry.traceTypes.ATTR_TTS_STREAMING]: true,
|
|
92
92
|
[import_telemetry.traceTypes.ATTR_TTS_LABEL]: this.#tts.label
|
|
93
93
|
});
|
|
94
|
-
for (let i = 0; i < this.
|
|
94
|
+
for (let i = 0; i < this.connOptions.maxRetry + 1; i++) {
|
|
95
95
|
try {
|
|
96
96
|
return await import_telemetry.tracer.startActiveSpan(
|
|
97
97
|
async (attemptSpan) => {
|
|
@@ -107,14 +107,14 @@ class SynthesizeStream {
|
|
|
107
107
|
);
|
|
108
108
|
} catch (error) {
|
|
109
109
|
if (error instanceof import_exceptions.APIError) {
|
|
110
|
-
const retryInterval = (0, import_types.intervalForRetry)(this.
|
|
111
|
-
if (this.
|
|
110
|
+
const retryInterval = (0, import_types.intervalForRetry)(this.connOptions, i);
|
|
111
|
+
if (this.connOptions.maxRetry === 0 || !error.retryable) {
|
|
112
112
|
this.emitError({ error, recoverable: false });
|
|
113
113
|
throw error;
|
|
114
|
-
} else if (i === this.
|
|
114
|
+
} else if (i === this.connOptions.maxRetry) {
|
|
115
115
|
this.emitError({ error, recoverable: false });
|
|
116
116
|
throw new import_exceptions.APIConnectionError({
|
|
117
|
-
message: `failed to generate TTS completion after ${this.
|
|
117
|
+
message: `failed to generate TTS completion after ${this.connOptions.maxRetry + 1} attempts`,
|
|
118
118
|
options: { retryable: false }
|
|
119
119
|
});
|
|
120
120
|
} else {
|
|
@@ -265,6 +265,9 @@ class SynthesizeStream {
|
|
|
265
265
|
next() {
|
|
266
266
|
return this.output.next();
|
|
267
267
|
}
|
|
268
|
+
get abortSignal() {
|
|
269
|
+
return this.abortController.signal;
|
|
270
|
+
}
|
|
268
271
|
/** Close both the input and output of the TTS stream */
|
|
269
272
|
close() {
|
|
270
273
|
this.abortController.abort();
|
|
@@ -282,10 +285,14 @@ class ChunkedStream {
|
|
|
282
285
|
#ttsRequestSpan;
|
|
283
286
|
_connOptions;
|
|
284
287
|
logger = (0, import_log.log)();
|
|
285
|
-
|
|
288
|
+
abortController = new AbortController();
|
|
289
|
+
constructor(text, tts, connOptions = import_types.DEFAULT_API_CONNECT_OPTIONS, abortSignal) {
|
|
286
290
|
this.#text = text;
|
|
287
291
|
this.#tts = tts;
|
|
288
292
|
this._connOptions = connOptions;
|
|
293
|
+
if (abortSignal) {
|
|
294
|
+
abortSignal.addEventListener("abort", () => this.abortController.abort(), { once: true });
|
|
295
|
+
}
|
|
289
296
|
this.monitorMetrics();
|
|
290
297
|
Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));
|
|
291
298
|
}
|
|
@@ -355,6 +362,9 @@ class ChunkedStream {
|
|
|
355
362
|
get inputText() {
|
|
356
363
|
return this.#text;
|
|
357
364
|
}
|
|
365
|
+
get abortSignal() {
|
|
366
|
+
return this.abortController.signal;
|
|
367
|
+
}
|
|
358
368
|
async monitorMetrics() {
|
|
359
369
|
const startTime = process.hrtime.bigint();
|
|
360
370
|
let audioDurationMs = 0;
|
|
@@ -403,8 +413,9 @@ class ChunkedStream {
|
|
|
403
413
|
}
|
|
404
414
|
/** Close both the input and output of the TTS stream */
|
|
405
415
|
close() {
|
|
406
|
-
this.queue.close();
|
|
407
|
-
this.output.close();
|
|
416
|
+
if (!this.queue.closed) this.queue.close();
|
|
417
|
+
if (!this.output.closed) this.output.close();
|
|
418
|
+
if (!this.abortController.signal.aborted) this.abortController.abort();
|
|
408
419
|
this.closed = true;
|
|
409
420
|
}
|
|
410
421
|
[Symbol.asyncIterator]() {
|