@livekit/agents 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n this.connOptions = connOptions;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n sendTokenizerStream.endInput();\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } catch (e) {\n this.#logger.error({ error: e }, 'Error in SynthesizeStream');\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,iBAAiB;AAC1B;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AACvD,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AACA,0BAAoB,SAAS;AAAA,IAC/B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOA,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,iBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,QAClD,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,SAAS,GAAG;AACV,WAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,2BAA2B;AAAA,IAC9D,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["ws"]}
1
+ {"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n type TtsSessionCreateEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia'\n | 'cartesia/sonic'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo';\n\nexport type ElevenlabsModels =\n | 'elevenlabs'\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type RimeModels = 'rime' | 'rime/mist' | 'rime/mistv2' | 'rime/arcana';\n\nexport type InworldModels = 'inworld' | 'inworld/inworld-tts-1';\n\nexport interface CartesiaOptions {\n duration?: number; // max duration of audio in seconds\n speed?: 'slow' | 'normal' | 'fast'; // default: not specified\n}\n\nexport interface ElevenlabsOptions {\n inactivity_timeout?: number; // default: 60\n apply_text_normalization?: 'auto' | 'off' | 'on'; // default: \"auto\"\n}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {}\n\ntype _TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels;\n\nexport type TTSModels = CartesiaModels | ElevenlabsModels | RimeModels | InworldModels | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeOptions\n ? RimeOptions\n : TModel extends InworldOptions\n ? InworldOptions\n : Record<string, unknown>;\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n if (modelString.includes(':')) {\n const [model, voice] = modelString.split(':') as [TTSModels, string];\n return new TTS({ model, voice });\n }\n return new TTS({ model: modelString });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as TtsSessionCreateEvent;\n\n if (this.opts.voice) params.voice = this.opts.voice;\n if (this.opts.model) params.model = this.opts.model;\n if (this.opts.language) params.language = this.opts.language;\n\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n this.connOptions = connOptions;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (this.abortController.signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async () => {\n for await (const data of this.input) {\n if (this.abortController.signal.aborted) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n sendTokenizerStream.endInput();\n };\n\n const createSentenceStreamTask = async () => {\n for await (const ev of sendTokenizerStream) {\n if (this.abortController.signal.aborted) break;\n\n sendClientEvent({\n type: 'input_transcript',\n transcript: ev.token + ' ',\n });\n }\n\n sendClientEvent({ type: 'session.flush' });\n };\n\n const createWsListenerTask = async (ws: WebSocket) => {\n return new Promise<void>((resolve, reject) => {\n this.abortController.signal.addEventListener('abort', () => {\n resourceCleanup();\n resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error\n });\n\n ws.on('message', async (data) => {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n eventChannel.write(validatedEvent);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', () => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n });\n });\n };\n\n const createRecvTask = async () => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (this.abortController.signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n break;\n case 'session.closed':\n resourceCleanup();\n break;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n resourceCleanup();\n throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.tts.connectWs(this.connOptions.timeoutMs);\n\n await Promise.all([\n createInputTask(),\n createSentenceStreamTask(),\n createWsListenerTask(ws),\n createRecvTask(),\n ]);\n } finally {\n resourceCleanup();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,iBAAiB;AAC1B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAAS,SAAS,qBAAqB;AAEvC,SAAS,oBAAoB,sBAAsB,OAAO,eAAe;AACzE,SAAiC,mCAAmC;AACpE,SAAS,iBAAiB;AAC1B;AAAA,EAIE;AAAA,EACA;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAoD7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAiBlB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EAEzD,UAAU,IAAI;AAAA,EAEd,YAAY,MAUT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,KAAK,IAAI,YAAY,MAAM,GAAG;AAC5C,aAAO,IAAI,IAAI,EAAE,OAAO,MAAM,CAAC;AAAA,IACjC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAClE,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,MAAO,QAAO,QAAQ,KAAK,KAAK;AAC9C,QAAI,KAAK,KAAK,SAAU,QAAO,WAAW,KAAK,KAAK;AAEpD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;AAEO,MAAM,yBAAmD,qBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,UAAU;AACd,QAAI,gBAAgB;AACpB,QAAI;AAEJ,UAAM,sBAAsB,IAAI,cAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,eAAe,oBAAoC;AACzD,UAAM,YAAY,UAAU,cAAc;AAE1C,UAAM,kBAAkB,MAAM;AAC5B,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAC1B,mBAAa,MAAM;AACnB,+BAAI;AACJ,+BAAI;AAAA,IACN;AAEA,UAAM,kBAAkB,OAAO,UAA0B;AAEvD,UAAI,KAAK,gBAAgB,OAAO,WAAW,QAAS;AAEpD,YAAM,iBAAiB,MAAM,qBAAqB,WAAW,KAAK;AAClE,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,YAAY;AAClC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AACA,0BAAoB,SAAS;AAAA,IAC/B;AAEA,UAAM,2BAA2B,YAAY;AAC3C,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,wBAAgB;AAAA,UACd,MAAM;AAAA,UACN,YAAY,GAAG,QAAQ;AAAA,QACzB,CAAC;AAAA,MACH;AAEA,sBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAAA,IAC3C;AAEA,UAAM,uBAAuB,OAAOA,QAAkB;AACpD,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,aAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,0BAAgB;AAChB,kBAAQ;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,WAAW,OAAO,SAAS;AAC/B,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,qBAAqB,MAAM,SAAS;AAC3D,uBAAa,MAAM,cAAc;AAAA,QACnC,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,0BAAgB;AAChB,iBAAO,CAAC;AAAA,QACV,CAAC;AAED,QAAAA,IAAG,GAAG,SAAS,MAAM;AACnB,0BAAgB;AAEhB,cAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,cAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,YACE,IAAI,eAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,UAAM,iBAAiB,YAAY;AACjC,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,KAAK,gBAAgB,OAAO,QAAS;AACzC,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C;AAAA,YACF,KAAK;AACH,8BAAgB;AAChB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,8BAAgB;AAChB,oBAAM,IAAI,SAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,YACzE;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,WAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,YAAM,QAAQ,IAAI;AAAA,QAChB,gBAAgB;AAAA,QAChB,yBAAyB;AAAA,QACzB,qBAAqB,EAAE;AAAA,QACvB,eAAe;AAAA,MACjB,CAAC;AAAA,IACH,UAAE;AACA,sBAAgB;AAAA,IAClB;AAAA,EACF;AACF;","names":["ws"]}
package/dist/stt/stt.cjs CHANGED
@@ -149,7 +149,18 @@ class SpeechStream {
149
149
  }
150
150
  async monitorMetrics() {
151
151
  for await (const event of this.queue) {
152
- this.output.put(event);
152
+ if (!this.output.closed) {
153
+ try {
154
+ this.output.put(event);
155
+ } catch (e) {
156
+ if (e instanceof Error && e.message.includes("Queue is closed")) {
157
+ this.logger.warn(
158
+ { err: e },
159
+ "Queue closed during transcript processing (expected during disconnect)"
160
+ );
161
+ }
162
+ }
163
+ }
153
164
  if (event.type !== 4 /* RECOGNITION_USAGE */) continue;
154
165
  const metrics = {
155
166
  type: "stt_metrics",
@@ -162,7 +173,9 @@ class SpeechStream {
162
173
  };
163
174
  this.#stt.emit("metrics_collected", metrics);
164
175
  }
165
- this.output.close();
176
+ if (!this.output.closed) {
177
+ this.output.close();
178
+ }
166
179
  }
167
180
  updateInputStream(audioStream) {
168
181
  this.deferredInputStream.setSource(audioStream);
@@ -217,9 +230,9 @@ class SpeechStream {
217
230
  }
218
231
  /** Close both the input and output of the STT stream */
219
232
  close() {
220
- this.input.close();
221
- this.queue.close();
222
- this.output.close();
233
+ if (!this.input.closed) this.input.close();
234
+ if (!this.queue.closed) this.queue.close();
235
+ if (!this.output.closed) this.output.close();
223
236
  this.closed = true;
224
237
  }
225
238
  [Symbol.asyncIterator]() {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EASA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EASA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAE5E,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAezD,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAEvE;;;OAGG;IACH,QAAQ,CAAC,MAAM,IAAI,YAAY;IAEzB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;;;GAeG;AACH,8BAAsB,YAAa,YAAW,qBAAqB,CAAC,WAAW,CAAC;;IAC9E,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,MAAM,kCAAyC;IACzD,SAAS,CAAC,KAAK,kCAAyC;IACxD,SAAS,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,SAAS,CAAC,EAAE,cAAc,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,MAAM,UAAS;IAEzB,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,YAAY,CAAoB;gBAGtC,GAAG,EAAE,GAAG,EACR,UAAU,CAAC,EAAE,MAAM,EACnB,iBAAiB,GAAE,iBAA+C;YAgBtD,QAAQ;IAqCtB,OAAO,CAAC,SAAS;cAUD,SAAS;cAkBT,cAAc;IAkB9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,qCAAqC;IACrC,SAAS,CAAC,KAAK,EAAE,UAAU;IAwB3B,4DAA4D;IAC5D,KAAK;IAUL,2DAA2D;IAC3D,QAAQ;IAUR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAI5C,wDAAwD;IACxD,KAAK;IAOL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,YAAY;CAGvC"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAE5E,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAezD,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAEvE;;;OAGG;IACH,QAAQ,CAAC,MAAM,IAAI,YAAY;IAEzB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;;;GAeG;AACH,8BAAsB,YAAa,YAAW,qBAAqB,CAAC,WAAW,CAAC;;IAC9E,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,MAAM,kCAAyC;IACzD,SAAS,CAAC,KAAK,kCAAyC;IACxD,SAAS,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,SAAS,CAAC,EAAE,cAAc,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,MAAM,UAAS;IAEzB,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,YAAY,CAAoB;gBAGtC,GAAG,EAAE,GAAG,EACR,UAAU,CAAC,EAAE,MAAM,EACnB,iBAAiB,GAAE,iBAA+C;YAgBtD,QAAQ;IAqCtB,OAAO,CAAC,SAAS;cAUD,SAAS;cAkBT,cAAc;IA+B9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,qCAAqC;IACrC,SAAS,CAAC,KAAK,EAAE,UAAU;IAwB3B,4DAA4D;IAC5D,KAAK;IAUL,2DAA2D;IAC3D,QAAQ;IAUR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAI5C,wDAAwD;IACxD,KAAK;IAOL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,YAAY;CAGvC"}
package/dist/stt/stt.js CHANGED
@@ -124,7 +124,18 @@ class SpeechStream {
124
124
  }
125
125
  async monitorMetrics() {
126
126
  for await (const event of this.queue) {
127
- this.output.put(event);
127
+ if (!this.output.closed) {
128
+ try {
129
+ this.output.put(event);
130
+ } catch (e) {
131
+ if (e instanceof Error && e.message.includes("Queue is closed")) {
132
+ this.logger.warn(
133
+ { err: e },
134
+ "Queue closed during transcript processing (expected during disconnect)"
135
+ );
136
+ }
137
+ }
138
+ }
128
139
  if (event.type !== 4 /* RECOGNITION_USAGE */) continue;
129
140
  const metrics = {
130
141
  type: "stt_metrics",
@@ -137,7 +148,9 @@ class SpeechStream {
137
148
  };
138
149
  this.#stt.emit("metrics_collected", metrics);
139
150
  }
140
- this.output.close();
151
+ if (!this.output.closed) {
152
+ this.output.close();
153
+ }
141
154
  }
142
155
  updateInputStream(audioStream) {
143
156
  this.deferredInputStream.setSource(audioStream);
@@ -192,9 +205,9 @@ class SpeechStream {
192
205
  }
193
206
  /** Close both the input and output of the STT stream */
194
207
  close() {
195
- this.input.close();
196
- this.queue.close();
197
- this.output.close();
208
+ if (!this.input.closed) this.input.close();
209
+ if (!this.queue.closed) this.queue.close();
210
+ if (!this.output.closed) this.output.close();
198
211
  this.closed = true;
199
212
  }
200
213
  [Symbol.asyncIterator]() {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AAEpE,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EASA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AAEpE,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EASA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
package/dist/tts/tts.cjs CHANGED
@@ -209,11 +209,8 @@ class SynthesizeStream {
209
209
  this.#monitorMetricsTask.finally(() => this.output.close());
210
210
  }
211
211
  this.#metricsText += text;
212
- if (this.input.closed) {
213
- throw new Error("Input is closed");
214
- }
215
- if (this.closed) {
216
- throw new Error("Stream is closed");
212
+ if (this.input.closed || this.closed) {
213
+ return;
217
214
  }
218
215
  this.input.put(text);
219
216
  }
@@ -223,22 +220,16 @@ class SynthesizeStream {
223
220
  this.#metricsPendingTexts.push(this.#metricsText);
224
221
  this.#metricsText = "";
225
222
  }
226
- if (this.input.closed) {
227
- throw new Error("Input is closed");
228
- }
229
- if (this.closed) {
230
- throw new Error("Stream is closed");
223
+ if (this.input.closed || this.closed) {
224
+ return;
231
225
  }
232
226
  this.input.put(SynthesizeStream.FLUSH_SENTINEL);
233
227
  }
234
228
  /** Mark the input as ended and forbid additional pushes */
235
229
  endInput() {
236
230
  this.flush();
237
- if (this.input.closed) {
238
- throw new Error("Input is closed");
239
- }
240
- if (this.closed) {
241
- throw new Error("Stream is closed");
231
+ if (this.input.closed || this.closed) {
232
+ return;
242
233
  }
243
234
  this.input.close();
244
235
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // NOTE(AJS-37): The implementation below uses an AsyncIterableQueue (`this.input`)\n // bridged from a DeferredReadableStream (`this.deferredInputStream`) rather than\n // consuming the stream directly.\n //\n // A full refactor to native Web Streams was considered but is currently deferred.\n // The primary reason is to maintain architectural parity with the Python SDK,\n // which is a key design goal for the project. This ensures a consistent developer\n // experience across both platforms.\n //\n // For more context, see the discussion in GitHub issue # 844.\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const roundedAudioDurationMs = Math.round(audioDurationMs);\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDurationMs: roundedAudioDurationMs,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDurationMs: Math.round(audioDurationMs),\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAE7B,wBAA6C;AAC7C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AACpE,mBAA2E;AA+CpE,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAYA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,gCAAoE;AAAA,EAChF,QAAQ,IAAI,gCAEpB;AAAA,EACQ,SAAS,IAAI,gCAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,aAAS,gBAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,0CAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AAEvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AAEF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,yBAAyB,KAAK,MAAM,eAAe;AACzD,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACzD,iBAAiB,KAAK;AAAA,UACtB,iBAAiB;AAAA,UACjB,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAC9E,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AACX,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,gCAAqC;AAAA,EACjD,SAAS,IAAI,gCAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,0CACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AAEvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AAEF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAAA,IAChF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACzD,iBAAiB,KAAK,MAAM;AAAA,MAC5B,iBAAiB,KAAK,MAAM,eAAe;AAAA,MAC3C,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,eAAO,0BAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // NOTE(AJS-37): The implementation below uses an AsyncIterableQueue (`this.input`)\n // bridged from a DeferredReadableStream (`this.deferredInputStream`) rather than\n // consuming the stream directly.\n //\n // A full refactor to native Web Streams was considered but is currently deferred.\n // The primary reason is to maintain architectural parity with the Python SDK,\n // which is a key design goal for the project. This ensures a consistent developer\n // experience across both platforms.\n //\n // For more context, see the discussion in GitHub issue # 844.\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const roundedAudioDurationMs = Math.round(audioDurationMs);\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDurationMs: roundedAudioDurationMs,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed || this.closed) {\n // Stream was aborted/closed, silently skip\n return;\n }\n\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n\n if (this.input.closed || this.closed) {\n // Stream was aborted/closed, silently skip\n return;\n }\n\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n\n if (this.input.closed || this.closed) {\n // Stream was aborted/closed, silently skip\n return;\n }\n\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDurationMs: Math.round(audioDurationMs),\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAE7B,wBAA6C;AAC7C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AACpE,mBAA2E;AA+CpE,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAYA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,gCAAoE;AAAA,EAChF,QAAQ,IAAI,gCAEpB;AAAA,EACQ,SAAS,IAAI,gCAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,aAAS,gBAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,0CAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AAEvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AAEF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,yBAAyB,KAAK,MAAM,eAAe;AACzD,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACzD,iBAAiB,KAAK;AAAA,UACtB,iBAAiB;AAAA,UACjB,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAC9E,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,UAAU,KAAK,QAAQ;AAEpC;AAAA,IACF;AAEA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AAEA,QAAI,KAAK,MAAM,UAAU,KAAK,QAAQ;AAEpC;AAAA,IACF;AAEA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AAEX,QAAI,KAAK,MAAM,UAAU,KAAK,QAAQ;AAEpC;AAAA,IACF;AAEA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,gCAAqC;AAAA,EACjD,SAAS,IAAI,gCAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,0CACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AAEvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AAEF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAAA,IAChF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACzD,iBAAiB,KAAK,MAAM;AAAA,MAC5B,iBAAiB,KAAK,MAAM,eAAe;AAAA,MAC3C,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,eAAO,0BAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/tts/tts.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,kBAAkB,EAA0C,MAAM,aAAa,CAAC;AAEzF,+EAA+E;AAC/E,MAAM,WAAW,gBAAgB;IAC/B,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;IAClB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,KAAK,EAAE,UAAU,CAAC;IAClB,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IAItF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,eAAe;IAOlF,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,mEAAmE;IACnE,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,qEAAqE;IACrE,IAAI,WAAW,IAAI,MAAM,CAExB;IAED;;OAEG;IACH,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa;IAEhD;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,gBAAgB;IAE7B,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,gBACpB,YAAW,qBAAqB,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC;;IAE1F,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,MAAM,CAAC,QAAQ,CAAC,aAAa,gBAA2B;IACxD,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,KAAK,+EAEX;IACJ,SAAS,CAAC,MAAM,+EAEZ;IACJ,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAKvB,OAAO,CAAC,YAAY,CAAoB;IACxC,SAAS,CAAC,eAAe,kBAAyB;IAElD,OAAO,CAAC,mBAAmB,CAEzB;IACF,OAAO,CAAC,MAAM,CAAS;gBAEX,GAAG,EAAE,GAAG,EAAE,WAAW,GAAE,iBAA+C;YAoBpE,QAAQ;IAuCtB,OAAO,CAAC,SAAS;cAoBD,SAAS;cAuBT,cAAc;IAiD9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC;IAI9C,uCAAuC;IACvC,kDAAkD;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM;IAiBrB,4DAA4D;IAC5D,KAAK;IAcL,2DAA2D;IAC3D,QAAQ;IAWR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAIzF,wDAAwD;IACxD,KAAK;IAIL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,gBAAgB;CAG3C;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,aAAc,YAAW,qBAAqB,CAAC,gBAAgB,CAAC;;IACpF,SAAS,CAAC,KAAK,uCAA8C;IAC7D,SAAS,CAAC,MAAM,uCAA8C;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAGvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,MAAM,CAAS;gBAGrB,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,GAAG,EACR,WAAW,GAAE,iBAA+C;YAehD,QAAQ;IAuCtB,OAAO,CAAC,SAAS;IAUjB,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,IAAI,SAAS,IAAI,MAAM,CAEtB;cAEe,cAAc;IAgC9B,oDAAoD;IAC9C,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC;IAQpC,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;IAIjD,wDAAwD;IACxD,KAAK;IAML,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa;CAGxC"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/tts/tts.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,kBAAkB,EAA0C,MAAM,aAAa,CAAC;AAEzF,+EAA+E;AAC/E,MAAM,WAAW,gBAAgB;IAC/B,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;IAClB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,KAAK,EAAE,UAAU,CAAC;IAClB,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IAItF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,eAAe;IAOlF,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,mEAAmE;IACnE,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,qEAAqE;IACrE,IAAI,WAAW,IAAI,MAAM,CAExB;IAED;;OAEG;IACH,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa;IAEhD;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,gBAAgB;IAE7B,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,gBACpB,YAAW,qBAAqB,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC;;IAE1F,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,MAAM,CAAC,QAAQ,CAAC,aAAa,gBAA2B;IACxD,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,KAAK,+EAEX;IACJ,SAAS,CAAC,MAAM,+EAEZ;IACJ,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAKvB,OAAO,CAAC,YAAY,CAAoB;IACxC,SAAS,CAAC,eAAe,kBAAyB;IAElD,OAAO,CAAC,mBAAmB,CAEzB;IACF,OAAO,CAAC,MAAM,CAAS;gBAEX,GAAG,EAAE,GAAG,EAAE,WAAW,GAAE,iBAA+C;YAoBpE,QAAQ;IAuCtB,OAAO,CAAC,SAAS;cAoBD,SAAS;cAuBT,cAAc;IAiD9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC;IAI9C,uCAAuC;IACvC,kDAAkD;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM;IAgBrB,4DAA4D;IAC5D,KAAK;IAcL,2DAA2D;IAC3D,QAAQ;IAWR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAIzF,wDAAwD;IACxD,KAAK;IAIL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,gBAAgB;CAG3C;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,aAAc,YAAW,qBAAqB,CAAC,gBAAgB,CAAC;;IACpF,SAAS,CAAC,KAAK,uCAA8C;IAC7D,SAAS,CAAC,MAAM,uCAA8C;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAGvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,MAAM,CAAS;gBAGrB,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,GAAG,EACR,WAAW,GAAE,iBAA+C;YAehD,QAAQ;IAuCtB,OAAO,CAAC,SAAS;IAUjB,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,IAAI,SAAS,IAAI,MAAM,CAEtB;cAEe,cAAc;IAgC9B,oDAAoD;IAC9C,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC;IAQpC,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;IAIjD,wDAAwD;IACxD,KAAK;IAML,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa;CAGxC"}
package/dist/tts/tts.js CHANGED
@@ -184,11 +184,8 @@ class SynthesizeStream {
184
184
  this.#monitorMetricsTask.finally(() => this.output.close());
185
185
  }
186
186
  this.#metricsText += text;
187
- if (this.input.closed) {
188
- throw new Error("Input is closed");
189
- }
190
- if (this.closed) {
191
- throw new Error("Stream is closed");
187
+ if (this.input.closed || this.closed) {
188
+ return;
192
189
  }
193
190
  this.input.put(text);
194
191
  }
@@ -198,22 +195,16 @@ class SynthesizeStream {
198
195
  this.#metricsPendingTexts.push(this.#metricsText);
199
196
  this.#metricsText = "";
200
197
  }
201
- if (this.input.closed) {
202
- throw new Error("Input is closed");
203
- }
204
- if (this.closed) {
205
- throw new Error("Stream is closed");
198
+ if (this.input.closed || this.closed) {
199
+ return;
206
200
  }
207
201
  this.input.put(SynthesizeStream.FLUSH_SENTINEL);
208
202
  }
209
203
  /** Mark the input as ended and forbid additional pushes */
210
204
  endInput() {
211
205
  this.flush();
212
- if (this.input.closed) {
213
- throw new Error("Input is closed");
214
- }
215
- if (this.closed) {
216
- throw new Error("Stream is closed");
206
+ if (this.input.closed || this.closed) {
207
+ return;
217
208
  }
218
209
  this.input.close();
219
210
  }