@livekit/agents 1.0.49 → 1.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +67 -67
- package/dist/inference/api_protos.d.ts +67 -67
- package/dist/inference/llm.cjs +10 -8
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +3 -7
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/inference/utils.cjs +5 -5
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.js +1 -1
- package/dist/inference/utils.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +13 -4
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -4
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent_activity.cjs +4 -1
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +4 -1
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +3 -2
- package/dist/voice/audio_recognition.d.ts +3 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +3 -2
- package/dist/voice/events.d.ts +3 -2
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +13 -15
- package/src/inference/llm.ts +3 -8
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +14 -5
- package/src/inference/utils.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +15 -4
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +11 -1
- package/src/voice/audio_recognition.ts +4 -3
- package/src/voice/events.ts +3 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type TimedString, createTimedString } from '../voice/io.js';\nimport {\n type SttServerEvent,\n type SttTranscriptEvent,\n sttServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram/flux-general'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels =\n | 'assemblyai/universal-streaming'\n | 'assemblyai/universal-streaming-multilingual';\n\nexport type ElevenlabsSTTModels = 'elevenlabs/scribe_v2_realtime';\n\nexport interface CartesiaOptions {\n /** Minimum volume threshold. Default: not specified. */\n min_volume?: number;\n /** Maximum silence duration in seconds. Default: not specified. */\n max_silence_duration_secs?: number;\n}\n\nexport interface DeepgramOptions {\n /** Enable filler words. Default: true. */\n filler_words?: boolean;\n /** Enable interim results. Default: true. */\n interim_results?: boolean;\n /** Endpointing timeout in milliseconds. Default: 25. */\n endpointing?: number;\n /** Enable punctuation. Default: false. */\n punctuate?: boolean;\n /** Enable smart formatting. */\n smart_format?: boolean;\n /** Keywords with boost values. */\n keywords?: Array<[string, number]>;\n /** Key terms for recognition. */\n keyterms?: string[];\n /** Enable profanity filter. */\n profanity_filter?: boolean;\n /** Convert spoken numbers to numerals. */\n numerals?: boolean;\n /** Opt out of model improvement program. */\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n /** Enable turn formatting. Default: false. */\n format_turns?: boolean;\n /** End of turn confidence threshold. Default: 0.01. */\n end_of_turn_confidence_threshold?: number;\n /** Minimum silence duration in milliseconds when confident about end of turn. Default: 0. */\n min_end_of_turn_silence_when_confident?: number;\n /** Maximum turn silence in milliseconds. Default: not specified. */\n max_turn_silence?: number;\n /** Key terms prompt for recognition. Default: not specified. */\n keyterms_prompt?: string[];\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels | ElevenlabsSTTModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\n/** A fallback model with optional extra configuration. Extra fields are passed through to the provider. */\nexport interface STTFallbackModel {\n /** Model name (e.g. \"deepgram/nova-3\", \"assemblyai/universal-streaming\", \"cartesia/ink-whisper\"). */\n model: string;\n /** Extra configuration for the model. */\n extraKwargs?: Record<string, unknown>;\n}\n\nexport type STTFallbackModelType = STTFallbackModel | string;\n\n/** Parse a model string into [model, language]. Language is undefined if not specified. */\nexport function parseSTTModelString(model: string): [string, string | undefined] {\n const idx = model.lastIndexOf(':');\n if (idx !== -1) {\n return [model.slice(0, idx), model.slice(idx + 1)];\n }\n return [model, undefined];\n}\n\n/** Normalize a single or list of FallbackModelType into STTFallbackModel[]. */\nexport function normalizeSTTFallback(\n fallback: STTFallbackModelType | STTFallbackModelType[],\n): STTFallbackModel[] {\n const makeFallback = (model: STTFallbackModelType): STTFallbackModel => {\n if (typeof model === 'string') {\n const [name] = parseSTTModelString(model);\n return { model: name };\n }\n return model;\n };\n\n if (Array.isArray(fallback)) {\n return fallback.map(makeFallback);\n }\n return [makeFallback(fallback)];\n}\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: STTLanguages;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n fallback?: STTFallbackModel[];\n connOptions?: APIConnectOptions;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: ModelWithLanguage;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n fallback?: STTFallbackModelType | STTFallbackModelType[];\n connOptions?: APIConnectOptions;\n }) {\n super({ streaming: true, interimResults: true, alignedTranscript: 'word' });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n fallback,\n connOptions,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // Parse language from model string if provided: \"provider/model:language\"\n let nextModel = model;\n let nextLanguage = language;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const languageFromModel = nextModel.slice(idx + 1) as STTLanguages;\n if (nextLanguage && nextLanguage !== languageFromModel) {\n this.#logger.warn(\n '`language` is provided via both argument and model, using the one from the argument',\n { language: nextLanguage, model: nextModel },\n );\n } else {\n nextLanguage = languageFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;\n\n this.opts = {\n model: nextModel as TModel,\n language: nextLanguage,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n fallback: normalizedFallback,\n connOptions: connOptions ?? DEFAULT_API_CONNECT_OPTIONS,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n const [model, language] = parseSTTModelString(modelString);\n return new STT({ model, language });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = this.opts.connOptions ?? DEFAULT_API_CONNECT_OPTIONS } =\n options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n if (this.opts.fallback?.length) {\n params.fallback = {\n models: this.opts.fallback.map((m) => ({\n model: m.model,\n extra: m.extraKwargs ?? {},\n })),\n };\n }\n\n if (this.opts.connOptions) {\n params.connection = {\n timeout: this.opts.connOptions.timeoutMs / 1000,\n retries: this.opts.connOptions.maxRetry,\n };\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, timeout);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n private stt: STT<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n this.stt = sttImpl;\n this.connOptions = connOptions;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n this.reconnectEvent.set();\n }\n\n protected async run(): Promise<void> {\n while (true) {\n // Create fresh resources for each connection attempt\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n\n const eventChannel = createStreamChannel<SttServerEvent>();\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {\n return new Promise<void>((resolve, reject) => {\n const onAbort = () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n };\n\n signal.addEventListener('abort', onAbort, { once: true });\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString()) as SttServerEvent;\n eventChannel.write(json);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', (code: number) => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n });\n });\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n // Create abort promise once to avoid memory leak\n const abortPromise = new Promise<never>((_, reject) => {\n if (signal.aborted) {\n return reject(new Error('Send aborted'));\n }\n const onAbort = () => reject(new Error('Send aborted'));\n signal.addEventListener('abort', onAbort, { once: true });\n });\n\n // Manual iteration to support cancellation\n const iterator = this.input[Symbol.asyncIterator]();\n try {\n while (true) {\n const result = await Promise.race([iterator.next(), abortPromise]);\n\n if (result.done) break;\n const ev = result.value;\n\n let frames: AudioFrame[];\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closing = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n } catch (e) {\n if ((e as Error).message === 'Send aborted') {\n // Expected abort, don't log\n return;\n }\n throw e;\n }\n };\n\n const recv = async (signal: AbortSignal) => {\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n // Parse and validate with Zod schema\n const parseResult = await sttServerEventSchema.safeParseAsync(result.value);\n if (!parseResult.success) {\n this.#logger.warn(\n { error: parseResult.error, rawData: result.value },\n 'Failed to parse STT server event',\n );\n continue;\n }\n\n const event: SttServerEvent = parseResult.data;\n\n switch (event.type) {\n case 'session.created':\n case 'session.finalized':\n break;\n case 'session.closed':\n finalReceived = true;\n resourceCleanup();\n break;\n case 'interim_transcript':\n this.processTranscript(event, false);\n break;\n case 'final_transcript':\n this.processTranscript(event, true);\n break;\n case 'error':\n this.#logger.error({ error: event }, 'Received error from LiveKit STT');\n resourceCleanup();\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(event)}`);\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.stt.connectWs(this.connOptions.timeoutMs);\n\n const controller = this.abortController; // Use base class abortController for proper cancellation\n const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);\n const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);\n const recvTask = Task.from(({ signal }) => recv(signal), controller);\n const waitReconnectTask = Task.from(\n ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),\n controller,\n );\n\n try {\n await Promise.race([\n Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),\n waitReconnectTask.result,\n ]);\n\n // If reconnect didn't trigger, tasks finished - exit loop\n if (!waitReconnectTask.done) break;\n\n // Reconnect triggered - clear event and continue loop\n this.reconnectEvent.clear();\n } finally {\n // Cancel all tasks to ensure cleanup\n await cancelAndWait(\n [sendTask, wsListenerTask, recvTask, waitReconnectTask],\n DEFAULT_CANCEL_TIMEOUT,\n );\n resourceCleanup();\n }\n } finally {\n // Ensure cleanup even if connectWs throws\n resourceCleanup();\n }\n }\n }\n\n private processTranscript(data: SttTranscriptEvent, isFinal: boolean) {\n // Check if queue is closed to avoid race condition during disconnect\n if (this.queue.closed) return;\n\n const requestId = data.session_id || this.requestId;\n const text = data.transcript;\n const language = data.language || this.opts.language || 'en';\n\n if (!text && !isFinal) return;\n\n try {\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: this.startTimeOffset + data.start,\n endTime: this.startTimeOffset + data.start + data.duration,\n confidence: data.confidence,\n text,\n words: data.words.map(\n (word): TimedString =>\n createTimedString({\n text: word.word,\n startTime: word.start + this.startTimeOffset,\n endTime: word.end + this.startTimeOffset,\n startTimeOffset: this.startTimeOffset,\n confidence: word.confidence,\n }),\n ),\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n // Expected behavior on disconnect, log as warning\n this.#logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err: e }, 'Error putting transcript to queue');\n }\n }\n }\n}\n"],"mappings":"AAGA,eAAgC;AAEhC,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC;AAAA,EACE,OAAO;AAAA,EACP,gBAAgB;AAAA,EAGhB;AAAA,OACK;AACP,SAAiC,mCAAmC;AACpE,SAA2B,OAAO,MAAM,eAAe,WAAW,oBAAoB;AACtF,SAA2B,yBAAyB;AACpD;AAAA,EAGE;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAmGtD,SAAS,oBAAoB,OAA6C;AAC/E,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,MAAI,QAAQ,IAAI;AACd,WAAO,CAAC,MAAM,MAAM,GAAG,GAAG,GAAG,MAAM,MAAM,MAAM,CAAC,CAAC;AAAA,EACnD;AACA,SAAO,CAAC,OAAO,MAAS;AAC1B;AAGO,SAAS,qBACd,UACoB;AACpB,QAAM,eAAe,CAAC,UAAkD;AACtE,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,CAAC,IAAI,IAAI,oBAAoB,KAAK;AACxC,aAAO,EAAE,OAAO,KAAK;AAAA,IACvB;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,QAAQ,GAAG;AAC3B,WAAO,SAAS,IAAI,YAAY;AAAA,EAClC;AACA,SAAO,CAAC,aAAa,QAAQ,CAAC;AAChC;AAIA,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAkBxB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,UAAU,IAAI;AAAA,EAEd,YAAY,MAWT;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,mBAAmB,OAAO,CAAC;AAE1E,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,MAChB;AAAA,MACA;AAAA,IACF,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,eAAe;AACnB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,oBAAoB,UAAU,MAAM,MAAM,CAAC;AACjD,YAAI,gBAAgB,iBAAiB,mBAAmB;AACtD,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,UAAU,cAAc,OAAO,UAAU;AAAA,UAC7C;AAAA,QACF,OAAO;AACL,yBAAe;AAAA,QACjB;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AACA,UAAM,qBAAqB,WAAW,qBAAqB,QAAQ,IAAI;AAEvE,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,UAAU;AAAA,MACV;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,UAAU;AAAA,MACV,aAAa,eAAe;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,WAAW;AACzD,WAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,EACpC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,KAAK,KAAK,eAAe,4BAA4B,IACnF,WAAW,CAAC;AACd,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AAjSvD;AAkSI,UAAM,SAAS;AAAA,MACb,UAAU;AAAA,QACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,QACxC,UAAU,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,aAAO,QAAQ,KAAK,KAAK;AAAA,IAC3B;AAEA,QAAI,KAAK,KAAK,UAAU;AACtB,MAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,IACpE;AAEA,SAAI,UAAK,KAAK,aAAV,mBAAoB,QAAQ;AAC9B,aAAO,WAAW;AAAA,QAChB,QAAQ,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACrC,OAAO,EAAE;AAAA,UACT,OAAO,EAAE,eAAe,CAAC;AAAA,QAC3B,EAAE;AAAA,MACJ;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,aAAa;AACzB,aAAO,aAAa;AAAA,QAClB,SAAS,KAAK,KAAK,YAAY,YAAY;AAAA,QAC3C,SAAS,KAAK,KAAK,YAAY;AAAA,MACjC;AAAA,IACF;AAEA,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,UAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,WAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,iBAAiB;AAAA,EACnE;AAAA,EACA,YAAY,UAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,MAAM;AAAA,EAC3B;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,SAAK,eAAe,IAAI;AAAA,EAC1B;AAAA,EAEA,MAAgB,MAAqB;AACnC,WAAO,MAAM;AAEX,UAAI,KAAuB;AAC3B,UAAI,UAAU;AACd,UAAI,gBAAgB;AAEpB,YAAM,eAAe,oBAAoC;AAEzD,YAAM,kBAAkB,MAAM;AAC5B,YAAI,QAAS;AACb,kBAAU;AACV,qBAAa,MAAM;AACnB,iCAAI;AACJ,iCAAI;AAAA,MACN;AAEA,YAAM,mBAAmB,OAAOA,KAAe,WAAwB;AACrE,eAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,gBAAM,UAAU,MAAM;AACpB,4BAAgB;AAChB,mBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,UAClD;AAEA,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAExD,UAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,kBAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,yBAAa,MAAM,IAAI;AAAA,UACzB,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,iBAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,4BAAgB;AAChB,mBAAO,CAAC;AAAA,UACV,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,SAAiB;AAC/B,4BAAgB;AAEhB,gBAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,gBAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,cACE,IAAI,eAAe;AAAA,gBACjB,SAAS;AAAA,gBACT,SAAS,EAAE,YAAY,KAAK;AAAA,cAC9B,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH;AAEA,YAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,cAAM,cAAc,IAAI;AAAA,UACtB,KAAK,KAAK;AAAA,UACV;AAAA,UACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,QACtC;AAGA,cAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACrD,cAAI,OAAO,SAAS;AAClB,mBAAO,OAAO,IAAI,MAAM,cAAc,CAAC;AAAA,UACzC;AACA,gBAAM,UAAU,MAAM,OAAO,IAAI,MAAM,cAAc,CAAC;AACtD,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,QAC1D,CAAC;AAGD,cAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAClD,YAAI;AACF,iBAAO,MAAM;AACX,kBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,SAAS,KAAK,GAAG,YAAY,CAAC;AAEjE,gBAAI,OAAO,KAAM;AACjB,kBAAM,KAAK,OAAO;AAElB,gBAAI;AACJ,gBAAI,OAAO,aAAa,gBAAgB;AACtC,uBAAS,YAAY,MAAM;AAAA,YAC7B,OAAO;AACL,oBAAM,QAAQ;AACd,uBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,YAC9D;AAEA,uBAAW,SAAS,QAAQ;AAC1B,mBAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,oBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,oBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,qBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,YACjC;AAAA,UACF;AAEA,oBAAU;AACV,iBAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,QAC1D,SAAS,GAAG;AACV,cAAK,EAAY,YAAY,gBAAgB;AAE3C;AAAA,UACF;AACA,gBAAM;AAAA,QACR;AAAA,MACF;AAEA,YAAM,OAAO,OAAO,WAAwB;AAC1C,cAAM,oBAAoB,aAAa,OAAO;AAC9C,cAAM,SAAS,kBAAkB,UAAU;AAE3C,YAAI;AACF,iBAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,kBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,gBAAI,OAAO,QAAS;AACpB,gBAAI,OAAO,KAAM;AAGjB,kBAAM,cAAc,MAAM,qBAAqB,eAAe,OAAO,KAAK;AAC1E,gBAAI,CAAC,YAAY,SAAS;AACxB,mBAAK,QAAQ;AAAA,gBACX,EAAE,OAAO,YAAY,OAAO,SAAS,OAAO,MAAM;AAAA,gBAClD;AAAA,cACF;AACA;AAAA,YACF;AAEA,kBAAM,QAAwB,YAAY;AAE1C,oBAAQ,MAAM,MAAM;AAAA,cAClB,KAAK;AAAA,cACL,KAAK;AACH;AAAA,cACF,KAAK;AACH,gCAAgB;AAChB,gCAAgB;AAChB;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,OAAO,KAAK;AACnC;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,OAAO,IAAI;AAClC;AAAA,cACF,KAAK;AACH,qBAAK,QAAQ,MAAM,EAAE,OAAO,MAAM,GAAG,iCAAiC;AACtE,gCAAgB;AAChB,sBAAM,IAAI,SAAS,+BAA+B,KAAK,UAAU,KAAK,CAAC,EAAE;AAAA,YAC7E;AAAA,UACF;AAAA,QACF,UAAE;AACA,iBAAO,YAAY;AACnB,cAAI;AACF,kBAAM,kBAAkB,OAAO;AAAA,UACjC,SAAS,GAAG;AACV,iBAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,UACxF;AAAA,QACF;AAAA,MACF;AAEA,UAAI;AACF,aAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,cAAM,aAAa,KAAK;AACxB,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,IAAK,MAAM,GAAG,UAAU;AACxE,cAAM,iBAAiB,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,iBAAiB,IAAK,MAAM,GAAG,UAAU;AAC1F,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,MAAM,GAAG,UAAU;AACnE,cAAM,oBAAoB,KAAK;AAAA,UAC7B,CAAC,EAAE,OAAO,MAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,UAC/E;AAAA,QACF;AAEA,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,CAAC,SAAS,QAAQ,eAAe,QAAQ,SAAS,MAAM,CAAC;AAAA,YACrE,kBAAkB;AAAA,UACpB,CAAC;AAGD,cAAI,CAAC,kBAAkB,KAAM;AAG7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AAEA,gBAAM;AAAA,YACJ,CAAC,UAAU,gBAAgB,UAAU,iBAAiB;AAAA,YACtD;AAAA,UACF;AACA,0BAAgB;AAAA,QAClB;AAAA,MACF,UAAE;AAEA,wBAAgB;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA0B,SAAkB;AAEpE,QAAI,KAAK,MAAM,OAAQ;AAEvB,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK;AAClB,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAEvB,QAAI;AAEF,UAAI,CAAC,KAAK,UAAU;AAClB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AAAA,MAC1D;AAEA,YAAM,aAAyB;AAAA,QAC7B;AAAA,QACA,WAAW,KAAK,kBAAkB,KAAK;AAAA,QACvC,SAAS,KAAK,kBAAkB,KAAK,QAAQ,KAAK;AAAA,QAClD,YAAY,KAAK;AAAA,QACjB;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,UAChB,CAAC,SACC,kBAAkB;AAAA,YAChB,MAAM,KAAK;AAAA,YACX,WAAW,KAAK,QAAQ,KAAK;AAAA,YAC7B,SAAS,KAAK,MAAM,KAAK;AAAA,YACzB,iBAAiB,KAAK;AAAA,YACtB,YAAY,KAAK;AAAA,UACnB,CAAC;AAAA,QACL;AAAA,MACF;AAEA,UAAI,SAAS;AACX,YAAI,KAAK,iBAAiB,GAAG;AAC3B,eAAK,MAAM,IAAI;AAAA,YACb,MAAM,gBAAgB;AAAA,YACtB;AAAA,YACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,UACzD,CAAC;AACD,eAAK,iBAAiB;AAAA,QACxB;AAEA,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAED,YAAI,KAAK,UAAU;AACjB,eAAK,WAAW;AAChB,eAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAAA,QACxD;AAAA,MACF,OAAO;AACL,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAAA,MACH;AAAA,IACF,SAAS,GAAG;AACV,UAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAE/D,aAAK,QAAQ;AAAA,UACX,EAAE,KAAK,EAAE;AAAA,UACT;AAAA,QACF;AAAA,MACF,OAAO;AACL,aAAK,QAAQ,MAAM,EAAE,KAAK,EAAE,GAAG,mCAAmC;AAAA,MACpE;AAAA,IACF;AAAA,EACF;AACF;","names":["ws"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type TimedString, createTimedString } from '../voice/io.js';\nimport {\n type SttServerEvent,\n type SttTranscriptEvent,\n sttServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram/flux-general'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels =\n | 'assemblyai/universal-streaming'\n | 'assemblyai/universal-streaming-multilingual';\n\nexport type ElevenlabsSTTModels = 'elevenlabs/scribe_v2_realtime';\n\nexport interface CartesiaOptions {\n /** Minimum volume threshold. Default: not specified. */\n min_volume?: number;\n /** Maximum silence duration in seconds. Default: not specified. */\n max_silence_duration_secs?: number;\n}\n\nexport interface DeepgramOptions {\n /** Enable filler words. Default: true. */\n filler_words?: boolean;\n /** Enable interim results. Default: true. */\n interim_results?: boolean;\n /** Endpointing timeout in milliseconds. Default: 25. */\n endpointing?: number;\n /** Enable punctuation. Default: false. */\n punctuate?: boolean;\n /** Enable smart formatting. */\n smart_format?: boolean;\n /** Keywords with boost values. */\n keywords?: Array<[string, number]>;\n /** Key terms for recognition. */\n keyterms?: string[];\n /** Enable profanity filter. */\n profanity_filter?: boolean;\n /** Convert spoken numbers to numerals. */\n numerals?: boolean;\n /** Opt out of model improvement program. */\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n /** Enable turn formatting. Default: false. */\n format_turns?: boolean;\n /** End of turn confidence threshold. Default: 0.01. */\n end_of_turn_confidence_threshold?: number;\n /** Minimum silence duration in milliseconds when confident about end of turn. Default: 0. */\n min_end_of_turn_silence_when_confident?: number;\n /** Maximum turn silence in milliseconds. Default: not specified. */\n max_turn_silence?: number;\n /** Key terms prompt for recognition. Default: not specified. */\n keyterms_prompt?: string[];\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels | ElevenlabsSTTModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\n/** A fallback model with optional extra configuration. Extra fields are passed through to the provider. */\nexport interface STTFallbackModel {\n /** Model name (e.g. \"deepgram/nova-3\", \"assemblyai/universal-streaming\", \"cartesia/ink-whisper\"). */\n model: string;\n /** Extra configuration for the model. */\n extraKwargs?: Record<string, unknown>;\n}\n\nexport type STTFallbackModelType = STTFallbackModel | string;\n\n/** Parse a model string into [model, language]. Language is undefined if not specified. */\nexport function parseSTTModelString(model: string): [string, LanguageCode | undefined] {\n const idx = model.lastIndexOf(':');\n if (idx !== -1) {\n return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];\n }\n return [model, undefined];\n}\n\n/** Normalize a single or list of FallbackModelType into STTFallbackModel[]. */\nexport function normalizeSTTFallback(\n fallback: STTFallbackModelType | STTFallbackModelType[],\n): STTFallbackModel[] {\n const makeFallback = (model: STTFallbackModelType): STTFallbackModel => {\n if (typeof model === 'string') {\n const [name] = parseSTTModelString(model);\n return { model: name };\n }\n return model;\n };\n\n if (Array.isArray(fallback)) {\n return fallback.map(makeFallback);\n }\n return [makeFallback(fallback)];\n}\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: LanguageCode;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n fallback?: STTFallbackModel[];\n connOptions?: APIConnectOptions;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: ModelWithLanguage;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n fallback?: STTFallbackModelType | STTFallbackModelType[];\n connOptions?: APIConnectOptions;\n }) {\n super({ streaming: true, interimResults: true, alignedTranscript: 'word' });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n fallback,\n connOptions,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // Parse language from model string if provided: \"provider/model:language\"\n let nextModel = model;\n let nextLanguage = language;\n if (typeof nextModel === 'string') {\n const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);\n if (parsedLanguage !== undefined) {\n if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {\n this.#logger.warn(\n '`language` is provided via both argument and model, using the one from the argument',\n { language: nextLanguage, model: nextModel },\n );\n } else {\n nextLanguage = parsedLanguage as STTLanguages;\n }\n nextModel = parsedModel as TModel;\n }\n }\n const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;\n\n this.opts = {\n model: nextModel as TModel,\n language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n fallback: normalizedFallback,\n connOptions: connOptions ?? DEFAULT_API_CONNECT_OPTIONS,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n const [model, language] = parseSTTModelString(modelString);\n return new STT({ model, language });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = {\n ...this.opts,\n ...opts,\n language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,\n };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = this.opts.connOptions ?? DEFAULT_API_CONNECT_OPTIONS } =\n options || {};\n const streamOpts = {\n ...this.opts,\n language: language !== undefined ? normalizeLanguage(language) : this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n if (this.opts.fallback?.length) {\n params.fallback = {\n models: this.opts.fallback.map((m) => ({\n model: m.model,\n extra: m.extraKwargs ?? {},\n })),\n };\n }\n\n if (this.opts.connOptions) {\n params.connection = {\n timeout: this.opts.connOptions.timeoutMs / 1000,\n retries: this.opts.connOptions.maxRetry,\n };\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, timeout);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n private stt: STT<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n this.stt = sttImpl;\n this.connOptions = connOptions;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = {\n ...this.opts,\n ...opts,\n language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,\n };\n this.reconnectEvent.set();\n }\n\n protected async run(): Promise<void> {\n while (true) {\n // Create fresh resources for each connection attempt\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n\n const eventChannel = createStreamChannel<SttServerEvent>();\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {\n return new Promise<void>((resolve, reject) => {\n const onAbort = () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n };\n\n signal.addEventListener('abort', onAbort, { once: true });\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString()) as SttServerEvent;\n eventChannel.write(json);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', (code: number) => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n });\n });\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n // Create abort promise once to avoid memory leak\n const abortPromise = new Promise<never>((_, reject) => {\n if (signal.aborted) {\n return reject(new Error('Send aborted'));\n }\n const onAbort = () => reject(new Error('Send aborted'));\n signal.addEventListener('abort', onAbort, { once: true });\n });\n\n // Manual iteration to support cancellation\n const iterator = this.input[Symbol.asyncIterator]();\n try {\n while (true) {\n const result = await Promise.race([iterator.next(), abortPromise]);\n\n if (result.done) break;\n const ev = result.value;\n\n let frames: AudioFrame[];\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closing = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n } catch (e) {\n if ((e as Error).message === 'Send aborted') {\n // Expected abort, don't log\n return;\n }\n throw e;\n }\n };\n\n const recv = async (signal: AbortSignal) => {\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n // Parse and validate with Zod schema\n const parseResult = await sttServerEventSchema.safeParseAsync(result.value);\n if (!parseResult.success) {\n this.#logger.warn(\n { error: parseResult.error, rawData: result.value },\n 'Failed to parse STT server event',\n );\n continue;\n }\n\n const event: SttServerEvent = parseResult.data;\n\n switch (event.type) {\n case 'session.created':\n case 'session.finalized':\n break;\n case 'session.closed':\n finalReceived = true;\n resourceCleanup();\n break;\n case 'interim_transcript':\n this.processTranscript(event, false);\n break;\n case 'final_transcript':\n this.processTranscript(event, true);\n break;\n case 'error':\n this.#logger.error({ error: event }, 'Received error from LiveKit STT');\n resourceCleanup();\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(event)}`);\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.stt.connectWs(this.connOptions.timeoutMs);\n\n const controller = this.abortController; // Use base class abortController for proper cancellation\n const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);\n const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);\n const recvTask = Task.from(({ signal }) => recv(signal), controller);\n const waitReconnectTask = Task.from(\n ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),\n controller,\n );\n\n try {\n await Promise.race([\n Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),\n waitReconnectTask.result,\n ]);\n\n // If reconnect didn't trigger, tasks finished - exit loop\n if (!waitReconnectTask.done) break;\n\n // Reconnect triggered - clear event and continue loop\n this.reconnectEvent.clear();\n } finally {\n // Cancel all tasks to ensure cleanup\n await cancelAndWait(\n [sendTask, wsListenerTask, recvTask, waitReconnectTask],\n DEFAULT_CANCEL_TIMEOUT,\n );\n resourceCleanup();\n }\n } finally {\n // Ensure cleanup even if connectWs throws\n resourceCleanup();\n }\n }\n }\n\n private processTranscript(data: SttTranscriptEvent, isFinal: boolean) {\n // Check if queue is closed to avoid race condition during disconnect\n if (this.queue.closed) return;\n\n const requestId = data.session_id || this.requestId;\n const text = data.transcript;\n const language = normalizeLanguage(data.language || this.opts.language || 'en');\n\n if (!text && !isFinal) return;\n\n try {\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: this.startTimeOffset + data.start,\n endTime: this.startTimeOffset + data.start + data.duration,\n confidence: data.confidence,\n text,\n words: data.words.map(\n (word): TimedString =>\n createTimedString({\n text: word.word,\n startTime: word.start + this.startTimeOffset,\n endTime: word.end + this.startTimeOffset,\n startTimeOffset: this.startTimeOffset,\n confidence: word.confidence,\n }),\n ),\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n // Expected behavior on disconnect, log as warning\n this.#logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err: e }, 'Error putting transcript to queue');\n }\n }\n }\n}\n"],"mappings":"AAGA,eAAgC;AAEhC,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAA4B,wBAAwB,yBAAyB;AAC7E,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC;AAAA,EACE,OAAO;AAAA,EACP,gBAAgB;AAAA,EAGhB;AAAA,OACK;AACP,SAAiC,mCAAmC;AACpE,SAA2B,OAAO,MAAM,eAAe,WAAW,oBAAoB;AACtF,SAA2B,yBAAyB;AACpD;AAAA,EAGE;AAAA,OACK;AACP,SAAyB,WAAW,yBAAyB;AAmGtD,SAAS,oBAAoB,OAAmD;AACrF,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,MAAI,QAAQ,IAAI;AACd,WAAO,CAAC,MAAM,MAAM,GAAG,GAAG,GAAG,kBAAkB,MAAM,MAAM,MAAM,CAAC,CAAC,CAAC;AAAA,EACtE;AACA,SAAO,CAAC,OAAO,MAAS;AAC1B;AAGO,SAAS,qBACd,UACoB;AACpB,QAAM,eAAe,CAAC,UAAkD;AACtE,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,CAAC,IAAI,IAAI,oBAAoB,KAAK;AACxC,aAAO,EAAE,OAAO,KAAK;AAAA,IACvB;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,QAAQ,GAAG;AAC3B,WAAO,SAAS,IAAI,YAAY;AAAA,EAClC;AACA,SAAO,CAAC,aAAa,QAAQ,CAAC;AAChC;AAIA,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAkBxB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,UAAU,IAAI;AAAA,EAEd,YAAY,MAWT;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,mBAAmB,OAAO,CAAC;AAE1E,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,MAChB;AAAA,MACA;AAAA,IACF,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,eAAe;AACnB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,CAAC,aAAa,cAAc,IAAI,oBAAoB,SAAS;AACnE,UAAI,mBAAmB,QAAW;AAChC,YAAI,gBAAgB,CAAC,uBAAuB,cAAc,cAAc,GAAG;AACzE,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,UAAU,cAAc,OAAO,UAAU;AAAA,UAC7C;AAAA,QACF,OAAO;AACL,yBAAe;AAAA,QACjB;AACA,oBAAY;AAAA,MACd;AAAA,IACF;AACA,UAAM,qBAAqB,WAAW,qBAAqB,QAAQ,IAAI;AAEvE,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,UAAU,eAAe,kBAAkB,YAAY,IAAI;AAAA,MAC3D;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,UAAU;AAAA,MACV,aAAa,eAAe;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,WAAW;AACzD,WAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,EACpC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO;AAAA,MACV,GAAG,KAAK;AAAA,MACR,GAAG;AAAA,MACH,UAAU,KAAK,aAAa,SAAY,kBAAkB,KAAK,QAAQ,IAAI,KAAK,KAAK;AAAA,IACvF;AAEA,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,KAAK,KAAK,eAAe,4BAA4B,IACnF,WAAW,CAAC;AACd,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,aAAa,SAAY,kBAAkB,QAAQ,IAAI,KAAK,KAAK;AAAA,IAC7E;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AArSvD;AAsSI,UAAM,SAAS;AAAA,MACb,UAAU;AAAA,QACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,QACxC,UAAU,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,aAAO,QAAQ,KAAK,KAAK;AAAA,IAC3B;AAEA,QAAI,KAAK,KAAK,UAAU;AACtB,MAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,IACpE;AAEA,SAAI,UAAK,KAAK,aAAV,mBAAoB,QAAQ;AAC9B,aAAO,WAAW;AAAA,QAChB,QAAQ,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACrC,OAAO,EAAE;AAAA,UACT,OAAO,EAAE,eAAe,CAAC;AAAA,QAC3B,EAAE;AAAA,MACJ;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,aAAa;AACzB,aAAO,aAAa;AAAA,QAClB,SAAS,KAAK,KAAK,YAAY,YAAY;AAAA,QAC3C,SAAS,KAAK,KAAK,YAAY;AAAA,MACjC;AAAA,IACF;AAEA,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,UAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,WAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,iBAAiB;AAAA,EACnE;AAAA,EACA,YAAY,UAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,MAAM;AAAA,EAC3B;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO;AAAA,MACV,GAAG,KAAK;AAAA,MACR,GAAG;AAAA,MACH,UAAU,KAAK,aAAa,SAAY,kBAAkB,KAAK,QAAQ,IAAI,KAAK,KAAK;AAAA,IACvF;AACA,SAAK,eAAe,IAAI;AAAA,EAC1B;AAAA,EAEA,MAAgB,MAAqB;AACnC,WAAO,MAAM;AAEX,UAAI,KAAuB;AAC3B,UAAI,UAAU;AACd,UAAI,gBAAgB;AAEpB,YAAM,eAAe,oBAAoC;AAEzD,YAAM,kBAAkB,MAAM;AAC5B,YAAI,QAAS;AACb,kBAAU;AACV,qBAAa,MAAM;AACnB,iCAAI;AACJ,iCAAI;AAAA,MACN;AAEA,YAAM,mBAAmB,OAAOA,KAAe,WAAwB;AACrE,eAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,gBAAM,UAAU,MAAM;AACpB,4BAAgB;AAChB,mBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,UAClD;AAEA,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAExD,UAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,kBAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,yBAAa,MAAM,IAAI;AAAA,UACzB,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,iBAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,4BAAgB;AAChB,mBAAO,CAAC;AAAA,UACV,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,SAAiB;AAC/B,4BAAgB;AAEhB,gBAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,gBAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,cACE,IAAI,eAAe;AAAA,gBACjB,SAAS;AAAA,gBACT,SAAS,EAAE,YAAY,KAAK;AAAA,cAC9B,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH;AAEA,YAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,cAAM,cAAc,IAAI;AAAA,UACtB,KAAK,KAAK;AAAA,UACV;AAAA,UACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,QACtC;AAGA,cAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACrD,cAAI,OAAO,SAAS;AAClB,mBAAO,OAAO,IAAI,MAAM,cAAc,CAAC;AAAA,UACzC;AACA,gBAAM,UAAU,MAAM,OAAO,IAAI,MAAM,cAAc,CAAC;AACtD,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,QAC1D,CAAC;AAGD,cAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAClD,YAAI;AACF,iBAAO,MAAM;AACX,kBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,SAAS,KAAK,GAAG,YAAY,CAAC;AAEjE,gBAAI,OAAO,KAAM;AACjB,kBAAM,KAAK,OAAO;AAElB,gBAAI;AACJ,gBAAI,OAAO,aAAa,gBAAgB;AACtC,uBAAS,YAAY,MAAM;AAAA,YAC7B,OAAO;AACL,oBAAM,QAAQ;AACd,uBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,YAC9D;AAEA,uBAAW,SAAS,QAAQ;AAC1B,mBAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,oBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,oBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,qBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,YACjC;AAAA,UACF;AAEA,oBAAU;AACV,iBAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,QAC1D,SAAS,GAAG;AACV,cAAK,EAAY,YAAY,gBAAgB;AAE3C;AAAA,UACF;AACA,gBAAM;AAAA,QACR;AAAA,MACF;AAEA,YAAM,OAAO,OAAO,WAAwB;AAC1C,cAAM,oBAAoB,aAAa,OAAO;AAC9C,cAAM,SAAS,kBAAkB,UAAU;AAE3C,YAAI;AACF,iBAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,kBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,gBAAI,OAAO,QAAS;AACpB,gBAAI,OAAO,KAAM;AAGjB,kBAAM,cAAc,MAAM,qBAAqB,eAAe,OAAO,KAAK;AAC1E,gBAAI,CAAC,YAAY,SAAS;AACxB,mBAAK,QAAQ;AAAA,gBACX,EAAE,OAAO,YAAY,OAAO,SAAS,OAAO,MAAM;AAAA,gBAClD;AAAA,cACF;AACA;AAAA,YACF;AAEA,kBAAM,QAAwB,YAAY;AAE1C,oBAAQ,MAAM,MAAM;AAAA,cAClB,KAAK;AAAA,cACL,KAAK;AACH;AAAA,cACF,KAAK;AACH,gCAAgB;AAChB,gCAAgB;AAChB;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,OAAO,KAAK;AACnC;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,OAAO,IAAI;AAClC;AAAA,cACF,KAAK;AACH,qBAAK,QAAQ,MAAM,EAAE,OAAO,MAAM,GAAG,iCAAiC;AACtE,gCAAgB;AAChB,sBAAM,IAAI,SAAS,+BAA+B,KAAK,UAAU,KAAK,CAAC,EAAE;AAAA,YAC7E;AAAA,UACF;AAAA,QACF,UAAE;AACA,iBAAO,YAAY;AACnB,cAAI;AACF,kBAAM,kBAAkB,OAAO;AAAA,UACjC,SAAS,GAAG;AACV,iBAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,UACxF;AAAA,QACF;AAAA,MACF;AAEA,UAAI;AACF,aAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAExD,cAAM,aAAa,KAAK;AACxB,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,IAAK,MAAM,GAAG,UAAU;AACxE,cAAM,iBAAiB,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,iBAAiB,IAAK,MAAM,GAAG,UAAU;AAC1F,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,MAAM,GAAG,UAAU;AACnE,cAAM,oBAAoB,KAAK;AAAA,UAC7B,CAAC,EAAE,OAAO,MAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,UAC/E;AAAA,QACF;AAEA,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,CAAC,SAAS,QAAQ,eAAe,QAAQ,SAAS,MAAM,CAAC;AAAA,YACrE,kBAAkB;AAAA,UACpB,CAAC;AAGD,cAAI,CAAC,kBAAkB,KAAM;AAG7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AAEA,gBAAM;AAAA,YACJ,CAAC,UAAU,gBAAgB,UAAU,iBAAiB;AAAA,YACtD;AAAA,UACF;AACA,0BAAgB;AAAA,QAClB;AAAA,MACF,UAAE;AAEA,wBAAgB;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA0B,SAAkB;AAEpE,QAAI,KAAK,MAAM,OAAQ;AAEvB,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK;AAClB,UAAM,WAAW,kBAAkB,KAAK,YAAY,KAAK,KAAK,YAAY,IAAI;AAE9E,QAAI,CAAC,QAAQ,CAAC,QAAS;AAEvB,QAAI;AAEF,UAAI,CAAC,KAAK,UAAU;AAClB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AAAA,MAC1D;AAEA,YAAM,aAAyB;AAAA,QAC7B;AAAA,QACA,WAAW,KAAK,kBAAkB,KAAK;AAAA,QACvC,SAAS,KAAK,kBAAkB,KAAK,QAAQ,KAAK;AAAA,QAClD,YAAY,KAAK;AAAA,QACjB;AAAA,QACA,OAAO,KAAK,MAAM;AAAA,UAChB,CAAC,SACC,kBAAkB;AAAA,YAChB,MAAM,KAAK;AAAA,YACX,WAAW,KAAK,QAAQ,KAAK;AAAA,YAC7B,SAAS,KAAK,MAAM,KAAK;AAAA,YACzB,iBAAiB,KAAK;AAAA,YACtB,YAAY,KAAK;AAAA,UACnB,CAAC;AAAA,QACL;AAAA,MACF;AAEA,UAAI,SAAS;AACX,YAAI,KAAK,iBAAiB,GAAG;AAC3B,eAAK,MAAM,IAAI;AAAA,YACb,MAAM,gBAAgB;AAAA,YACtB;AAAA,YACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,UACzD,CAAC;AACD,eAAK,iBAAiB;AAAA,QACxB;AAEA,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAED,YAAI,KAAK,UAAU;AACjB,eAAK,WAAW;AAChB,eAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAAA,QACxD;AAAA,MACF,OAAO;AACL,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAAA,MACH;AAAA,IACF,SAAS,GAAG;AACV,UAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAE/D,aAAK,QAAQ;AAAA,UACX,EAAE,KAAK,EAAE;AAAA,UACT;AAAA,QACF;AAAA,MACF,OAAO;AACL,aAAK,QAAQ,MAAM,EAAE,KAAK,EAAE,GAAG,mCAAmC;AAAA,MACpE;AAAA,IACF;AAAA,EACF;AACF;","names":["ws"]}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
var import_vitest = require("vitest");
|
|
3
|
+
var import_language = require("../language.cjs");
|
|
3
4
|
var import_log = require("../log.cjs");
|
|
4
5
|
var import_types = require("../types.cjs");
|
|
5
6
|
var import_stt = require("./stt.cjs");
|
|
@@ -26,6 +27,11 @@ function makeStt(overrides = {}) {
|
|
|
26
27
|
(0, import_vitest.expect)(model).toBe("deepgram");
|
|
27
28
|
(0, import_vitest.expect)(language).toBe("en");
|
|
28
29
|
});
|
|
30
|
+
(0, import_vitest.it)("normalizes language suffixes", () => {
|
|
31
|
+
const [model, language] = (0, import_stt.parseSTTModelString)("deepgram:english");
|
|
32
|
+
(0, import_vitest.expect)(model).toBe("deepgram");
|
|
33
|
+
(0, import_vitest.expect)(language).toBe("en");
|
|
34
|
+
});
|
|
29
35
|
(0, import_vitest.it)("provider/model format without language", () => {
|
|
30
36
|
const [model, language] = (0, import_stt.parseSTTModelString)("deepgram/nova-3");
|
|
31
37
|
(0, import_vitest.expect)(model).toBe("deepgram/nova-3");
|
|
@@ -128,6 +134,14 @@ function makeStt(overrides = {}) {
|
|
|
128
134
|
});
|
|
129
135
|
});
|
|
130
136
|
(0, import_vitest.describe)("STT constructor fallback and connOptions", () => {
|
|
137
|
+
(0, import_vitest.it)("normalizes language in constructor and model string", () => {
|
|
138
|
+
const stt = makeStt({ model: "deepgram/nova-3:english" });
|
|
139
|
+
(0, import_vitest.expect)(stt["opts"].language).toBe("en");
|
|
140
|
+
});
|
|
141
|
+
(0, import_vitest.it)("prefers explicit normalized language over model suffix", () => {
|
|
142
|
+
const stt = makeStt({ model: "deepgram/nova-3:english", language: "en_US" });
|
|
143
|
+
(0, import_vitest.expect)(stt["opts"].language).toBe((0, import_language.normalizeLanguage)("en_US"));
|
|
144
|
+
});
|
|
131
145
|
(0, import_vitest.it)("fallback not given defaults to undefined", () => {
|
|
132
146
|
const stt = makeStt();
|
|
133
147
|
(0, import_vitest.expect)(stt["opts"].fallback).toBeUndefined();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { beforeAll, describe, expect, it } from 'vitest';\nimport { initializeLogger } from '../log.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';\n\nbeforeAll(() => {\n initializeLogger({ level: 'silent', pretty: false });\n});\n\n/** Helper to create STT with required credentials. */\nfunction makeStt(overrides: Record<string, unknown> = {}) {\n const defaults = {\n model: 'deepgram' as const,\n apiKey: 'test-key',\n apiSecret: 'test-secret',\n baseURL: 'https://example.livekit.cloud',\n };\n return new STT({ ...defaults, ...overrides });\n}\n\ndescribe('parseSTTModelString', () => {\n it('simple model without language', () => {\n const [model, language] = parseSTTModelString('deepgram');\n expect(model).toBe('deepgram');\n expect(language).toBeUndefined();\n });\n\n it('model with language suffix', () => {\n const [model, language] = parseSTTModelString('deepgram:en');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('provider/model format without language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBeUndefined();\n });\n\n it('provider/model format with language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3:en');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBe('en');\n });\n\n it.each([\n ['cartesia/ink-whisper:de', 'cartesia/ink-whisper', 'de'],\n ['assemblyai:es', 'assemblyai', 'es'],\n ['deepgram/nova-2-medical:ja', 'deepgram/nova-2-medical', 'ja'],\n ['deepgram/nova-3:multi', 'deepgram/nova-3', 'multi'],\n ['cartesia:zh', 'cartesia', 'zh'],\n ])('various providers and languages: %s', (modelStr, expectedModel, expectedLang) => {\n const [model, language] = parseSTTModelString(modelStr);\n expect(model).toBe(expectedModel);\n expect(language).toBe(expectedLang);\n });\n\n it('auto model without language', () => {\n const [model, language] = parseSTTModelString('auto');\n expect(model).toBe('auto');\n expect(language).toBeUndefined();\n });\n\n it('auto model with language', () => {\n const [model, language] = parseSTTModelString('auto:pt');\n expect(model).toBe('auto');\n expect(language).toBe('pt');\n });\n});\n\ndescribe('normalizeSTTFallback', () => {\n it('single string model', () => {\n const result = normalizeSTTFallback('deepgram/nova-3');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('single FallbackModel dict', () => {\n const fallback: STTFallbackModel = { model: 'deepgram/nova-3' };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('list of string models', () => {\n const result = normalizeSTTFallback(['deepgram/nova-3', 'cartesia/ink-whisper']);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'cartesia/ink-whisper' }]);\n });\n\n it('list of FallbackModel dicts', () => {\n const fallbacks: STTFallbackModel[] = [{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }];\n const result = normalizeSTTFallback(fallbacks);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('mixed list of strings and dicts', () => {\n const result = normalizeSTTFallback([\n 'deepgram/nova-3',\n { model: 'cartesia/ink-whisper' } as STTFallbackModel,\n 'assemblyai',\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai' },\n ]);\n });\n\n it('string with language suffix discards language', () => {\n const result = normalizeSTTFallback('deepgram/nova-3:en');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('FallbackModel with extraKwargs is preserved', () => {\n const fallback: STTFallbackModel = {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n },\n ]);\n });\n\n it('list with extraKwargs preserved', () => {\n const result = normalizeSTTFallback([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } } as STTFallbackModel,\n 'cartesia/ink-whisper',\n { model: 'assemblyai', extraKwargs: { format_turns: true } } as STTFallbackModel,\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai', extraKwargs: { format_turns: true } },\n ]);\n });\n\n it('empty list returns empty list', () => {\n const result = normalizeSTTFallback([]);\n expect(result).toEqual([]);\n });\n\n it('multiple colons in model string splits on last', () => {\n const result = normalizeSTTFallback('some:model:part:fr');\n expect(result).toEqual([{ model: 'some:model:part' }]);\n });\n});\n\ndescribe('STT constructor fallback and connOptions', () => {\n it('fallback not given defaults to undefined', () => {\n const stt = makeStt();\n expect(stt['opts'].fallback).toBeUndefined();\n });\n\n it('fallback single string is normalized', () => {\n const stt = makeStt({ fallback: 'cartesia/ink-whisper' });\n expect(stt['opts'].fallback).toEqual([{ model: 'cartesia/ink-whisper' }]);\n });\n\n it('fallback list of strings is normalized', () => {\n const stt = makeStt({ fallback: ['deepgram/nova-3', 'assemblyai'] });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('fallback single FallbackModel is normalized to list', () => {\n const stt = makeStt({ fallback: { model: 'deepgram/nova-3' } });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('fallback with extraKwargs is preserved', () => {\n const stt = makeStt({\n fallback: {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n });\n expect(stt['opts'].fallback).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n ]);\n });\n\n it('fallback mixed list is normalized', () => {\n const stt = makeStt({\n fallback: [\n 'deepgram/nova-3',\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n 'assemblyai',\n ],\n });\n expect(stt['opts'].fallback).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n { model: 'assemblyai' },\n ]);\n });\n\n it('fallback string with language discards language', () => {\n const stt = makeStt({ fallback: 'deepgram/nova-3:en' });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('connOptions not given uses default', () => {\n const stt = makeStt();\n expect(stt['opts'].connOptions).toEqual(DEFAULT_API_CONNECT_OPTIONS);\n });\n\n it('connOptions custom timeout', () => {\n const custom: APIConnectOptions = { timeoutMs: 30000, maxRetry: 3, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(30000);\n });\n\n it('connOptions custom maxRetry', () => {\n const custom: APIConnectOptions = { timeoutMs: 10000, maxRetry: 5, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.maxRetry).toBe(5);\n });\n\n it('connOptions full custom', () => {\n const custom: APIConnectOptions = { timeoutMs: 60000, maxRetry: 10, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(60000);\n expect(stt['opts'].connOptions!.maxRetry).toBe(10);\n expect(stt['opts'].connOptions!.retryIntervalMs).toBe(2000);\n });\n});\n"],"mappings":";AAGA,oBAAgD;AAChD,iBAAiC;AACjC,mBAAoE;AACpE,iBAAsF;AAAA,IAEtF,yBAAU,MAAM;AACd,mCAAiB,EAAE,OAAO,UAAU,QAAQ,MAAM,CAAC;AACrD,CAAC;AAGD,SAAS,QAAQ,YAAqC,CAAC,GAAG;AACxD,QAAM,WAAW;AAAA,IACf,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,SAAS;AAAA,EACX;AACA,SAAO,IAAI,eAAI,EAAE,GAAG,UAAU,GAAG,UAAU,CAAC;AAC9C;AAAA,IAEA,wBAAS,uBAAuB,MAAM;AACpC,wBAAG,iCAAiC,MAAM;AACxC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,UAAU;AACxD,8BAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,8BAA8B,MAAM;AACrC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,aAAa;AAC3D,8BAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,iBAAiB;AAC/D,8BAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,uCAAuC,MAAM;AAC9C,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,oBAAoB;AAClE,8BAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,mBAAG,KAAK;AAAA,IACN,CAAC,2BAA2B,wBAAwB,IAAI;AAAA,IACxD,CAAC,iBAAiB,cAAc,IAAI;AAAA,IACpC,CAAC,8BAA8B,2BAA2B,IAAI;AAAA,IAC9D,CAAC,yBAAyB,mBAAmB,OAAO;AAAA,IACpD,CAAC,eAAe,YAAY,IAAI;AAAA,EAClC,CAAC,EAAE,uCAAuC,CAAC,UAAU,eAAe,iBAAiB;AACnF,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,QAAQ;AACtD,8BAAO,KAAK,EAAE,KAAK,aAAa;AAChC,8BAAO,QAAQ,EAAE,KAAK,YAAY;AAAA,EACpC,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,MAAM;AACpD,8BAAO,KAAK,EAAE,KAAK,MAAM;AACzB,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,4BAA4B,MAAM;AACnC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,SAAS;AACvD,8BAAO,KAAK,EAAE,KAAK,MAAM;AACzB,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AACH,CAAC;AAAA,IAED,wBAAS,wBAAwB,MAAM;AACrC,wBAAG,uBAAuB,MAAM;AAC9B,UAAM,aAAS,iCAAqB,iBAAiB;AACrD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,6BAA6B,MAAM;AACpC,UAAM,WAA6B,EAAE,OAAO,kBAAkB;AAC9D,UAAM,aAAS,iCAAqB,QAAQ;AAC5C,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,yBAAyB,MAAM;AAChC,UAAM,aAAS,iCAAqB,CAAC,mBAAmB,sBAAsB,CAAC;AAC/E,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1F,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,YAAgC,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC;AAC5F,UAAM,aAAS,iCAAqB,SAAS;AAC7C,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAChF,CAAC;AAED,wBAAG,mCAAmC,MAAM;AAC1C,UAAM,aAAS,iCAAqB;AAAA,MAClC;AAAA,MACA,EAAE,OAAO,uBAAuB;AAAA,MAChC;AAAA,IACF,CAAC;AACD,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,iDAAiD,MAAM;AACxD,UAAM,aAAS,iCAAqB,oBAAoB;AACxD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,+CAA+C,MAAM;AACtD,UAAM,WAA6B;AAAA,MACjC,OAAO;AAAA,MACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,IAC/D;AACA,UAAM,aAAS,iCAAqB,QAAQ;AAC5C,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,mCAAmC,MAAM;AAC1C,UAAM,aAAS,iCAAqB;AAAA,MAClC,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D;AAAA,MACA,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AACD,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,iCAAiC,MAAM;AACxC,UAAM,aAAS,iCAAqB,CAAC,CAAC;AACtC,8BAAO,MAAM,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC3B,CAAC;AAED,wBAAG,kDAAkD,MAAM;AACzD,UAAM,aAAS,iCAAqB,oBAAoB;AACxD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AACH,CAAC;AAAA,IAED,wBAAS,4CAA4C,MAAM;AACzD,wBAAG,4CAA4C,MAAM;AACnD,UAAM,MAAM,QAAQ;AACpB,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,cAAc;AAAA,EAC7C,CAAC;AAED,wBAAG,wCAAwC,MAAM;AAC/C,UAAM,MAAM,QAAQ,EAAE,UAAU,uBAAuB,CAAC;AACxD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1E,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ,EAAE,UAAU,CAAC,mBAAmB,YAAY,EAAE,CAAC;AACnE,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAC9F,CAAC;AAED,wBAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,UAAU,EAAE,OAAO,kBAAkB,EAAE,CAAC;AAC9D,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AACD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,qCAAqC,MAAM;AAC5C,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR;AAAA,QACA,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,QACtD;AAAA,MACF;AAAA,IACF,CAAC;AACD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,MACtD,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,mDAAmD,MAAM;AAC1D,UAAM,MAAM,QAAQ,EAAE,UAAU,qBAAqB,CAAC;AACtD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,wBAAG,sCAAsC,MAAM;AAC7C,UAAM,MAAM,QAAQ;AACpB,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,wCAA2B;AAAA,EACrE,CAAC;AAED,wBAAG,8BAA8B,MAAM;AACrC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AAAA,EACvD,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,CAAC;AAAA,EAClD,CAAC;AAED,wBAAG,2BAA2B,MAAM;AAClC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,IAAI,iBAAiB,IAAK;AAC1F,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AACrD,8BAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,EAAE;AACjD,8BAAO,IAAI,MAAM,EAAE,YAAa,eAAe,EAAE,KAAK,GAAI;AAAA,EAC5D,CAAC;AACH,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { beforeAll, describe, expect, it } from 'vitest';\nimport { normalizeLanguage } from '../language.js';\nimport { initializeLogger } from '../log.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';\n\nbeforeAll(() => {\n initializeLogger({ level: 'silent', pretty: false });\n});\n\n/** Helper to create STT with required credentials. */\nfunction makeStt(overrides: Record<string, unknown> = {}) {\n const defaults = {\n model: 'deepgram' as const,\n apiKey: 'test-key',\n apiSecret: 'test-secret',\n baseURL: 'https://example.livekit.cloud',\n };\n return new STT({ ...defaults, ...overrides });\n}\n\ndescribe('parseSTTModelString', () => {\n it('simple model without language', () => {\n const [model, language] = parseSTTModelString('deepgram');\n expect(model).toBe('deepgram');\n expect(language).toBeUndefined();\n });\n\n it('model with language suffix', () => {\n const [model, language] = parseSTTModelString('deepgram:en');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('normalizes language suffixes', () => {\n const [model, language] = parseSTTModelString('deepgram:english');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('provider/model format without language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBeUndefined();\n });\n\n it('provider/model format with language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3:en');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBe('en');\n });\n\n it.each([\n ['cartesia/ink-whisper:de', 'cartesia/ink-whisper', 'de'],\n ['assemblyai:es', 'assemblyai', 'es'],\n ['deepgram/nova-2-medical:ja', 'deepgram/nova-2-medical', 'ja'],\n ['deepgram/nova-3:multi', 'deepgram/nova-3', 'multi'],\n ['cartesia:zh', 'cartesia', 'zh'],\n ])('various providers and languages: %s', (modelStr, expectedModel, expectedLang) => {\n const [model, language] = parseSTTModelString(modelStr);\n expect(model).toBe(expectedModel);\n expect(language).toBe(expectedLang);\n });\n\n it('auto model without language', () => {\n const [model, language] = parseSTTModelString('auto');\n expect(model).toBe('auto');\n expect(language).toBeUndefined();\n });\n\n it('auto model with language', () => {\n const [model, language] = parseSTTModelString('auto:pt');\n expect(model).toBe('auto');\n expect(language).toBe('pt');\n });\n});\n\ndescribe('normalizeSTTFallback', () => {\n it('single string model', () => {\n const result = normalizeSTTFallback('deepgram/nova-3');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('single FallbackModel dict', () => {\n const fallback: STTFallbackModel = { model: 'deepgram/nova-3' };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('list of string models', () => {\n const result = normalizeSTTFallback(['deepgram/nova-3', 'cartesia/ink-whisper']);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'cartesia/ink-whisper' }]);\n });\n\n it('list of FallbackModel dicts', () => {\n const fallbacks: STTFallbackModel[] = [{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }];\n const result = normalizeSTTFallback(fallbacks);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('mixed list of strings and dicts', () => {\n const result = normalizeSTTFallback([\n 'deepgram/nova-3',\n { model: 'cartesia/ink-whisper' } as STTFallbackModel,\n 'assemblyai',\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai' },\n ]);\n });\n\n it('string with language suffix discards language', () => {\n const result = normalizeSTTFallback('deepgram/nova-3:en');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('FallbackModel with extraKwargs is preserved', () => {\n const fallback: STTFallbackModel = {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n },\n ]);\n });\n\n it('list with extraKwargs preserved', () => {\n const result = normalizeSTTFallback([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } } as STTFallbackModel,\n 'cartesia/ink-whisper',\n { model: 'assemblyai', extraKwargs: { format_turns: true } } as STTFallbackModel,\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai', extraKwargs: { format_turns: true } },\n ]);\n });\n\n it('empty list returns empty list', () => {\n const result = normalizeSTTFallback([]);\n expect(result).toEqual([]);\n });\n\n it('multiple colons in model string splits on last', () => {\n const result = normalizeSTTFallback('some:model:part:fr');\n expect(result).toEqual([{ model: 'some:model:part' }]);\n });\n});\n\ndescribe('STT constructor fallback and connOptions', () => {\n it('normalizes language in constructor and model string', () => {\n const stt = makeStt({ model: 'deepgram/nova-3:english' });\n expect(stt['opts'].language).toBe('en');\n });\n\n it('prefers explicit normalized language over model suffix', () => {\n const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });\n expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));\n });\n\n it('fallback not given defaults to undefined', () => {\n const stt = makeStt();\n expect(stt['opts'].fallback).toBeUndefined();\n });\n\n it('fallback single string is normalized', () => {\n const stt = makeStt({ fallback: 'cartesia/ink-whisper' });\n expect(stt['opts'].fallback).toEqual([{ model: 'cartesia/ink-whisper' }]);\n });\n\n it('fallback list of strings is normalized', () => {\n const stt = makeStt({ fallback: ['deepgram/nova-3', 'assemblyai'] });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('fallback single FallbackModel is normalized to list', () => {\n const stt = makeStt({ fallback: { model: 'deepgram/nova-3' } });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('fallback with extraKwargs is preserved', () => {\n const stt = makeStt({\n fallback: {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n });\n expect(stt['opts'].fallback).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n ]);\n });\n\n it('fallback mixed list is normalized', () => {\n const stt = makeStt({\n fallback: [\n 'deepgram/nova-3',\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n 'assemblyai',\n ],\n });\n expect(stt['opts'].fallback).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n { model: 'assemblyai' },\n ]);\n });\n\n it('fallback string with language discards language', () => {\n const stt = makeStt({ fallback: 'deepgram/nova-3:en' });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('connOptions not given uses default', () => {\n const stt = makeStt();\n expect(stt['opts'].connOptions).toEqual(DEFAULT_API_CONNECT_OPTIONS);\n });\n\n it('connOptions custom timeout', () => {\n const custom: APIConnectOptions = { timeoutMs: 30000, maxRetry: 3, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(30000);\n });\n\n it('connOptions custom maxRetry', () => {\n const custom: APIConnectOptions = { timeoutMs: 10000, maxRetry: 5, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.maxRetry).toBe(5);\n });\n\n it('connOptions full custom', () => {\n const custom: APIConnectOptions = { timeoutMs: 60000, maxRetry: 10, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(60000);\n expect(stt['opts'].connOptions!.maxRetry).toBe(10);\n expect(stt['opts'].connOptions!.retryIntervalMs).toBe(2000);\n });\n});\n"],"mappings":";AAGA,oBAAgD;AAChD,sBAAkC;AAClC,iBAAiC;AACjC,mBAAoE;AACpE,iBAAsF;AAAA,IAEtF,yBAAU,MAAM;AACd,mCAAiB,EAAE,OAAO,UAAU,QAAQ,MAAM,CAAC;AACrD,CAAC;AAGD,SAAS,QAAQ,YAAqC,CAAC,GAAG;AACxD,QAAM,WAAW;AAAA,IACf,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,SAAS;AAAA,EACX;AACA,SAAO,IAAI,eAAI,EAAE,GAAG,UAAU,GAAG,UAAU,CAAC;AAC9C;AAAA,IAEA,wBAAS,uBAAuB,MAAM;AACpC,wBAAG,iCAAiC,MAAM;AACxC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,UAAU;AACxD,8BAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,8BAA8B,MAAM;AACrC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,aAAa;AAC3D,8BAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,wBAAG,gCAAgC,MAAM;AACvC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,kBAAkB;AAChE,8BAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,iBAAiB;AAC/D,8BAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,uCAAuC,MAAM;AAC9C,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,oBAAoB;AAClE,8BAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,mBAAG,KAAK;AAAA,IACN,CAAC,2BAA2B,wBAAwB,IAAI;AAAA,IACxD,CAAC,iBAAiB,cAAc,IAAI;AAAA,IACpC,CAAC,8BAA8B,2BAA2B,IAAI;AAAA,IAC9D,CAAC,yBAAyB,mBAAmB,OAAO;AAAA,IACpD,CAAC,eAAe,YAAY,IAAI;AAAA,EAClC,CAAC,EAAE,uCAAuC,CAAC,UAAU,eAAe,iBAAiB;AACnF,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,QAAQ;AACtD,8BAAO,KAAK,EAAE,KAAK,aAAa;AAChC,8BAAO,QAAQ,EAAE,KAAK,YAAY;AAAA,EACpC,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,MAAM;AACpD,8BAAO,KAAK,EAAE,KAAK,MAAM;AACzB,8BAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,wBAAG,4BAA4B,MAAM;AACnC,UAAM,CAAC,OAAO,QAAQ,QAAI,gCAAoB,SAAS;AACvD,8BAAO,KAAK,EAAE,KAAK,MAAM;AACzB,8BAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AACH,CAAC;AAAA,IAED,wBAAS,wBAAwB,MAAM;AACrC,wBAAG,uBAAuB,MAAM;AAC9B,UAAM,aAAS,iCAAqB,iBAAiB;AACrD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,6BAA6B,MAAM;AACpC,UAAM,WAA6B,EAAE,OAAO,kBAAkB;AAC9D,UAAM,aAAS,iCAAqB,QAAQ;AAC5C,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,yBAAyB,MAAM;AAChC,UAAM,aAAS,iCAAqB,CAAC,mBAAmB,sBAAsB,CAAC;AAC/E,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1F,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,YAAgC,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC;AAC5F,UAAM,aAAS,iCAAqB,SAAS;AAC7C,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAChF,CAAC;AAED,wBAAG,mCAAmC,MAAM;AAC1C,UAAM,aAAS,iCAAqB;AAAA,MAClC;AAAA,MACA,EAAE,OAAO,uBAAuB;AAAA,MAChC;AAAA,IACF,CAAC;AACD,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,iDAAiD,MAAM;AACxD,UAAM,aAAS,iCAAqB,oBAAoB;AACxD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,wBAAG,+CAA+C,MAAM;AACtD,UAAM,WAA6B;AAAA,MACjC,OAAO;AAAA,MACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,IAC/D;AACA,UAAM,aAAS,iCAAqB,QAAQ;AAC5C,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,mCAAmC,MAAM;AAC1C,UAAM,aAAS,iCAAqB;AAAA,MAClC,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D;AAAA,MACA,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AACD,8BAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,iCAAiC,MAAM;AACxC,UAAM,aAAS,iCAAqB,CAAC,CAAC;AACtC,8BAAO,MAAM,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC3B,CAAC;AAED,wBAAG,kDAAkD,MAAM;AACzD,UAAM,aAAS,iCAAqB,oBAAoB;AACxD,8BAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AACH,CAAC;AAAA,IAED,wBAAS,4CAA4C,MAAM;AACzD,wBAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,OAAO,0BAA0B,CAAC;AACxD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,KAAK,IAAI;AAAA,EACxC,CAAC;AAED,wBAAG,0DAA0D,MAAM;AACjE,UAAM,MAAM,QAAQ,EAAE,OAAO,2BAA2B,UAAU,QAAQ,CAAC;AAC3E,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,SAAK,mCAAkB,OAAO,CAAC;AAAA,EAC9D,CAAC;AAED,wBAAG,4CAA4C,MAAM;AACnD,UAAM,MAAM,QAAQ;AACpB,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,cAAc;AAAA,EAC7C,CAAC;AAED,wBAAG,wCAAwC,MAAM;AAC/C,UAAM,MAAM,QAAQ,EAAE,UAAU,uBAAuB,CAAC;AACxD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1E,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ,EAAE,UAAU,CAAC,mBAAmB,YAAY,EAAE,CAAC;AACnE,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAC9F,CAAC;AAED,wBAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,UAAU,EAAE,OAAO,kBAAkB,EAAE,CAAC;AAC9D,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,wBAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AACD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,qCAAqC,MAAM;AAC5C,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR;AAAA,QACA,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,QACtD;AAAA,MACF;AAAA,IACF,CAAC;AACD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,MACtD,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,wBAAG,mDAAmD,MAAM;AAC1D,UAAM,MAAM,QAAQ,EAAE,UAAU,qBAAqB,CAAC;AACtD,8BAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,wBAAG,sCAAsC,MAAM;AAC7C,UAAM,MAAM,QAAQ;AACpB,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,wCAA2B;AAAA,EACrE,CAAC;AAED,wBAAG,8BAA8B,MAAM;AACrC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AAAA,EACvD,CAAC;AAED,wBAAG,+BAA+B,MAAM;AACtC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,CAAC;AAAA,EAClD,CAAC;AAED,wBAAG,2BAA2B,MAAM;AAClC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,IAAI,iBAAiB,IAAK;AAC1F,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,8BAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,8BAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AACrD,8BAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,EAAE;AACjD,8BAAO,IAAI,MAAM,EAAE,YAAa,eAAe,EAAE,KAAK,GAAI;AAAA,EAC5D,CAAC;AACH,CAAC;","names":[]}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { beforeAll, describe, expect, it } from "vitest";
|
|
2
|
+
import { normalizeLanguage } from "../language.js";
|
|
2
3
|
import { initializeLogger } from "../log.js";
|
|
3
4
|
import { DEFAULT_API_CONNECT_OPTIONS } from "../types.js";
|
|
4
5
|
import { STT, normalizeSTTFallback, parseSTTModelString } from "./stt.js";
|
|
@@ -25,6 +26,11 @@ describe("parseSTTModelString", () => {
|
|
|
25
26
|
expect(model).toBe("deepgram");
|
|
26
27
|
expect(language).toBe("en");
|
|
27
28
|
});
|
|
29
|
+
it("normalizes language suffixes", () => {
|
|
30
|
+
const [model, language] = parseSTTModelString("deepgram:english");
|
|
31
|
+
expect(model).toBe("deepgram");
|
|
32
|
+
expect(language).toBe("en");
|
|
33
|
+
});
|
|
28
34
|
it("provider/model format without language", () => {
|
|
29
35
|
const [model, language] = parseSTTModelString("deepgram/nova-3");
|
|
30
36
|
expect(model).toBe("deepgram/nova-3");
|
|
@@ -127,6 +133,14 @@ describe("normalizeSTTFallback", () => {
|
|
|
127
133
|
});
|
|
128
134
|
});
|
|
129
135
|
describe("STT constructor fallback and connOptions", () => {
|
|
136
|
+
it("normalizes language in constructor and model string", () => {
|
|
137
|
+
const stt = makeStt({ model: "deepgram/nova-3:english" });
|
|
138
|
+
expect(stt["opts"].language).toBe("en");
|
|
139
|
+
});
|
|
140
|
+
it("prefers explicit normalized language over model suffix", () => {
|
|
141
|
+
const stt = makeStt({ model: "deepgram/nova-3:english", language: "en_US" });
|
|
142
|
+
expect(stt["opts"].language).toBe(normalizeLanguage("en_US"));
|
|
143
|
+
});
|
|
130
144
|
it("fallback not given defaults to undefined", () => {
|
|
131
145
|
const stt = makeStt();
|
|
132
146
|
expect(stt["opts"].fallback).toBeUndefined();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { beforeAll, describe, expect, it } from 'vitest';\nimport { initializeLogger } from '../log.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';\n\nbeforeAll(() => {\n initializeLogger({ level: 'silent', pretty: false });\n});\n\n/** Helper to create STT with required credentials. */\nfunction makeStt(overrides: Record<string, unknown> = {}) {\n const defaults = {\n model: 'deepgram' as const,\n apiKey: 'test-key',\n apiSecret: 'test-secret',\n baseURL: 'https://example.livekit.cloud',\n };\n return new STT({ ...defaults, ...overrides });\n}\n\ndescribe('parseSTTModelString', () => {\n it('simple model without language', () => {\n const [model, language] = parseSTTModelString('deepgram');\n expect(model).toBe('deepgram');\n expect(language).toBeUndefined();\n });\n\n it('model with language suffix', () => {\n const [model, language] = parseSTTModelString('deepgram:en');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('provider/model format without language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBeUndefined();\n });\n\n it('provider/model format with language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3:en');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBe('en');\n });\n\n it.each([\n ['cartesia/ink-whisper:de', 'cartesia/ink-whisper', 'de'],\n ['assemblyai:es', 'assemblyai', 'es'],\n ['deepgram/nova-2-medical:ja', 'deepgram/nova-2-medical', 'ja'],\n ['deepgram/nova-3:multi', 'deepgram/nova-3', 'multi'],\n ['cartesia:zh', 'cartesia', 'zh'],\n ])('various providers and languages: %s', (modelStr, expectedModel, expectedLang) => {\n const [model, language] = parseSTTModelString(modelStr);\n expect(model).toBe(expectedModel);\n expect(language).toBe(expectedLang);\n });\n\n it('auto model without language', () => {\n const [model, language] = parseSTTModelString('auto');\n expect(model).toBe('auto');\n expect(language).toBeUndefined();\n });\n\n it('auto model with language', () => {\n const [model, language] = parseSTTModelString('auto:pt');\n expect(model).toBe('auto');\n expect(language).toBe('pt');\n });\n});\n\ndescribe('normalizeSTTFallback', () => {\n it('single string model', () => {\n const result = normalizeSTTFallback('deepgram/nova-3');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('single FallbackModel dict', () => {\n const fallback: STTFallbackModel = { model: 'deepgram/nova-3' };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('list of string models', () => {\n const result = normalizeSTTFallback(['deepgram/nova-3', 'cartesia/ink-whisper']);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'cartesia/ink-whisper' }]);\n });\n\n it('list of FallbackModel dicts', () => {\n const fallbacks: STTFallbackModel[] = [{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }];\n const result = normalizeSTTFallback(fallbacks);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('mixed list of strings and dicts', () => {\n const result = normalizeSTTFallback([\n 'deepgram/nova-3',\n { model: 'cartesia/ink-whisper' } as STTFallbackModel,\n 'assemblyai',\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai' },\n ]);\n });\n\n it('string with language suffix discards language', () => {\n const result = normalizeSTTFallback('deepgram/nova-3:en');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('FallbackModel with extraKwargs is preserved', () => {\n const fallback: STTFallbackModel = {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n },\n ]);\n });\n\n it('list with extraKwargs preserved', () => {\n const result = normalizeSTTFallback([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } } as STTFallbackModel,\n 'cartesia/ink-whisper',\n { model: 'assemblyai', extraKwargs: { format_turns: true } } as STTFallbackModel,\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai', extraKwargs: { format_turns: true } },\n ]);\n });\n\n it('empty list returns empty list', () => {\n const result = normalizeSTTFallback([]);\n expect(result).toEqual([]);\n });\n\n it('multiple colons in model string splits on last', () => {\n const result = normalizeSTTFallback('some:model:part:fr');\n expect(result).toEqual([{ model: 'some:model:part' }]);\n });\n});\n\ndescribe('STT constructor fallback and connOptions', () => {\n it('fallback not given defaults to undefined', () => {\n const stt = makeStt();\n expect(stt['opts'].fallback).toBeUndefined();\n });\n\n it('fallback single string is normalized', () => {\n const stt = makeStt({ fallback: 'cartesia/ink-whisper' });\n expect(stt['opts'].fallback).toEqual([{ model: 'cartesia/ink-whisper' }]);\n });\n\n it('fallback list of strings is normalized', () => {\n const stt = makeStt({ fallback: ['deepgram/nova-3', 'assemblyai'] });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('fallback single FallbackModel is normalized to list', () => {\n const stt = makeStt({ fallback: { model: 'deepgram/nova-3' } });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('fallback with extraKwargs is preserved', () => {\n const stt = makeStt({\n fallback: {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n });\n expect(stt['opts'].fallback).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n ]);\n });\n\n it('fallback mixed list is normalized', () => {\n const stt = makeStt({\n fallback: [\n 'deepgram/nova-3',\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n 'assemblyai',\n ],\n });\n expect(stt['opts'].fallback).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n { model: 'assemblyai' },\n ]);\n });\n\n it('fallback string with language discards language', () => {\n const stt = makeStt({ fallback: 'deepgram/nova-3:en' });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('connOptions not given uses default', () => {\n const stt = makeStt();\n expect(stt['opts'].connOptions).toEqual(DEFAULT_API_CONNECT_OPTIONS);\n });\n\n it('connOptions custom timeout', () => {\n const custom: APIConnectOptions = { timeoutMs: 30000, maxRetry: 3, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(30000);\n });\n\n it('connOptions custom maxRetry', () => {\n const custom: APIConnectOptions = { timeoutMs: 10000, maxRetry: 5, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.maxRetry).toBe(5);\n });\n\n it('connOptions full custom', () => {\n const custom: APIConnectOptions = { timeoutMs: 60000, maxRetry: 10, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(60000);\n expect(stt['opts'].connOptions!.maxRetry).toBe(10);\n expect(stt['opts'].connOptions!.retryIntervalMs).toBe(2000);\n });\n});\n"],"mappings":"AAGA,SAAS,WAAW,UAAU,QAAQ,UAAU;AAChD,SAAS,wBAAwB;AACjC,SAAiC,mCAAmC;AACpE,SAAS,KAA4B,sBAAsB,2BAA2B;AAEtF,UAAU,MAAM;AACd,mBAAiB,EAAE,OAAO,UAAU,QAAQ,MAAM,CAAC;AACrD,CAAC;AAGD,SAAS,QAAQ,YAAqC,CAAC,GAAG;AACxD,QAAM,WAAW;AAAA,IACf,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,SAAS;AAAA,EACX;AACA,SAAO,IAAI,IAAI,EAAE,GAAG,UAAU,GAAG,UAAU,CAAC;AAC9C;AAEA,SAAS,uBAAuB,MAAM;AACpC,KAAG,iCAAiC,MAAM;AACxC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,UAAU;AACxD,WAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,8BAA8B,MAAM;AACrC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,aAAa;AAC3D,WAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,iBAAiB;AAC/D,WAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,uCAAuC,MAAM;AAC9C,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,oBAAoB;AAClE,WAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,KAAG,KAAK;AAAA,IACN,CAAC,2BAA2B,wBAAwB,IAAI;AAAA,IACxD,CAAC,iBAAiB,cAAc,IAAI;AAAA,IACpC,CAAC,8BAA8B,2BAA2B,IAAI;AAAA,IAC9D,CAAC,yBAAyB,mBAAmB,OAAO;AAAA,IACpD,CAAC,eAAe,YAAY,IAAI;AAAA,EAClC,CAAC,EAAE,uCAAuC,CAAC,UAAU,eAAe,iBAAiB;AACnF,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,QAAQ;AACtD,WAAO,KAAK,EAAE,KAAK,aAAa;AAChC,WAAO,QAAQ,EAAE,KAAK,YAAY;AAAA,EACpC,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,MAAM;AACpD,WAAO,KAAK,EAAE,KAAK,MAAM;AACzB,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,4BAA4B,MAAM;AACnC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,SAAS;AACvD,WAAO,KAAK,EAAE,KAAK,MAAM;AACzB,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AACH,CAAC;AAED,SAAS,wBAAwB,MAAM;AACrC,KAAG,uBAAuB,MAAM;AAC9B,UAAM,SAAS,qBAAqB,iBAAiB;AACrD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,6BAA6B,MAAM;AACpC,UAAM,WAA6B,EAAE,OAAO,kBAAkB;AAC9D,UAAM,SAAS,qBAAqB,QAAQ;AAC5C,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,yBAAyB,MAAM;AAChC,UAAM,SAAS,qBAAqB,CAAC,mBAAmB,sBAAsB,CAAC;AAC/E,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1F,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,YAAgC,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC;AAC5F,UAAM,SAAS,qBAAqB,SAAS;AAC7C,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAChF,CAAC;AAED,KAAG,mCAAmC,MAAM;AAC1C,UAAM,SAAS,qBAAqB;AAAA,MAClC;AAAA,MACA,EAAE,OAAO,uBAAuB;AAAA,MAChC;AAAA,IACF,CAAC;AACD,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,KAAG,iDAAiD,MAAM;AACxD,UAAM,SAAS,qBAAqB,oBAAoB;AACxD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,+CAA+C,MAAM;AACtD,UAAM,WAA6B;AAAA,MACjC,OAAO;AAAA,MACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,IAC/D;AACA,UAAM,SAAS,qBAAqB,QAAQ;AAC5C,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,KAAG,mCAAmC,MAAM;AAC1C,UAAM,SAAS,qBAAqB;AAAA,MAClC,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D;AAAA,MACA,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AACD,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AAAA,EACH,CAAC;AAED,KAAG,iCAAiC,MAAM;AACxC,UAAM,SAAS,qBAAqB,CAAC,CAAC;AACtC,WAAO,MAAM,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC3B,CAAC;AAED,KAAG,kDAAkD,MAAM;AACzD,UAAM,SAAS,qBAAqB,oBAAoB;AACxD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AACH,CAAC;AAED,SAAS,4CAA4C,MAAM;AACzD,KAAG,4CAA4C,MAAM;AACnD,UAAM,MAAM,QAAQ;AACpB,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,cAAc;AAAA,EAC7C,CAAC;AAED,KAAG,wCAAwC,MAAM;AAC/C,UAAM,MAAM,QAAQ,EAAE,UAAU,uBAAuB,CAAC;AACxD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1E,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ,EAAE,UAAU,CAAC,mBAAmB,YAAY,EAAE,CAAC;AACnE,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAC9F,CAAC;AAED,KAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,UAAU,EAAE,OAAO,kBAAkB,EAAE,CAAC;AAC9D,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AACD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,KAAG,qCAAqC,MAAM;AAC5C,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR;AAAA,QACA,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,QACtD;AAAA,MACF;AAAA,IACF,CAAC;AACD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,MACtD,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,KAAG,mDAAmD,MAAM;AAC1D,UAAM,MAAM,QAAQ,EAAE,UAAU,qBAAqB,CAAC;AACtD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,KAAG,sCAAsC,MAAM;AAC7C,UAAM,MAAM,QAAQ;AACpB,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,2BAA2B;AAAA,EACrE,CAAC;AAED,KAAG,8BAA8B,MAAM;AACrC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AAAA,EACvD,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,CAAC;AAAA,EAClD,CAAC;AAED,KAAG,2BAA2B,MAAM;AAClC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,IAAI,iBAAiB,IAAK;AAC1F,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AACrD,WAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,EAAE;AACjD,WAAO,IAAI,MAAM,EAAE,YAAa,eAAe,EAAE,KAAK,GAAI;AAAA,EAC5D,CAAC;AACH,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { beforeAll, describe, expect, it } from 'vitest';\nimport { normalizeLanguage } from '../language.js';\nimport { initializeLogger } from '../log.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';\n\nbeforeAll(() => {\n initializeLogger({ level: 'silent', pretty: false });\n});\n\n/** Helper to create STT with required credentials. */\nfunction makeStt(overrides: Record<string, unknown> = {}) {\n const defaults = {\n model: 'deepgram' as const,\n apiKey: 'test-key',\n apiSecret: 'test-secret',\n baseURL: 'https://example.livekit.cloud',\n };\n return new STT({ ...defaults, ...overrides });\n}\n\ndescribe('parseSTTModelString', () => {\n it('simple model without language', () => {\n const [model, language] = parseSTTModelString('deepgram');\n expect(model).toBe('deepgram');\n expect(language).toBeUndefined();\n });\n\n it('model with language suffix', () => {\n const [model, language] = parseSTTModelString('deepgram:en');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('normalizes language suffixes', () => {\n const [model, language] = parseSTTModelString('deepgram:english');\n expect(model).toBe('deepgram');\n expect(language).toBe('en');\n });\n\n it('provider/model format without language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBeUndefined();\n });\n\n it('provider/model format with language', () => {\n const [model, language] = parseSTTModelString('deepgram/nova-3:en');\n expect(model).toBe('deepgram/nova-3');\n expect(language).toBe('en');\n });\n\n it.each([\n ['cartesia/ink-whisper:de', 'cartesia/ink-whisper', 'de'],\n ['assemblyai:es', 'assemblyai', 'es'],\n ['deepgram/nova-2-medical:ja', 'deepgram/nova-2-medical', 'ja'],\n ['deepgram/nova-3:multi', 'deepgram/nova-3', 'multi'],\n ['cartesia:zh', 'cartesia', 'zh'],\n ])('various providers and languages: %s', (modelStr, expectedModel, expectedLang) => {\n const [model, language] = parseSTTModelString(modelStr);\n expect(model).toBe(expectedModel);\n expect(language).toBe(expectedLang);\n });\n\n it('auto model without language', () => {\n const [model, language] = parseSTTModelString('auto');\n expect(model).toBe('auto');\n expect(language).toBeUndefined();\n });\n\n it('auto model with language', () => {\n const [model, language] = parseSTTModelString('auto:pt');\n expect(model).toBe('auto');\n expect(language).toBe('pt');\n });\n});\n\ndescribe('normalizeSTTFallback', () => {\n it('single string model', () => {\n const result = normalizeSTTFallback('deepgram/nova-3');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('single FallbackModel dict', () => {\n const fallback: STTFallbackModel = { model: 'deepgram/nova-3' };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('list of string models', () => {\n const result = normalizeSTTFallback(['deepgram/nova-3', 'cartesia/ink-whisper']);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'cartesia/ink-whisper' }]);\n });\n\n it('list of FallbackModel dicts', () => {\n const fallbacks: STTFallbackModel[] = [{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }];\n const result = normalizeSTTFallback(fallbacks);\n expect(result).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('mixed list of strings and dicts', () => {\n const result = normalizeSTTFallback([\n 'deepgram/nova-3',\n { model: 'cartesia/ink-whisper' } as STTFallbackModel,\n 'assemblyai',\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai' },\n ]);\n });\n\n it('string with language suffix discards language', () => {\n const result = normalizeSTTFallback('deepgram/nova-3:en');\n expect(result).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('FallbackModel with extraKwargs is preserved', () => {\n const fallback: STTFallbackModel = {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n };\n const result = normalizeSTTFallback(fallback);\n expect(result).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { keywords: [['livekit', 1.5]], punctuate: true },\n },\n ]);\n });\n\n it('list with extraKwargs preserved', () => {\n const result = normalizeSTTFallback([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } } as STTFallbackModel,\n 'cartesia/ink-whisper',\n { model: 'assemblyai', extraKwargs: { format_turns: true } } as STTFallbackModel,\n ]);\n expect(result).toEqual([\n { model: 'deepgram/nova-3', extraKwargs: { punctuate: true } },\n { model: 'cartesia/ink-whisper' },\n { model: 'assemblyai', extraKwargs: { format_turns: true } },\n ]);\n });\n\n it('empty list returns empty list', () => {\n const result = normalizeSTTFallback([]);\n expect(result).toEqual([]);\n });\n\n it('multiple colons in model string splits on last', () => {\n const result = normalizeSTTFallback('some:model:part:fr');\n expect(result).toEqual([{ model: 'some:model:part' }]);\n });\n});\n\ndescribe('STT constructor fallback and connOptions', () => {\n it('normalizes language in constructor and model string', () => {\n const stt = makeStt({ model: 'deepgram/nova-3:english' });\n expect(stt['opts'].language).toBe('en');\n });\n\n it('prefers explicit normalized language over model suffix', () => {\n const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });\n expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));\n });\n\n it('fallback not given defaults to undefined', () => {\n const stt = makeStt();\n expect(stt['opts'].fallback).toBeUndefined();\n });\n\n it('fallback single string is normalized', () => {\n const stt = makeStt({ fallback: 'cartesia/ink-whisper' });\n expect(stt['opts'].fallback).toEqual([{ model: 'cartesia/ink-whisper' }]);\n });\n\n it('fallback list of strings is normalized', () => {\n const stt = makeStt({ fallback: ['deepgram/nova-3', 'assemblyai'] });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }, { model: 'assemblyai' }]);\n });\n\n it('fallback single FallbackModel is normalized to list', () => {\n const stt = makeStt({ fallback: { model: 'deepgram/nova-3' } });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('fallback with extraKwargs is preserved', () => {\n const stt = makeStt({\n fallback: {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n });\n expect(stt['opts'].fallback).toEqual([\n {\n model: 'deepgram/nova-3',\n extraKwargs: { punctuate: true, keywords: [['livekit', 1.5]] },\n },\n ]);\n });\n\n it('fallback mixed list is normalized', () => {\n const stt = makeStt({\n fallback: [\n 'deepgram/nova-3',\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n 'assemblyai',\n ],\n });\n expect(stt['opts'].fallback).toEqual([\n { model: 'deepgram/nova-3' },\n { model: 'cartesia', extraKwargs: { min_volume: 0.5 } },\n { model: 'assemblyai' },\n ]);\n });\n\n it('fallback string with language discards language', () => {\n const stt = makeStt({ fallback: 'deepgram/nova-3:en' });\n expect(stt['opts'].fallback).toEqual([{ model: 'deepgram/nova-3' }]);\n });\n\n it('connOptions not given uses default', () => {\n const stt = makeStt();\n expect(stt['opts'].connOptions).toEqual(DEFAULT_API_CONNECT_OPTIONS);\n });\n\n it('connOptions custom timeout', () => {\n const custom: APIConnectOptions = { timeoutMs: 30000, maxRetry: 3, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(30000);\n });\n\n it('connOptions custom maxRetry', () => {\n const custom: APIConnectOptions = { timeoutMs: 10000, maxRetry: 5, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.maxRetry).toBe(5);\n });\n\n it('connOptions full custom', () => {\n const custom: APIConnectOptions = { timeoutMs: 60000, maxRetry: 10, retryIntervalMs: 2000 };\n const stt = makeStt({ connOptions: custom });\n expect(stt['opts'].connOptions).toEqual(custom);\n expect(stt['opts'].connOptions!.timeoutMs).toBe(60000);\n expect(stt['opts'].connOptions!.maxRetry).toBe(10);\n expect(stt['opts'].connOptions!.retryIntervalMs).toBe(2000);\n });\n});\n"],"mappings":"AAGA,SAAS,WAAW,UAAU,QAAQ,UAAU;AAChD,SAAS,yBAAyB;AAClC,SAAS,wBAAwB;AACjC,SAAiC,mCAAmC;AACpE,SAAS,KAA4B,sBAAsB,2BAA2B;AAEtF,UAAU,MAAM;AACd,mBAAiB,EAAE,OAAO,UAAU,QAAQ,MAAM,CAAC;AACrD,CAAC;AAGD,SAAS,QAAQ,YAAqC,CAAC,GAAG;AACxD,QAAM,WAAW;AAAA,IACf,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,SAAS;AAAA,EACX;AACA,SAAO,IAAI,IAAI,EAAE,GAAG,UAAU,GAAG,UAAU,CAAC;AAC9C;AAEA,SAAS,uBAAuB,MAAM;AACpC,KAAG,iCAAiC,MAAM;AACxC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,UAAU;AACxD,WAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,8BAA8B,MAAM;AACrC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,aAAa;AAC3D,WAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,KAAG,gCAAgC,MAAM;AACvC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,kBAAkB;AAChE,WAAO,KAAK,EAAE,KAAK,UAAU;AAC7B,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,iBAAiB;AAC/D,WAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,uCAAuC,MAAM;AAC9C,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,oBAAoB;AAClE,WAAO,KAAK,EAAE,KAAK,iBAAiB;AACpC,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AAED,KAAG,KAAK;AAAA,IACN,CAAC,2BAA2B,wBAAwB,IAAI;AAAA,IACxD,CAAC,iBAAiB,cAAc,IAAI;AAAA,IACpC,CAAC,8BAA8B,2BAA2B,IAAI;AAAA,IAC9D,CAAC,yBAAyB,mBAAmB,OAAO;AAAA,IACpD,CAAC,eAAe,YAAY,IAAI;AAAA,EAClC,CAAC,EAAE,uCAAuC,CAAC,UAAU,eAAe,iBAAiB;AACnF,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,QAAQ;AACtD,WAAO,KAAK,EAAE,KAAK,aAAa;AAChC,WAAO,QAAQ,EAAE,KAAK,YAAY;AAAA,EACpC,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,MAAM;AACpD,WAAO,KAAK,EAAE,KAAK,MAAM;AACzB,WAAO,QAAQ,EAAE,cAAc;AAAA,EACjC,CAAC;AAED,KAAG,4BAA4B,MAAM;AACnC,UAAM,CAAC,OAAO,QAAQ,IAAI,oBAAoB,SAAS;AACvD,WAAO,KAAK,EAAE,KAAK,MAAM;AACzB,WAAO,QAAQ,EAAE,KAAK,IAAI;AAAA,EAC5B,CAAC;AACH,CAAC;AAED,SAAS,wBAAwB,MAAM;AACrC,KAAG,uBAAuB,MAAM;AAC9B,UAAM,SAAS,qBAAqB,iBAAiB;AACrD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,6BAA6B,MAAM;AACpC,UAAM,WAA6B,EAAE,OAAO,kBAAkB;AAC9D,UAAM,SAAS,qBAAqB,QAAQ;AAC5C,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,yBAAyB,MAAM;AAChC,UAAM,SAAS,qBAAqB,CAAC,mBAAmB,sBAAsB,CAAC;AAC/E,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1F,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,YAAgC,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC;AAC5F,UAAM,SAAS,qBAAqB,SAAS;AAC7C,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAChF,CAAC;AAED,KAAG,mCAAmC,MAAM;AAC1C,UAAM,SAAS,qBAAqB;AAAA,MAClC;AAAA,MACA,EAAE,OAAO,uBAAuB;AAAA,MAChC;AAAA,IACF,CAAC;AACD,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,KAAG,iDAAiD,MAAM;AACxD,UAAM,SAAS,qBAAqB,oBAAoB;AACxD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AAED,KAAG,+CAA+C,MAAM;AACtD,UAAM,WAA6B;AAAA,MACjC,OAAO;AAAA,MACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,IAC/D;AACA,UAAM,SAAS,qBAAqB,QAAQ;AAC5C,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,KAAK;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,KAAG,mCAAmC,MAAM;AAC1C,UAAM,SAAS,qBAAqB;AAAA,MAClC,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D;AAAA,MACA,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AACD,WAAO,MAAM,EAAE,QAAQ;AAAA,MACrB,EAAE,OAAO,mBAAmB,aAAa,EAAE,WAAW,KAAK,EAAE;AAAA,MAC7D,EAAE,OAAO,uBAAuB;AAAA,MAChC,EAAE,OAAO,cAAc,aAAa,EAAE,cAAc,KAAK,EAAE;AAAA,IAC7D,CAAC;AAAA,EACH,CAAC;AAED,KAAG,iCAAiC,MAAM;AACxC,UAAM,SAAS,qBAAqB,CAAC,CAAC;AACtC,WAAO,MAAM,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC3B,CAAC;AAED,KAAG,kDAAkD,MAAM;AACzD,UAAM,SAAS,qBAAqB,oBAAoB;AACxD,WAAO,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACvD,CAAC;AACH,CAAC;AAED,SAAS,4CAA4C,MAAM;AACzD,KAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,OAAO,0BAA0B,CAAC;AACxD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,KAAK,IAAI;AAAA,EACxC,CAAC;AAED,KAAG,0DAA0D,MAAM;AACjE,UAAM,MAAM,QAAQ,EAAE,OAAO,2BAA2B,UAAU,QAAQ,CAAC;AAC3E,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,KAAK,kBAAkB,OAAO,CAAC;AAAA,EAC9D,CAAC;AAED,KAAG,4CAA4C,MAAM;AACnD,UAAM,MAAM,QAAQ;AACpB,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,cAAc;AAAA,EAC7C,CAAC;AAED,KAAG,wCAAwC,MAAM;AAC/C,UAAM,MAAM,QAAQ,EAAE,UAAU,uBAAuB,CAAC;AACxD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,uBAAuB,CAAC,CAAC;AAAA,EAC1E,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ,EAAE,UAAU,CAAC,mBAAmB,YAAY,EAAE,CAAC;AACnE,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,GAAG,EAAE,OAAO,aAAa,CAAC,CAAC;AAAA,EAC9F,CAAC;AAED,KAAG,uDAAuD,MAAM;AAC9D,UAAM,MAAM,QAAQ,EAAE,UAAU,EAAE,OAAO,kBAAkB,EAAE,CAAC;AAC9D,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,KAAG,0CAA0C,MAAM;AACjD,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AACD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC;AAAA,QACE,OAAO;AAAA,QACP,aAAa,EAAE,WAAW,MAAM,UAAU,CAAC,CAAC,WAAW,GAAG,CAAC,EAAE;AAAA,MAC/D;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAED,KAAG,qCAAqC,MAAM;AAC5C,UAAM,MAAM,QAAQ;AAAA,MAClB,UAAU;AAAA,QACR;AAAA,QACA,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,QACtD;AAAA,MACF;AAAA,IACF,CAAC;AACD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ;AAAA,MACnC,EAAE,OAAO,kBAAkB;AAAA,MAC3B,EAAE,OAAO,YAAY,aAAa,EAAE,YAAY,IAAI,EAAE;AAAA,MACtD,EAAE,OAAO,aAAa;AAAA,IACxB,CAAC;AAAA,EACH,CAAC;AAED,KAAG,mDAAmD,MAAM;AAC1D,UAAM,MAAM,QAAQ,EAAE,UAAU,qBAAqB,CAAC;AACtD,WAAO,IAAI,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,OAAO,kBAAkB,CAAC,CAAC;AAAA,EACrE,CAAC;AAED,KAAG,sCAAsC,MAAM;AAC7C,UAAM,MAAM,QAAQ;AACpB,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,2BAA2B;AAAA,EACrE,CAAC;AAED,KAAG,8BAA8B,MAAM;AACrC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AAAA,EACvD,CAAC;AAED,KAAG,+BAA+B,MAAM;AACtC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,GAAG,iBAAiB,IAAK;AACzF,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,CAAC;AAAA,EAClD,CAAC;AAED,KAAG,2BAA2B,MAAM;AAClC,UAAM,SAA4B,EAAE,WAAW,KAAO,UAAU,IAAI,iBAAiB,IAAK;AAC1F,UAAM,MAAM,QAAQ,EAAE,aAAa,OAAO,CAAC;AAC3C,WAAO,IAAI,MAAM,EAAE,WAAW,EAAE,QAAQ,MAAM;AAC9C,WAAO,IAAI,MAAM,EAAE,YAAa,SAAS,EAAE,KAAK,GAAK;AACrD,WAAO,IAAI,MAAM,EAAE,YAAa,QAAQ,EAAE,KAAK,EAAE;AACjD,WAAO,IAAI,MAAM,EAAE,YAAa,eAAe,EAAE,KAAK,GAAI;AAAA,EAC5D,CAAC;AACH,CAAC;","names":[]}
|
package/dist/inference/tts.cjs
CHANGED
|
@@ -28,6 +28,7 @@ var import_ws = require("ws");
|
|
|
28
28
|
var import_exceptions = require("../_exceptions.cjs");
|
|
29
29
|
var import_audio = require("../audio.cjs");
|
|
30
30
|
var import_connection_pool = require("../connection_pool.cjs");
|
|
31
|
+
var import_language = require("../language.cjs");
|
|
31
32
|
var import_log = require("../log.cjs");
|
|
32
33
|
var import_stream_channel = require("../stream/stream_channel.cjs");
|
|
33
34
|
var import_tokenize = require("../tokenize/index.cjs");
|
|
@@ -111,7 +112,7 @@ class TTS extends import_tts.TTS {
|
|
|
111
112
|
this.opts = {
|
|
112
113
|
model: nextModel,
|
|
113
114
|
voice: nextVoice,
|
|
114
|
-
language,
|
|
115
|
+
language: (0, import_language.normalizeLanguage)(language),
|
|
115
116
|
encoding,
|
|
116
117
|
sampleRate,
|
|
117
118
|
baseURL: lkBaseURL,
|
|
@@ -138,9 +139,13 @@ class TTS extends import_tts.TTS {
|
|
|
138
139
|
return new TTS({ model, voice: voice || void 0 });
|
|
139
140
|
}
|
|
140
141
|
updateOptions(opts) {
|
|
141
|
-
this.opts = {
|
|
142
|
+
this.opts = {
|
|
143
|
+
...this.opts,
|
|
144
|
+
...opts,
|
|
145
|
+
language: opts.language !== void 0 ? (0, import_language.normalizeLanguage)(opts.language) : this.opts.language
|
|
146
|
+
};
|
|
142
147
|
for (const stream of this.streams) {
|
|
143
|
-
stream.updateOptions(opts);
|
|
148
|
+
stream.updateOptions(this.opts);
|
|
144
149
|
}
|
|
145
150
|
}
|
|
146
151
|
synthesize(_) {
|
|
@@ -217,7 +222,11 @@ class SynthesizeStream extends import_tts.SynthesizeStream {
|
|
|
217
222
|
return "inference.SynthesizeStream";
|
|
218
223
|
}
|
|
219
224
|
updateOptions(opts) {
|
|
220
|
-
this.opts = {
|
|
225
|
+
this.opts = {
|
|
226
|
+
...this.opts,
|
|
227
|
+
...opts,
|
|
228
|
+
language: opts.language !== void 0 ? (0, import_language.normalizeLanguage)(opts.language) : this.opts.language
|
|
229
|
+
};
|
|
221
230
|
}
|
|
222
231
|
async run() {
|
|
223
232
|
let closing = false;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { ConnectionPool } from '../connection_pool.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia/sonic-3'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo'\n | 'cartesia/sonic';\n\nexport type DeepgramTTSModels = 'deepgram/aura' | 'deepgram/aura-2';\n\nexport type ElevenlabsModels =\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type InworldModels =\n | 'inworld/inworld-tts-1.5-max'\n | 'inworld/inworld-tts-1.5-mini'\n | 'inworld/inworld-tts-1-max'\n | 'inworld/inworld-tts-1';\n\nexport type RimeModels = 'rime/arcana' | 'rime/mistv2';\n\nexport interface CartesiaOptions {\n /** Maximum duration of audio in seconds. */\n duration?: number;\n /** Speech speed. Default: not specified. */\n speed?: 'slow' | 'normal' | 'fast';\n}\n\nexport interface ElevenlabsOptions {\n /** Inactivity timeout in seconds. Default: 60. */\n inactivity_timeout?: number;\n /** Text normalization mode. Default: \"auto\". */\n apply_text_normalization?: 'auto' | 'off' | 'on';\n}\n\nexport interface DeepgramTTSOptions {}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {\n /** Controls how fast the voice speaks. 1.0 is normal speed, 0.5 is half, 1.5 is 1.5x. Default: 1.0. */\n speaking_rate?: number;\n /** Controls randomness in the output. Recommended between 0.6 and 1.1. Default: 1.1. */\n temperature?: number;\n /** Controls text normalization. \"ON\" expands numbers, dates, abbreviations. \"OFF\" reads text as written. Default: \"ON\". */\n text_normalization?: 'ON' | 'OFF';\n}\n\ntype _TTSModels =\n | CartesiaModels\n | DeepgramTTSModels\n | ElevenlabsModels\n | RimeModels\n | InworldModels;\n\nexport type TTSModels =\n | CartesiaModels\n | DeepgramTTSModels\n | ElevenlabsModels\n | RimeModels\n | InworldModels\n | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends DeepgramTTSModels\n ? DeepgramTTSOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeModels\n ? RimeOptions\n : TModel extends InworldModels\n ? InworldOptions\n : Record<string, unknown>;\n\n/** Parse a model string into [model, voice]. Voice is undefined if not specified. */\nexport function parseTTSModelString(model: string): [string, string | undefined] {\n const idx = model.lastIndexOf(':');\n if (idx !== -1) {\n return [model.slice(0, idx), model.slice(idx + 1)];\n }\n return [model, undefined];\n}\n\n/** A fallback model with optional extra configuration. Extra fields are passed through to the provider. */\nexport interface TTSFallbackModel {\n /** Model name (e.g. \"cartesia/sonic\", \"elevenlabs/eleven_flash_v2\", \"rime/arcana\"). */\n model: string;\n /** Voice to use for the model. */\n voice: string;\n /** Extra configuration for the model. */\n extraKwargs?: Record<string, unknown>;\n}\n\nexport type TTSFallbackModelType = TTSFallbackModel | string;\n\n/** Normalize a single or list of FallbackModelType into TTSFallbackModel[]. */\nexport function normalizeTTSFallback(\n fallback: TTSFallbackModelType | TTSFallbackModelType[],\n): TTSFallbackModel[] {\n const makeFallback = (model: TTSFallbackModelType): TTSFallbackModel => {\n if (typeof model === 'string') {\n const [name, voice] = parseTTSModelString(model);\n return { model: name, voice: voice ?? '' };\n }\n return model;\n };\n\n if (Array.isArray(fallback)) {\n return fallback.map(makeFallback);\n }\n return [makeFallback(fallback)];\n}\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: string;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n fallback?: TTSFallbackModel[];\n connOptions?: APIConnectOptions;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n pool: ConnectionPool<WebSocket>;\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n fallback?: TTSFallbackModelType | TTSFallbackModelType[];\n connOptions?: APIConnectOptions;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n fallback,\n connOptions,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n const normalizedFallback = fallback ? normalizeTTSFallback(fallback) : undefined;\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n fallback: normalizedFallback,\n connOptions: connOptions ?? DEFAULT_API_CONNECT_OPTIONS,\n };\n\n // Initialize connection pool\n this.pool = new ConnectionPool<WebSocket>({\n connectCb: (timeout) => this.connectWs(timeout),\n closeCb: (ws) => this.closeWs(ws),\n maxSessionDuration: 300_000,\n markRefreshedOnGet: true,\n connectTimeout: 10_000, // 10 seconds default\n });\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n const [model, voice] = parseTTSModelString(modelString);\n return new TTS({ model, voice: voice || undefined });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = this.opts.connOptions ?? DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as Record<string, unknown>;\n\n if (this.opts.voice) (params as Record<string, unknown>).voice = this.opts.voice;\n if (this.opts.model) (params as Record<string, unknown>).model = this.opts.model;\n if (this.opts.language) (params as Record<string, unknown>).language = this.opts.language;\n\n if (this.opts.fallback?.length) {\n params.fallback = {\n models: this.opts.fallback.map((m) => ({\n model: m.model,\n voice: m.voice,\n extra: m.extraKwargs ?? {},\n })),\n };\n }\n\n if (this.opts.connOptions) {\n params.connection = {\n timeout: this.opts.connOptions.timeoutMs / 1000,\n retries: this.opts.connOptions.maxRetry,\n };\n }\n\n this.#logger.debug({ url }, 'inference.TTS creating new websocket connection (pool miss)');\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n prewarm(): void {\n this.pool.prewarm();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n await this.pool.close();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let closing = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n const inputSentEvent = new Event();\n\n // Signal for protocol-driven completion (when 'done' message is received)\n const completionFuture = new Future<void>();\n\n const resourceCleanup = async () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n // close() returns a promise; don't leak it\n await eventChannel.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent, ws: WebSocket, signal: AbortSignal) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async (signal: AbortSignal) => {\n for await (const data of this.input) {\n if (signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async (ws: WebSocket, signal: AbortSignal) => {\n for await (const ev of sendTokenizerStream) {\n if (signal.aborted || closing) break;\n\n await sendClientEvent(\n {\n type: 'input_transcript',\n transcript: ev.token + ' ',\n },\n ws,\n signal,\n );\n inputSentEvent.set();\n }\n\n await sendClientEvent({ type: 'session.flush' }, ws, signal);\n // needed in case empty input is sent\n inputSentEvent.set();\n };\n\n // Handles WebSocket message routing and error handling\n // Completes based on protocol messages, NOT on ws.close()\n const createWsListenerTask = async (ws: WebSocket, signal: AbortSignal) => {\n const onMessage = (data: Buffer) => {\n try {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n // writer.write returns a promise; avoid unhandled rejections if stream is closed\n void eventChannel.write(validatedEvent).catch((error) => {\n this.#logger.debug(\n { error },\n 'Failed writing TTS event to stream channel (likely closed)',\n );\n });\n } catch (e) {\n this.#logger.error({ error: e }, 'Error parsing WebSocket message');\n }\n };\n\n const onError = (e: Error) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n void resourceCleanup();\n try {\n // If the ws is misbehaving, hard-stop it immediately to avoid buffering.\n ws.terminate?.();\n } catch {\n // ignore\n }\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(e);\n };\n\n const onClose = () => {\n // WebSocket closed unexpectedly (not by us)\n if (!closing) {\n this.#logger.error('WebSocket closed unexpectedly');\n void resourceCleanup();\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n }\n };\n\n const onAbort = () => {\n void resourceCleanup();\n try {\n // On interruption/abort, close the websocket immediately so the server stops streaming\n // and the ws library doesn't buffer unread frames in memory.\n ws.terminate?.();\n } catch {\n // ignore\n }\n this.tts.pool.remove(ws);\n inputSentEvent.set();\n completionFuture.resolve();\n };\n\n // Attach listeners\n ws.on('message', onMessage);\n ws.on('error', onError);\n ws.on('close', onClose);\n signal.addEventListener('abort', onAbort);\n\n try {\n // Wait for protocol-driven completion or error\n await completionFuture.await;\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('error', onError);\n ws.off('close', onClose);\n signal.removeEventListener('abort', onAbort);\n }\n };\n\n const createRecvTask = async (signal: AbortSignal) => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n await inputSentEvent.wait();\n\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'session.closed':\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n await resourceCleanup();\n completionFuture.reject(\n new APIError(`LiveKit TTS returned error: ${serverEvent.message}`),\n );\n return;\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n await this.tts.pool.withConnection(\n async (ws: WebSocket) => {\n try {\n // IMPORTANT: don't cancel the stream's controller on normal completion,\n // otherwise the pool will remove+close the ws and every run becomes a pool miss.\n const runController = new AbortController();\n const onStreamAbort = () => runController.abort(this.abortController.signal.reason);\n this.abortController.signal.addEventListener('abort', onStreamAbort, { once: true });\n\n const tasks = [\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createInputTask(combined);\n },\n undefined,\n 'inference-tts-input',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createSentenceStreamTask(ws, combined);\n },\n undefined,\n 'inference-tts-sentence',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createWsListenerTask(ws, combined);\n },\n undefined,\n 'inference-tts-ws-listener',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createRecvTask(combined);\n },\n undefined,\n 'inference-tts-recv',\n ),\n ];\n\n try {\n await Promise.all(tasks.map((t) => t.result));\n } finally {\n // Mirror python finally: unblock recv and cancel all tasks.\n inputSentEvent.set();\n await resourceCleanup();\n await cancelAndWait(tasks, 5000);\n this.abortController.signal.removeEventListener('abort', onStreamAbort);\n }\n } catch (e) {\n // If aborted, don't throw - let cleanup handle it\n if (e instanceof Error && e.name === 'AbortError') {\n return;\n }\n throw e;\n }\n },\n {\n timeout: this.connOptions.timeoutMs,\n },\n );\n } catch (e) {\n // Handle connection errors\n if (e instanceof Error && e.name === 'AbortError') {\n // Abort is expected during normal shutdown\n return;\n }\n throw e;\n } finally {\n // Ensure cleanup always runs (and don't leak the promise)\n await resourceCleanup();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,gBAA0B;AAC1B,wBAAyC;AACzC,mBAAgC;AAChC,6BAA+B;AAC/B,iBAAoB;AACpB,4BAAoC;AACpC,sBAAuC;AAEvC,iBAAyE;AACzE,mBAAoE;AACpE,mBAA8E;AAC9E,wBAKO;AACP,IAAAA,gBAA6D;AAkFtD,SAAS,oBAAoB,OAA6C;AAC/E,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,MAAI,QAAQ,IAAI;AACd,WAAO,CAAC,MAAM,MAAM,GAAG,GAAG,GAAG,MAAM,MAAM,MAAM,CAAC,CAAC;AAAA,EACnD;AACA,SAAO,CAAC,OAAO,MAAS;AAC1B;AAeO,SAAS,qBACd,UACoB;AACpB,QAAM,eAAe,CAAC,UAAkD;AACtE,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,CAAC,MAAM,KAAK,IAAI,oBAAoB,KAAK;AAC/C,aAAO,EAAE,OAAO,MAAM,OAAO,SAAS,GAAG;AAAA,IAC3C;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,QAAQ,GAAG;AAC3B,WAAO,SAAS,IAAI,YAAY;AAAA,EAClC;AACA,SAAO,CAAC,aAAa,QAAQ,CAAC;AAChC;AAIA,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAmBlB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EACzD;AAAA,EAEA,cAAU,gBAAI;AAAA,EAEd,YAAY,MAYT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,MAChB;AAAA,MACA;AAAA,IACF,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,qBAAqB,WAAW,qBAAqB,QAAQ,IAAI;AAEvE,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,UAAU;AAAA,MACV,aAAa,eAAe;AAAA,IAC9B;AAGA,SAAK,OAAO,IAAI,sCAA0B;AAAA,MACxC,WAAW,CAAC,YAAY,KAAK,UAAU,OAAO;AAAA,MAC9C,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE;AAAA,MAChC,oBAAoB;AAAA,MACpB,oBAAoB;AAAA,MACpB,gBAAgB;AAAA;AAAA,IAClB,CAAC;AAAA,EACH;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,UAAM,CAAC,OAAO,KAAK,IAAI,oBAAoB,WAAW;AACtD,WAAO,IAAI,IAAI,EAAE,OAAO,OAAO,SAAS,OAAU,CAAC;AAAA,EACrD;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,KAAK,KAAK,eAAe,yCAA4B,IAAI,WAAW,CAAC;AAC3F,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AA9RvD;AA+RI,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,CAAC,OAAmC,QAAQ,KAAK,KAAK;AAC3E,QAAI,KAAK,KAAK,MAAO,CAAC,OAAmC,QAAQ,KAAK,KAAK;AAC3E,QAAI,KAAK,KAAK,SAAU,CAAC,OAAmC,WAAW,KAAK,KAAK;AAEjF,SAAI,UAAK,KAAK,aAAV,mBAAoB,QAAQ;AAC9B,aAAO,WAAW;AAAA,QAChB,QAAQ,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACrC,OAAO,EAAE;AAAA,UACT,OAAO,EAAE;AAAA,UACT,OAAO,EAAE,eAAe,CAAC;AAAA,QAC3B,EAAE;AAAA,MACJ;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,aAAa;AACzB,aAAO,aAAa;AAAA,QAClB,SAAS,KAAK,KAAK,YAAY,YAAY;AAAA,QAC3C,SAAS,KAAK,KAAK,YAAY;AAAA,MACjC;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6DAA6D;AACzF,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,UAAgB;AACd,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,KAAK,KAAK,MAAM;AAAA,EACxB;AACF;AAEO,MAAM,yBAAmD,WAAAC,iBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,UAAU;AACd,QAAI;AAEJ,UAAM,sBAAsB,IAAI,gBAAAC,MAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,mBAAe,2CAAoC;AACzD,UAAM,gBAAY,wBAAU,cAAc;AAC1C,UAAM,iBAAiB,IAAI,mBAAM;AAGjC,UAAM,mBAAmB,IAAI,oBAAa;AAE1C,UAAM,kBAAkB,YAAY;AAClC,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAE1B,YAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,kBAAkB,OAAO,OAAuB,IAAe,WAAwB;AAE3F,UAAI,OAAO,WAAW,QAAS;AAE/B,YAAM,iBAAiB,MAAM,uCAAqB,WAAW,KAAK;AAClE,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,OAAO,WAAwB;AACrD,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,OAAO,WAAW,QAAS;AAC/B,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,OAAO,IAAe,WAAwB;AAC7E,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,OAAO,WAAW,QAAS;AAE/B,cAAM;AAAA,UACJ;AAAA,YACE,MAAM;AAAA,YACN,YAAY,GAAG,QAAQ;AAAA,UACzB;AAAA,UACA;AAAA,UACA;AAAA,QACF;AACA,uBAAe,IAAI;AAAA,MACrB;AAEA,YAAM,gBAAgB,EAAE,MAAM,gBAAgB,GAAG,IAAI,MAAM;AAE3D,qBAAe,IAAI;AAAA,IACrB;AAIA,UAAM,uBAAuB,OAAO,IAAe,WAAwB;AACzE,YAAM,YAAY,CAAC,SAAiB;AAClC,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,uCAAqB,MAAM,SAAS;AAE3D,eAAK,aAAa,MAAM,cAAc,EAAE,MAAM,CAAC,UAAU;AACvD,iBAAK,QAAQ;AAAA,cACX,EAAE,MAAM;AAAA,cACR;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iCAAiC;AAAA,QACpE;AAAA,MACF;AAEA,YAAM,UAAU,CAAC,MAAa;AA5cpC;AA6cQ,aAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,aAAK,gBAAgB;AACrB,YAAI;AAEF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AAEA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,yBAAiB,OAAO,CAAC;AAAA,MAC3B;AAEA,YAAM,UAAU,MAAM;AAEpB,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,+BAA+B;AAClD,eAAK,gBAAgB;AAErB,eAAK,IAAI,KAAK,OAAO,EAAE;AACvB,2BAAiB;AAAA,YACf,IAAI,iCAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,MAAM;AA1e5B;AA2eQ,aAAK,gBAAgB;AACrB,YAAI;AAGF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,uBAAe,IAAI;AACnB,yBAAiB,QAAQ;AAAA,MAC3B;AAGA,SAAG,GAAG,WAAW,SAAS;AAC1B,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AACtB,aAAO,iBAAiB,SAAS,OAAO;AAExC,UAAI;AAEF,cAAM,iBAAiB;AAAA,MACzB,UAAE;AAEA,WAAG,IAAI,WAAW,SAAS;AAC3B,WAAG,IAAI,SAAS,OAAO;AACvB,WAAG,IAAI,SAAS,OAAO;AACvB,eAAO,oBAAoB,SAAS,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,UAAM,iBAAiB,OAAO,WAAwB;AACpD,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,6BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,cAAM,eAAe,KAAK;AAE1B,eAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,QAAS;AACpB,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,oBAAM,gBAAgB;AACtB,+BAAiB;AAAA,gBACf,IAAI,2BAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,cACnE;AACA;AAAA,YACF;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,YAAM,KAAK,IAAI,KAAK;AAAA,QAClB,OAAO,OAAkB;AACvB,cAAI;AAGF,kBAAM,gBAAgB,IAAI,gBAAgB;AAC1C,kBAAM,gBAAgB,MAAM,cAAc,MAAM,KAAK,gBAAgB,OAAO,MAAM;AAClF,iBAAK,gBAAgB,OAAO,iBAAiB,SAAS,eAAe,EAAE,MAAM,KAAK,CAAC;AAEnF,kBAAM,QAAQ;AAAA,cACZ,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,gBAAgB,QAAQ;AAAA,gBAChC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,yBAAyB,IAAI,QAAQ;AAAA,gBAC7C;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,qBAAqB,IAAI,QAAQ;AAAA,gBACzC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,eAAe,QAAQ;AAAA,gBAC/B;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAEA,gBAAI;AACF,oBAAM,QAAQ,IAAI,MAAM,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAAA,YAC9C,UAAE;AAEA,6BAAe,IAAI;AACnB,oBAAM,gBAAgB;AACtB,wBAAM,4BAAc,OAAO,GAAI;AAC/B,mBAAK,gBAAgB,OAAO,oBAAoB,SAAS,aAAa;AAAA,YACxE;AAAA,UACF,SAAS,GAAG;AAEV,gBAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AACjD;AAAA,YACF;AACA,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,QACA;AAAA,UACE,SAAS,KAAK,YAAY;AAAA,QAC5B;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AAEV,UAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AAEjD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AAEA,YAAM,gBAAgB;AAAA,IACxB;AAAA,EACF;AACF;","names":["import_utils","BaseTTS","BaseSynthesizeStream","tokenizeBasic"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { ConnectionPool } from '../connection_pool.js';\nimport { type LanguageCode, normalizeLanguage } from '../language.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport { basic as tokenizeBasic } from '../tokenize/index.js';\nimport type { ChunkedStream } from '../tts/index.js';\nimport { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';\nimport {\n type TtsClientEvent,\n type TtsServerEvent,\n ttsClientEventSchema,\n ttsServerEventSchema,\n} from './api_protos.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type CartesiaModels =\n | 'cartesia/sonic-3'\n | 'cartesia/sonic-2'\n | 'cartesia/sonic-turbo'\n | 'cartesia/sonic';\n\nexport type DeepgramTTSModels = 'deepgram/aura' | 'deepgram/aura-2';\n\nexport type ElevenlabsModels =\n | 'elevenlabs/eleven_flash_v2'\n | 'elevenlabs/eleven_flash_v2_5'\n | 'elevenlabs/eleven_turbo_v2'\n | 'elevenlabs/eleven_turbo_v2_5'\n | 'elevenlabs/eleven_multilingual_v2';\n\nexport type InworldModels =\n | 'inworld/inworld-tts-1.5-max'\n | 'inworld/inworld-tts-1.5-mini'\n | 'inworld/inworld-tts-1-max'\n | 'inworld/inworld-tts-1';\n\nexport type RimeModels = 'rime/arcana' | 'rime/mistv2';\n\nexport interface CartesiaOptions {\n /** Maximum duration of audio in seconds. */\n duration?: number;\n /** Speech speed. Default: not specified. */\n speed?: 'slow' | 'normal' | 'fast';\n}\n\nexport interface ElevenlabsOptions {\n /** Inactivity timeout in seconds. Default: 60. */\n inactivity_timeout?: number;\n /** Text normalization mode. Default: \"auto\". */\n apply_text_normalization?: 'auto' | 'off' | 'on';\n}\n\nexport interface DeepgramTTSOptions {}\n\nexport interface RimeOptions {}\n\nexport interface InworldOptions {\n /** Controls how fast the voice speaks. 1.0 is normal speed, 0.5 is half, 1.5 is 1.5x. Default: 1.0. */\n speaking_rate?: number;\n /** Controls randomness in the output. Recommended between 0.6 and 1.1. Default: 1.1. */\n temperature?: number;\n /** Controls text normalization. \"ON\" expands numbers, dates, abbreviations. \"OFF\" reads text as written. Default: \"ON\". */\n text_normalization?: 'ON' | 'OFF';\n}\n\ntype _TTSModels =\n | CartesiaModels\n | DeepgramTTSModels\n | ElevenlabsModels\n | RimeModels\n | InworldModels;\n\nexport type TTSModels =\n | CartesiaModels\n | DeepgramTTSModels\n | ElevenlabsModels\n | RimeModels\n | InworldModels\n | AnyString;\n\nexport type ModelWithVoice = `${_TTSModels}:${string}` | TTSModels;\n\nexport type TTSOptions<TModel extends TTSModels> = TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends DeepgramTTSModels\n ? DeepgramTTSOptions\n : TModel extends ElevenlabsModels\n ? ElevenlabsOptions\n : TModel extends RimeModels\n ? RimeOptions\n : TModel extends InworldModels\n ? InworldOptions\n : Record<string, unknown>;\n\n/** Parse a model string into [model, voice]. Voice is undefined if not specified. */\nexport function parseTTSModelString(model: string): [string, string | undefined] {\n const idx = model.lastIndexOf(':');\n if (idx !== -1) {\n return [model.slice(0, idx), model.slice(idx + 1)];\n }\n return [model, undefined];\n}\n\n/** A fallback model with optional extra configuration. Extra fields are passed through to the provider. */\nexport interface TTSFallbackModel {\n /** Model name (e.g. \"cartesia/sonic\", \"elevenlabs/eleven_flash_v2\", \"rime/arcana\"). */\n model: string;\n /** Voice to use for the model. */\n voice: string;\n /** Extra configuration for the model. */\n extraKwargs?: Record<string, unknown>;\n}\n\nexport type TTSFallbackModelType = TTSFallbackModel | string;\n\n/** Normalize a single or list of FallbackModelType into TTSFallbackModel[]. */\nexport function normalizeTTSFallback(\n fallback: TTSFallbackModelType | TTSFallbackModelType[],\n): TTSFallbackModel[] {\n const makeFallback = (model: TTSFallbackModelType): TTSFallbackModel => {\n if (typeof model === 'string') {\n const [name, voice] = parseTTSModelString(model);\n return { model: name, voice: voice ?? '' };\n }\n return model;\n };\n\n if (Array.isArray(fallback)) {\n return fallback.map(makeFallback);\n }\n return [makeFallback(fallback)];\n}\n\ntype TTSEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';\nconst NUM_CHANNELS = 1;\nconst DEFAULT_LANGUAGE = 'en';\n\nexport interface InferenceTTSOptions<TModel extends TTSModels> {\n model?: TModel;\n voice?: string;\n language?: LanguageCode;\n encoding: TTSEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: TTSOptions<TModel>;\n fallback?: TTSFallbackModel[];\n connOptions?: APIConnectOptions;\n}\n\n/**\n * Livekit Cloud Inference TTS\n */\nexport class TTS<TModel extends TTSModels> extends BaseTTS {\n private opts: InferenceTTSOptions<TModel>;\n private streams: Set<SynthesizeStream<TModel>> = new Set();\n pool: ConnectionPool<WebSocket>;\n\n #logger = log();\n\n constructor(opts: {\n model: TModel;\n voice?: string;\n language?: string;\n baseURL?: string;\n encoding?: TTSEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: TTSOptions<TModel>;\n fallback?: TTSFallbackModelType | TTSFallbackModelType[];\n connOptions?: APIConnectOptions;\n }) {\n const sampleRate = opts?.sampleRate ?? DEFAULT_SAMPLE_RATE;\n super(sampleRate, 1, { streaming: true });\n\n const {\n model,\n voice,\n language = DEFAULT_LANGUAGE,\n baseURL,\n encoding = DEFAULT_ENCODING,\n apiKey,\n apiSecret,\n modelOptions = {} as TTSOptions<TModel>,\n fallback,\n connOptions,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n // read voice id from the model if provided: \"provider/model:voice_id\"\n let nextModel = model;\n let nextVoice = voice;\n if (typeof nextModel === 'string') {\n const idx = nextModel.lastIndexOf(':');\n if (idx !== -1) {\n const voiceFromModel = nextModel.slice(idx + 1);\n if (nextVoice && nextVoice !== voiceFromModel) {\n this.#logger.warn(\n '`voice` is provided via both argument and model, using the one from the argument',\n { voice: nextVoice, model: nextModel },\n );\n } else {\n nextVoice = voiceFromModel;\n }\n nextModel = nextModel.slice(0, idx) as TModel;\n }\n }\n\n const normalizedFallback = fallback ? normalizeTTSFallback(fallback) : undefined;\n\n this.opts = {\n model: nextModel,\n voice: nextVoice,\n language: normalizeLanguage(language),\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n fallback: normalizedFallback,\n connOptions: connOptions ?? DEFAULT_API_CONNECT_OPTIONS,\n };\n\n // Initialize connection pool\n this.pool = new ConnectionPool<WebSocket>({\n connectCb: (timeout) => this.connectWs(timeout),\n closeCb: (ws) => this.closeWs(ws),\n maxSessionDuration: 300_000,\n markRefreshedOnGet: true,\n connectTimeout: 10_000, // 10 seconds default\n });\n }\n\n get label() {\n return 'inference.TTS';\n }\n\n static fromModelString(modelString: string): TTS<AnyString> {\n const [model, voice] = parseTTSModelString(modelString);\n return new TTS({ model, voice: voice || undefined });\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = {\n ...this.opts,\n ...opts,\n language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,\n };\n for (const stream of this.streams) {\n stream.updateOptions(this.opts);\n }\n }\n\n synthesize(_: string): ChunkedStream {\n throw new Error('ChunkedStream is not implemented');\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream<TModel> {\n const { connOptions = this.opts.connOptions ?? DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const stream = new SynthesizeStream(this, { ...this.opts }, connOptions);\n this.streams.add(stream);\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/tts`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const params = {\n type: 'session.create',\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n } as Record<string, unknown>;\n\n if (this.opts.voice) (params as Record<string, unknown>).voice = this.opts.voice;\n if (this.opts.model) (params as Record<string, unknown>).model = this.opts.model;\n if (this.opts.language) (params as Record<string, unknown>).language = this.opts.language;\n\n if (this.opts.fallback?.length) {\n params.fallback = {\n models: this.opts.fallback.map((m) => ({\n model: m.model,\n voice: m.voice,\n extra: m.extraKwargs ?? {},\n })),\n };\n }\n\n if (this.opts.connOptions) {\n params.connection = {\n timeout: this.opts.connOptions.timeoutMs / 1000,\n retries: this.opts.connOptions.maxRetry,\n };\n }\n\n this.#logger.debug({ url }, 'inference.TTS creating new websocket connection (pool miss)');\n const socket = await connectWs(url, headers, timeout);\n socket.send(JSON.stringify(params));\n return socket;\n }\n\n async closeWs(ws: WebSocket) {\n await ws.close();\n }\n\n prewarm(): void {\n this.pool.prewarm();\n }\n\n async close() {\n for (const stream of this.streams) {\n await stream.close();\n }\n this.streams.clear();\n await this.pool.close();\n }\n}\n\nexport class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeStream {\n private opts: InferenceTTSOptions<TModel>;\n private tts: TTS<TModel>;\n\n #logger = log();\n\n constructor(tts: TTS<TModel>, opts: InferenceTTSOptions<TModel>, connOptions: APIConnectOptions) {\n super(tts, connOptions);\n this.opts = opts;\n this.tts = tts;\n }\n\n get label() {\n return 'inference.SynthesizeStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceTTSOptions<TModel>, 'model' | 'voice' | 'language'>>) {\n this.opts = {\n ...this.opts,\n ...opts,\n language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,\n };\n }\n\n protected async run(): Promise<void> {\n let closing = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();\n const eventChannel = createStreamChannel<TtsServerEvent>();\n const requestId = shortuuid('tts_request_');\n const inputSentEvent = new Event();\n\n // Signal for protocol-driven completion (when 'done' message is received)\n const completionFuture = new Future<void>();\n\n const resourceCleanup = async () => {\n if (closing) return;\n closing = true;\n sendTokenizerStream.close();\n // close() returns a promise; don't leak it\n await eventChannel.close();\n };\n\n const sendClientEvent = async (event: TtsClientEvent, ws: WebSocket, signal: AbortSignal) => {\n // Don't send events to a closed WebSocket or aborted controller\n if (signal.aborted || closing) return;\n\n const validatedEvent = await ttsClientEventSchema.parseAsync(event);\n if (ws.readyState !== WebSocket.OPEN) {\n this.#logger.warn('Trying to send client TTS event to a closed WebSocket');\n return;\n }\n ws.send(JSON.stringify(validatedEvent));\n };\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const createInputTask = async (signal: AbortSignal) => {\n for await (const data of this.input) {\n if (signal.aborted || closing) break;\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n sendTokenizerStream.flush();\n continue;\n }\n sendTokenizerStream.pushText(data);\n }\n // Only call endInput if the stream hasn't been closed by cleanup\n if (!closing) {\n sendTokenizerStream.endInput();\n }\n };\n\n const createSentenceStreamTask = async (ws: WebSocket, signal: AbortSignal) => {\n for await (const ev of sendTokenizerStream) {\n if (signal.aborted || closing) break;\n\n await sendClientEvent(\n {\n type: 'input_transcript',\n transcript: ev.token + ' ',\n },\n ws,\n signal,\n );\n inputSentEvent.set();\n }\n\n await sendClientEvent({ type: 'session.flush' }, ws, signal);\n // needed in case empty input is sent\n inputSentEvent.set();\n };\n\n // Handles WebSocket message routing and error handling\n // Completes based on protocol messages, NOT on ws.close()\n const createWsListenerTask = async (ws: WebSocket, signal: AbortSignal) => {\n const onMessage = (data: Buffer) => {\n try {\n const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;\n const validatedEvent = ttsServerEventSchema.parse(eventJson);\n // writer.write returns a promise; avoid unhandled rejections if stream is closed\n void eventChannel.write(validatedEvent).catch((error) => {\n this.#logger.debug(\n { error },\n 'Failed writing TTS event to stream channel (likely closed)',\n );\n });\n } catch (e) {\n this.#logger.error({ error: e }, 'Error parsing WebSocket message');\n }\n };\n\n const onError = (e: Error) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n void resourceCleanup();\n try {\n // If the ws is misbehaving, hard-stop it immediately to avoid buffering.\n ws.terminate?.();\n } catch {\n // ignore\n }\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(e);\n };\n\n const onClose = () => {\n // WebSocket closed unexpectedly (not by us)\n if (!closing) {\n this.#logger.error('WebSocket closed unexpectedly');\n void resourceCleanup();\n // Ensure this ws is not reused\n this.tts.pool.remove(ws);\n completionFuture.reject(\n new APIStatusError({\n message: 'Gateway connection closed unexpectedly',\n options: { requestId },\n }),\n );\n }\n };\n\n const onAbort = () => {\n void resourceCleanup();\n try {\n // On interruption/abort, close the websocket immediately so the server stops streaming\n // and the ws library doesn't buffer unread frames in memory.\n ws.terminate?.();\n } catch {\n // ignore\n }\n this.tts.pool.remove(ws);\n inputSentEvent.set();\n completionFuture.resolve();\n };\n\n // Attach listeners\n ws.on('message', onMessage);\n ws.on('error', onError);\n ws.on('close', onClose);\n signal.addEventListener('abort', onAbort);\n\n try {\n // Wait for protocol-driven completion or error\n await completionFuture.await;\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('error', onError);\n ws.off('close', onClose);\n signal.removeEventListener('abort', onAbort);\n }\n };\n\n const createRecvTask = async (signal: AbortSignal) => {\n let currentSessionId: string | null = null;\n\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n await inputSentEvent.wait();\n\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const serverEvent = result.value;\n switch (serverEvent.type) {\n case 'session.created':\n currentSessionId = serverEvent.session_id;\n break;\n case 'output_audio':\n const base64Data = new Int8Array(Buffer.from(serverEvent.audio, 'base64'));\n for (const frame of bstream.write(base64Data.buffer)) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n break;\n case 'done':\n for (const frame of bstream.flush()) {\n sendLastFrame(currentSessionId!, false);\n lastFrame = frame;\n }\n sendLastFrame(currentSessionId!, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'session.closed':\n await resourceCleanup();\n completionFuture.resolve();\n return;\n case 'error':\n this.#logger.error(\n { serverEvent },\n 'Received error message from LiveKit TTS WebSocket',\n );\n await resourceCleanup();\n completionFuture.reject(\n new APIError(`LiveKit TTS returned error: ${serverEvent.message}`),\n );\n return;\n default:\n this.#logger.warn('Unexpected message %s', serverEvent);\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n await this.tts.pool.withConnection(\n async (ws: WebSocket) => {\n try {\n // IMPORTANT: don't cancel the stream's controller on normal completion,\n // otherwise the pool will remove+close the ws and every run becomes a pool miss.\n const runController = new AbortController();\n const onStreamAbort = () => runController.abort(this.abortController.signal.reason);\n this.abortController.signal.addEventListener('abort', onStreamAbort, { once: true });\n\n const tasks = [\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createInputTask(combined);\n },\n undefined,\n 'inference-tts-input',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createSentenceStreamTask(ws, combined);\n },\n undefined,\n 'inference-tts-sentence',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createWsListenerTask(ws, combined);\n },\n undefined,\n 'inference-tts-ws-listener',\n ),\n Task.from(\n async (controller) => {\n const combined = combineSignals(runController.signal, controller.signal);\n await createRecvTask(combined);\n },\n undefined,\n 'inference-tts-recv',\n ),\n ];\n\n try {\n await Promise.all(tasks.map((t) => t.result));\n } finally {\n // Mirror python finally: unblock recv and cancel all tasks.\n inputSentEvent.set();\n await resourceCleanup();\n await cancelAndWait(tasks, 5000);\n this.abortController.signal.removeEventListener('abort', onStreamAbort);\n }\n } catch (e) {\n // If aborted, don't throw - let cleanup handle it\n if (e instanceof Error && e.name === 'AbortError') {\n return;\n }\n throw e;\n }\n },\n {\n timeout: this.connOptions.timeoutMs,\n },\n );\n } catch (e) {\n // Handle connection errors\n if (e instanceof Error && e.name === 'AbortError') {\n // Abort is expected during normal shutdown\n return;\n }\n throw e;\n } finally {\n // Ensure cleanup always runs (and don't leak the promise)\n await resourceCleanup();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,gBAA0B;AAC1B,wBAAyC;AACzC,mBAAgC;AAChC,6BAA+B;AAC/B,sBAAqD;AACrD,iBAAoB;AACpB,4BAAoC;AACpC,sBAAuC;AAEvC,iBAAyE;AACzE,mBAAoE;AACpE,mBAA8E;AAC9E,wBAKO;AACP,IAAAA,gBAA6D;AAkFtD,SAAS,oBAAoB,OAA6C;AAC/E,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,MAAI,QAAQ,IAAI;AACd,WAAO,CAAC,MAAM,MAAM,GAAG,GAAG,GAAG,MAAM,MAAM,MAAM,CAAC,CAAC;AAAA,EACnD;AACA,SAAO,CAAC,OAAO,MAAS;AAC1B;AAeO,SAAS,qBACd,UACoB;AACpB,QAAM,eAAe,CAAC,UAAkD;AACtE,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,CAAC,MAAM,KAAK,IAAI,oBAAoB,KAAK;AAC/C,aAAO,EAAE,OAAO,MAAM,OAAO,SAAS,GAAG;AAAA,IAC3C;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,QAAQ,GAAG;AAC3B,WAAO,SAAS,IAAI,YAAY;AAAA,EAClC;AACA,SAAO,CAAC,aAAa,QAAQ,CAAC;AAChC;AAIA,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,eAAe;AACrB,MAAM,mBAAmB;AAmBlB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAyC,oBAAI,IAAI;AAAA,EACzD;AAAA,EAEA,cAAU,gBAAI;AAAA,EAEd,YAAY,MAYT;AACD,UAAM,cAAa,6BAAM,eAAc;AACvC,UAAM,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AAExC,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,MAChB;AAAA,MACA;AAAA,IACF,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAGA,QAAI,YAAY;AAChB,QAAI,YAAY;AAChB,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,MAAM,UAAU,YAAY,GAAG;AACrC,UAAI,QAAQ,IAAI;AACd,cAAM,iBAAiB,UAAU,MAAM,MAAM,CAAC;AAC9C,YAAI,aAAa,cAAc,gBAAgB;AAC7C,eAAK,QAAQ;AAAA,YACX;AAAA,YACA,EAAE,OAAO,WAAW,OAAO,UAAU;AAAA,UACvC;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AACA,oBAAY,UAAU,MAAM,GAAG,GAAG;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,qBAAqB,WAAW,qBAAqB,QAAQ,IAAI;AAEvE,SAAK,OAAO;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP,cAAU,mCAAkB,QAAQ;AAAA,MACpC;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,UAAU;AAAA,MACV,aAAa,eAAe;AAAA,IAC9B;AAGA,SAAK,OAAO,IAAI,sCAA0B;AAAA,MACxC,WAAW,CAAC,YAAY,KAAK,UAAU,OAAO;AAAA,MAC9C,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE;AAAA,MAChC,oBAAoB;AAAA,MACpB,oBAAoB;AAAA,MACpB,gBAAgB;AAAA;AAAA,IAClB,CAAC;AAAA,EACH;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,UAAM,CAAC,OAAO,KAAK,IAAI,oBAAoB,WAAW;AACtD,WAAO,IAAI,IAAI,EAAE,OAAO,OAAO,SAAS,OAAU,CAAC;AAAA,EACrD;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO;AAAA,MACV,GAAG,KAAK;AAAA,MACR,GAAG;AAAA,MACH,UAAU,KAAK,aAAa,aAAY,mCAAkB,KAAK,QAAQ,IAAI,KAAK,KAAK;AAAA,IACvF;AACA,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,KAAK,IAAI;AAAA,IAChC;AAAA,EACF;AAAA,EAEA,WAAW,GAA0B;AACnC,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAAA,EAEA,OAAO,SAAyE;AAC9E,UAAM,EAAE,cAAc,KAAK,KAAK,eAAe,yCAA4B,IAAI,WAAW,CAAC;AAC3F,UAAM,SAAS,IAAI,iBAAiB,MAAM,EAAE,GAAG,KAAK,KAAK,GAAG,WAAW;AACvE,SAAK,QAAQ,IAAI,MAAM;AACvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AAnSvD;AAoSI,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS;AAAA,MACb,MAAM;AAAA,MACN,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,MACxC,UAAU,KAAK,KAAK;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,IACnB;AAEA,QAAI,KAAK,KAAK,MAAO,CAAC,OAAmC,QAAQ,KAAK,KAAK;AAC3E,QAAI,KAAK,KAAK,MAAO,CAAC,OAAmC,QAAQ,KAAK,KAAK;AAC3E,QAAI,KAAK,KAAK,SAAU,CAAC,OAAmC,WAAW,KAAK,KAAK;AAEjF,SAAI,UAAK,KAAK,aAAV,mBAAoB,QAAQ;AAC9B,aAAO,WAAW;AAAA,QAChB,QAAQ,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACrC,OAAO,EAAE;AAAA,UACT,OAAO,EAAE;AAAA,UACT,OAAO,EAAE,eAAe,CAAC;AAAA,QAC3B,EAAE;AAAA,MACJ;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,aAAa;AACzB,aAAO,aAAa;AAAA,QAClB,SAAS,KAAK,KAAK,YAAY,YAAY;AAAA,QAC3C,SAAS,KAAK,KAAK,YAAY;AAAA,MACjC;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6DAA6D;AACzF,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,WAAO,KAAK,KAAK,UAAU,MAAM,CAAC;AAClC,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ,IAAe;AAC3B,UAAM,GAAG,MAAM;AAAA,EACjB;AAAA,EAEA,UAAgB;AACd,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AACZ,eAAW,UAAU,KAAK,SAAS;AACjC,YAAM,OAAO,MAAM;AAAA,IACrB;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,KAAK,KAAK,MAAM;AAAA,EACxB;AACF;AAEO,MAAM,yBAAmD,WAAAC,iBAAqB;AAAA,EAC3E;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YAAY,KAAkB,MAAmC,aAAgC;AAC/F,UAAM,KAAK,WAAW;AACtB,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAAkF;AAC9F,SAAK,OAAO;AAAA,MACV,GAAG,KAAK;AAAA,MACR,GAAG;AAAA,MACH,UAAU,KAAK,aAAa,aAAY,mCAAkB,KAAK,QAAQ,IAAI,KAAK,KAAK;AAAA,IACvF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,UAAU;AACd,QAAI;AAEJ,UAAM,sBAAsB,IAAI,gBAAAC,MAAc,kBAAkB,EAAE,OAAO;AACzE,UAAM,mBAAe,2CAAoC;AACzD,UAAM,gBAAY,wBAAU,cAAc;AAC1C,UAAM,iBAAiB,IAAI,mBAAM;AAGjC,UAAM,mBAAmB,IAAI,oBAAa;AAE1C,UAAM,kBAAkB,YAAY;AAClC,UAAI,QAAS;AACb,gBAAU;AACV,0BAAoB,MAAM;AAE1B,YAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,kBAAkB,OAAO,OAAuB,IAAe,WAAwB;AAE3F,UAAI,OAAO,WAAW,QAAS;AAE/B,YAAM,iBAAiB,MAAM,uCAAqB,WAAW,KAAK;AAClE,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,aAAK,QAAQ,KAAK,uDAAuD;AACzE;AAAA,MACF;AACA,SAAG,KAAK,KAAK,UAAU,cAAc,CAAC;AAAA,IACxC;AAEA,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,kBAAkB,OAAO,WAAwB;AACrD,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,OAAO,WAAW,QAAS;AAC/B,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,8BAAoB,MAAM;AAC1B;AAAA,QACF;AACA,4BAAoB,SAAS,IAAI;AAAA,MACnC;AAEA,UAAI,CAAC,SAAS;AACZ,4BAAoB,SAAS;AAAA,MAC/B;AAAA,IACF;AAEA,UAAM,2BAA2B,OAAO,IAAe,WAAwB;AAC7E,uBAAiB,MAAM,qBAAqB;AAC1C,YAAI,OAAO,WAAW,QAAS;AAE/B,cAAM;AAAA,UACJ;AAAA,YACE,MAAM;AAAA,YACN,YAAY,GAAG,QAAQ;AAAA,UACzB;AAAA,UACA;AAAA,UACA;AAAA,QACF;AACA,uBAAe,IAAI;AAAA,MACrB;AAEA,YAAM,gBAAgB,EAAE,MAAM,gBAAgB,GAAG,IAAI,MAAM;AAE3D,qBAAe,IAAI;AAAA,IACrB;AAIA,UAAM,uBAAuB,OAAO,IAAe,WAAwB;AACzE,YAAM,YAAY,CAAC,SAAiB;AAClC,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,KAAK,SAAS,CAAC;AAC5C,gBAAM,iBAAiB,uCAAqB,MAAM,SAAS;AAE3D,eAAK,aAAa,MAAM,cAAc,EAAE,MAAM,CAAC,UAAU;AACvD,iBAAK,QAAQ;AAAA,cACX,EAAE,MAAM;AAAA,cACR;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iCAAiC;AAAA,QACpE;AAAA,MACF;AAEA,YAAM,UAAU,CAAC,MAAa;AArdpC;AAsdQ,aAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,aAAK,gBAAgB;AACrB,YAAI;AAEF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AAEA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,yBAAiB,OAAO,CAAC;AAAA,MAC3B;AAEA,YAAM,UAAU,MAAM;AAEpB,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,+BAA+B;AAClD,eAAK,gBAAgB;AAErB,eAAK,IAAI,KAAK,OAAO,EAAE;AACvB,2BAAiB;AAAA,YACf,IAAI,iCAAe;AAAA,cACjB,SAAS;AAAA,cACT,SAAS,EAAE,UAAU;AAAA,YACvB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,MAAM;AAnf5B;AAofQ,aAAK,gBAAgB;AACrB,YAAI;AAGF,mBAAG,cAAH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,aAAK,IAAI,KAAK,OAAO,EAAE;AACvB,uBAAe,IAAI;AACnB,yBAAiB,QAAQ;AAAA,MAC3B;AAGA,SAAG,GAAG,WAAW,SAAS;AAC1B,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AACtB,aAAO,iBAAiB,SAAS,OAAO;AAExC,UAAI;AAEF,cAAM,iBAAiB;AAAA,MACzB,UAAE;AAEA,WAAG,IAAI,WAAW,SAAS;AAC3B,WAAG,IAAI,SAAS,OAAO;AACvB,WAAG,IAAI,SAAS,OAAO;AACvB,eAAO,oBAAoB,SAAS,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,UAAM,iBAAiB,OAAO,WAAwB;AACpD,UAAI,mBAAkC;AAEtC,YAAM,UAAU,IAAI,6BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,YAAM,oBAAoB,aAAa,OAAO;AAC9C,YAAM,SAAS,kBAAkB,UAAU;AAE3C,UAAI;AACF,cAAM,eAAe,KAAK;AAE1B,eAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,QAAS;AACpB,cAAI,OAAO,KAAM;AAEjB,gBAAM,cAAc,OAAO;AAC3B,kBAAQ,YAAY,MAAM;AAAA,YACxB,KAAK;AACH,iCAAmB,YAAY;AAC/B;AAAA,YACF,KAAK;AACH,oBAAM,aAAa,IAAI,UAAU,OAAO,KAAK,YAAY,OAAO,QAAQ,CAAC;AACzE,yBAAW,SAAS,QAAQ,MAAM,WAAW,MAAM,GAAG;AACpD,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA;AAAA,YACF,KAAK;AACH,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,kBAAmB,KAAK;AACtC,4BAAY;AAAA,cACd;AACA,4BAAc,kBAAmB,IAAI;AACrC,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAC7C,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,oBAAM,gBAAgB;AACtB,+BAAiB,QAAQ;AACzB;AAAA,YACF,KAAK;AACH,mBAAK,QAAQ;AAAA,gBACX,EAAE,YAAY;AAAA,gBACd;AAAA,cACF;AACA,oBAAM,gBAAgB;AACtB,+BAAiB;AAAA,gBACf,IAAI,2BAAS,+BAA+B,YAAY,OAAO,EAAE;AAAA,cACnE;AACA;AAAA,YACF;AACE,mBAAK,QAAQ,KAAK,yBAAyB,WAAW;AACtD;AAAA,UACJ;AAAA,QACF;AAAA,MACF,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,kBAAkB,OAAO;AAAA,QACjC,SAAS,GAAG;AACV,eAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,QACxF;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AACF,YAAM,KAAK,IAAI,KAAK;AAAA,QAClB,OAAO,OAAkB;AACvB,cAAI;AAGF,kBAAM,gBAAgB,IAAI,gBAAgB;AAC1C,kBAAM,gBAAgB,MAAM,cAAc,MAAM,KAAK,gBAAgB,OAAO,MAAM;AAClF,iBAAK,gBAAgB,OAAO,iBAAiB,SAAS,eAAe,EAAE,MAAM,KAAK,CAAC;AAEnF,kBAAM,QAAQ;AAAA,cACZ,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,gBAAgB,QAAQ;AAAA,gBAChC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,yBAAyB,IAAI,QAAQ;AAAA,gBAC7C;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,qBAAqB,IAAI,QAAQ;AAAA,gBACzC;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,cACA,kBAAK;AAAA,gBACH,OAAO,eAAe;AACpB,wBAAM,eAAW,6BAAe,cAAc,QAAQ,WAAW,MAAM;AACvE,wBAAM,eAAe,QAAQ;AAAA,gBAC/B;AAAA,gBACA;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAEA,gBAAI;AACF,oBAAM,QAAQ,IAAI,MAAM,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC;AAAA,YAC9C,UAAE;AAEA,6BAAe,IAAI;AACnB,oBAAM,gBAAgB;AACtB,wBAAM,4BAAc,OAAO,GAAI;AAC/B,mBAAK,gBAAgB,OAAO,oBAAoB,SAAS,aAAa;AAAA,YACxE;AAAA,UACF,SAAS,GAAG;AAEV,gBAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AACjD;AAAA,YACF;AACA,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,QACA;AAAA,UACE,SAAS,KAAK,YAAY;AAAA,QAC5B;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AAEV,UAAI,aAAa,SAAS,EAAE,SAAS,cAAc;AAEjD;AAAA,MACF;AACA,YAAM;AAAA,IACR,UAAE;AAEA,YAAM,gBAAgB;AAAA,IACxB;AAAA,EACF;AACF;","names":["import_utils","BaseTTS","BaseSynthesizeStream","tokenizeBasic"]}
|
package/dist/inference/tts.d.cts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { WebSocket } from 'ws';
|
|
2
2
|
import { ConnectionPool } from '../connection_pool.js';
|
|
3
|
+
import { type LanguageCode } from '../language.js';
|
|
3
4
|
import type { ChunkedStream } from '../tts/index.js';
|
|
4
5
|
import { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';
|
|
5
6
|
import { type APIConnectOptions } from '../types.js';
|
|
@@ -55,7 +56,7 @@ type TTSEncoding = 'pcm_s16le';
|
|
|
55
56
|
export interface InferenceTTSOptions<TModel extends TTSModels> {
|
|
56
57
|
model?: TModel;
|
|
57
58
|
voice?: string;
|
|
58
|
-
language?:
|
|
59
|
+
language?: LanguageCode;
|
|
59
60
|
encoding: TTSEncoding;
|
|
60
61
|
sampleRate: number;
|
|
61
62
|
baseURL: string;
|