@livekit/agents-plugin-sarvam 1.0.50 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,7 +34,7 @@ class SarvamPlugin extends import_agents.Plugin {
34
34
  constructor() {
35
35
  super({
36
36
  title: "sarvam",
37
- version: "1.0.50",
37
+ version: "1.0.51",
38
38
  package: "@livekit/agents-plugin-sarvam"
39
39
  });
40
40
  }
package/dist/index.js CHANGED
@@ -13,7 +13,7 @@ class SarvamPlugin extends Plugin {
13
13
  constructor() {
14
14
  super({
15
15
  title: "sarvam",
16
- version: "1.0.50",
16
+ version: "1.0.51",
17
17
  package: "@livekit/agents-plugin-sarvam"
18
18
  });
19
19
  }
package/dist/stt.cjs CHANGED
@@ -73,14 +73,16 @@ function resolveOptions(opts) {
73
73
  base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;
74
74
  } else if (model === "saaras:v3") {
75
75
  const v3Opts = opts;
76
- base.languageCode = v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode;
76
+ base.languageCode = (0, import_agents.normalizeLanguage)(v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode);
77
77
  base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;
78
78
  base.prompt = v3Opts.prompt;
79
79
  base.withTimestamps = v3Opts.withTimestamps;
80
80
  } else {
81
- let languageCode = opts.languageCode ?? SAARIKA_DEFAULTS.languageCode;
81
+ let languageCode = (0, import_agents.normalizeLanguage)(
82
+ opts.languageCode ?? SAARIKA_DEFAULTS.languageCode
83
+ );
82
84
  if (!STTV2_LANGUAGE_SET.has(languageCode)) {
83
- languageCode = SAARIKA_DEFAULTS.languageCode;
85
+ languageCode = (0, import_agents.normalizeLanguage)(SAARIKA_DEFAULTS.languageCode);
84
86
  }
85
87
  base.languageCode = languageCode;
86
88
  base.withTimestamps = opts.withTimestamps;
@@ -223,7 +225,7 @@ class STT extends import_agents.stt.STT {
223
225
  alternatives: [
224
226
  {
225
227
  text: data.transcript || "",
226
- language: data.language_code ?? this.opts.languageCode ?? "unknown",
228
+ language: (0, import_agents.normalizeLanguage)(data.language_code ?? this.opts.languageCode ?? "unknown"),
227
229
  startTime,
228
230
  endTime,
229
231
  confidence: data.language_probability ?? 0
@@ -429,7 +431,9 @@ class SpeechStream extends import_agents.stt.SpeechStream {
429
431
  } else if (msgType === "data") {
430
432
  const td = json["data"] ?? {};
431
433
  const transcript = td.transcript ?? "";
432
- const language = td.language_code ?? this.#opts.languageCode ?? "unknown";
434
+ const language = (0, import_agents.normalizeLanguage)(
435
+ td.language_code ?? this.#opts.languageCode ?? "unknown"
436
+ );
433
437
  const requestId = td.request_id ?? "";
434
438
  const confidence = td.language_probability ?? 0;
435
439
  this.#requestId = requestId;
package/dist/stt.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n mergeFrames,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n STTLanguages,\n STTModels,\n STTModes,\n STTV2Languages,\n STTV3Languages,\n} from './models.js';\n\n// ---------------------------------------------------------------------------\n// Endpoint URLs\n// ---------------------------------------------------------------------------\n\nconst SARVAM_STT_REST_URL = 'https://api.sarvam.ai/speech-to-text';\nconst SARVAM_STT_TRANSLATE_REST_URL = 'https://api.sarvam.ai/speech-to-text-translate';\nconst SARVAM_STT_WS_URL = 'wss://api.sarvam.ai/speech-to-text/ws';\nconst SARVAM_STT_TRANSLATE_WS_URL = 'wss://api.sarvam.ai/speech-to-text-translate/ws';\n\nconst SAMPLE_RATE = 16000;\nconst NUM_CHANNELS = 1;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// ---------------------------------------------------------------------------\n\ninterface STTBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST recognition (used by Agent via StreamAdapter + VAD).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Increase VAD sensitivity (WS only). Maps to `high_vad_sensitivity` query param. */\n highVadSensitivity?: boolean;\n /** Enable flush signal events from server (WS only). Maps to `flush_signal` query param. */\n flushSignal?: boolean;\n}\n\n/**\n * Options specific to saarika:v2.5.\n * saarika:v2.5 will be deprecated soon — prefer {@link STTV3Options} with `saaras:v3` for new integrations.\n * All v2.5 language codes are also supported by v3.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV2Options extends STTBaseOptions {\n model: 'saarika:v2.5';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV2Languages | string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/**\n * Options specific to saaras:v2.5 (dedicated translate endpoint).\n * Uses the `/speech-to-text-translate` endpoint for Indic-to-English translation.\n * Auto-detects the source language; does not accept language codes or timestamps.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\nexport interface STTTranslateOptions extends STTBaseOptions {\n model: 'saaras:v2.5';\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Mode for translate WS. Default: 'translate'. */\n mode?: STTModes | string;\n}\n\n/**\n * Options specific to saaras:v3 (recommended).\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV3Options extends STTBaseOptions {\n model?: 'saaras:v3';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV3Languages | string;\n /** Transcription mode (v3 only). Default: 'transcribe' */\n mode?: STTModes | string;\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type STTOptions = STTV2Options | STTTranslateOptions | STTV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedSTTOptions {\n apiKey: string;\n model: STTModels;\n streaming: boolean;\n // saarika:v2.5 and saaras:v3 only — not used by saaras:v2.5 (translate auto-detects)\n languageCode?: STTLanguages | string;\n // saaras:v3 and saaras:v2.5 (translate)\n mode?: STTModes | string;\n // saaras:v2.5 (translate) and saaras:v3\n prompt?: string;\n // saarika:v2.5 and saaras:v3 (/speech-to-text only, not translate)\n withTimestamps?: boolean;\n // WS-only flags\n highVadSensitivity?: boolean;\n flushSignal?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst SAARIKA_DEFAULTS = {\n languageCode: 'en-IN',\n};\n\nconst SAARAS_V3_DEFAULTS = {\n languageCode: 'en-IN',\n mode: 'transcribe',\n};\n\nconst SAARAS_TRANSLATE_DEFAULTS = {\n mode: 'translate',\n};\n\n/** Runtime set of languages supported by saarika:v2.5 (for validation on model switch) */\nconst STTV2_LANGUAGE_SET: ReadonlySet<string> = new Set<STTV2Languages>([\n 'unknown',\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n]);\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<STTOptions>): ResolvedSTTOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: STTModels = opts.model ?? 'saaras:v3';\n\n const base: ResolvedSTTOptions = {\n apiKey,\n model,\n streaming: opts.streaming ?? true,\n highVadSensitivity: opts.highVadSensitivity,\n flushSignal: opts.flushSignal,\n };\n\n if (model === 'saaras:v2.5') {\n const translateOpts = opts as STTTranslateOptions;\n base.prompt = translateOpts.prompt;\n base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;\n } else if (model === 'saaras:v3') {\n const v3Opts = opts as STTV3Options;\n base.languageCode = v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode;\n base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;\n base.prompt = v3Opts.prompt;\n base.withTimestamps = v3Opts.withTimestamps;\n } else {\n // saarika:v2.5\n let languageCode = (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode;\n if (!STTV2_LANGUAGE_SET.has(languageCode)) {\n languageCode = SAARIKA_DEFAULTS.languageCode;\n }\n base.languageCode = languageCode;\n base.withTimestamps = (opts as STTV2Options).withTimestamps;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// URL helpers\n// ---------------------------------------------------------------------------\n\nfunction getRestUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_REST_URL : SARVAM_STT_REST_URL;\n}\n\nfunction getWsUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_WS_URL : SARVAM_STT_WS_URL;\n}\n\nfunction buildWsUrl(opts: ResolvedSTTOptions): string {\n const base = getWsUrl(opts.model);\n const params = new URLSearchParams();\n params.set('model', opts.model);\n params.set('vad_signals', 'true');\n params.set('sample_rate', String(SAMPLE_RATE));\n params.set('input_audio_codec', 'pcm_s16le');\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n params.set('language-code', opts.languageCode);\n }\n\n // mode: v3 on STT WS, and translate WS (both endpoints support it)\n if (opts.mode != null) {\n params.set('mode', opts.mode);\n }\n\n // Optional WS params\n if (opts.highVadSensitivity != null) {\n params.set('high_vad_sensitivity', String(opts.highVadSensitivity));\n }\n if (opts.flushSignal != null) {\n params.set('flush_signal', String(opts.flushSignal));\n }\n\n return `${base}?${params.toString()}`;\n}\n\n// ---------------------------------------------------------------------------\n// Build the multipart form data (REST) — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildFormData(wavBlob: Blob, opts: ResolvedSTTOptions): FormData {\n const formData = new FormData();\n formData.append('file', wavBlob, 'audio.wav');\n formData.append('model', opts.model);\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n formData.append('language_code', opts.languageCode);\n }\n if (opts.model === 'saaras:v3' && opts.mode != null) {\n formData.append('mode', opts.mode);\n }\n if ((opts.model === 'saaras:v2.5' || opts.model === 'saaras:v3') && opts.prompt != null) {\n formData.append('prompt', opts.prompt);\n }\n if (opts.model !== 'saaras:v2.5' && opts.withTimestamps) {\n formData.append('with_timestamps', 'true');\n }\n\n return formData;\n}\n\n// ---------------------------------------------------------------------------\n// WAV encoding helper\n// ---------------------------------------------------------------------------\n\nfunction createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(bitsPerSample, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n\n const pcm = Buffer.from(frame.data.buffer, frame.data.byteOffset, frame.data.byteLength);\n return Buffer.concat([header, pcm]);\n}\n\n// ---------------------------------------------------------------------------\n// REST response type\n// ---------------------------------------------------------------------------\n\ninterface SarvamSTTResponse {\n request_id: string | null;\n transcript: string;\n language_code: string | null;\n language_probability?: number | null;\n timestamps?: {\n words: string[];\n start_time_seconds: number[];\n end_time_seconds: number[];\n } | null;\n}\n\n// ---------------------------------------------------------------------------\n// WS response types (from server Publish messages)\n// ---------------------------------------------------------------------------\n\n/** type: \"data\" */\ninterface SarvamWSTranscriptData {\n request_id?: string;\n transcript?: string;\n language_code?: string | null;\n language_probability?: number | null;\n timestamps?: Record<string, unknown> | null;\n diarized_transcript?: Record<string, unknown> | null;\n metrics?: {\n audio_duration?: number;\n processing_latency?: number;\n };\n}\n\n/** type: \"events\" */\ninterface SarvamWSEventData {\n event_type?: string;\n timestamp?: string;\n signal_type?: 'START_SPEECH' | 'END_SPEECH';\n occured_at?: number;\n}\n\n/** type: \"error\" — server sends data with message and code fields */\ninterface SarvamWSErrorData {\n message?: string;\n error?: string;\n code?: string;\n}\n\n// ---------------------------------------------------------------------------\n// STT class — supports both REST (recognize) and WebSocket (stream)\n// ---------------------------------------------------------------------------\n\nexport class STT extends stt.STT {\n private opts: ResolvedSTTOptions;\n label = 'sarvam.STT';\n\n /**\n * Create a new instance of Sarvam AI STT.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n *\n * Supported models:\n * - `saaras:v3` (default, recommended) — supports all 22 languages, modes, prompt, timestamps, and uses `/speech-to-text`.\n * - `saaras:v2.5` — Indic-to-English translation via `/speech-to-text-translate`. Auto-detects source language. Supports prompt.\n * - `saarika:v2.5` — will be deprecated soon. Supports timestamps. All its languages are available in `saaras:v3`.\n *\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\n constructor(opts: Partial<STTOptions> = {}) {\n const resolved = resolveOptions(opts);\n super({\n streaming: resolved.streaming,\n interimResults: false,\n alignedTranscript: false,\n });\n this.opts = resolved;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.opts.apiKey,\n streaming: this.opts.streaming,\n ...(this.opts.highVadSensitivity != null\n ? { highVadSensitivity: this.opts.highVadSensitivity }\n : {}),\n ...(this.opts.flushSignal != null ? { flushSignal: this.opts.flushSignal } : {}),\n ...(this.opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.opts } as Partial<STTOptions>);\n\n this.opts = resolveOptions({ ...base, ...opts } as STTOptions);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const frame = mergeFrames(buffer);\n const wavBuffer = createWav(frame);\n const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: 'audio/wav' });\n\n const formData = buildFormData(wavBlob, this.opts);\n\n const response = await fetch(getRestUrl(this.opts.model), {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n body: formData,\n signal: abortSignal ?? null,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam STT API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as SarvamSTTResponse;\n\n let startTime = 0;\n let endTime = 0;\n if (data.timestamps) {\n const starts = data.timestamps.start_time_seconds;\n const ends = data.timestamps.end_time_seconds;\n if (starts.length > 0) startTime = starts[0] ?? 0;\n if (ends.length > 0) endTime = ends[ends.length - 1] ?? 0;\n }\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId: data.request_id ?? undefined,\n alternatives: [\n {\n text: data.transcript || '',\n language: data.language_code ?? this.opts.languageCode ?? 'unknown',\n startTime,\n endTime,\n confidence: data.language_probability ?? 0,\n },\n ],\n };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam STT streaming is disabled (`streaming: false`). Use recognize() for REST or wrap with stt.StreamAdapter + VAD for streaming behavior.',\n );\n }\n return new SpeechStream(this, this.opts, options?.connOptions);\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming SpeechStream\n// ---------------------------------------------------------------------------\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: ResolvedSTTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n label = 'sarvam.SpeechStream';\n\n constructor(sttInstance: STT, opts: ResolvedSTTOptions, connOptions?: APIConnectOptions) {\n super(sttInstance, SAMPLE_RATE, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n ...(this.#opts.highVadSensitivity != null\n ? { highVadSensitivity: this.#opts.highVadSensitivity }\n : {}),\n ...(this.#opts.flushSignal != null ? { flushSignal: this.#opts.flushSignal } : {}),\n ...(this.#opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.#opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.#opts } as Partial<STTOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as STTOptions);\n this.#resetWS.resolve();\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n\n while (!this.input.closed && !this.closed) {\n const wsUrl = buildWsUrl(this.#opts);\n this.#logger.info(`Sarvam STT connecting to: ${wsUrl}`);\n const ws = new WebSocket(wsUrl, {\n headers: { 'api-subscription-key': this.#opts.apiKey },\n });\n\n let sessionStart = 0;\n try {\n await new Promise<void>((resolve, reject) => {\n ws.once('open', () => resolve());\n ws.once('error', (err: Error) => reject(err));\n ws.once('close', (code: number) =>\n reject(new Error(`WebSocket closed with code ${code}`)),\n );\n });\n\n sessionStart = Date.now();\n await this.#runWS(ws);\n retries = 0;\n } catch (e) {\n // Clean up the WebSocket on failure to prevent listener leaks\n ws.removeAllListeners();\n ws.close();\n\n if (!this.closed && !this.input.closed) {\n // If the session ran for a meaningful duration (>5s), this was a working\n // session that ended normally (e.g. server idle timeout ~20s). Reset retries\n // so expected idle-timeout reconnections don't accumulate toward the fatal limit.\n if (sessionStart > 0 && Date.now() - sessionStart > 5000) {\n retries = 0;\n }\n if (retries >= maxRetry) {\n throw new Error(`Failed to connect to Sarvam STT after ${retries} attempts: ${e}`);\n }\n const delay = Math.min(retries * 5, 10);\n retries++;\n this.#logger.warn(\n `Failed to connect to Sarvam STT, retrying in ${delay}s: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Sarvam STT disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n this.#speaking = false;\n let closing = false;\n // Session-scoped controller: aborted in finally to cancel sendTask on WS reset\n const sessionController = new AbortController();\n\n // Config message: only supported on translate WS endpoint (saaras:v2.5)\n // @see https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate/ws\n if (this.#opts.model === 'saaras:v2.5' && this.#opts.prompt != null) {\n ws.send(JSON.stringify({ type: 'config', prompt: this.#opts.prompt }));\n }\n\n // No keepalive — Sarvam rejects messages without 'audio' field, and sending\n // silent audio could confuse server-side VAD. On idle timeout (~20s), the\n // server closes the connection and the outer retry loop in run() reconnects.\n // This matches the Python SDK's approach.\n\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>((_, reject) => {\n ws.once('close', (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples50Ms = Math.floor(SAMPLE_RATE / 20); // 50ms chunks\n const stream = new AudioByteStream(SAMPLE_RATE, NUM_CHANNELS, samples50Ms);\n const abortPromise = waitForAbort(this.abortSignal);\n const sessionAbort = waitForAbort(sessionController.signal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise, sessionAbort]);\n if (result === undefined) return; // aborted\n if (result.done) break;\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (data.sampleRate !== SAMPLE_RATE || data.channels !== NUM_CHANNELS) {\n throw new Error(\n `Expected ${SAMPLE_RATE}Hz/${NUM_CHANNELS}ch, got ${data.sampleRate}Hz/${data.channels}ch`,\n );\n } else {\n frames = stream.write(\n data.data.buffer.slice(\n data.data.byteOffset,\n data.data.byteOffset + data.data.byteLength,\n ) as ArrayBuffer,\n );\n }\n\n for (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n // Sarvam expects base64-encoded PCM in a JSON message\n const pcmBuffer = Buffer.from(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.data.byteLength,\n );\n const base64Audio = pcmBuffer.toString('base64');\n ws.send(\n JSON.stringify({\n audio: {\n data: base64Audio,\n encoding: 'audio/wav',\n sample_rate: SAMPLE_RATE,\n },\n }),\n );\n }\n }\n\n // Send flush message on FLUSH_SENTINEL (VAD end of speech)\n if (data === SpeechStream.FLUSH_SENTINEL) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n }\n } finally {\n closing = true;\n // Match Python: end_of_stream includes an empty audio field to avoid\n // \"audio must not be None\" rejection from the server\n try {\n ws.send(\n JSON.stringify({\n type: 'end_of_stream',\n audio: { data: '', encoding: 'audio/wav', sample_rate: SAMPLE_RATE },\n }),\n );\n } catch {\n // ws may already be closed\n }\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (event: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(event);\n } catch {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.once('close', () => resolve());\n ws.on('message', (msg: RawData) => {\n try {\n const raw = msg.toString();\n this.#logger.debug(`Sarvam STT raw WS message: ${raw.substring(0, 500)}`);\n const json = JSON.parse(raw);\n const msgType: string = json['type'] ?? '';\n\n if (msgType === 'events') {\n const eventData = (json['data'] as SarvamWSEventData | undefined) ?? {};\n const signalType = eventData.signal_type;\n\n if (signalType === 'START_SPEECH') {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n } else if (signalType === 'END_SPEECH') {\n if (this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n }\n } else if (msgType === 'data') {\n const td = (json['data'] as SarvamWSTranscriptData | undefined) ?? {};\n const transcript = td.transcript ?? '';\n const language = td.language_code ?? this.#opts.languageCode ?? 'unknown';\n const requestId = td.request_id ?? '';\n const confidence = td.language_probability ?? 0;\n this.#requestId = requestId;\n\n // Log metrics when available\n if (td.metrics) {\n this.#logger.debug(\n `Sarvam STT metrics: audio_duration=${td.metrics.audio_duration}s, latency=${td.metrics.processing_latency}s`,\n );\n }\n\n if (transcript) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [\n {\n text: transcript,\n language,\n startTime: 0,\n endTime: td.metrics?.audio_duration ?? 0,\n confidence,\n },\n ],\n });\n }\n } else if (msgType === 'error') {\n // Server format: { type: \"error\", data: { message: \"...\", code: \"...\" } }\n // Also check top-level and 'error' field as fallback\n const nested = json['data'] as SarvamWSErrorData | undefined;\n const errorInfo =\n nested?.message ??\n nested?.error ??\n json['error'] ??\n json['message'] ??\n 'Unknown error';\n const errorCode = nested?.code ?? json['code'] ?? '';\n this.#logger.error(`Sarvam STT WebSocket error [${errorCode}]: ${errorInfo}`);\n reject(new Error(`Sarvam STT API error [${errorCode}]: ${errorInfo}`));\n return;\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`Error processing Sarvam STT message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n try {\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor.result]),\n ]);\n } finally {\n closing = true;\n sessionController.abort();\n // Do NOT call listenTask.cancel() — it would abort this.abortController\n // (passed to Task.from) and permanently break the stream. Instead, ws.close()\n // triggers the ws.once('close') handler inside listenMessage, letting listenTask\n // exit naturally. On close(), the parent abort signal handles it directly.\n wsMonitor.cancel();\n ws.close();\n // Suppress unhandled rejection from orphaned listenTask on reconnect\n listenTask.result.catch(() => {});\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAWO;AAEP,gBAAwC;AAaxC,MAAM,sBAAsB;AAC5B,MAAM,gCAAgC;AACtC,MAAM,oBAAoB;AAC1B,MAAM,8BAA8B;AAEpC,MAAM,cAAc;AACpB,MAAM,eAAe;AA6FrB,MAAM,mBAAmB;AAAA,EACvB,cAAc;AAChB;AAEA,MAAM,qBAAqB;AAAA,EACzB,cAAc;AAAA,EACd,MAAM;AACR;AAEA,MAAM,4BAA4B;AAAA,EAChC,MAAM;AACR;AAGA,MAAM,qBAA0C,oBAAI,IAAoB;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAMD,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AAEvC,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B,oBAAoB,KAAK;AAAA,IACzB,aAAa,KAAK;AAAA,EACpB;AAEA,MAAI,UAAU,eAAe;AAC3B,UAAM,gBAAgB;AACtB,SAAK,SAAS,cAAc;AAC5B,SAAK,OAAO,cAAc,QAAQ,0BAA0B;AAAA,EAC9D,WAAW,UAAU,aAAa;AAChC,UAAM,SAAS;AACf,SAAK,eAAe,OAAO,gBAAgB,mBAAmB;AAC9D,SAAK,OAAO,OAAO,QAAQ,mBAAmB;AAC9C,SAAK,SAAS,OAAO;AACrB,SAAK,iBAAiB,OAAO;AAAA,EAC/B,OAAO;AAEL,QAAI,eAAgB,KAAsB,gBAAgB,iBAAiB;AAC3E,QAAI,CAAC,mBAAmB,IAAI,YAAY,GAAG;AACzC,qBAAe,iBAAiB;AAAA,IAClC;AACA,SAAK,eAAe;AACpB,SAAK,iBAAkB,KAAsB;AAAA,EAC/C;AAEA,SAAO;AACT;AAMA,SAAS,WAAW,OAA0B;AAC5C,SAAO,UAAU,gBAAgB,gCAAgC;AACnE;AAEA,SAAS,SAAS,OAA0B;AAC1C,SAAO,UAAU,gBAAgB,8BAA8B;AACjE;AAEA,SAAS,WAAW,MAAkC;AACpD,QAAM,OAAO,SAAS,KAAK,KAAK;AAChC,QAAM,SAAS,IAAI,gBAAgB;AACnC,SAAO,IAAI,SAAS,KAAK,KAAK;AAC9B,SAAO,IAAI,eAAe,MAAM;AAChC,SAAO,IAAI,eAAe,OAAO,WAAW,CAAC;AAC7C,SAAO,IAAI,qBAAqB,WAAW;AAE3C,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,WAAO,IAAI,iBAAiB,KAAK,YAAY;AAAA,EAC/C;AAGA,MAAI,KAAK,QAAQ,MAAM;AACrB,WAAO,IAAI,QAAQ,KAAK,IAAI;AAAA,EAC9B;AAGA,MAAI,KAAK,sBAAsB,MAAM;AACnC,WAAO,IAAI,wBAAwB,OAAO,KAAK,kBAAkB,CAAC;AAAA,EACpE;AACA,MAAI,KAAK,eAAe,MAAM;AAC5B,WAAO,IAAI,gBAAgB,OAAO,KAAK,WAAW,CAAC;AAAA,EACrD;AAEA,SAAO,GAAG,IAAI,IAAI,OAAO,SAAS,CAAC;AACrC;AAMA,SAAS,cAAc,SAAe,MAAoC;AACxE,QAAM,WAAW,IAAI,SAAS;AAC9B,WAAS,OAAO,QAAQ,SAAS,WAAW;AAC5C,WAAS,OAAO,SAAS,KAAK,KAAK;AAEnC,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,aAAS,OAAO,iBAAiB,KAAK,YAAY;AAAA,EACpD;AACA,MAAI,KAAK,UAAU,eAAe,KAAK,QAAQ,MAAM;AACnD,aAAS,OAAO,QAAQ,KAAK,IAAI;AAAA,EACnC;AACA,OAAK,KAAK,UAAU,iBAAiB,KAAK,UAAU,gBAAgB,KAAK,UAAU,MAAM;AACvF,aAAS,OAAO,UAAU,KAAK,MAAM;AAAA,EACvC;AACA,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB;AACvD,aAAS,OAAO,mBAAmB,MAAM;AAAA,EAC3C;AAEA,SAAO;AACT;AAMA,SAAS,UAAU,OAA2B;AAC5C,QAAM,gBAAgB;AACtB,QAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,QAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,QAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,IAAI,EAAE;AAC3B,SAAO,cAAc,GAAG,EAAE;AAC1B,SAAO,cAAc,MAAM,UAAU,EAAE;AACvC,SAAO,cAAc,MAAM,YAAY,EAAE;AACzC,SAAO,cAAc,UAAU,EAAE;AACjC,SAAO,cAAc,YAAY,EAAE;AACnC,SAAO,cAAc,eAAe,EAAE;AACtC,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAE9C,QAAM,MAAM,OAAO,KAAK,MAAM,KAAK,QAAQ,MAAM,KAAK,YAAY,MAAM,KAAK,UAAU;AACvF,SAAO,OAAO,OAAO,CAAC,QAAQ,GAAG,CAAC;AACpC;AAuDO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM;AAAA,MACJ,WAAW,SAAS;AAAA,MACpB,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AACD,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,KAAK;AAErE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,KAAK;AAAA,MAClB,WAAW,KAAK,KAAK;AAAA,MACrB,GAAI,KAAK,KAAK,sBAAsB,OAChC,EAAE,oBAAoB,KAAK,KAAK,mBAAmB,IACnD,CAAC;AAAA,MACL,GAAI,KAAK,KAAK,eAAe,OAAO,EAAE,aAAa,KAAK,KAAK,YAAY,IAAI,CAAC;AAAA,MAC9E,GAAI,KAAK,KAAK,gBAAgB,QAAQ,KAAK,UAAU,gBACjD,EAAE,cAAc,KAAK,KAAK,aAA+B,IACzD,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,KAAK;AAEpB,SAAK,OAAO,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,YAAQ,2BAAY,MAAM;AAChC,UAAM,YAAY,UAAU,KAAK;AACjC,UAAM,UAAU,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,EAAE,MAAM,YAAY,CAAC;AAE3E,UAAM,WAAW,cAAc,SAAS,KAAK,IAAI;AAEjD,UAAM,WAAW,MAAM,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG;AAAA,MACxD,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM;AAAA,MACN,QAAQ,eAAe;AAAA,IACzB,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAElC,QAAI,YAAY;AAChB,QAAI,UAAU;AACd,QAAI,KAAK,YAAY;AACnB,YAAM,SAAS,KAAK,WAAW;AAC/B,YAAM,OAAO,KAAK,WAAW;AAC7B,UAAI,OAAO,SAAS,EAAG,aAAY,OAAO,CAAC,KAAK;AAChD,UAAI,KAAK,SAAS,EAAG,WAAU,KAAK,KAAK,SAAS,CAAC,KAAK;AAAA,IAC1D;AAEA,WAAO;AAAA,MACL,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK,cAAc;AAAA,MAC9B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,cAAc;AAAA,UACzB,UAAU,KAAK,iBAAiB,KAAK,KAAK,gBAAgB;AAAA,UAC1D;AAAA,UACA;AAAA,UACA,YAAY,KAAK,wBAAwB;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,OAAO,SAA6D;AAClE,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,aAAa,MAAM,KAAK,MAAM,mCAAS,WAAW;AAAA,EAC/D;AACF;AAMO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb,QAAQ;AAAA,EAER,YAAY,aAAkB,MAA0B,aAAiC;AACvF,UAAM,aAAa,aAAa,WAAW;AAC3C,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAAA,EAClD;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,GAAI,KAAK,MAAM,sBAAsB,OACjC,EAAE,oBAAoB,KAAK,MAAM,mBAAmB,IACpD,CAAC;AAAA,MACL,GAAI,KAAK,MAAM,eAAe,OAAO,EAAE,aAAa,KAAK,MAAM,YAAY,IAAI,CAAC;AAAA,MAChF,GAAI,KAAK,MAAM,gBAAgB,QAAQ,KAAK,UAAU,gBAClD,EAAE,cAAc,KAAK,MAAM,aAA+B,IAC1D,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAC9D,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AAEd,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,QAAQ,WAAW,KAAK,KAAK;AACnC,WAAK,QAAQ,KAAK,6BAA6B,KAAK,EAAE;AACtD,YAAM,KAAK,IAAI,oBAAU,OAAO;AAAA,QAC9B,SAAS,EAAE,wBAAwB,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI,eAAe;AACnB,UAAI;AACF,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,aAAG,KAAK,QAAQ,MAAM,QAAQ,CAAC;AAC/B,aAAG,KAAK,SAAS,CAAC,QAAe,OAAO,GAAG,CAAC;AAC5C,aAAG;AAAA,YAAK;AAAA,YAAS,CAAC,SAChB,OAAO,IAAI,MAAM,8BAA8B,IAAI,EAAE,CAAC;AAAA,UACxD;AAAA,QACF,CAAC;AAED,uBAAe,KAAK,IAAI;AACxB,cAAM,KAAK,OAAO,EAAE;AACpB,kBAAU;AAAA,MACZ,SAAS,GAAG;AAEV,WAAG,mBAAmB;AACtB,WAAG,MAAM;AAET,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AAItC,cAAI,eAAe,KAAK,KAAK,IAAI,IAAI,eAAe,KAAM;AACxD,sBAAU;AAAA,UACZ;AACA,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,UACnF;AACA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AACA,eAAK,QAAQ;AAAA,YACX,gDAAgD,KAAK,MAAM,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UACtF;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,kDAAkD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAClH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,SAAK,YAAY;AACjB,QAAI,UAAU;AAEd,UAAM,oBAAoB,IAAI,gBAAgB;AAI9C,QAAI,KAAK,MAAM,UAAU,iBAAiB,KAAK,MAAM,UAAU,MAAM;AACnE,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,UAAU,QAAQ,KAAK,MAAM,OAAO,CAAC,CAAC;AAAA,IACvE;AAOA,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,CAAC,GAAG,WAAW;AAC9C,WAAG,KAAK,SAAS,CAAC,MAAc,WAAmB;AACjD,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AACD,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,cAAc,KAAK,MAAM,cAAc,EAAE;AAC/C,YAAM,SAAS,IAAI,8BAAgB,aAAa,cAAc,WAAW;AACzE,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAClD,YAAM,mBAAe,4BAAa,kBAAkB,MAAM;AAE1D,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,cAAc,YAAY,CAAC;AACjF,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,KAAM;AAEjB,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AAAA,UACxB,WAAW,KAAK,eAAe,eAAe,KAAK,aAAa,cAAc;AAC5E,kBAAM,IAAI;AAAA,cACR,YAAY,WAAW,MAAM,YAAY,WAAW,KAAK,UAAU,MAAM,KAAK,QAAQ;AAAA,YACxF;AAAA,UACF,OAAO;AACL,qBAAS,OAAO;AAAA,cACd,KAAK,KAAK,OAAO;AAAA,gBACf,KAAK,KAAK;AAAA,gBACV,KAAK,KAAK,aAAa,KAAK,KAAK;AAAA,cACnC;AAAA,YACF;AAAA,UACF;AAEA,qBAAW,SAAS,QAAQ;AAC1B,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAE5C,oBAAM,YAAY,OAAO;AAAA,gBACvB,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,cACb;AACA,oBAAM,cAAc,UAAU,SAAS,QAAQ;AAC/C,iBAAG;AAAA,gBACD,KAAK,UAAU;AAAA,kBACb,OAAO;AAAA,oBACL,MAAM;AAAA,oBACN,UAAU;AAAA,oBACV,aAAa;AAAA,kBACf;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF;AAGA,cAAI,SAAS,aAAa,gBAAgB;AACxC,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,UAC3C;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AAGV,YAAI;AACF,aAAG;AAAA,YACD,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,OAAO,EAAE,MAAM,IAAI,UAAU,aAAa,aAAa,YAAY;AAAA,YACrE,CAAC;AAAA,UACH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,UAA2B;AAC7C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,KAAK;AAAA,UACtB,QAAQ;AAAA,UAER;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,KAAK,SAAS,MAAM,QAAQ,CAAC;AAChC,WAAG,GAAG,WAAW,CAAC,QAAiB;AAzpB3C;AA0pBU,cAAI;AACF,kBAAM,MAAM,IAAI,SAAS;AACzB,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,UAAU,GAAG,GAAG,CAAC,EAAE;AACxE,kBAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,kBAAM,UAAkB,KAAK,MAAM,KAAK;AAExC,gBAAI,YAAY,UAAU;AACxB,oBAAM,YAAa,KAAK,MAAM,KAAuC,CAAC;AACtE,oBAAM,aAAa,UAAU;AAE7B,kBAAI,eAAe,gBAAgB;AACjC,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAAA,cACF,WAAW,eAAe,cAAc;AACtC,oBAAI,KAAK,WAAW;AAClB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAAA,cACF;AAAA,YACF,WAAW,YAAY,QAAQ;AAC7B,oBAAM,KAAM,KAAK,MAAM,KAA4C,CAAC;AACpE,oBAAM,aAAa,GAAG,cAAc;AACpC,oBAAM,WAAW,GAAG,iBAAiB,KAAK,MAAM,gBAAgB;AAChE,oBAAM,YAAY,GAAG,cAAc;AACnC,oBAAM,aAAa,GAAG,wBAAwB;AAC9C,mBAAK,aAAa;AAGlB,kBAAI,GAAG,SAAS;AACd,qBAAK,QAAQ;AAAA,kBACX,sCAAsC,GAAG,QAAQ,cAAc,cAAc,GAAG,QAAQ,kBAAkB;AAAA,gBAC5G;AAAA,cACF;AAEA,kBAAI,YAAY;AACd,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAEA,2BAAW;AAAA,kBACT,MAAM,kBAAI,gBAAgB;AAAA,kBAC1B;AAAA,kBACA,cAAc;AAAA,oBACZ;AAAA,sBACE,MAAM;AAAA,sBACN;AAAA,sBACA,WAAW;AAAA,sBACX,WAAS,QAAG,YAAH,mBAAY,mBAAkB;AAAA,sBACvC;AAAA,oBACF;AAAA,kBACF;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF,WAAW,YAAY,SAAS;AAG9B,oBAAM,SAAS,KAAK,MAAM;AAC1B,oBAAM,aACJ,iCAAQ,aACR,iCAAQ,UACR,KAAK,OAAO,KACZ,KAAK,SAAS,KACd;AACF,oBAAM,aAAY,iCAAQ,SAAQ,KAAK,MAAM,KAAK;AAClD,mBAAK,QAAQ,MAAM,+BAA+B,SAAS,MAAM,SAAS,EAAE;AAC5E,qBAAO,IAAI,MAAM,yBAAyB,SAAS,MAAM,SAAS,EAAE,CAAC;AACrE;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,wCAAwC,GAAG,EAAE;AAChE,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,QAAI;AACF,YAAM,QAAQ,KAAK;AAAA,QACjB,KAAK,SAAS;AAAA,QACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,UAAU,MAAM,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,UAAE;AACA,gBAAU;AACV,wBAAkB,MAAM;AAKxB,gBAAU,OAAO;AACjB,SAAG,MAAM;AAET,iBAAW,OAAO,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IAClC;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n mergeFrames,\n normalizeLanguage,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n STTLanguages,\n STTModels,\n STTModes,\n STTV2Languages,\n STTV3Languages,\n} from './models.js';\n\n// ---------------------------------------------------------------------------\n// Endpoint URLs\n// ---------------------------------------------------------------------------\n\nconst SARVAM_STT_REST_URL = 'https://api.sarvam.ai/speech-to-text';\nconst SARVAM_STT_TRANSLATE_REST_URL = 'https://api.sarvam.ai/speech-to-text-translate';\nconst SARVAM_STT_WS_URL = 'wss://api.sarvam.ai/speech-to-text/ws';\nconst SARVAM_STT_TRANSLATE_WS_URL = 'wss://api.sarvam.ai/speech-to-text-translate/ws';\n\nconst SAMPLE_RATE = 16000;\nconst NUM_CHANNELS = 1;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// ---------------------------------------------------------------------------\n\ninterface STTBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST recognition (used by Agent via StreamAdapter + VAD).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Increase VAD sensitivity (WS only). Maps to `high_vad_sensitivity` query param. */\n highVadSensitivity?: boolean;\n /** Enable flush signal events from server (WS only). Maps to `flush_signal` query param. */\n flushSignal?: boolean;\n}\n\n/**\n * Options specific to saarika:v2.5.\n * saarika:v2.5 will be deprecated soon — prefer {@link STTV3Options} with `saaras:v3` for new integrations.\n * All v2.5 language codes are also supported by v3.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV2Options extends STTBaseOptions {\n model: 'saarika:v2.5';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV2Languages | string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/**\n * Options specific to saaras:v2.5 (dedicated translate endpoint).\n * Uses the `/speech-to-text-translate` endpoint for Indic-to-English translation.\n * Auto-detects the source language; does not accept language codes or timestamps.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\nexport interface STTTranslateOptions extends STTBaseOptions {\n model: 'saaras:v2.5';\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Mode for translate WS. Default: 'translate'. */\n mode?: STTModes | string;\n}\n\n/**\n * Options specific to saaras:v3 (recommended).\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV3Options extends STTBaseOptions {\n model?: 'saaras:v3';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV3Languages | string;\n /** Transcription mode (v3 only). Default: 'transcribe' */\n mode?: STTModes | string;\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type STTOptions = STTV2Options | STTTranslateOptions | STTV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedSTTOptions {\n apiKey: string;\n model: STTModels;\n streaming: boolean;\n // saarika:v2.5 and saaras:v3 only — not used by saaras:v2.5 (translate auto-detects)\n languageCode?: STTLanguages | string;\n // saaras:v3 and saaras:v2.5 (translate)\n mode?: STTModes | string;\n // saaras:v2.5 (translate) and saaras:v3\n prompt?: string;\n // saarika:v2.5 and saaras:v3 (/speech-to-text only, not translate)\n withTimestamps?: boolean;\n // WS-only flags\n highVadSensitivity?: boolean;\n flushSignal?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst SAARIKA_DEFAULTS = {\n languageCode: 'en-IN',\n};\n\nconst SAARAS_V3_DEFAULTS = {\n languageCode: 'en-IN',\n mode: 'transcribe',\n};\n\nconst SAARAS_TRANSLATE_DEFAULTS = {\n mode: 'translate',\n};\n\n/** Runtime set of languages supported by saarika:v2.5 (for validation on model switch) */\nconst STTV2_LANGUAGE_SET: ReadonlySet<string> = new Set<STTV2Languages>([\n 'unknown',\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n]);\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<STTOptions>): ResolvedSTTOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: STTModels = opts.model ?? 'saaras:v3';\n\n const base: ResolvedSTTOptions = {\n apiKey,\n model,\n streaming: opts.streaming ?? true,\n highVadSensitivity: opts.highVadSensitivity,\n flushSignal: opts.flushSignal,\n };\n\n if (model === 'saaras:v2.5') {\n const translateOpts = opts as STTTranslateOptions;\n base.prompt = translateOpts.prompt;\n base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;\n } else if (model === 'saaras:v3') {\n const v3Opts = opts as STTV3Options;\n base.languageCode = normalizeLanguage(v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode);\n base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;\n base.prompt = v3Opts.prompt;\n base.withTimestamps = v3Opts.withTimestamps;\n } else {\n // saarika:v2.5\n let languageCode = normalizeLanguage(\n (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode,\n );\n if (!STTV2_LANGUAGE_SET.has(languageCode)) {\n languageCode = normalizeLanguage(SAARIKA_DEFAULTS.languageCode);\n }\n base.languageCode = languageCode;\n base.withTimestamps = (opts as STTV2Options).withTimestamps;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// URL helpers\n// ---------------------------------------------------------------------------\n\nfunction getRestUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_REST_URL : SARVAM_STT_REST_URL;\n}\n\nfunction getWsUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_WS_URL : SARVAM_STT_WS_URL;\n}\n\nfunction buildWsUrl(opts: ResolvedSTTOptions): string {\n const base = getWsUrl(opts.model);\n const params = new URLSearchParams();\n params.set('model', opts.model);\n params.set('vad_signals', 'true');\n params.set('sample_rate', String(SAMPLE_RATE));\n params.set('input_audio_codec', 'pcm_s16le');\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n params.set('language-code', opts.languageCode);\n }\n\n // mode: v3 on STT WS, and translate WS (both endpoints support it)\n if (opts.mode != null) {\n params.set('mode', opts.mode);\n }\n\n // Optional WS params\n if (opts.highVadSensitivity != null) {\n params.set('high_vad_sensitivity', String(opts.highVadSensitivity));\n }\n if (opts.flushSignal != null) {\n params.set('flush_signal', String(opts.flushSignal));\n }\n\n return `${base}?${params.toString()}`;\n}\n\n// ---------------------------------------------------------------------------\n// Build the multipart form data (REST) — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildFormData(wavBlob: Blob, opts: ResolvedSTTOptions): FormData {\n const formData = new FormData();\n formData.append('file', wavBlob, 'audio.wav');\n formData.append('model', opts.model);\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n formData.append('language_code', opts.languageCode);\n }\n if (opts.model === 'saaras:v3' && opts.mode != null) {\n formData.append('mode', opts.mode);\n }\n if ((opts.model === 'saaras:v2.5' || opts.model === 'saaras:v3') && opts.prompt != null) {\n formData.append('prompt', opts.prompt);\n }\n if (opts.model !== 'saaras:v2.5' && opts.withTimestamps) {\n formData.append('with_timestamps', 'true');\n }\n\n return formData;\n}\n\n// ---------------------------------------------------------------------------\n// WAV encoding helper\n// ---------------------------------------------------------------------------\n\nfunction createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(bitsPerSample, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n\n const pcm = Buffer.from(frame.data.buffer, frame.data.byteOffset, frame.data.byteLength);\n return Buffer.concat([header, pcm]);\n}\n\n// ---------------------------------------------------------------------------\n// REST response type\n// ---------------------------------------------------------------------------\n\ninterface SarvamSTTResponse {\n request_id: string | null;\n transcript: string;\n language_code: string | null;\n language_probability?: number | null;\n timestamps?: {\n words: string[];\n start_time_seconds: number[];\n end_time_seconds: number[];\n } | null;\n}\n\n// ---------------------------------------------------------------------------\n// WS response types (from server Publish messages)\n// ---------------------------------------------------------------------------\n\n/** type: \"data\" */\ninterface SarvamWSTranscriptData {\n request_id?: string;\n transcript?: string;\n language_code?: string | null;\n language_probability?: number | null;\n timestamps?: Record<string, unknown> | null;\n diarized_transcript?: Record<string, unknown> | null;\n metrics?: {\n audio_duration?: number;\n processing_latency?: number;\n };\n}\n\n/** type: \"events\" */\ninterface SarvamWSEventData {\n event_type?: string;\n timestamp?: string;\n signal_type?: 'START_SPEECH' | 'END_SPEECH';\n occured_at?: number;\n}\n\n/** type: \"error\" — server sends data with message and code fields */\ninterface SarvamWSErrorData {\n message?: string;\n error?: string;\n code?: string;\n}\n\n// ---------------------------------------------------------------------------\n// STT class — supports both REST (recognize) and WebSocket (stream)\n// ---------------------------------------------------------------------------\n\nexport class STT extends stt.STT {\n private opts: ResolvedSTTOptions;\n label = 'sarvam.STT';\n\n /**\n * Create a new instance of Sarvam AI STT.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n *\n * Supported models:\n * - `saaras:v3` (default, recommended) — supports all 22 languages, modes, prompt, timestamps, and uses `/speech-to-text`.\n * - `saaras:v2.5` — Indic-to-English translation via `/speech-to-text-translate`. Auto-detects source language. Supports prompt.\n * - `saarika:v2.5` — will be deprecated soon. Supports timestamps. All its languages are available in `saaras:v3`.\n *\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\n constructor(opts: Partial<STTOptions> = {}) {\n const resolved = resolveOptions(opts);\n super({\n streaming: resolved.streaming,\n interimResults: false,\n alignedTranscript: false,\n });\n this.opts = resolved;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.opts.apiKey,\n streaming: this.opts.streaming,\n ...(this.opts.highVadSensitivity != null\n ? { highVadSensitivity: this.opts.highVadSensitivity }\n : {}),\n ...(this.opts.flushSignal != null ? { flushSignal: this.opts.flushSignal } : {}),\n ...(this.opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.opts } as Partial<STTOptions>);\n\n this.opts = resolveOptions({ ...base, ...opts } as STTOptions);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const frame = mergeFrames(buffer);\n const wavBuffer = createWav(frame);\n const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: 'audio/wav' });\n\n const formData = buildFormData(wavBlob, this.opts);\n\n const response = await fetch(getRestUrl(this.opts.model), {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n body: formData,\n signal: abortSignal ?? null,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam STT API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as SarvamSTTResponse;\n\n let startTime = 0;\n let endTime = 0;\n if (data.timestamps) {\n const starts = data.timestamps.start_time_seconds;\n const ends = data.timestamps.end_time_seconds;\n if (starts.length > 0) startTime = starts[0] ?? 0;\n if (ends.length > 0) endTime = ends[ends.length - 1] ?? 0;\n }\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId: data.request_id ?? undefined,\n alternatives: [\n {\n text: data.transcript || '',\n language: normalizeLanguage(data.language_code ?? this.opts.languageCode ?? 'unknown'),\n startTime,\n endTime,\n confidence: data.language_probability ?? 0,\n },\n ],\n };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam STT streaming is disabled (`streaming: false`). Use recognize() for REST or wrap with stt.StreamAdapter + VAD for streaming behavior.',\n );\n }\n return new SpeechStream(this, this.opts, options?.connOptions);\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming SpeechStream\n// ---------------------------------------------------------------------------\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: ResolvedSTTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n label = 'sarvam.SpeechStream';\n\n constructor(sttInstance: STT, opts: ResolvedSTTOptions, connOptions?: APIConnectOptions) {\n super(sttInstance, SAMPLE_RATE, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n ...(this.#opts.highVadSensitivity != null\n ? { highVadSensitivity: this.#opts.highVadSensitivity }\n : {}),\n ...(this.#opts.flushSignal != null ? { flushSignal: this.#opts.flushSignal } : {}),\n ...(this.#opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.#opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.#opts } as Partial<STTOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as STTOptions);\n this.#resetWS.resolve();\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n\n while (!this.input.closed && !this.closed) {\n const wsUrl = buildWsUrl(this.#opts);\n this.#logger.info(`Sarvam STT connecting to: ${wsUrl}`);\n const ws = new WebSocket(wsUrl, {\n headers: { 'api-subscription-key': this.#opts.apiKey },\n });\n\n let sessionStart = 0;\n try {\n await new Promise<void>((resolve, reject) => {\n ws.once('open', () => resolve());\n ws.once('error', (err: Error) => reject(err));\n ws.once('close', (code: number) =>\n reject(new Error(`WebSocket closed with code ${code}`)),\n );\n });\n\n sessionStart = Date.now();\n await this.#runWS(ws);\n retries = 0;\n } catch (e) {\n // Clean up the WebSocket on failure to prevent listener leaks\n ws.removeAllListeners();\n ws.close();\n\n if (!this.closed && !this.input.closed) {\n // If the session ran for a meaningful duration (>5s), this was a working\n // session that ended normally (e.g. server idle timeout ~20s). Reset retries\n // so expected idle-timeout reconnections don't accumulate toward the fatal limit.\n if (sessionStart > 0 && Date.now() - sessionStart > 5000) {\n retries = 0;\n }\n if (retries >= maxRetry) {\n throw new Error(`Failed to connect to Sarvam STT after ${retries} attempts: ${e}`);\n }\n const delay = Math.min(retries * 5, 10);\n retries++;\n this.#logger.warn(\n `Failed to connect to Sarvam STT, retrying in ${delay}s: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Sarvam STT disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n this.#speaking = false;\n let closing = false;\n // Session-scoped controller: aborted in finally to cancel sendTask on WS reset\n const sessionController = new AbortController();\n\n // Config message: only supported on translate WS endpoint (saaras:v2.5)\n // @see https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate/ws\n if (this.#opts.model === 'saaras:v2.5' && this.#opts.prompt != null) {\n ws.send(JSON.stringify({ type: 'config', prompt: this.#opts.prompt }));\n }\n\n // No keepalive — Sarvam rejects messages without 'audio' field, and sending\n // silent audio could confuse server-side VAD. On idle timeout (~20s), the\n // server closes the connection and the outer retry loop in run() reconnects.\n // This matches the Python SDK's approach.\n\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>((_, reject) => {\n ws.once('close', (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples50Ms = Math.floor(SAMPLE_RATE / 20); // 50ms chunks\n const stream = new AudioByteStream(SAMPLE_RATE, NUM_CHANNELS, samples50Ms);\n const abortPromise = waitForAbort(this.abortSignal);\n const sessionAbort = waitForAbort(sessionController.signal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise, sessionAbort]);\n if (result === undefined) return; // aborted\n if (result.done) break;\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (data.sampleRate !== SAMPLE_RATE || data.channels !== NUM_CHANNELS) {\n throw new Error(\n `Expected ${SAMPLE_RATE}Hz/${NUM_CHANNELS}ch, got ${data.sampleRate}Hz/${data.channels}ch`,\n );\n } else {\n frames = stream.write(\n data.data.buffer.slice(\n data.data.byteOffset,\n data.data.byteOffset + data.data.byteLength,\n ) as ArrayBuffer,\n );\n }\n\n for (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n // Sarvam expects base64-encoded PCM in a JSON message\n const pcmBuffer = Buffer.from(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.data.byteLength,\n );\n const base64Audio = pcmBuffer.toString('base64');\n ws.send(\n JSON.stringify({\n audio: {\n data: base64Audio,\n encoding: 'audio/wav',\n sample_rate: SAMPLE_RATE,\n },\n }),\n );\n }\n }\n\n // Send flush message on FLUSH_SENTINEL (VAD end of speech)\n if (data === SpeechStream.FLUSH_SENTINEL) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n }\n } finally {\n closing = true;\n // Match Python: end_of_stream includes an empty audio field to avoid\n // \"audio must not be None\" rejection from the server\n try {\n ws.send(\n JSON.stringify({\n type: 'end_of_stream',\n audio: { data: '', encoding: 'audio/wav', sample_rate: SAMPLE_RATE },\n }),\n );\n } catch {\n // ws may already be closed\n }\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (event: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(event);\n } catch {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.once('close', () => resolve());\n ws.on('message', (msg: RawData) => {\n try {\n const raw = msg.toString();\n this.#logger.debug(`Sarvam STT raw WS message: ${raw.substring(0, 500)}`);\n const json = JSON.parse(raw);\n const msgType: string = json['type'] ?? '';\n\n if (msgType === 'events') {\n const eventData = (json['data'] as SarvamWSEventData | undefined) ?? {};\n const signalType = eventData.signal_type;\n\n if (signalType === 'START_SPEECH') {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n } else if (signalType === 'END_SPEECH') {\n if (this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n }\n } else if (msgType === 'data') {\n const td = (json['data'] as SarvamWSTranscriptData | undefined) ?? {};\n const transcript = td.transcript ?? '';\n const language = normalizeLanguage(\n td.language_code ?? this.#opts.languageCode ?? 'unknown',\n );\n const requestId = td.request_id ?? '';\n const confidence = td.language_probability ?? 0;\n this.#requestId = requestId;\n\n // Log metrics when available\n if (td.metrics) {\n this.#logger.debug(\n `Sarvam STT metrics: audio_duration=${td.metrics.audio_duration}s, latency=${td.metrics.processing_latency}s`,\n );\n }\n\n if (transcript) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [\n {\n text: transcript,\n language,\n startTime: 0,\n endTime: td.metrics?.audio_duration ?? 0,\n confidence,\n },\n ],\n });\n }\n } else if (msgType === 'error') {\n // Server format: { type: \"error\", data: { message: \"...\", code: \"...\" } }\n // Also check top-level and 'error' field as fallback\n const nested = json['data'] as SarvamWSErrorData | undefined;\n const errorInfo =\n nested?.message ??\n nested?.error ??\n json['error'] ??\n json['message'] ??\n 'Unknown error';\n const errorCode = nested?.code ?? json['code'] ?? '';\n this.#logger.error(`Sarvam STT WebSocket error [${errorCode}]: ${errorInfo}`);\n reject(new Error(`Sarvam STT API error [${errorCode}]: ${errorInfo}`));\n return;\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`Error processing Sarvam STT message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n try {\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor.result]),\n ]);\n } finally {\n closing = true;\n sessionController.abort();\n // Do NOT call listenTask.cancel() — it would abort this.abortController\n // (passed to Task.from) and permanently break the stream. Instead, ws.close()\n // triggers the ws.once('close') handler inside listenMessage, letting listenTask\n // exit naturally. On close(), the parent abort signal handles it directly.\n wsMonitor.cancel();\n ws.close();\n // Suppress unhandled rejection from orphaned listenTask on reconnect\n listenTask.result.catch(() => {});\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAYO;AAEP,gBAAwC;AAaxC,MAAM,sBAAsB;AAC5B,MAAM,gCAAgC;AACtC,MAAM,oBAAoB;AAC1B,MAAM,8BAA8B;AAEpC,MAAM,cAAc;AACpB,MAAM,eAAe;AA6FrB,MAAM,mBAAmB;AAAA,EACvB,cAAc;AAChB;AAEA,MAAM,qBAAqB;AAAA,EACzB,cAAc;AAAA,EACd,MAAM;AACR;AAEA,MAAM,4BAA4B;AAAA,EAChC,MAAM;AACR;AAGA,MAAM,qBAA0C,oBAAI,IAAoB;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAMD,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AAEvC,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B,oBAAoB,KAAK;AAAA,IACzB,aAAa,KAAK;AAAA,EACpB;AAEA,MAAI,UAAU,eAAe;AAC3B,UAAM,gBAAgB;AACtB,SAAK,SAAS,cAAc;AAC5B,SAAK,OAAO,cAAc,QAAQ,0BAA0B;AAAA,EAC9D,WAAW,UAAU,aAAa;AAChC,UAAM,SAAS;AACf,SAAK,mBAAe,iCAAkB,OAAO,gBAAgB,mBAAmB,YAAY;AAC5F,SAAK,OAAO,OAAO,QAAQ,mBAAmB;AAC9C,SAAK,SAAS,OAAO;AACrB,SAAK,iBAAiB,OAAO;AAAA,EAC/B,OAAO;AAEL,QAAI,mBAAe;AAAA,MAChB,KAAsB,gBAAgB,iBAAiB;AAAA,IAC1D;AACA,QAAI,CAAC,mBAAmB,IAAI,YAAY,GAAG;AACzC,yBAAe,iCAAkB,iBAAiB,YAAY;AAAA,IAChE;AACA,SAAK,eAAe;AACpB,SAAK,iBAAkB,KAAsB;AAAA,EAC/C;AAEA,SAAO;AACT;AAMA,SAAS,WAAW,OAA0B;AAC5C,SAAO,UAAU,gBAAgB,gCAAgC;AACnE;AAEA,SAAS,SAAS,OAA0B;AAC1C,SAAO,UAAU,gBAAgB,8BAA8B;AACjE;AAEA,SAAS,WAAW,MAAkC;AACpD,QAAM,OAAO,SAAS,KAAK,KAAK;AAChC,QAAM,SAAS,IAAI,gBAAgB;AACnC,SAAO,IAAI,SAAS,KAAK,KAAK;AAC9B,SAAO,IAAI,eAAe,MAAM;AAChC,SAAO,IAAI,eAAe,OAAO,WAAW,CAAC;AAC7C,SAAO,IAAI,qBAAqB,WAAW;AAE3C,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,WAAO,IAAI,iBAAiB,KAAK,YAAY;AAAA,EAC/C;AAGA,MAAI,KAAK,QAAQ,MAAM;AACrB,WAAO,IAAI,QAAQ,KAAK,IAAI;AAAA,EAC9B;AAGA,MAAI,KAAK,sBAAsB,MAAM;AACnC,WAAO,IAAI,wBAAwB,OAAO,KAAK,kBAAkB,CAAC;AAAA,EACpE;AACA,MAAI,KAAK,eAAe,MAAM;AAC5B,WAAO,IAAI,gBAAgB,OAAO,KAAK,WAAW,CAAC;AAAA,EACrD;AAEA,SAAO,GAAG,IAAI,IAAI,OAAO,SAAS,CAAC;AACrC;AAMA,SAAS,cAAc,SAAe,MAAoC;AACxE,QAAM,WAAW,IAAI,SAAS;AAC9B,WAAS,OAAO,QAAQ,SAAS,WAAW;AAC5C,WAAS,OAAO,SAAS,KAAK,KAAK;AAEnC,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,aAAS,OAAO,iBAAiB,KAAK,YAAY;AAAA,EACpD;AACA,MAAI,KAAK,UAAU,eAAe,KAAK,QAAQ,MAAM;AACnD,aAAS,OAAO,QAAQ,KAAK,IAAI;AAAA,EACnC;AACA,OAAK,KAAK,UAAU,iBAAiB,KAAK,UAAU,gBAAgB,KAAK,UAAU,MAAM;AACvF,aAAS,OAAO,UAAU,KAAK,MAAM;AAAA,EACvC;AACA,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB;AACvD,aAAS,OAAO,mBAAmB,MAAM;AAAA,EAC3C;AAEA,SAAO;AACT;AAMA,SAAS,UAAU,OAA2B;AAC5C,QAAM,gBAAgB;AACtB,QAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,QAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,QAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,IAAI,EAAE;AAC3B,SAAO,cAAc,GAAG,EAAE;AAC1B,SAAO,cAAc,MAAM,UAAU,EAAE;AACvC,SAAO,cAAc,MAAM,YAAY,EAAE;AACzC,SAAO,cAAc,UAAU,EAAE;AACjC,SAAO,cAAc,YAAY,EAAE;AACnC,SAAO,cAAc,eAAe,EAAE;AACtC,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAE9C,QAAM,MAAM,OAAO,KAAK,MAAM,KAAK,QAAQ,MAAM,KAAK,YAAY,MAAM,KAAK,UAAU;AACvF,SAAO,OAAO,OAAO,CAAC,QAAQ,GAAG,CAAC;AACpC;AAuDO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM;AAAA,MACJ,WAAW,SAAS;AAAA,MACpB,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AACD,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,KAAK;AAErE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,KAAK;AAAA,MAClB,WAAW,KAAK,KAAK;AAAA,MACrB,GAAI,KAAK,KAAK,sBAAsB,OAChC,EAAE,oBAAoB,KAAK,KAAK,mBAAmB,IACnD,CAAC;AAAA,MACL,GAAI,KAAK,KAAK,eAAe,OAAO,EAAE,aAAa,KAAK,KAAK,YAAY,IAAI,CAAC;AAAA,MAC9E,GAAI,KAAK,KAAK,gBAAgB,QAAQ,KAAK,UAAU,gBACjD,EAAE,cAAc,KAAK,KAAK,aAA+B,IACzD,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,KAAK;AAEpB,SAAK,OAAO,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,YAAQ,2BAAY,MAAM;AAChC,UAAM,YAAY,UAAU,KAAK;AACjC,UAAM,UAAU,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,EAAE,MAAM,YAAY,CAAC;AAE3E,UAAM,WAAW,cAAc,SAAS,KAAK,IAAI;AAEjD,UAAM,WAAW,MAAM,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG;AAAA,MACxD,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM;AAAA,MACN,QAAQ,eAAe;AAAA,IACzB,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAElC,QAAI,YAAY;AAChB,QAAI,UAAU;AACd,QAAI,KAAK,YAAY;AACnB,YAAM,SAAS,KAAK,WAAW;AAC/B,YAAM,OAAO,KAAK,WAAW;AAC7B,UAAI,OAAO,SAAS,EAAG,aAAY,OAAO,CAAC,KAAK;AAChD,UAAI,KAAK,SAAS,EAAG,WAAU,KAAK,KAAK,SAAS,CAAC,KAAK;AAAA,IAC1D;AAEA,WAAO;AAAA,MACL,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK,cAAc;AAAA,MAC9B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,cAAc;AAAA,UACzB,cAAU,iCAAkB,KAAK,iBAAiB,KAAK,KAAK,gBAAgB,SAAS;AAAA,UACrF;AAAA,UACA;AAAA,UACA,YAAY,KAAK,wBAAwB;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,OAAO,SAA6D;AAClE,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,aAAa,MAAM,KAAK,MAAM,mCAAS,WAAW;AAAA,EAC/D;AACF;AAMO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb,QAAQ;AAAA,EAER,YAAY,aAAkB,MAA0B,aAAiC;AACvF,UAAM,aAAa,aAAa,WAAW;AAC3C,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAAA,EAClD;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,GAAI,KAAK,MAAM,sBAAsB,OACjC,EAAE,oBAAoB,KAAK,MAAM,mBAAmB,IACpD,CAAC;AAAA,MACL,GAAI,KAAK,MAAM,eAAe,OAAO,EAAE,aAAa,KAAK,MAAM,YAAY,IAAI,CAAC;AAAA,MAChF,GAAI,KAAK,MAAM,gBAAgB,QAAQ,KAAK,UAAU,gBAClD,EAAE,cAAc,KAAK,MAAM,aAA+B,IAC1D,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAC9D,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AAEd,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,QAAQ,WAAW,KAAK,KAAK;AACnC,WAAK,QAAQ,KAAK,6BAA6B,KAAK,EAAE;AACtD,YAAM,KAAK,IAAI,oBAAU,OAAO;AAAA,QAC9B,SAAS,EAAE,wBAAwB,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI,eAAe;AACnB,UAAI;AACF,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,aAAG,KAAK,QAAQ,MAAM,QAAQ,CAAC;AAC/B,aAAG,KAAK,SAAS,CAAC,QAAe,OAAO,GAAG,CAAC;AAC5C,aAAG;AAAA,YAAK;AAAA,YAAS,CAAC,SAChB,OAAO,IAAI,MAAM,8BAA8B,IAAI,EAAE,CAAC;AAAA,UACxD;AAAA,QACF,CAAC;AAED,uBAAe,KAAK,IAAI;AACxB,cAAM,KAAK,OAAO,EAAE;AACpB,kBAAU;AAAA,MACZ,SAAS,GAAG;AAEV,WAAG,mBAAmB;AACtB,WAAG,MAAM;AAET,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AAItC,cAAI,eAAe,KAAK,KAAK,IAAI,IAAI,eAAe,KAAM;AACxD,sBAAU;AAAA,UACZ;AACA,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,UACnF;AACA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AACA,eAAK,QAAQ;AAAA,YACX,gDAAgD,KAAK,MAAM,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UACtF;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,kDAAkD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAClH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,SAAK,YAAY;AACjB,QAAI,UAAU;AAEd,UAAM,oBAAoB,IAAI,gBAAgB;AAI9C,QAAI,KAAK,MAAM,UAAU,iBAAiB,KAAK,MAAM,UAAU,MAAM;AACnE,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,UAAU,QAAQ,KAAK,MAAM,OAAO,CAAC,CAAC;AAAA,IACvE;AAOA,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,CAAC,GAAG,WAAW;AAC9C,WAAG,KAAK,SAAS,CAAC,MAAc,WAAmB;AACjD,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AACD,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,cAAc,KAAK,MAAM,cAAc,EAAE;AAC/C,YAAM,SAAS,IAAI,8BAAgB,aAAa,cAAc,WAAW;AACzE,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAClD,YAAM,mBAAe,4BAAa,kBAAkB,MAAM;AAE1D,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,cAAc,YAAY,CAAC;AACjF,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,KAAM;AAEjB,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AAAA,UACxB,WAAW,KAAK,eAAe,eAAe,KAAK,aAAa,cAAc;AAC5E,kBAAM,IAAI;AAAA,cACR,YAAY,WAAW,MAAM,YAAY,WAAW,KAAK,UAAU,MAAM,KAAK,QAAQ;AAAA,YACxF;AAAA,UACF,OAAO;AACL,qBAAS,OAAO;AAAA,cACd,KAAK,KAAK,OAAO;AAAA,gBACf,KAAK,KAAK;AAAA,gBACV,KAAK,KAAK,aAAa,KAAK,KAAK;AAAA,cACnC;AAAA,YACF;AAAA,UACF;AAEA,qBAAW,SAAS,QAAQ;AAC1B,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAE5C,oBAAM,YAAY,OAAO;AAAA,gBACvB,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,cACb;AACA,oBAAM,cAAc,UAAU,SAAS,QAAQ;AAC/C,iBAAG;AAAA,gBACD,KAAK,UAAU;AAAA,kBACb,OAAO;AAAA,oBACL,MAAM;AAAA,oBACN,UAAU;AAAA,oBACV,aAAa;AAAA,kBACf;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF;AAGA,cAAI,SAAS,aAAa,gBAAgB;AACxC,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,UAC3C;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AAGV,YAAI;AACF,aAAG;AAAA,YACD,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,OAAO,EAAE,MAAM,IAAI,UAAU,aAAa,aAAa,YAAY;AAAA,YACrE,CAAC;AAAA,UACH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,UAA2B;AAC7C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,KAAK;AAAA,UACtB,QAAQ;AAAA,UAER;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,KAAK,SAAS,MAAM,QAAQ,CAAC;AAChC,WAAG,GAAG,WAAW,CAAC,QAAiB;AA5pB3C;AA6pBU,cAAI;AACF,kBAAM,MAAM,IAAI,SAAS;AACzB,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,UAAU,GAAG,GAAG,CAAC,EAAE;AACxE,kBAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,kBAAM,UAAkB,KAAK,MAAM,KAAK;AAExC,gBAAI,YAAY,UAAU;AACxB,oBAAM,YAAa,KAAK,MAAM,KAAuC,CAAC;AACtE,oBAAM,aAAa,UAAU;AAE7B,kBAAI,eAAe,gBAAgB;AACjC,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAAA,cACF,WAAW,eAAe,cAAc;AACtC,oBAAI,KAAK,WAAW;AAClB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAAA,cACF;AAAA,YACF,WAAW,YAAY,QAAQ;AAC7B,oBAAM,KAAM,KAAK,MAAM,KAA4C,CAAC;AACpE,oBAAM,aAAa,GAAG,cAAc;AACpC,oBAAM,eAAW;AAAA,gBACf,GAAG,iBAAiB,KAAK,MAAM,gBAAgB;AAAA,cACjD;AACA,oBAAM,YAAY,GAAG,cAAc;AACnC,oBAAM,aAAa,GAAG,wBAAwB;AAC9C,mBAAK,aAAa;AAGlB,kBAAI,GAAG,SAAS;AACd,qBAAK,QAAQ;AAAA,kBACX,sCAAsC,GAAG,QAAQ,cAAc,cAAc,GAAG,QAAQ,kBAAkB;AAAA,gBAC5G;AAAA,cACF;AAEA,kBAAI,YAAY;AACd,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAEA,2BAAW;AAAA,kBACT,MAAM,kBAAI,gBAAgB;AAAA,kBAC1B;AAAA,kBACA,cAAc;AAAA,oBACZ;AAAA,sBACE,MAAM;AAAA,sBACN;AAAA,sBACA,WAAW;AAAA,sBACX,WAAS,QAAG,YAAH,mBAAY,mBAAkB;AAAA,sBACvC;AAAA,oBACF;AAAA,kBACF;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF,WAAW,YAAY,SAAS;AAG9B,oBAAM,SAAS,KAAK,MAAM;AAC1B,oBAAM,aACJ,iCAAQ,aACR,iCAAQ,UACR,KAAK,OAAO,KACZ,KAAK,SAAS,KACd;AACF,oBAAM,aAAY,iCAAQ,SAAQ,KAAK,MAAM,KAAK;AAClD,mBAAK,QAAQ,MAAM,+BAA+B,SAAS,MAAM,SAAS,EAAE;AAC5E,qBAAO,IAAI,MAAM,yBAAyB,SAAS,MAAM,SAAS,EAAE,CAAC;AACrE;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,wCAAwC,GAAG,EAAE;AAChE,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,QAAI;AACF,YAAM,QAAQ,KAAK;AAAA,QACjB,KAAK,SAAS;AAAA,QACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,UAAU,MAAM,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,UAAE;AACA,gBAAU;AACV,wBAAkB,MAAM;AAKxB,gBAAU,OAAO;AACjB,SAAG,MAAM;AAET,iBAAW,OAAO,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IAClC;AAAA,EACF;AACF;","names":[]}
package/dist/stt.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAOhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EACV,YAAY,EACZ,SAAS,EACT,QAAQ,EACR,cAAc,EACd,cAAc,EACf,MAAM,aAAa,CAAC;AAkBrB,UAAU,cAAc;IACtB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,sFAAsF;IACtF,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,4FAA4F;IAC5F,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,EAAE,cAAc,CAAC;IACtB,qFAAqF;IACrF,YAAY,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACvC,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAoB,SAAQ,cAAc;IACzD,KAAK,EAAE,aAAa,CAAC;IACrB,mDAAmD;IACnD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACvC,0DAA0D;IAC1D,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IACzB,mDAAmD;IACnD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,wDAAwD;AACxD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,mBAAmB,GAAG,YAAY,CAAC;AAM3E,UAAU,kBAAkB;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,SAAS,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;IAEnB,YAAY,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAErC,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IAEzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAgOD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAqB;IACjC,KAAK,SAAgB;IAErB;;;;;;;;;;;;;;OAcG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAU1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAoBjC,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IA+C1F,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;CAQpE;AAMD,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAOhD,KAAK,SAAyB;gBAElB,WAAW,EAAE,GAAG,EAAE,IAAI,EAAE,kBAAkB,EAAE,WAAW,CAAC,EAAE,iBAAiB;IAOvF,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAoBvB,GAAG;CAqRpB"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAQhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EACV,YAAY,EACZ,SAAS,EACT,QAAQ,EACR,cAAc,EACd,cAAc,EACf,MAAM,aAAa,CAAC;AAkBrB,UAAU,cAAc;IACtB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,sFAAsF;IACtF,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,4FAA4F;IAC5F,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,EAAE,cAAc,CAAC;IACtB,qFAAqF;IACrF,YAAY,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACvC,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAoB,SAAQ,cAAc;IACzD,KAAK,EAAE,aAAa,CAAC;IACrB,mDAAmD;IACnD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACvC,0DAA0D;IAC1D,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IACzB,mDAAmD;IACnD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,wDAAwD;AACxD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,mBAAmB,GAAG,YAAY,CAAC;AAM3E,UAAU,kBAAkB;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,SAAS,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;IAEnB,YAAY,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAErC,IAAI,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IAEzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAkOD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAqB;IACjC,KAAK,SAAgB;IAErB;;;;;;;;;;;;;;OAcG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAU1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAoBjC,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IA+C1F,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;CAQpE;AAMD,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAOhD,KAAK,SAAyB;gBAElB,WAAW,EAAE,GAAG,EAAE,IAAI,EAAE,kBAAkB,EAAE,WAAW,CAAC,EAAE,iBAAiB;IAOvF,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAoBvB,GAAG;CAuRpB"}
package/dist/stt.js CHANGED
@@ -5,6 +5,7 @@ import {
5
5
  Task,
6
6
  log,
7
7
  mergeFrames,
8
+ normalizeLanguage,
8
9
  stt,
9
10
  waitForAbort
10
11
  } from "@livekit/agents";
@@ -58,14 +59,16 @@ function resolveOptions(opts) {
58
59
  base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;
59
60
  } else if (model === "saaras:v3") {
60
61
  const v3Opts = opts;
61
- base.languageCode = v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode;
62
+ base.languageCode = normalizeLanguage(v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode);
62
63
  base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;
63
64
  base.prompt = v3Opts.prompt;
64
65
  base.withTimestamps = v3Opts.withTimestamps;
65
66
  } else {
66
- let languageCode = opts.languageCode ?? SAARIKA_DEFAULTS.languageCode;
67
+ let languageCode = normalizeLanguage(
68
+ opts.languageCode ?? SAARIKA_DEFAULTS.languageCode
69
+ );
67
70
  if (!STTV2_LANGUAGE_SET.has(languageCode)) {
68
- languageCode = SAARIKA_DEFAULTS.languageCode;
71
+ languageCode = normalizeLanguage(SAARIKA_DEFAULTS.languageCode);
69
72
  }
70
73
  base.languageCode = languageCode;
71
74
  base.withTimestamps = opts.withTimestamps;
@@ -208,7 +211,7 @@ class STT extends stt.STT {
208
211
  alternatives: [
209
212
  {
210
213
  text: data.transcript || "",
211
- language: data.language_code ?? this.opts.languageCode ?? "unknown",
214
+ language: normalizeLanguage(data.language_code ?? this.opts.languageCode ?? "unknown"),
212
215
  startTime,
213
216
  endTime,
214
217
  confidence: data.language_probability ?? 0
@@ -414,7 +417,9 @@ class SpeechStream extends stt.SpeechStream {
414
417
  } else if (msgType === "data") {
415
418
  const td = json["data"] ?? {};
416
419
  const transcript = td.transcript ?? "";
417
- const language = td.language_code ?? this.#opts.languageCode ?? "unknown";
420
+ const language = normalizeLanguage(
421
+ td.language_code ?? this.#opts.languageCode ?? "unknown"
422
+ );
418
423
  const requestId = td.request_id ?? "";
419
424
  const confidence = td.language_probability ?? 0;
420
425
  this.#requestId = requestId;
package/dist/stt.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n mergeFrames,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n STTLanguages,\n STTModels,\n STTModes,\n STTV2Languages,\n STTV3Languages,\n} from './models.js';\n\n// ---------------------------------------------------------------------------\n// Endpoint URLs\n// ---------------------------------------------------------------------------\n\nconst SARVAM_STT_REST_URL = 'https://api.sarvam.ai/speech-to-text';\nconst SARVAM_STT_TRANSLATE_REST_URL = 'https://api.sarvam.ai/speech-to-text-translate';\nconst SARVAM_STT_WS_URL = 'wss://api.sarvam.ai/speech-to-text/ws';\nconst SARVAM_STT_TRANSLATE_WS_URL = 'wss://api.sarvam.ai/speech-to-text-translate/ws';\n\nconst SAMPLE_RATE = 16000;\nconst NUM_CHANNELS = 1;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// ---------------------------------------------------------------------------\n\ninterface STTBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST recognition (used by Agent via StreamAdapter + VAD).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Increase VAD sensitivity (WS only). Maps to `high_vad_sensitivity` query param. */\n highVadSensitivity?: boolean;\n /** Enable flush signal events from server (WS only). Maps to `flush_signal` query param. */\n flushSignal?: boolean;\n}\n\n/**\n * Options specific to saarika:v2.5.\n * saarika:v2.5 will be deprecated soon — prefer {@link STTV3Options} with `saaras:v3` for new integrations.\n * All v2.5 language codes are also supported by v3.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV2Options extends STTBaseOptions {\n model: 'saarika:v2.5';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV2Languages | string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/**\n * Options specific to saaras:v2.5 (dedicated translate endpoint).\n * Uses the `/speech-to-text-translate` endpoint for Indic-to-English translation.\n * Auto-detects the source language; does not accept language codes or timestamps.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\nexport interface STTTranslateOptions extends STTBaseOptions {\n model: 'saaras:v2.5';\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Mode for translate WS. Default: 'translate'. */\n mode?: STTModes | string;\n}\n\n/**\n * Options specific to saaras:v3 (recommended).\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV3Options extends STTBaseOptions {\n model?: 'saaras:v3';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV3Languages | string;\n /** Transcription mode (v3 only). Default: 'transcribe' */\n mode?: STTModes | string;\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type STTOptions = STTV2Options | STTTranslateOptions | STTV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedSTTOptions {\n apiKey: string;\n model: STTModels;\n streaming: boolean;\n // saarika:v2.5 and saaras:v3 only — not used by saaras:v2.5 (translate auto-detects)\n languageCode?: STTLanguages | string;\n // saaras:v3 and saaras:v2.5 (translate)\n mode?: STTModes | string;\n // saaras:v2.5 (translate) and saaras:v3\n prompt?: string;\n // saarika:v2.5 and saaras:v3 (/speech-to-text only, not translate)\n withTimestamps?: boolean;\n // WS-only flags\n highVadSensitivity?: boolean;\n flushSignal?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst SAARIKA_DEFAULTS = {\n languageCode: 'en-IN',\n};\n\nconst SAARAS_V3_DEFAULTS = {\n languageCode: 'en-IN',\n mode: 'transcribe',\n};\n\nconst SAARAS_TRANSLATE_DEFAULTS = {\n mode: 'translate',\n};\n\n/** Runtime set of languages supported by saarika:v2.5 (for validation on model switch) */\nconst STTV2_LANGUAGE_SET: ReadonlySet<string> = new Set<STTV2Languages>([\n 'unknown',\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n]);\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<STTOptions>): ResolvedSTTOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: STTModels = opts.model ?? 'saaras:v3';\n\n const base: ResolvedSTTOptions = {\n apiKey,\n model,\n streaming: opts.streaming ?? true,\n highVadSensitivity: opts.highVadSensitivity,\n flushSignal: opts.flushSignal,\n };\n\n if (model === 'saaras:v2.5') {\n const translateOpts = opts as STTTranslateOptions;\n base.prompt = translateOpts.prompt;\n base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;\n } else if (model === 'saaras:v3') {\n const v3Opts = opts as STTV3Options;\n base.languageCode = v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode;\n base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;\n base.prompt = v3Opts.prompt;\n base.withTimestamps = v3Opts.withTimestamps;\n } else {\n // saarika:v2.5\n let languageCode = (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode;\n if (!STTV2_LANGUAGE_SET.has(languageCode)) {\n languageCode = SAARIKA_DEFAULTS.languageCode;\n }\n base.languageCode = languageCode;\n base.withTimestamps = (opts as STTV2Options).withTimestamps;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// URL helpers\n// ---------------------------------------------------------------------------\n\nfunction getRestUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_REST_URL : SARVAM_STT_REST_URL;\n}\n\nfunction getWsUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_WS_URL : SARVAM_STT_WS_URL;\n}\n\nfunction buildWsUrl(opts: ResolvedSTTOptions): string {\n const base = getWsUrl(opts.model);\n const params = new URLSearchParams();\n params.set('model', opts.model);\n params.set('vad_signals', 'true');\n params.set('sample_rate', String(SAMPLE_RATE));\n params.set('input_audio_codec', 'pcm_s16le');\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n params.set('language-code', opts.languageCode);\n }\n\n // mode: v3 on STT WS, and translate WS (both endpoints support it)\n if (opts.mode != null) {\n params.set('mode', opts.mode);\n }\n\n // Optional WS params\n if (opts.highVadSensitivity != null) {\n params.set('high_vad_sensitivity', String(opts.highVadSensitivity));\n }\n if (opts.flushSignal != null) {\n params.set('flush_signal', String(opts.flushSignal));\n }\n\n return `${base}?${params.toString()}`;\n}\n\n// ---------------------------------------------------------------------------\n// Build the multipart form data (REST) — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildFormData(wavBlob: Blob, opts: ResolvedSTTOptions): FormData {\n const formData = new FormData();\n formData.append('file', wavBlob, 'audio.wav');\n formData.append('model', opts.model);\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n formData.append('language_code', opts.languageCode);\n }\n if (opts.model === 'saaras:v3' && opts.mode != null) {\n formData.append('mode', opts.mode);\n }\n if ((opts.model === 'saaras:v2.5' || opts.model === 'saaras:v3') && opts.prompt != null) {\n formData.append('prompt', opts.prompt);\n }\n if (opts.model !== 'saaras:v2.5' && opts.withTimestamps) {\n formData.append('with_timestamps', 'true');\n }\n\n return formData;\n}\n\n// ---------------------------------------------------------------------------\n// WAV encoding helper\n// ---------------------------------------------------------------------------\n\nfunction createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(bitsPerSample, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n\n const pcm = Buffer.from(frame.data.buffer, frame.data.byteOffset, frame.data.byteLength);\n return Buffer.concat([header, pcm]);\n}\n\n// ---------------------------------------------------------------------------\n// REST response type\n// ---------------------------------------------------------------------------\n\ninterface SarvamSTTResponse {\n request_id: string | null;\n transcript: string;\n language_code: string | null;\n language_probability?: number | null;\n timestamps?: {\n words: string[];\n start_time_seconds: number[];\n end_time_seconds: number[];\n } | null;\n}\n\n// ---------------------------------------------------------------------------\n// WS response types (from server Publish messages)\n// ---------------------------------------------------------------------------\n\n/** type: \"data\" */\ninterface SarvamWSTranscriptData {\n request_id?: string;\n transcript?: string;\n language_code?: string | null;\n language_probability?: number | null;\n timestamps?: Record<string, unknown> | null;\n diarized_transcript?: Record<string, unknown> | null;\n metrics?: {\n audio_duration?: number;\n processing_latency?: number;\n };\n}\n\n/** type: \"events\" */\ninterface SarvamWSEventData {\n event_type?: string;\n timestamp?: string;\n signal_type?: 'START_SPEECH' | 'END_SPEECH';\n occured_at?: number;\n}\n\n/** type: \"error\" — server sends data with message and code fields */\ninterface SarvamWSErrorData {\n message?: string;\n error?: string;\n code?: string;\n}\n\n// ---------------------------------------------------------------------------\n// STT class — supports both REST (recognize) and WebSocket (stream)\n// ---------------------------------------------------------------------------\n\nexport class STT extends stt.STT {\n private opts: ResolvedSTTOptions;\n label = 'sarvam.STT';\n\n /**\n * Create a new instance of Sarvam AI STT.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n *\n * Supported models:\n * - `saaras:v3` (default, recommended) — supports all 22 languages, modes, prompt, timestamps, and uses `/speech-to-text`.\n * - `saaras:v2.5` — Indic-to-English translation via `/speech-to-text-translate`. Auto-detects source language. Supports prompt.\n * - `saarika:v2.5` — will be deprecated soon. Supports timestamps. All its languages are available in `saaras:v3`.\n *\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\n constructor(opts: Partial<STTOptions> = {}) {\n const resolved = resolveOptions(opts);\n super({\n streaming: resolved.streaming,\n interimResults: false,\n alignedTranscript: false,\n });\n this.opts = resolved;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.opts.apiKey,\n streaming: this.opts.streaming,\n ...(this.opts.highVadSensitivity != null\n ? { highVadSensitivity: this.opts.highVadSensitivity }\n : {}),\n ...(this.opts.flushSignal != null ? { flushSignal: this.opts.flushSignal } : {}),\n ...(this.opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.opts } as Partial<STTOptions>);\n\n this.opts = resolveOptions({ ...base, ...opts } as STTOptions);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const frame = mergeFrames(buffer);\n const wavBuffer = createWav(frame);\n const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: 'audio/wav' });\n\n const formData = buildFormData(wavBlob, this.opts);\n\n const response = await fetch(getRestUrl(this.opts.model), {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n body: formData,\n signal: abortSignal ?? null,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam STT API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as SarvamSTTResponse;\n\n let startTime = 0;\n let endTime = 0;\n if (data.timestamps) {\n const starts = data.timestamps.start_time_seconds;\n const ends = data.timestamps.end_time_seconds;\n if (starts.length > 0) startTime = starts[0] ?? 0;\n if (ends.length > 0) endTime = ends[ends.length - 1] ?? 0;\n }\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId: data.request_id ?? undefined,\n alternatives: [\n {\n text: data.transcript || '',\n language: data.language_code ?? this.opts.languageCode ?? 'unknown',\n startTime,\n endTime,\n confidence: data.language_probability ?? 0,\n },\n ],\n };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam STT streaming is disabled (`streaming: false`). Use recognize() for REST or wrap with stt.StreamAdapter + VAD for streaming behavior.',\n );\n }\n return new SpeechStream(this, this.opts, options?.connOptions);\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming SpeechStream\n// ---------------------------------------------------------------------------\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: ResolvedSTTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n label = 'sarvam.SpeechStream';\n\n constructor(sttInstance: STT, opts: ResolvedSTTOptions, connOptions?: APIConnectOptions) {\n super(sttInstance, SAMPLE_RATE, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n ...(this.#opts.highVadSensitivity != null\n ? { highVadSensitivity: this.#opts.highVadSensitivity }\n : {}),\n ...(this.#opts.flushSignal != null ? { flushSignal: this.#opts.flushSignal } : {}),\n ...(this.#opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.#opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.#opts } as Partial<STTOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as STTOptions);\n this.#resetWS.resolve();\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n\n while (!this.input.closed && !this.closed) {\n const wsUrl = buildWsUrl(this.#opts);\n this.#logger.info(`Sarvam STT connecting to: ${wsUrl}`);\n const ws = new WebSocket(wsUrl, {\n headers: { 'api-subscription-key': this.#opts.apiKey },\n });\n\n let sessionStart = 0;\n try {\n await new Promise<void>((resolve, reject) => {\n ws.once('open', () => resolve());\n ws.once('error', (err: Error) => reject(err));\n ws.once('close', (code: number) =>\n reject(new Error(`WebSocket closed with code ${code}`)),\n );\n });\n\n sessionStart = Date.now();\n await this.#runWS(ws);\n retries = 0;\n } catch (e) {\n // Clean up the WebSocket on failure to prevent listener leaks\n ws.removeAllListeners();\n ws.close();\n\n if (!this.closed && !this.input.closed) {\n // If the session ran for a meaningful duration (>5s), this was a working\n // session that ended normally (e.g. server idle timeout ~20s). Reset retries\n // so expected idle-timeout reconnections don't accumulate toward the fatal limit.\n if (sessionStart > 0 && Date.now() - sessionStart > 5000) {\n retries = 0;\n }\n if (retries >= maxRetry) {\n throw new Error(`Failed to connect to Sarvam STT after ${retries} attempts: ${e}`);\n }\n const delay = Math.min(retries * 5, 10);\n retries++;\n this.#logger.warn(\n `Failed to connect to Sarvam STT, retrying in ${delay}s: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Sarvam STT disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n this.#speaking = false;\n let closing = false;\n // Session-scoped controller: aborted in finally to cancel sendTask on WS reset\n const sessionController = new AbortController();\n\n // Config message: only supported on translate WS endpoint (saaras:v2.5)\n // @see https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate/ws\n if (this.#opts.model === 'saaras:v2.5' && this.#opts.prompt != null) {\n ws.send(JSON.stringify({ type: 'config', prompt: this.#opts.prompt }));\n }\n\n // No keepalive — Sarvam rejects messages without 'audio' field, and sending\n // silent audio could confuse server-side VAD. On idle timeout (~20s), the\n // server closes the connection and the outer retry loop in run() reconnects.\n // This matches the Python SDK's approach.\n\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>((_, reject) => {\n ws.once('close', (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples50Ms = Math.floor(SAMPLE_RATE / 20); // 50ms chunks\n const stream = new AudioByteStream(SAMPLE_RATE, NUM_CHANNELS, samples50Ms);\n const abortPromise = waitForAbort(this.abortSignal);\n const sessionAbort = waitForAbort(sessionController.signal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise, sessionAbort]);\n if (result === undefined) return; // aborted\n if (result.done) break;\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (data.sampleRate !== SAMPLE_RATE || data.channels !== NUM_CHANNELS) {\n throw new Error(\n `Expected ${SAMPLE_RATE}Hz/${NUM_CHANNELS}ch, got ${data.sampleRate}Hz/${data.channels}ch`,\n );\n } else {\n frames = stream.write(\n data.data.buffer.slice(\n data.data.byteOffset,\n data.data.byteOffset + data.data.byteLength,\n ) as ArrayBuffer,\n );\n }\n\n for (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n // Sarvam expects base64-encoded PCM in a JSON message\n const pcmBuffer = Buffer.from(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.data.byteLength,\n );\n const base64Audio = pcmBuffer.toString('base64');\n ws.send(\n JSON.stringify({\n audio: {\n data: base64Audio,\n encoding: 'audio/wav',\n sample_rate: SAMPLE_RATE,\n },\n }),\n );\n }\n }\n\n // Send flush message on FLUSH_SENTINEL (VAD end of speech)\n if (data === SpeechStream.FLUSH_SENTINEL) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n }\n } finally {\n closing = true;\n // Match Python: end_of_stream includes an empty audio field to avoid\n // \"audio must not be None\" rejection from the server\n try {\n ws.send(\n JSON.stringify({\n type: 'end_of_stream',\n audio: { data: '', encoding: 'audio/wav', sample_rate: SAMPLE_RATE },\n }),\n );\n } catch {\n // ws may already be closed\n }\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (event: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(event);\n } catch {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.once('close', () => resolve());\n ws.on('message', (msg: RawData) => {\n try {\n const raw = msg.toString();\n this.#logger.debug(`Sarvam STT raw WS message: ${raw.substring(0, 500)}`);\n const json = JSON.parse(raw);\n const msgType: string = json['type'] ?? '';\n\n if (msgType === 'events') {\n const eventData = (json['data'] as SarvamWSEventData | undefined) ?? {};\n const signalType = eventData.signal_type;\n\n if (signalType === 'START_SPEECH') {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n } else if (signalType === 'END_SPEECH') {\n if (this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n }\n } else if (msgType === 'data') {\n const td = (json['data'] as SarvamWSTranscriptData | undefined) ?? {};\n const transcript = td.transcript ?? '';\n const language = td.language_code ?? this.#opts.languageCode ?? 'unknown';\n const requestId = td.request_id ?? '';\n const confidence = td.language_probability ?? 0;\n this.#requestId = requestId;\n\n // Log metrics when available\n if (td.metrics) {\n this.#logger.debug(\n `Sarvam STT metrics: audio_duration=${td.metrics.audio_duration}s, latency=${td.metrics.processing_latency}s`,\n );\n }\n\n if (transcript) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [\n {\n text: transcript,\n language,\n startTime: 0,\n endTime: td.metrics?.audio_duration ?? 0,\n confidence,\n },\n ],\n });\n }\n } else if (msgType === 'error') {\n // Server format: { type: \"error\", data: { message: \"...\", code: \"...\" } }\n // Also check top-level and 'error' field as fallback\n const nested = json['data'] as SarvamWSErrorData | undefined;\n const errorInfo =\n nested?.message ??\n nested?.error ??\n json['error'] ??\n json['message'] ??\n 'Unknown error';\n const errorCode = nested?.code ?? json['code'] ?? '';\n this.#logger.error(`Sarvam STT WebSocket error [${errorCode}]: ${errorInfo}`);\n reject(new Error(`Sarvam STT API error [${errorCode}]: ${errorInfo}`));\n return;\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`Error processing Sarvam STT message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n try {\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor.result]),\n ]);\n } finally {\n closing = true;\n sessionController.abort();\n // Do NOT call listenTask.cancel() — it would abort this.abortController\n // (passed to Task.from) and permanently break the stream. Instead, ws.close()\n // triggers the ws.once('close') handler inside listenMessage, letting listenTask\n // exit naturally. On close(), the parent abort signal handles it directly.\n wsMonitor.cancel();\n ws.close();\n // Suppress unhandled rejection from orphaned listenTask on reconnect\n listenTask.result.catch(() => {});\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAaxC,MAAM,sBAAsB;AAC5B,MAAM,gCAAgC;AACtC,MAAM,oBAAoB;AAC1B,MAAM,8BAA8B;AAEpC,MAAM,cAAc;AACpB,MAAM,eAAe;AA6FrB,MAAM,mBAAmB;AAAA,EACvB,cAAc;AAChB;AAEA,MAAM,qBAAqB;AAAA,EACzB,cAAc;AAAA,EACd,MAAM;AACR;AAEA,MAAM,4BAA4B;AAAA,EAChC,MAAM;AACR;AAGA,MAAM,qBAA0C,oBAAI,IAAoB;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAMD,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AAEvC,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B,oBAAoB,KAAK;AAAA,IACzB,aAAa,KAAK;AAAA,EACpB;AAEA,MAAI,UAAU,eAAe;AAC3B,UAAM,gBAAgB;AACtB,SAAK,SAAS,cAAc;AAC5B,SAAK,OAAO,cAAc,QAAQ,0BAA0B;AAAA,EAC9D,WAAW,UAAU,aAAa;AAChC,UAAM,SAAS;AACf,SAAK,eAAe,OAAO,gBAAgB,mBAAmB;AAC9D,SAAK,OAAO,OAAO,QAAQ,mBAAmB;AAC9C,SAAK,SAAS,OAAO;AACrB,SAAK,iBAAiB,OAAO;AAAA,EAC/B,OAAO;AAEL,QAAI,eAAgB,KAAsB,gBAAgB,iBAAiB;AAC3E,QAAI,CAAC,mBAAmB,IAAI,YAAY,GAAG;AACzC,qBAAe,iBAAiB;AAAA,IAClC;AACA,SAAK,eAAe;AACpB,SAAK,iBAAkB,KAAsB;AAAA,EAC/C;AAEA,SAAO;AACT;AAMA,SAAS,WAAW,OAA0B;AAC5C,SAAO,UAAU,gBAAgB,gCAAgC;AACnE;AAEA,SAAS,SAAS,OAA0B;AAC1C,SAAO,UAAU,gBAAgB,8BAA8B;AACjE;AAEA,SAAS,WAAW,MAAkC;AACpD,QAAM,OAAO,SAAS,KAAK,KAAK;AAChC,QAAM,SAAS,IAAI,gBAAgB;AACnC,SAAO,IAAI,SAAS,KAAK,KAAK;AAC9B,SAAO,IAAI,eAAe,MAAM;AAChC,SAAO,IAAI,eAAe,OAAO,WAAW,CAAC;AAC7C,SAAO,IAAI,qBAAqB,WAAW;AAE3C,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,WAAO,IAAI,iBAAiB,KAAK,YAAY;AAAA,EAC/C;AAGA,MAAI,KAAK,QAAQ,MAAM;AACrB,WAAO,IAAI,QAAQ,KAAK,IAAI;AAAA,EAC9B;AAGA,MAAI,KAAK,sBAAsB,MAAM;AACnC,WAAO,IAAI,wBAAwB,OAAO,KAAK,kBAAkB,CAAC;AAAA,EACpE;AACA,MAAI,KAAK,eAAe,MAAM;AAC5B,WAAO,IAAI,gBAAgB,OAAO,KAAK,WAAW,CAAC;AAAA,EACrD;AAEA,SAAO,GAAG,IAAI,IAAI,OAAO,SAAS,CAAC;AACrC;AAMA,SAAS,cAAc,SAAe,MAAoC;AACxE,QAAM,WAAW,IAAI,SAAS;AAC9B,WAAS,OAAO,QAAQ,SAAS,WAAW;AAC5C,WAAS,OAAO,SAAS,KAAK,KAAK;AAEnC,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,aAAS,OAAO,iBAAiB,KAAK,YAAY;AAAA,EACpD;AACA,MAAI,KAAK,UAAU,eAAe,KAAK,QAAQ,MAAM;AACnD,aAAS,OAAO,QAAQ,KAAK,IAAI;AAAA,EACnC;AACA,OAAK,KAAK,UAAU,iBAAiB,KAAK,UAAU,gBAAgB,KAAK,UAAU,MAAM;AACvF,aAAS,OAAO,UAAU,KAAK,MAAM;AAAA,EACvC;AACA,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB;AACvD,aAAS,OAAO,mBAAmB,MAAM;AAAA,EAC3C;AAEA,SAAO;AACT;AAMA,SAAS,UAAU,OAA2B;AAC5C,QAAM,gBAAgB;AACtB,QAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,QAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,QAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,IAAI,EAAE;AAC3B,SAAO,cAAc,GAAG,EAAE;AAC1B,SAAO,cAAc,MAAM,UAAU,EAAE;AACvC,SAAO,cAAc,MAAM,YAAY,EAAE;AACzC,SAAO,cAAc,UAAU,EAAE;AACjC,SAAO,cAAc,YAAY,EAAE;AACnC,SAAO,cAAc,eAAe,EAAE;AACtC,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAE9C,QAAM,MAAM,OAAO,KAAK,MAAM,KAAK,QAAQ,MAAM,KAAK,YAAY,MAAM,KAAK,UAAU;AACvF,SAAO,OAAO,OAAO,CAAC,QAAQ,GAAG,CAAC;AACpC;AAuDO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM;AAAA,MACJ,WAAW,SAAS;AAAA,MACpB,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AACD,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,KAAK;AAErE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,KAAK;AAAA,MAClB,WAAW,KAAK,KAAK;AAAA,MACrB,GAAI,KAAK,KAAK,sBAAsB,OAChC,EAAE,oBAAoB,KAAK,KAAK,mBAAmB,IACnD,CAAC;AAAA,MACL,GAAI,KAAK,KAAK,eAAe,OAAO,EAAE,aAAa,KAAK,KAAK,YAAY,IAAI,CAAC;AAAA,MAC9E,GAAI,KAAK,KAAK,gBAAgB,QAAQ,KAAK,UAAU,gBACjD,EAAE,cAAc,KAAK,KAAK,aAA+B,IACzD,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,KAAK;AAEpB,SAAK,OAAO,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,QAAQ,YAAY,MAAM;AAChC,UAAM,YAAY,UAAU,KAAK;AACjC,UAAM,UAAU,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,EAAE,MAAM,YAAY,CAAC;AAE3E,UAAM,WAAW,cAAc,SAAS,KAAK,IAAI;AAEjD,UAAM,WAAW,MAAM,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG;AAAA,MACxD,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM;AAAA,MACN,QAAQ,eAAe;AAAA,IACzB,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAElC,QAAI,YAAY;AAChB,QAAI,UAAU;AACd,QAAI,KAAK,YAAY;AACnB,YAAM,SAAS,KAAK,WAAW;AAC/B,YAAM,OAAO,KAAK,WAAW;AAC7B,UAAI,OAAO,SAAS,EAAG,aAAY,OAAO,CAAC,KAAK;AAChD,UAAI,KAAK,SAAS,EAAG,WAAU,KAAK,KAAK,SAAS,CAAC,KAAK;AAAA,IAC1D;AAEA,WAAO;AAAA,MACL,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK,cAAc;AAAA,MAC9B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,cAAc;AAAA,UACzB,UAAU,KAAK,iBAAiB,KAAK,KAAK,gBAAgB;AAAA,UAC1D;AAAA,UACA;AAAA,UACA,YAAY,KAAK,wBAAwB;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,OAAO,SAA6D;AAClE,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,aAAa,MAAM,KAAK,MAAM,mCAAS,WAAW;AAAA,EAC/D;AACF;AAMO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb,QAAQ;AAAA,EAER,YAAY,aAAkB,MAA0B,aAAiC;AACvF,UAAM,aAAa,aAAa,WAAW;AAC3C,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAAA,EAClD;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,GAAI,KAAK,MAAM,sBAAsB,OACjC,EAAE,oBAAoB,KAAK,MAAM,mBAAmB,IACpD,CAAC;AAAA,MACL,GAAI,KAAK,MAAM,eAAe,OAAO,EAAE,aAAa,KAAK,MAAM,YAAY,IAAI,CAAC;AAAA,MAChF,GAAI,KAAK,MAAM,gBAAgB,QAAQ,KAAK,UAAU,gBAClD,EAAE,cAAc,KAAK,MAAM,aAA+B,IAC1D,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAC9D,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AAEd,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,QAAQ,WAAW,KAAK,KAAK;AACnC,WAAK,QAAQ,KAAK,6BAA6B,KAAK,EAAE;AACtD,YAAM,KAAK,IAAI,UAAU,OAAO;AAAA,QAC9B,SAAS,EAAE,wBAAwB,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI,eAAe;AACnB,UAAI;AACF,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,aAAG,KAAK,QAAQ,MAAM,QAAQ,CAAC;AAC/B,aAAG,KAAK,SAAS,CAAC,QAAe,OAAO,GAAG,CAAC;AAC5C,aAAG;AAAA,YAAK;AAAA,YAAS,CAAC,SAChB,OAAO,IAAI,MAAM,8BAA8B,IAAI,EAAE,CAAC;AAAA,UACxD;AAAA,QACF,CAAC;AAED,uBAAe,KAAK,IAAI;AACxB,cAAM,KAAK,OAAO,EAAE;AACpB,kBAAU;AAAA,MACZ,SAAS,GAAG;AAEV,WAAG,mBAAmB;AACtB,WAAG,MAAM;AAET,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AAItC,cAAI,eAAe,KAAK,KAAK,IAAI,IAAI,eAAe,KAAM;AACxD,sBAAU;AAAA,UACZ;AACA,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,UACnF;AACA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AACA,eAAK,QAAQ;AAAA,YACX,gDAAgD,KAAK,MAAM,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UACtF;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,kDAAkD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAClH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,SAAK,YAAY;AACjB,QAAI,UAAU;AAEd,UAAM,oBAAoB,IAAI,gBAAgB;AAI9C,QAAI,KAAK,MAAM,UAAU,iBAAiB,KAAK,MAAM,UAAU,MAAM;AACnE,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,UAAU,QAAQ,KAAK,MAAM,OAAO,CAAC,CAAC;AAAA,IACvE;AAOA,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,CAAC,GAAG,WAAW;AAC9C,WAAG,KAAK,SAAS,CAAC,MAAc,WAAmB;AACjD,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AACD,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,cAAc,KAAK,MAAM,cAAc,EAAE;AAC/C,YAAM,SAAS,IAAI,gBAAgB,aAAa,cAAc,WAAW;AACzE,YAAM,eAAe,aAAa,KAAK,WAAW;AAClD,YAAM,eAAe,aAAa,kBAAkB,MAAM;AAE1D,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,cAAc,YAAY,CAAC;AACjF,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,KAAM;AAEjB,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AAAA,UACxB,WAAW,KAAK,eAAe,eAAe,KAAK,aAAa,cAAc;AAC5E,kBAAM,IAAI;AAAA,cACR,YAAY,WAAW,MAAM,YAAY,WAAW,KAAK,UAAU,MAAM,KAAK,QAAQ;AAAA,YACxF;AAAA,UACF,OAAO;AACL,qBAAS,OAAO;AAAA,cACd,KAAK,KAAK,OAAO;AAAA,gBACf,KAAK,KAAK;AAAA,gBACV,KAAK,KAAK,aAAa,KAAK,KAAK;AAAA,cACnC;AAAA,YACF;AAAA,UACF;AAEA,qBAAW,SAAS,QAAQ;AAC1B,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAE5C,oBAAM,YAAY,OAAO;AAAA,gBACvB,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,cACb;AACA,oBAAM,cAAc,UAAU,SAAS,QAAQ;AAC/C,iBAAG;AAAA,gBACD,KAAK,UAAU;AAAA,kBACb,OAAO;AAAA,oBACL,MAAM;AAAA,oBACN,UAAU;AAAA,oBACV,aAAa;AAAA,kBACf;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF;AAGA,cAAI,SAAS,aAAa,gBAAgB;AACxC,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,UAC3C;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AAGV,YAAI;AACF,aAAG;AAAA,YACD,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,OAAO,EAAE,MAAM,IAAI,UAAU,aAAa,aAAa,YAAY;AAAA,YACrE,CAAC;AAAA,UACH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,UAA2B;AAC7C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,KAAK;AAAA,UACtB,QAAQ;AAAA,UAER;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,KAAK,SAAS,MAAM,QAAQ,CAAC;AAChC,WAAG,GAAG,WAAW,CAAC,QAAiB;AAzpB3C;AA0pBU,cAAI;AACF,kBAAM,MAAM,IAAI,SAAS;AACzB,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,UAAU,GAAG,GAAG,CAAC,EAAE;AACxE,kBAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,kBAAM,UAAkB,KAAK,MAAM,KAAK;AAExC,gBAAI,YAAY,UAAU;AACxB,oBAAM,YAAa,KAAK,MAAM,KAAuC,CAAC;AACtE,oBAAM,aAAa,UAAU;AAE7B,kBAAI,eAAe,gBAAgB;AACjC,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAAA,cACF,WAAW,eAAe,cAAc;AACtC,oBAAI,KAAK,WAAW;AAClB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAAA,cACF;AAAA,YACF,WAAW,YAAY,QAAQ;AAC7B,oBAAM,KAAM,KAAK,MAAM,KAA4C,CAAC;AACpE,oBAAM,aAAa,GAAG,cAAc;AACpC,oBAAM,WAAW,GAAG,iBAAiB,KAAK,MAAM,gBAAgB;AAChE,oBAAM,YAAY,GAAG,cAAc;AACnC,oBAAM,aAAa,GAAG,wBAAwB;AAC9C,mBAAK,aAAa;AAGlB,kBAAI,GAAG,SAAS;AACd,qBAAK,QAAQ;AAAA,kBACX,sCAAsC,GAAG,QAAQ,cAAc,cAAc,GAAG,QAAQ,kBAAkB;AAAA,gBAC5G;AAAA,cACF;AAEA,kBAAI,YAAY;AACd,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAEA,2BAAW;AAAA,kBACT,MAAM,IAAI,gBAAgB;AAAA,kBAC1B;AAAA,kBACA,cAAc;AAAA,oBACZ;AAAA,sBACE,MAAM;AAAA,sBACN;AAAA,sBACA,WAAW;AAAA,sBACX,WAAS,QAAG,YAAH,mBAAY,mBAAkB;AAAA,sBACvC;AAAA,oBACF;AAAA,kBACF;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF,WAAW,YAAY,SAAS;AAG9B,oBAAM,SAAS,KAAK,MAAM;AAC1B,oBAAM,aACJ,iCAAQ,aACR,iCAAQ,UACR,KAAK,OAAO,KACZ,KAAK,SAAS,KACd;AACF,oBAAM,aAAY,iCAAQ,SAAQ,KAAK,MAAM,KAAK;AAClD,mBAAK,QAAQ,MAAM,+BAA+B,SAAS,MAAM,SAAS,EAAE;AAC5E,qBAAO,IAAI,MAAM,yBAAyB,SAAS,MAAM,SAAS,EAAE,CAAC;AACrE;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,wCAAwC,GAAG,EAAE;AAChE,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,QAAI;AACF,YAAM,QAAQ,KAAK;AAAA,QACjB,KAAK,SAAS;AAAA,QACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,UAAU,MAAM,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,UAAE;AACA,gBAAU;AACV,wBAAkB,MAAM;AAKxB,gBAAU,OAAO;AACjB,SAAG,MAAM;AAET,iBAAW,OAAO,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IAClC;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n mergeFrames,\n normalizeLanguage,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n STTLanguages,\n STTModels,\n STTModes,\n STTV2Languages,\n STTV3Languages,\n} from './models.js';\n\n// ---------------------------------------------------------------------------\n// Endpoint URLs\n// ---------------------------------------------------------------------------\n\nconst SARVAM_STT_REST_URL = 'https://api.sarvam.ai/speech-to-text';\nconst SARVAM_STT_TRANSLATE_REST_URL = 'https://api.sarvam.ai/speech-to-text-translate';\nconst SARVAM_STT_WS_URL = 'wss://api.sarvam.ai/speech-to-text/ws';\nconst SARVAM_STT_TRANSLATE_WS_URL = 'wss://api.sarvam.ai/speech-to-text-translate/ws';\n\nconst SAMPLE_RATE = 16000;\nconst NUM_CHANNELS = 1;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// ---------------------------------------------------------------------------\n\ninterface STTBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST recognition (used by Agent via StreamAdapter + VAD).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Increase VAD sensitivity (WS only). Maps to `high_vad_sensitivity` query param. */\n highVadSensitivity?: boolean;\n /** Enable flush signal events from server (WS only). Maps to `flush_signal` query param. */\n flushSignal?: boolean;\n}\n\n/**\n * Options specific to saarika:v2.5.\n * saarika:v2.5 will be deprecated soon — prefer {@link STTV3Options} with `saaras:v3` for new integrations.\n * All v2.5 language codes are also supported by v3.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV2Options extends STTBaseOptions {\n model: 'saarika:v2.5';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV2Languages | string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/**\n * Options specific to saaras:v2.5 (dedicated translate endpoint).\n * Uses the `/speech-to-text-translate` endpoint for Indic-to-English translation.\n * Auto-detects the source language; does not accept language codes or timestamps.\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\nexport interface STTTranslateOptions extends STTBaseOptions {\n model: 'saaras:v2.5';\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Mode for translate WS. Default: 'translate'. */\n mode?: STTModes | string;\n}\n\n/**\n * Options specific to saaras:v3 (recommended).\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n */\nexport interface STTV3Options extends STTBaseOptions {\n model?: 'saaras:v3';\n /** Language code (BCP-47). Default: 'en-IN'. Set to 'unknown' for auto-detection. */\n languageCode?: STTV3Languages | string;\n /** Transcription mode (v3 only). Default: 'transcribe' */\n mode?: STTModes | string;\n /** Conversation context to boost model accuracy */\n prompt?: string;\n /** Return chunk-level timestamps in REST response */\n withTimestamps?: boolean;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type STTOptions = STTV2Options | STTTranslateOptions | STTV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedSTTOptions {\n apiKey: string;\n model: STTModels;\n streaming: boolean;\n // saarika:v2.5 and saaras:v3 only — not used by saaras:v2.5 (translate auto-detects)\n languageCode?: STTLanguages | string;\n // saaras:v3 and saaras:v2.5 (translate)\n mode?: STTModes | string;\n // saaras:v2.5 (translate) and saaras:v3\n prompt?: string;\n // saarika:v2.5 and saaras:v3 (/speech-to-text only, not translate)\n withTimestamps?: boolean;\n // WS-only flags\n highVadSensitivity?: boolean;\n flushSignal?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst SAARIKA_DEFAULTS = {\n languageCode: 'en-IN',\n};\n\nconst SAARAS_V3_DEFAULTS = {\n languageCode: 'en-IN',\n mode: 'transcribe',\n};\n\nconst SAARAS_TRANSLATE_DEFAULTS = {\n mode: 'translate',\n};\n\n/** Runtime set of languages supported by saarika:v2.5 (for validation on model switch) */\nconst STTV2_LANGUAGE_SET: ReadonlySet<string> = new Set<STTV2Languages>([\n 'unknown',\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n]);\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<STTOptions>): ResolvedSTTOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: STTModels = opts.model ?? 'saaras:v3';\n\n const base: ResolvedSTTOptions = {\n apiKey,\n model,\n streaming: opts.streaming ?? true,\n highVadSensitivity: opts.highVadSensitivity,\n flushSignal: opts.flushSignal,\n };\n\n if (model === 'saaras:v2.5') {\n const translateOpts = opts as STTTranslateOptions;\n base.prompt = translateOpts.prompt;\n base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;\n } else if (model === 'saaras:v3') {\n const v3Opts = opts as STTV3Options;\n base.languageCode = normalizeLanguage(v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode);\n base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;\n base.prompt = v3Opts.prompt;\n base.withTimestamps = v3Opts.withTimestamps;\n } else {\n // saarika:v2.5\n let languageCode = normalizeLanguage(\n (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode,\n );\n if (!STTV2_LANGUAGE_SET.has(languageCode)) {\n languageCode = normalizeLanguage(SAARIKA_DEFAULTS.languageCode);\n }\n base.languageCode = languageCode;\n base.withTimestamps = (opts as STTV2Options).withTimestamps;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// URL helpers\n// ---------------------------------------------------------------------------\n\nfunction getRestUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_REST_URL : SARVAM_STT_REST_URL;\n}\n\nfunction getWsUrl(model: STTModels): string {\n return model === 'saaras:v2.5' ? SARVAM_STT_TRANSLATE_WS_URL : SARVAM_STT_WS_URL;\n}\n\nfunction buildWsUrl(opts: ResolvedSTTOptions): string {\n const base = getWsUrl(opts.model);\n const params = new URLSearchParams();\n params.set('model', opts.model);\n params.set('vad_signals', 'true');\n params.set('sample_rate', String(SAMPLE_RATE));\n params.set('input_audio_codec', 'pcm_s16le');\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n params.set('language-code', opts.languageCode);\n }\n\n // mode: v3 on STT WS, and translate WS (both endpoints support it)\n if (opts.mode != null) {\n params.set('mode', opts.mode);\n }\n\n // Optional WS params\n if (opts.highVadSensitivity != null) {\n params.set('high_vad_sensitivity', String(opts.highVadSensitivity));\n }\n if (opts.flushSignal != null) {\n params.set('flush_signal', String(opts.flushSignal));\n }\n\n return `${base}?${params.toString()}`;\n}\n\n// ---------------------------------------------------------------------------\n// Build the multipart form data (REST) — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildFormData(wavBlob: Blob, opts: ResolvedSTTOptions): FormData {\n const formData = new FormData();\n formData.append('file', wavBlob, 'audio.wav');\n formData.append('model', opts.model);\n\n if (opts.model !== 'saaras:v2.5' && opts.languageCode != null) {\n formData.append('language_code', opts.languageCode);\n }\n if (opts.model === 'saaras:v3' && opts.mode != null) {\n formData.append('mode', opts.mode);\n }\n if ((opts.model === 'saaras:v2.5' || opts.model === 'saaras:v3') && opts.prompt != null) {\n formData.append('prompt', opts.prompt);\n }\n if (opts.model !== 'saaras:v2.5' && opts.withTimestamps) {\n formData.append('with_timestamps', 'true');\n }\n\n return formData;\n}\n\n// ---------------------------------------------------------------------------\n// WAV encoding helper\n// ---------------------------------------------------------------------------\n\nfunction createWav(frame: AudioFrame): Buffer {\n const bitsPerSample = 16;\n const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;\n const blockAlign = (frame.channels * bitsPerSample) / 8;\n\n const header = Buffer.alloc(44);\n header.write('RIFF', 0);\n header.writeUInt32LE(36 + frame.data.byteLength, 4);\n header.write('WAVE', 8);\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16);\n header.writeUInt16LE(1, 20);\n header.writeUInt16LE(frame.channels, 22);\n header.writeUInt32LE(frame.sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(bitsPerSample, 34);\n header.write('data', 36);\n header.writeUInt32LE(frame.data.byteLength, 40);\n\n const pcm = Buffer.from(frame.data.buffer, frame.data.byteOffset, frame.data.byteLength);\n return Buffer.concat([header, pcm]);\n}\n\n// ---------------------------------------------------------------------------\n// REST response type\n// ---------------------------------------------------------------------------\n\ninterface SarvamSTTResponse {\n request_id: string | null;\n transcript: string;\n language_code: string | null;\n language_probability?: number | null;\n timestamps?: {\n words: string[];\n start_time_seconds: number[];\n end_time_seconds: number[];\n } | null;\n}\n\n// ---------------------------------------------------------------------------\n// WS response types (from server Publish messages)\n// ---------------------------------------------------------------------------\n\n/** type: \"data\" */\ninterface SarvamWSTranscriptData {\n request_id?: string;\n transcript?: string;\n language_code?: string | null;\n language_probability?: number | null;\n timestamps?: Record<string, unknown> | null;\n diarized_transcript?: Record<string, unknown> | null;\n metrics?: {\n audio_duration?: number;\n processing_latency?: number;\n };\n}\n\n/** type: \"events\" */\ninterface SarvamWSEventData {\n event_type?: string;\n timestamp?: string;\n signal_type?: 'START_SPEECH' | 'END_SPEECH';\n occured_at?: number;\n}\n\n/** type: \"error\" — server sends data with message and code fields */\ninterface SarvamWSErrorData {\n message?: string;\n error?: string;\n code?: string;\n}\n\n// ---------------------------------------------------------------------------\n// STT class — supports both REST (recognize) and WebSocket (stream)\n// ---------------------------------------------------------------------------\n\nexport class STT extends stt.STT {\n private opts: ResolvedSTTOptions;\n label = 'sarvam.STT';\n\n /**\n * Create a new instance of Sarvam AI STT.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n *\n * Supported models:\n * - `saaras:v3` (default, recommended) — supports all 22 languages, modes, prompt, timestamps, and uses `/speech-to-text`.\n * - `saaras:v2.5` — Indic-to-English translation via `/speech-to-text-translate`. Auto-detects source language. Supports prompt.\n * - `saarika:v2.5` — will be deprecated soon. Supports timestamps. All its languages are available in `saaras:v3`.\n *\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe | Sarvam STT API docs}\n * @see {@link https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate | Sarvam STT Translate docs}\n */\n constructor(opts: Partial<STTOptions> = {}) {\n const resolved = resolveOptions(opts);\n super({\n streaming: resolved.streaming,\n interimResults: false,\n alignedTranscript: false,\n });\n this.opts = resolved;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.opts.apiKey,\n streaming: this.opts.streaming,\n ...(this.opts.highVadSensitivity != null\n ? { highVadSensitivity: this.opts.highVadSensitivity }\n : {}),\n ...(this.opts.flushSignal != null ? { flushSignal: this.opts.flushSignal } : {}),\n ...(this.opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.opts } as Partial<STTOptions>);\n\n this.opts = resolveOptions({ ...base, ...opts } as STTOptions);\n }\n\n async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {\n const frame = mergeFrames(buffer);\n const wavBuffer = createWav(frame);\n const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: 'audio/wav' });\n\n const formData = buildFormData(wavBlob, this.opts);\n\n const response = await fetch(getRestUrl(this.opts.model), {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n body: formData,\n signal: abortSignal ?? null,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam STT API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as SarvamSTTResponse;\n\n let startTime = 0;\n let endTime = 0;\n if (data.timestamps) {\n const starts = data.timestamps.start_time_seconds;\n const ends = data.timestamps.end_time_seconds;\n if (starts.length > 0) startTime = starts[0] ?? 0;\n if (ends.length > 0) endTime = ends[ends.length - 1] ?? 0;\n }\n\n return {\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId: data.request_id ?? undefined,\n alternatives: [\n {\n text: data.transcript || '',\n language: normalizeLanguage(data.language_code ?? this.opts.languageCode ?? 'unknown'),\n startTime,\n endTime,\n confidence: data.language_probability ?? 0,\n },\n ],\n };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam STT streaming is disabled (`streaming: false`). Use recognize() for REST or wrap with stt.StreamAdapter + VAD for streaming behavior.',\n );\n }\n return new SpeechStream(this, this.opts, options?.connOptions);\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming SpeechStream\n// ---------------------------------------------------------------------------\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: ResolvedSTTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n label = 'sarvam.SpeechStream';\n\n constructor(sttInstance: STT, opts: ResolvedSTTOptions, connOptions?: APIConnectOptions) {\n super(sttInstance, SAMPLE_RATE, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<STTOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n ...(this.#opts.highVadSensitivity != null\n ? { highVadSensitivity: this.#opts.highVadSensitivity }\n : {}),\n ...(this.#opts.flushSignal != null ? { flushSignal: this.#opts.flushSignal } : {}),\n ...(this.#opts.languageCode != null && opts.model !== 'saaras:v2.5'\n ? { languageCode: this.#opts.languageCode as STTV3Languages }\n : {}),\n }\n : ({ ...this.#opts } as Partial<STTOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as STTOptions);\n this.#resetWS.resolve();\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n\n while (!this.input.closed && !this.closed) {\n const wsUrl = buildWsUrl(this.#opts);\n this.#logger.info(`Sarvam STT connecting to: ${wsUrl}`);\n const ws = new WebSocket(wsUrl, {\n headers: { 'api-subscription-key': this.#opts.apiKey },\n });\n\n let sessionStart = 0;\n try {\n await new Promise<void>((resolve, reject) => {\n ws.once('open', () => resolve());\n ws.once('error', (err: Error) => reject(err));\n ws.once('close', (code: number) =>\n reject(new Error(`WebSocket closed with code ${code}`)),\n );\n });\n\n sessionStart = Date.now();\n await this.#runWS(ws);\n retries = 0;\n } catch (e) {\n // Clean up the WebSocket on failure to prevent listener leaks\n ws.removeAllListeners();\n ws.close();\n\n if (!this.closed && !this.input.closed) {\n // If the session ran for a meaningful duration (>5s), this was a working\n // session that ended normally (e.g. server idle timeout ~20s). Reset retries\n // so expected idle-timeout reconnections don't accumulate toward the fatal limit.\n if (sessionStart > 0 && Date.now() - sessionStart > 5000) {\n retries = 0;\n }\n if (retries >= maxRetry) {\n throw new Error(`Failed to connect to Sarvam STT after ${retries} attempts: ${e}`);\n }\n const delay = Math.min(retries * 5, 10);\n retries++;\n this.#logger.warn(\n `Failed to connect to Sarvam STT, retrying in ${delay}s: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Sarvam STT disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n this.#speaking = false;\n let closing = false;\n // Session-scoped controller: aborted in finally to cancel sendTask on WS reset\n const sessionController = new AbortController();\n\n // Config message: only supported on translate WS endpoint (saaras:v2.5)\n // @see https://docs.sarvam.ai/api-reference-docs/speech-to-text-translate/translate/ws\n if (this.#opts.model === 'saaras:v2.5' && this.#opts.prompt != null) {\n ws.send(JSON.stringify({ type: 'config', prompt: this.#opts.prompt }));\n }\n\n // No keepalive — Sarvam rejects messages without 'audio' field, and sending\n // silent audio could confuse server-side VAD. On idle timeout (~20s), the\n // server closes the connection and the outer retry loop in run() reconnects.\n // This matches the Python SDK's approach.\n\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>((_, reject) => {\n ws.once('close', (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples50Ms = Math.floor(SAMPLE_RATE / 20); // 50ms chunks\n const stream = new AudioByteStream(SAMPLE_RATE, NUM_CHANNELS, samples50Ms);\n const abortPromise = waitForAbort(this.abortSignal);\n const sessionAbort = waitForAbort(sessionController.signal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise, sessionAbort]);\n if (result === undefined) return; // aborted\n if (result.done) break;\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (data.sampleRate !== SAMPLE_RATE || data.channels !== NUM_CHANNELS) {\n throw new Error(\n `Expected ${SAMPLE_RATE}Hz/${NUM_CHANNELS}ch, got ${data.sampleRate}Hz/${data.channels}ch`,\n );\n } else {\n frames = stream.write(\n data.data.buffer.slice(\n data.data.byteOffset,\n data.data.byteOffset + data.data.byteLength,\n ) as ArrayBuffer,\n );\n }\n\n for (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n // Sarvam expects base64-encoded PCM in a JSON message\n const pcmBuffer = Buffer.from(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.data.byteLength,\n );\n const base64Audio = pcmBuffer.toString('base64');\n ws.send(\n JSON.stringify({\n audio: {\n data: base64Audio,\n encoding: 'audio/wav',\n sample_rate: SAMPLE_RATE,\n },\n }),\n );\n }\n }\n\n // Send flush message on FLUSH_SENTINEL (VAD end of speech)\n if (data === SpeechStream.FLUSH_SENTINEL) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n }\n } finally {\n closing = true;\n // Match Python: end_of_stream includes an empty audio field to avoid\n // \"audio must not be None\" rejection from the server\n try {\n ws.send(\n JSON.stringify({\n type: 'end_of_stream',\n audio: { data: '', encoding: 'audio/wav', sample_rate: SAMPLE_RATE },\n }),\n );\n } catch {\n // ws may already be closed\n }\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (event: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(event);\n } catch {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.once('close', () => resolve());\n ws.on('message', (msg: RawData) => {\n try {\n const raw = msg.toString();\n this.#logger.debug(`Sarvam STT raw WS message: ${raw.substring(0, 500)}`);\n const json = JSON.parse(raw);\n const msgType: string = json['type'] ?? '';\n\n if (msgType === 'events') {\n const eventData = (json['data'] as SarvamWSEventData | undefined) ?? {};\n const signalType = eventData.signal_type;\n\n if (signalType === 'START_SPEECH') {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n } else if (signalType === 'END_SPEECH') {\n if (this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n }\n } else if (msgType === 'data') {\n const td = (json['data'] as SarvamWSTranscriptData | undefined) ?? {};\n const transcript = td.transcript ?? '';\n const language = normalizeLanguage(\n td.language_code ?? this.#opts.languageCode ?? 'unknown',\n );\n const requestId = td.request_id ?? '';\n const confidence = td.language_probability ?? 0;\n this.#requestId = requestId;\n\n // Log metrics when available\n if (td.metrics) {\n this.#logger.debug(\n `Sarvam STT metrics: audio_duration=${td.metrics.audio_duration}s, latency=${td.metrics.processing_latency}s`,\n );\n }\n\n if (transcript) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [\n {\n text: transcript,\n language,\n startTime: 0,\n endTime: td.metrics?.audio_duration ?? 0,\n confidence,\n },\n ],\n });\n }\n } else if (msgType === 'error') {\n // Server format: { type: \"error\", data: { message: \"...\", code: \"...\" } }\n // Also check top-level and 'error' field as fallback\n const nested = json['data'] as SarvamWSErrorData | undefined;\n const errorInfo =\n nested?.message ??\n nested?.error ??\n json['error'] ??\n json['message'] ??\n 'Unknown error';\n const errorCode = nested?.code ?? json['code'] ?? '';\n this.#logger.error(`Sarvam STT WebSocket error [${errorCode}]: ${errorInfo}`);\n reject(new Error(`Sarvam STT API error [${errorCode}]: ${errorInfo}`));\n return;\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`Error processing Sarvam STT message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n try {\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor.result]),\n ]);\n } finally {\n closing = true;\n sessionController.abort();\n // Do NOT call listenTask.cancel() — it would abort this.abortController\n // (passed to Task.from) and permanently break the stream. Instead, ws.close()\n // triggers the ws.once('close') handler inside listenMessage, letting listenTask\n // exit naturally. On close(), the parent abort signal handles it directly.\n wsMonitor.cancel();\n ws.close();\n // Suppress unhandled rejection from orphaned listenTask on reconnect\n listenTask.result.catch(() => {});\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAaxC,MAAM,sBAAsB;AAC5B,MAAM,gCAAgC;AACtC,MAAM,oBAAoB;AAC1B,MAAM,8BAA8B;AAEpC,MAAM,cAAc;AACpB,MAAM,eAAe;AA6FrB,MAAM,mBAAmB;AAAA,EACvB,cAAc;AAChB;AAEA,MAAM,qBAAqB;AAAA,EACzB,cAAc;AAAA,EACd,MAAM;AACR;AAEA,MAAM,4BAA4B;AAAA,EAChC,MAAM;AACR;AAGA,MAAM,qBAA0C,oBAAI,IAAoB;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAMD,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AAEvC,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B,oBAAoB,KAAK;AAAA,IACzB,aAAa,KAAK;AAAA,EACpB;AAEA,MAAI,UAAU,eAAe;AAC3B,UAAM,gBAAgB;AACtB,SAAK,SAAS,cAAc;AAC5B,SAAK,OAAO,cAAc,QAAQ,0BAA0B;AAAA,EAC9D,WAAW,UAAU,aAAa;AAChC,UAAM,SAAS;AACf,SAAK,eAAe,kBAAkB,OAAO,gBAAgB,mBAAmB,YAAY;AAC5F,SAAK,OAAO,OAAO,QAAQ,mBAAmB;AAC9C,SAAK,SAAS,OAAO;AACrB,SAAK,iBAAiB,OAAO;AAAA,EAC/B,OAAO;AAEL,QAAI,eAAe;AAAA,MAChB,KAAsB,gBAAgB,iBAAiB;AAAA,IAC1D;AACA,QAAI,CAAC,mBAAmB,IAAI,YAAY,GAAG;AACzC,qBAAe,kBAAkB,iBAAiB,YAAY;AAAA,IAChE;AACA,SAAK,eAAe;AACpB,SAAK,iBAAkB,KAAsB;AAAA,EAC/C;AAEA,SAAO;AACT;AAMA,SAAS,WAAW,OAA0B;AAC5C,SAAO,UAAU,gBAAgB,gCAAgC;AACnE;AAEA,SAAS,SAAS,OAA0B;AAC1C,SAAO,UAAU,gBAAgB,8BAA8B;AACjE;AAEA,SAAS,WAAW,MAAkC;AACpD,QAAM,OAAO,SAAS,KAAK,KAAK;AAChC,QAAM,SAAS,IAAI,gBAAgB;AACnC,SAAO,IAAI,SAAS,KAAK,KAAK;AAC9B,SAAO,IAAI,eAAe,MAAM;AAChC,SAAO,IAAI,eAAe,OAAO,WAAW,CAAC;AAC7C,SAAO,IAAI,qBAAqB,WAAW;AAE3C,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,WAAO,IAAI,iBAAiB,KAAK,YAAY;AAAA,EAC/C;AAGA,MAAI,KAAK,QAAQ,MAAM;AACrB,WAAO,IAAI,QAAQ,KAAK,IAAI;AAAA,EAC9B;AAGA,MAAI,KAAK,sBAAsB,MAAM;AACnC,WAAO,IAAI,wBAAwB,OAAO,KAAK,kBAAkB,CAAC;AAAA,EACpE;AACA,MAAI,KAAK,eAAe,MAAM;AAC5B,WAAO,IAAI,gBAAgB,OAAO,KAAK,WAAW,CAAC;AAAA,EACrD;AAEA,SAAO,GAAG,IAAI,IAAI,OAAO,SAAS,CAAC;AACrC;AAMA,SAAS,cAAc,SAAe,MAAoC;AACxE,QAAM,WAAW,IAAI,SAAS;AAC9B,WAAS,OAAO,QAAQ,SAAS,WAAW;AAC5C,WAAS,OAAO,SAAS,KAAK,KAAK;AAEnC,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB,MAAM;AAC7D,aAAS,OAAO,iBAAiB,KAAK,YAAY;AAAA,EACpD;AACA,MAAI,KAAK,UAAU,eAAe,KAAK,QAAQ,MAAM;AACnD,aAAS,OAAO,QAAQ,KAAK,IAAI;AAAA,EACnC;AACA,OAAK,KAAK,UAAU,iBAAiB,KAAK,UAAU,gBAAgB,KAAK,UAAU,MAAM;AACvF,aAAS,OAAO,UAAU,KAAK,MAAM;AAAA,EACvC;AACA,MAAI,KAAK,UAAU,iBAAiB,KAAK,gBAAgB;AACvD,aAAS,OAAO,mBAAmB,MAAM;AAAA,EAC3C;AAEA,SAAO;AACT;AAMA,SAAS,UAAU,OAA2B;AAC5C,QAAM,gBAAgB;AACtB,QAAM,WAAY,MAAM,aAAa,MAAM,WAAW,gBAAiB;AACvE,QAAM,aAAc,MAAM,WAAW,gBAAiB;AAEtD,QAAM,SAAS,OAAO,MAAM,EAAE;AAC9B,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,cAAc,KAAK,MAAM,KAAK,YAAY,CAAC;AAClD,SAAO,MAAM,QAAQ,CAAC;AACtB,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,IAAI,EAAE;AAC3B,SAAO,cAAc,GAAG,EAAE;AAC1B,SAAO,cAAc,MAAM,UAAU,EAAE;AACvC,SAAO,cAAc,MAAM,YAAY,EAAE;AACzC,SAAO,cAAc,UAAU,EAAE;AACjC,SAAO,cAAc,YAAY,EAAE;AACnC,SAAO,cAAc,eAAe,EAAE;AACtC,SAAO,MAAM,QAAQ,EAAE;AACvB,SAAO,cAAc,MAAM,KAAK,YAAY,EAAE;AAE9C,QAAM,MAAM,OAAO,KAAK,MAAM,KAAK,QAAQ,MAAM,KAAK,YAAY,MAAM,KAAK,UAAU;AACvF,SAAO,OAAO,OAAO,CAAC,QAAQ,GAAG,CAAC;AACpC;AAuDO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM;AAAA,MACJ,WAAW,SAAS;AAAA,MACpB,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AACD,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,KAAK;AAErE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,KAAK;AAAA,MAClB,WAAW,KAAK,KAAK;AAAA,MACrB,GAAI,KAAK,KAAK,sBAAsB,OAChC,EAAE,oBAAoB,KAAK,KAAK,mBAAmB,IACnD,CAAC;AAAA,MACL,GAAI,KAAK,KAAK,eAAe,OAAO,EAAE,aAAa,KAAK,KAAK,YAAY,IAAI,CAAC;AAAA,MAC9E,GAAI,KAAK,KAAK,gBAAgB,QAAQ,KAAK,UAAU,gBACjD,EAAE,cAAc,KAAK,KAAK,aAA+B,IACzD,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,KAAK;AAEpB,SAAK,OAAO,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAC/D;AAAA,EAEA,MAAM,WAAW,QAAqB,aAAqD;AACzF,UAAM,QAAQ,YAAY,MAAM;AAChC,UAAM,YAAY,UAAU,KAAK;AACjC,UAAM,UAAU,IAAI,KAAK,CAAC,IAAI,WAAW,SAAS,CAAC,GAAG,EAAE,MAAM,YAAY,CAAC;AAE3E,UAAM,WAAW,cAAc,SAAS,KAAK,IAAI;AAEjD,UAAM,WAAW,MAAM,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG;AAAA,MACxD,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM;AAAA,MACN,QAAQ,eAAe;AAAA,IACzB,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAElC,QAAI,YAAY;AAChB,QAAI,UAAU;AACd,QAAI,KAAK,YAAY;AACnB,YAAM,SAAS,KAAK,WAAW;AAC/B,YAAM,OAAO,KAAK,WAAW;AAC7B,UAAI,OAAO,SAAS,EAAG,aAAY,OAAO,CAAC,KAAK;AAChD,UAAI,KAAK,SAAS,EAAG,WAAU,KAAK,KAAK,SAAS,CAAC,KAAK;AAAA,IAC1D;AAEA,WAAO;AAAA,MACL,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK,cAAc;AAAA,MAC9B,cAAc;AAAA,QACZ;AAAA,UACE,MAAM,KAAK,cAAc;AAAA,UACzB,UAAU,kBAAkB,KAAK,iBAAiB,KAAK,KAAK,gBAAgB,SAAS;AAAA,UACrF;AAAA,UACA;AAAA,UACA,YAAY,KAAK,wBAAwB;AAAA,QAC3C;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,OAAO,SAA6D;AAClE,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,aAAa,MAAM,KAAK,MAAM,mCAAS,WAAW;AAAA,EAC/D;AACF;AAMO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb,QAAQ;AAAA,EAER,YAAY,aAAkB,MAA0B,aAAiC;AACvF,UAAM,aAAa,aAAa,WAAW;AAC3C,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAAA,EAClD;AAAA,EAEA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,GAAI,KAAK,MAAM,sBAAsB,OACjC,EAAE,oBAAoB,KAAK,MAAM,mBAAmB,IACpD,CAAC;AAAA,MACL,GAAI,KAAK,MAAM,eAAe,OAAO,EAAE,aAAa,KAAK,MAAM,YAAY,IAAI,CAAC;AAAA,MAChF,GAAI,KAAK,MAAM,gBAAgB,QAAQ,KAAK,UAAU,gBAClD,EAAE,cAAc,KAAK,MAAM,aAA+B,IAC1D,CAAC;AAAA,IACP,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAC9D,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AAEd,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,QAAQ,WAAW,KAAK,KAAK;AACnC,WAAK,QAAQ,KAAK,6BAA6B,KAAK,EAAE;AACtD,YAAM,KAAK,IAAI,UAAU,OAAO;AAAA,QAC9B,SAAS,EAAE,wBAAwB,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI,eAAe;AACnB,UAAI;AACF,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,aAAG,KAAK,QAAQ,MAAM,QAAQ,CAAC;AAC/B,aAAG,KAAK,SAAS,CAAC,QAAe,OAAO,GAAG,CAAC;AAC5C,aAAG;AAAA,YAAK;AAAA,YAAS,CAAC,SAChB,OAAO,IAAI,MAAM,8BAA8B,IAAI,EAAE,CAAC;AAAA,UACxD;AAAA,QACF,CAAC;AAED,uBAAe,KAAK,IAAI;AACxB,cAAM,KAAK,OAAO,EAAE;AACpB,kBAAU;AAAA,MACZ,SAAS,GAAG;AAEV,WAAG,mBAAmB;AACtB,WAAG,MAAM;AAET,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AAItC,cAAI,eAAe,KAAK,KAAK,IAAI,IAAI,eAAe,KAAM;AACxD,sBAAU;AAAA,UACZ;AACA,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,UACnF;AACA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AACA,eAAK,QAAQ;AAAA,YACX,gDAAgD,KAAK,MAAM,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UACtF;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,kDAAkD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAClH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,SAAK,YAAY;AACjB,QAAI,UAAU;AAEd,UAAM,oBAAoB,IAAI,gBAAgB;AAI9C,QAAI,KAAK,MAAM,UAAU,iBAAiB,KAAK,MAAM,UAAU,MAAM;AACnE,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,UAAU,QAAQ,KAAK,MAAM,OAAO,CAAC,CAAC;AAAA,IACvE;AAOA,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,CAAC,GAAG,WAAW;AAC9C,WAAG,KAAK,SAAS,CAAC,MAAc,WAAmB;AACjD,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AACD,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,cAAc,KAAK,MAAM,cAAc,EAAE;AAC/C,YAAM,SAAS,IAAI,gBAAgB,aAAa,cAAc,WAAW;AACzE,YAAM,eAAe,aAAa,KAAK,WAAW;AAClD,YAAM,eAAe,aAAa,kBAAkB,MAAM;AAE1D,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,cAAc,YAAY,CAAC;AACjF,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,KAAM;AAEjB,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AAAA,UACxB,WAAW,KAAK,eAAe,eAAe,KAAK,aAAa,cAAc;AAC5E,kBAAM,IAAI;AAAA,cACR,YAAY,WAAW,MAAM,YAAY,WAAW,KAAK,UAAU,MAAM,KAAK,QAAQ;AAAA,YACxF;AAAA,UACF,OAAO;AACL,qBAAS,OAAO;AAAA,cACd,KAAK,KAAK,OAAO;AAAA,gBACf,KAAK,KAAK;AAAA,gBACV,KAAK,KAAK,aAAa,KAAK,KAAK;AAAA,cACnC;AAAA,YACF;AAAA,UACF;AAEA,qBAAW,SAAS,QAAQ;AAC1B,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAE5C,oBAAM,YAAY,OAAO;AAAA,gBACvB,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,gBACX,MAAM,KAAK;AAAA,cACb;AACA,oBAAM,cAAc,UAAU,SAAS,QAAQ;AAC/C,iBAAG;AAAA,gBACD,KAAK,UAAU;AAAA,kBACb,OAAO;AAAA,oBACL,MAAM;AAAA,oBACN,UAAU;AAAA,oBACV,aAAa;AAAA,kBACf;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF;AAGA,cAAI,SAAS,aAAa,gBAAgB;AACxC,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,UAC3C;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AAGV,YAAI;AACF,aAAG;AAAA,YACD,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,OAAO,EAAE,MAAM,IAAI,UAAU,aAAa,aAAa,YAAY;AAAA,YACrE,CAAC;AAAA,UACH;AAAA,QACF,QAAQ;AAAA,QAER;AACA,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,UAA2B;AAC7C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,KAAK;AAAA,UACtB,QAAQ;AAAA,UAER;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,KAAK,SAAS,MAAM,QAAQ,CAAC;AAChC,WAAG,GAAG,WAAW,CAAC,QAAiB;AA5pB3C;AA6pBU,cAAI;AACF,kBAAM,MAAM,IAAI,SAAS;AACzB,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,UAAU,GAAG,GAAG,CAAC,EAAE;AACxE,kBAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,kBAAM,UAAkB,KAAK,MAAM,KAAK;AAExC,gBAAI,YAAY,UAAU;AACxB,oBAAM,YAAa,KAAK,MAAM,KAAuC,CAAC;AACtE,oBAAM,aAAa,UAAU;AAE7B,kBAAI,eAAe,gBAAgB;AACjC,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAAA,cACF,WAAW,eAAe,cAAc;AACtC,oBAAI,KAAK,WAAW;AAClB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAAA,cACF;AAAA,YACF,WAAW,YAAY,QAAQ;AAC7B,oBAAM,KAAM,KAAK,MAAM,KAA4C,CAAC;AACpE,oBAAM,aAAa,GAAG,cAAc;AACpC,oBAAM,WAAW;AAAA,gBACf,GAAG,iBAAiB,KAAK,MAAM,gBAAgB;AAAA,cACjD;AACA,oBAAM,YAAY,GAAG,cAAc;AACnC,oBAAM,aAAa,GAAG,wBAAwB;AAC9C,mBAAK,aAAa;AAGlB,kBAAI,GAAG,SAAS;AACd,qBAAK,QAAQ;AAAA,kBACX,sCAAsC,GAAG,QAAQ,cAAc,cAAc,GAAG,QAAQ,kBAAkB;AAAA,gBAC5G;AAAA,cACF;AAEA,kBAAI,YAAY;AACd,oBAAI,CAAC,KAAK,WAAW;AACnB,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,gBAC1D;AAEA,2BAAW;AAAA,kBACT,MAAM,IAAI,gBAAgB;AAAA,kBAC1B;AAAA,kBACA,cAAc;AAAA,oBACZ;AAAA,sBACE,MAAM;AAAA,sBACN;AAAA,sBACA,WAAW;AAAA,sBACX,WAAS,QAAG,YAAH,mBAAY,mBAAkB;AAAA,sBACvC;AAAA,oBACF;AAAA,kBACF;AAAA,gBACF,CAAC;AAAA,cACH;AAAA,YACF,WAAW,YAAY,SAAS;AAG9B,oBAAM,SAAS,KAAK,MAAM;AAC1B,oBAAM,aACJ,iCAAQ,aACR,iCAAQ,UACR,KAAK,OAAO,KACZ,KAAK,SAAS,KACd;AACF,oBAAM,aAAY,iCAAQ,SAAQ,KAAK,MAAM,KAAK;AAClD,mBAAK,QAAQ,MAAM,+BAA+B,SAAS,MAAM,SAAS,EAAE;AAC5E,qBAAO,IAAI,MAAM,yBAAyB,SAAS,MAAM,SAAS,EAAE,CAAC;AACrE;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,wCAAwC,GAAG,EAAE;AAChE,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,QAAI;AACF,YAAM,QAAQ,KAAK;AAAA,QACjB,KAAK,SAAS;AAAA,QACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,UAAU,MAAM,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,UAAE;AACA,gBAAU;AACV,wBAAkB,MAAM;AAKxB,gBAAU,OAAO;AACjB,SAAG,MAAM;AAET,iBAAW,OAAO,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IAClC;AAAA,EACF;AACF;","names":[]}
package/dist/tts.cjs CHANGED
@@ -54,7 +54,7 @@ function resolveOptions(opts) {
54
54
  streaming: opts.streaming ?? true,
55
55
  model,
56
56
  speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),
57
- targetLanguageCode: opts.targetLanguageCode ?? "en-IN",
57
+ targetLanguageCode: (0, import_agents.normalizeLanguage)(opts.targetLanguageCode ?? "en-IN"),
58
58
  pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),
59
59
  sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,
60
60
  baseURL: opts.baseURL ?? SARVAM_BASE_URL,
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n TTSLanguages,\n TTSModels,\n TTSSampleRates,\n TTSSpeakers,\n TTSV2Speakers,\n TTSV3Speakers,\n} from './models.js';\n\nconst SARVAM_TTS_SAMPLE_RATE = 24000;\nconst SARVAM_TTS_CHANNELS = 1;\nconst SARVAM_BASE_URL = 'https://api.sarvam.ai';\nconst SARVAM_WS_URL_PATH = '/text-to-speech/ws';\nconst MIN_SENTENCE_LENGTH = 8;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// V2 supports pitch / loudness / enablePreprocessing\n// V3 supports temperature (pitch, loudness, enablePreprocessing are NOT supported)\n// ---------------------------------------------------------------------------\n\ninterface TTSBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Target language code (BCP-47) */\n targetLanguageCode?: TTSLanguages | string;\n /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */\n pace?: number;\n /** Output sample rate in Hz (default 24000) */\n sampleRate?: TTSSampleRates | number;\n /** Base URL for the Sarvam API */\n baseURL?: string;\n /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */\n sentenceTokenizer?: tokenize.SentenceTokenizer;\n}\n\n/** Options specific to bulbul:v2 */\nexport interface TTSV2Options extends TTSBaseOptions {\n model?: 'bulbul:v2';\n /** Speaker voice (v2 voices). Default: 'anushka' */\n speaker?: TTSV2Speakers | string;\n /** Pitch adjustment, -0.75 to 0.75 (v2 only) */\n pitch?: number;\n /** Loudness, 0.3 to 3.0 (v2 only) */\n loudness?: number;\n /** Enable text preprocessing (v2 only) */\n enablePreprocessing?: boolean;\n}\n\n/** Options specific to bulbul:v3 */\nexport interface TTSV3Options extends TTSBaseOptions {\n model: 'bulbul:v3';\n /** Speaker voice (v3 voices). Default: 'shubh' */\n speaker?: TTSV3Speakers | string;\n /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */\n temperature?: number;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type TTSOptions = TTSV2Options | TTSV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedTTSOptions {\n apiKey: string;\n streaming: boolean;\n model: TTSModels;\n speaker: TTSSpeakers | string;\n targetLanguageCode: string;\n pace: number;\n sampleRate: number;\n baseURL: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n // V2 only\n pitch?: number;\n loudness?: number;\n enablePreprocessing?: boolean;\n // V3 only\n temperature?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst V2_DEFAULTS = {\n speaker: 'anushka' as const,\n pitch: 0,\n pace: 1.0,\n loudness: 1.0,\n enablePreprocessing: false,\n};\n\nconst V3_DEFAULTS = {\n speaker: 'shubh' as const,\n pace: 1.0,\n temperature: 0.6,\n};\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: TTSModels = opts.model ?? 'bulbul:v2';\n const isV3 = model === 'bulbul:v3';\n\n const base: ResolvedTTSOptions = {\n apiKey,\n streaming: opts.streaming ?? true,\n model,\n speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),\n targetLanguageCode: opts.targetLanguageCode ?? 'en-IN',\n pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),\n sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,\n baseURL: opts.baseURL ?? SARVAM_BASE_URL,\n sentenceTokenizer:\n opts.sentenceTokenizer ??\n new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH }),\n };\n\n if (isV3) {\n base.temperature = (opts as TTSV3Options).temperature ?? V3_DEFAULTS.temperature;\n } else {\n const v2 = opts as TTSV2Options;\n base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;\n base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;\n base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// Build the API request body — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildRequestBody(text: string, opts: ResolvedTTSOptions): Record<string, unknown> {\n const body: Record<string, unknown> = {\n text,\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n // Always request WAV — AudioByteStream requires raw PCM, which we get by\n // stripping the 44-byte WAV header. Other codecs produce compressed audio\n // that cannot be fed into AudioByteStream.\n output_audio_codec: 'wav',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) body.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) body.pitch = opts.pitch;\n if (opts.loudness != null) body.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return body;\n}\n\n// ---------------------------------------------------------------------------\n// Build WS config message (sent as first message after connection)\n// ---------------------------------------------------------------------------\n\nfunction buildWsConfigMessage(opts: ResolvedTTSOptions): string {\n const data: Record<string, unknown> = {\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n output_audio_codec: 'linear16',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) data.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) data.pitch = opts.pitch;\n if (opts.loudness != null) data.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return JSON.stringify({ type: 'config', data });\n}\n\n// ---------------------------------------------------------------------------\n// TTS class\n// ---------------------------------------------------------------------------\n\nexport class TTS extends tts.TTS {\n #opts: ResolvedTTSOptions;\n label = 'sarvam.TTS';\n\n /**\n * Create a new instance of Sarvam AI TTS.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolved = resolveOptions(opts);\n super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });\n this.#opts = resolved;\n }\n\n /**\n * Update TTS options after initialization.\n *\n * @remarks\n * When the model changes, only truly shared fields (apiKey,\n * targetLanguageCode, pace, sampleRate, baseURL) carry over.\n * Model-specific fields (speaker, pitch, loudness, temperature,\n * enablePreprocessing) are dropped so resolveOptions re-applies\n * the correct defaults for the new model.\n */\n updateOptions(opts: Partial<TTSOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<TTSOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n streaming: this.#opts.streaming,\n targetLanguageCode: this.#opts.targetLanguageCode as TTSLanguages,\n pace: this.#opts.pace,\n sampleRate: this.#opts.sampleRate as TTSSampleRates,\n baseURL: this.#opts.baseURL,\n sentenceTokenizer: this.#opts.sentenceTokenizer,\n }\n : ({ ...this.#opts } as Partial<TTSOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as TTSOptions);\n }\n\n /**\n * Synthesize text to audio using Sarvam AI TTS.\n *\n * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)\n * @param connOptions - API connection options\n * @param abortSignal - Abort signal for cancellation\n * @returns A chunked stream of synthesized audio\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior.',\n );\n }\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Chunked stream (non-streaming synthesis)\n// ---------------------------------------------------------------------------\n\n/** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'sarvam.ChunkedStream';\n private opts: ResolvedTTSOptions;\n\n /** @internal */\n constructor(\n tts: TTS,\n text: string,\n opts: ResolvedTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n\n const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'api-subscription-key': this.opts.apiKey,\n },\n body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as { audios: string[] };\n const audioBase64 = data.audios[0];\n if (!audioBase64) {\n throw new Error('Sarvam TTS returned empty audio');\n }\n\n // Decode base64 WAV and strip 44-byte header to get raw PCM\n const raw = Buffer.from(audioBase64, 'base64');\n const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);\n\n const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming synthesis\n// ---------------------------------------------------------------------------\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: ResolvedTTSOptions;\n private tokenizer: tokenize.SentenceStream;\n #logger = log();\n label = 'sarvam.SynthesizeStream';\n\n constructor(tts: TTS, opts: ResolvedTTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: 'flush' }));\n\n try {\n await new Promise<void>((resolve) => {\n const timeout = setTimeout(() => resolve(), 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n this.#logger.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // Build WS URL: wss://api.sarvam.ai/text-to-speech/ws?model=...&send_completion_event=true\n const wsBaseUrl = this.opts.baseURL.replace(/^http/, 'ws');\n const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);\n url.searchParams.set('model', this.opts.model);\n url.searchParams.set('send_completion_event', 'true');\n\n const ws = new WebSocket(url, {\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n });\n\n await new Promise<void>((resolve, reject) => {\n const onOpen = () => {\n cleanup();\n resolve();\n };\n const onError = (error: Error) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));\n };\n const onClose = (code: number) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));\n };\n const cleanup = () => {\n ws.removeListener('open', onOpen);\n ws.removeListener('error', onError);\n ws.removeListener('close', onClose);\n };\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n });\n\n // Send config message immediately after connection\n ws.send(buildWsConfigMessage(this.opts));\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n const text = event.token;\n ws.send(JSON.stringify({ type: 'text', data: { text } }));\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData) => {\n let msg: { type: string; data?: Record<string, unknown> };\n try {\n msg = JSON.parse(data.toString());\n } catch {\n this.#logger.warn('Sarvam WS: received non-JSON message');\n return;\n }\n\n switch (msg.type) {\n case 'audio': {\n const audioB64 = (msg.data?.audio as string) ?? '';\n if (!audioB64) break;\n\n const raw = Buffer.from(audioB64, 'base64');\n const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);\n\n for (const frame of bstream.write(pcm as ArrayBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n break;\n }\n\n case 'event': {\n const eventType = msg.data?.event_type as string | undefined;\n if (eventType === 'final') {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n break;\n }\n\n case 'error': {\n const errMsg = (msg.data?.message as string) ?? 'Unknown Sarvam WS error';\n const errCode = msg.data?.code as number | undefined;\n reject(new Error(`Sarvam WS error ${errCode ?? ''}: ${errMsg}`));\n break;\n }\n }\n });\n\n ws.on('close', () => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n const msg = e instanceof Error ? e.message : String(e);\n throw new Error(`Sarvam TTS streaming failed: ${msg}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,gBAAwC;AAUxC,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAC5B,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,sBAAsB;AAgF5B,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,OAAO;AAAA,EACP,MAAM;AAAA,EACN,UAAU;AAAA,EACV,qBAAqB;AACvB;AAEA,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,MAAM;AAAA,EACN,aAAa;AACf;AAMA,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AACvC,QAAM,OAAO,UAAU;AAEvB,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA,SAAS,KAAK,YAAY,OAAO,YAAY,UAAU,YAAY;AAAA,IACnE,oBAAoB,KAAK,sBAAsB;AAAA,IAC/C,MAAM,KAAK,SAAS,OAAO,YAAY,OAAO,YAAY;AAAA,IAC1D,YAAY,KAAK,cAAc;AAAA,IAC/B,SAAS,KAAK,WAAW;AAAA,IACzB,mBACE,KAAK,qBACL,IAAI,uBAAS,MAAM,kBAAkB,EAAE,mBAAmB,oBAAoB,CAAC;AAAA,EACnF;AAEA,MAAI,MAAM;AACR,SAAK,cAAe,KAAsB,eAAe,YAAY;AAAA,EACvE,OAAO;AACL,UAAM,KAAK;AACX,SAAK,QAAQ,GAAG,SAAS,YAAY;AACrC,SAAK,WAAW,GAAG,YAAY,YAAY;AAC3C,SAAK,sBAAsB,GAAG,uBAAuB,YAAY;AAAA,EACnE;AAEA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAc,MAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC;AAAA,IACA,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,IAI1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO;AACT;AAMA,SAAS,qBAAqB,MAAkC;AAC9D,QAAM,OAAgC;AAAA,IACpC,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA,IAC1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO,KAAK,UAAU,EAAE,MAAM,UAAU,KAAK,CAAC;AAChD;AAMO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,SAAS,YAAY,qBAAqB,EAAE,WAAW,SAAS,UAAU,CAAC;AACjF,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,WAAW,KAAK,MAAM;AAAA,MACtB,oBAAoB,KAAK,MAAM;AAAA,MAC/B,MAAM,KAAK,MAAM;AAAA,MACjB,YAAY,KAAK,MAAM;AAAA,MACvB,SAAS,KAAK,MAAM;AAAA,MACpB,mBAAmB,KAAK,MAAM;AAAA,IAChC,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA+B;AAC7B,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAOO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAE5B,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,KAAK,OAAO,mBAAmB;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM,KAAK,UAAU,iBAAiB,KAAK,WAAW,KAAK,IAAI,CAAC;AAAA,MAChE,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAClC,UAAM,cAAc,KAAK,OAAO,CAAC;AACjC,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,MAAM,OAAO,KAAK,aAAa,QAAQ;AAC7C,UAAM,UAAU,IAAI,OAAO,MAAM,IAAI,aAAa,IAAI,IAAI,aAAa,IAAI,UAAU;AAErF,UAAM,kBAAkB,IAAI,8BAAgB,KAAK,KAAK,YAAY,mBAAmB;AACrF,UAAM,SAAS,CAAC,GAAG,gBAAgB,MAAM,OAAO,GAAG,GAAG,gBAAgB,MAAM,CAAC;AAE7E,QAAI;AACJ,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ;AAC1B,oBAAc,WAAW,KAAK;AAC9B,kBAAY;AAAA,IACd;AACA,kBAAc,WAAW,IAAI;AAE7B,SAAK,MAAM,MAAM;AAAA,EACnB;AACF;AAMO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAYA,MAAU,MAA0B;AAC9C,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AACF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAEzC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,YAAY;AACnC,kBAAM,UAAU,WAAW,MAAM,QAAQ,GAAG,GAAI;AAEhD,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,QAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IACjE,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAG5B,UAAM,YAAY,KAAK,KAAK,QAAQ,QAAQ,SAAS,IAAI;AACzD,UAAM,MAAM,IAAI,IAAI,GAAG,SAAS,GAAG,kBAAkB,EAAE;AACvD,QAAI,aAAa,IAAI,SAAS,KAAK,KAAK,KAAK;AAC7C,QAAI,aAAa,IAAI,yBAAyB,MAAM;AAEpD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,SAAS,MAAM;AACnB,gBAAQ;AACR,gBAAQ;AAAA,MACV;AACA,YAAM,UAAU,CAAC,UAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE,CAAC;AAAA,MACtE;AACA,YAAM,UAAU,CAAC,SAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,wCAAwC,IAAI,EAAE,CAAC;AAAA,MAClE;AACA,YAAM,UAAU,MAAM;AACpB,WAAG,eAAe,QAAQ,MAAM;AAChC,WAAG,eAAe,SAAS,OAAO;AAClC,WAAG,eAAe,SAAS,OAAO;AAAA,MACpC;AACA,SAAG,GAAG,QAAQ,MAAM;AACpB,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AAAA,IACxB,CAAC;AAGD,OAAG,KAAK,qBAAqB,KAAK,IAAI,CAAC;AAEvC,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,cAAM,OAAO,MAAM;AACnB,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AAAA,MAC1D;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,mBAAmB;AAC7E,UAAI,gBAAgB;AACpB,UAAI;AAEJ,YAAM,gBAAgB,CAAC,UAAmB;AACxC,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,SAAkB;AAze5C;AA0eU,cAAI;AACJ,cAAI;AACF,kBAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AAAA,UAClC,QAAQ;AACN,iBAAK,QAAQ,KAAK,sCAAsC;AACxD;AAAA,UACF;AAEA,kBAAQ,IAAI,MAAM;AAAA,YAChB,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU,UAAoB;AAChD,kBAAI,CAAC,SAAU;AAEf,oBAAM,MAAM,OAAO,KAAK,UAAU,QAAQ;AAC1C,oBAAM,MAAM,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AAE5E,yBAAW,SAAS,QAAQ,MAAM,GAAkB,GAAG;AACrD,8BAAc,KAAK;AACnB,4BAAY;AAAA,cACd;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU;AAC5B,kBAAI,cAAc,SAAS;AACzB,gCAAgB;AAChB,2BAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,gCAAc,KAAK;AACnB,8BAAY;AAAA,gBACd;AACA,8BAAc,IAAI;AAElB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,uBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,gBAC/C;AACA,wBAAQ;AAAA,cACV;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,WAAU,SAAI,SAAJ,mBAAU,YAAsB;AAChD,oBAAM,WAAU,SAAI,SAAJ,mBAAU;AAC1B,qBAAO,IAAI,MAAM,mBAAmB,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;AAC/D;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,KAAK;AACnB,0BAAY;AAAA,YACd;AACA,0BAAc,IAAI;AAElB,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,MAAM,aAAa,QAAQ,EAAE,UAAU,OAAO,CAAC;AACrD,YAAM,IAAI,MAAM,gCAAgC,GAAG,EAAE;AAAA,IACvD,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n normalizeLanguage,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n TTSLanguages,\n TTSModels,\n TTSSampleRates,\n TTSSpeakers,\n TTSV2Speakers,\n TTSV3Speakers,\n} from './models.js';\n\nconst SARVAM_TTS_SAMPLE_RATE = 24000;\nconst SARVAM_TTS_CHANNELS = 1;\nconst SARVAM_BASE_URL = 'https://api.sarvam.ai';\nconst SARVAM_WS_URL_PATH = '/text-to-speech/ws';\nconst MIN_SENTENCE_LENGTH = 8;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// V2 supports pitch / loudness / enablePreprocessing\n// V3 supports temperature (pitch, loudness, enablePreprocessing are NOT supported)\n// ---------------------------------------------------------------------------\n\ninterface TTSBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Target language code (BCP-47) */\n targetLanguageCode?: TTSLanguages | string;\n /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */\n pace?: number;\n /** Output sample rate in Hz (default 24000) */\n sampleRate?: TTSSampleRates | number;\n /** Base URL for the Sarvam API */\n baseURL?: string;\n /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */\n sentenceTokenizer?: tokenize.SentenceTokenizer;\n}\n\n/** Options specific to bulbul:v2 */\nexport interface TTSV2Options extends TTSBaseOptions {\n model?: 'bulbul:v2';\n /** Speaker voice (v2 voices). Default: 'anushka' */\n speaker?: TTSV2Speakers | string;\n /** Pitch adjustment, -0.75 to 0.75 (v2 only) */\n pitch?: number;\n /** Loudness, 0.3 to 3.0 (v2 only) */\n loudness?: number;\n /** Enable text preprocessing (v2 only) */\n enablePreprocessing?: boolean;\n}\n\n/** Options specific to bulbul:v3 */\nexport interface TTSV3Options extends TTSBaseOptions {\n model: 'bulbul:v3';\n /** Speaker voice (v3 voices). Default: 'shubh' */\n speaker?: TTSV3Speakers | string;\n /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */\n temperature?: number;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type TTSOptions = TTSV2Options | TTSV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedTTSOptions {\n apiKey: string;\n streaming: boolean;\n model: TTSModels;\n speaker: TTSSpeakers | string;\n targetLanguageCode: string;\n pace: number;\n sampleRate: number;\n baseURL: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n // V2 only\n pitch?: number;\n loudness?: number;\n enablePreprocessing?: boolean;\n // V3 only\n temperature?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst V2_DEFAULTS = {\n speaker: 'anushka' as const,\n pitch: 0,\n pace: 1.0,\n loudness: 1.0,\n enablePreprocessing: false,\n};\n\nconst V3_DEFAULTS = {\n speaker: 'shubh' as const,\n pace: 1.0,\n temperature: 0.6,\n};\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: TTSModels = opts.model ?? 'bulbul:v2';\n const isV3 = model === 'bulbul:v3';\n\n const base: ResolvedTTSOptions = {\n apiKey,\n streaming: opts.streaming ?? true,\n model,\n speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),\n targetLanguageCode: normalizeLanguage(opts.targetLanguageCode ?? 'en-IN'),\n pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),\n sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,\n baseURL: opts.baseURL ?? SARVAM_BASE_URL,\n sentenceTokenizer:\n opts.sentenceTokenizer ??\n new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH }),\n };\n\n if (isV3) {\n base.temperature = (opts as TTSV3Options).temperature ?? V3_DEFAULTS.temperature;\n } else {\n const v2 = opts as TTSV2Options;\n base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;\n base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;\n base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// Build the API request body — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildRequestBody(text: string, opts: ResolvedTTSOptions): Record<string, unknown> {\n const body: Record<string, unknown> = {\n text,\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n // Always request WAV — AudioByteStream requires raw PCM, which we get by\n // stripping the 44-byte WAV header. Other codecs produce compressed audio\n // that cannot be fed into AudioByteStream.\n output_audio_codec: 'wav',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) body.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) body.pitch = opts.pitch;\n if (opts.loudness != null) body.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return body;\n}\n\n// ---------------------------------------------------------------------------\n// Build WS config message (sent as first message after connection)\n// ---------------------------------------------------------------------------\n\nfunction buildWsConfigMessage(opts: ResolvedTTSOptions): string {\n const data: Record<string, unknown> = {\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n output_audio_codec: 'linear16',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) data.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) data.pitch = opts.pitch;\n if (opts.loudness != null) data.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return JSON.stringify({ type: 'config', data });\n}\n\n// ---------------------------------------------------------------------------\n// TTS class\n// ---------------------------------------------------------------------------\n\nexport class TTS extends tts.TTS {\n #opts: ResolvedTTSOptions;\n label = 'sarvam.TTS';\n\n /**\n * Create a new instance of Sarvam AI TTS.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolved = resolveOptions(opts);\n super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });\n this.#opts = resolved;\n }\n\n /**\n * Update TTS options after initialization.\n *\n * @remarks\n * When the model changes, only truly shared fields (apiKey,\n * targetLanguageCode, pace, sampleRate, baseURL) carry over.\n * Model-specific fields (speaker, pitch, loudness, temperature,\n * enablePreprocessing) are dropped so resolveOptions re-applies\n * the correct defaults for the new model.\n */\n updateOptions(opts: Partial<TTSOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<TTSOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n streaming: this.#opts.streaming,\n targetLanguageCode: this.#opts.targetLanguageCode as TTSLanguages,\n pace: this.#opts.pace,\n sampleRate: this.#opts.sampleRate as TTSSampleRates,\n baseURL: this.#opts.baseURL,\n sentenceTokenizer: this.#opts.sentenceTokenizer,\n }\n : ({ ...this.#opts } as Partial<TTSOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as TTSOptions);\n }\n\n /**\n * Synthesize text to audio using Sarvam AI TTS.\n *\n * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)\n * @param connOptions - API connection options\n * @param abortSignal - Abort signal for cancellation\n * @returns A chunked stream of synthesized audio\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior.',\n );\n }\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Chunked stream (non-streaming synthesis)\n// ---------------------------------------------------------------------------\n\n/** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'sarvam.ChunkedStream';\n private opts: ResolvedTTSOptions;\n\n /** @internal */\n constructor(\n tts: TTS,\n text: string,\n opts: ResolvedTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n\n const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'api-subscription-key': this.opts.apiKey,\n },\n body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as { audios: string[] };\n const audioBase64 = data.audios[0];\n if (!audioBase64) {\n throw new Error('Sarvam TTS returned empty audio');\n }\n\n // Decode base64 WAV and strip 44-byte header to get raw PCM\n const raw = Buffer.from(audioBase64, 'base64');\n const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);\n\n const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming synthesis\n// ---------------------------------------------------------------------------\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: ResolvedTTSOptions;\n private tokenizer: tokenize.SentenceStream;\n #logger = log();\n label = 'sarvam.SynthesizeStream';\n\n constructor(tts: TTS, opts: ResolvedTTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: 'flush' }));\n\n try {\n await new Promise<void>((resolve) => {\n const timeout = setTimeout(() => resolve(), 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n this.#logger.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // Build WS URL: wss://api.sarvam.ai/text-to-speech/ws?model=...&send_completion_event=true\n const wsBaseUrl = this.opts.baseURL.replace(/^http/, 'ws');\n const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);\n url.searchParams.set('model', this.opts.model);\n url.searchParams.set('send_completion_event', 'true');\n\n const ws = new WebSocket(url, {\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n });\n\n await new Promise<void>((resolve, reject) => {\n const onOpen = () => {\n cleanup();\n resolve();\n };\n const onError = (error: Error) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));\n };\n const onClose = (code: number) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));\n };\n const cleanup = () => {\n ws.removeListener('open', onOpen);\n ws.removeListener('error', onError);\n ws.removeListener('close', onClose);\n };\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n });\n\n // Send config message immediately after connection\n ws.send(buildWsConfigMessage(this.opts));\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n const text = event.token;\n ws.send(JSON.stringify({ type: 'text', data: { text } }));\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData) => {\n let msg: { type: string; data?: Record<string, unknown> };\n try {\n msg = JSON.parse(data.toString());\n } catch {\n this.#logger.warn('Sarvam WS: received non-JSON message');\n return;\n }\n\n switch (msg.type) {\n case 'audio': {\n const audioB64 = (msg.data?.audio as string) ?? '';\n if (!audioB64) break;\n\n const raw = Buffer.from(audioB64, 'base64');\n const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);\n\n for (const frame of bstream.write(pcm as ArrayBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n break;\n }\n\n case 'event': {\n const eventType = msg.data?.event_type as string | undefined;\n if (eventType === 'final') {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n break;\n }\n\n case 'error': {\n const errMsg = (msg.data?.message as string) ?? 'Unknown Sarvam WS error';\n const errCode = msg.data?.code as number | undefined;\n reject(new Error(`Sarvam WS error ${errCode ?? ''}: ${errMsg}`));\n break;\n }\n }\n });\n\n ws.on('close', () => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n const msg = e instanceof Error ? e.message : String(e);\n throw new Error(`Sarvam TTS streaming failed: ${msg}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAQO;AAEP,gBAAwC;AAUxC,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAC5B,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,sBAAsB;AAgF5B,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,OAAO;AAAA,EACP,MAAM;AAAA,EACN,UAAU;AAAA,EACV,qBAAqB;AACvB;AAEA,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,MAAM;AAAA,EACN,aAAa;AACf;AAMA,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AACvC,QAAM,OAAO,UAAU;AAEvB,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA,SAAS,KAAK,YAAY,OAAO,YAAY,UAAU,YAAY;AAAA,IACnE,wBAAoB,iCAAkB,KAAK,sBAAsB,OAAO;AAAA,IACxE,MAAM,KAAK,SAAS,OAAO,YAAY,OAAO,YAAY;AAAA,IAC1D,YAAY,KAAK,cAAc;AAAA,IAC/B,SAAS,KAAK,WAAW;AAAA,IACzB,mBACE,KAAK,qBACL,IAAI,uBAAS,MAAM,kBAAkB,EAAE,mBAAmB,oBAAoB,CAAC;AAAA,EACnF;AAEA,MAAI,MAAM;AACR,SAAK,cAAe,KAAsB,eAAe,YAAY;AAAA,EACvE,OAAO;AACL,UAAM,KAAK;AACX,SAAK,QAAQ,GAAG,SAAS,YAAY;AACrC,SAAK,WAAW,GAAG,YAAY,YAAY;AAC3C,SAAK,sBAAsB,GAAG,uBAAuB,YAAY;AAAA,EACnE;AAEA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAc,MAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC;AAAA,IACA,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,IAI1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO;AACT;AAMA,SAAS,qBAAqB,MAAkC;AAC9D,QAAM,OAAgC;AAAA,IACpC,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA,IAC1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO,KAAK,UAAU,EAAE,MAAM,UAAU,KAAK,CAAC;AAChD;AAMO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,SAAS,YAAY,qBAAqB,EAAE,WAAW,SAAS,UAAU,CAAC;AACjF,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,WAAW,KAAK,MAAM;AAAA,MACtB,oBAAoB,KAAK,MAAM;AAAA,MAC/B,MAAM,KAAK,MAAM;AAAA,MACjB,YAAY,KAAK,MAAM;AAAA,MACvB,SAAS,KAAK,MAAM;AAAA,MACpB,mBAAmB,KAAK,MAAM;AAAA,IAChC,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA+B;AAC7B,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAOO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAE5B,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,KAAK,OAAO,mBAAmB;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM,KAAK,UAAU,iBAAiB,KAAK,WAAW,KAAK,IAAI,CAAC;AAAA,MAChE,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAClC,UAAM,cAAc,KAAK,OAAO,CAAC;AACjC,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,MAAM,OAAO,KAAK,aAAa,QAAQ;AAC7C,UAAM,UAAU,IAAI,OAAO,MAAM,IAAI,aAAa,IAAI,IAAI,aAAa,IAAI,UAAU;AAErF,UAAM,kBAAkB,IAAI,8BAAgB,KAAK,KAAK,YAAY,mBAAmB;AACrF,UAAM,SAAS,CAAC,GAAG,gBAAgB,MAAM,OAAO,GAAG,GAAG,gBAAgB,MAAM,CAAC;AAE7E,QAAI;AACJ,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ;AAC1B,oBAAc,WAAW,KAAK;AAC9B,kBAAY;AAAA,IACd;AACA,kBAAc,WAAW,IAAI;AAE7B,SAAK,MAAM,MAAM;AAAA,EACnB;AACF;AAMO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAYA,MAAU,MAA0B;AAC9C,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AACF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAEzC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,YAAY;AACnC,kBAAM,UAAU,WAAW,MAAM,QAAQ,GAAG,GAAI;AAEhD,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,QAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IACjE,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAG5B,UAAM,YAAY,KAAK,KAAK,QAAQ,QAAQ,SAAS,IAAI;AACzD,UAAM,MAAM,IAAI,IAAI,GAAG,SAAS,GAAG,kBAAkB,EAAE;AACvD,QAAI,aAAa,IAAI,SAAS,KAAK,KAAK,KAAK;AAC7C,QAAI,aAAa,IAAI,yBAAyB,MAAM;AAEpD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,SAAS,MAAM;AACnB,gBAAQ;AACR,gBAAQ;AAAA,MACV;AACA,YAAM,UAAU,CAAC,UAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE,CAAC;AAAA,MACtE;AACA,YAAM,UAAU,CAAC,SAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,wCAAwC,IAAI,EAAE,CAAC;AAAA,MAClE;AACA,YAAM,UAAU,MAAM;AACpB,WAAG,eAAe,QAAQ,MAAM;AAChC,WAAG,eAAe,SAAS,OAAO;AAClC,WAAG,eAAe,SAAS,OAAO;AAAA,MACpC;AACA,SAAG,GAAG,QAAQ,MAAM;AACpB,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AAAA,IACxB,CAAC;AAGD,OAAG,KAAK,qBAAqB,KAAK,IAAI,CAAC;AAEvC,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,cAAM,OAAO,MAAM;AACnB,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AAAA,MAC1D;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,mBAAmB;AAC7E,UAAI,gBAAgB;AACpB,UAAI;AAEJ,YAAM,gBAAgB,CAAC,UAAmB;AACxC,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,SAAkB;AA1e5C;AA2eU,cAAI;AACJ,cAAI;AACF,kBAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AAAA,UAClC,QAAQ;AACN,iBAAK,QAAQ,KAAK,sCAAsC;AACxD;AAAA,UACF;AAEA,kBAAQ,IAAI,MAAM;AAAA,YAChB,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU,UAAoB;AAChD,kBAAI,CAAC,SAAU;AAEf,oBAAM,MAAM,OAAO,KAAK,UAAU,QAAQ;AAC1C,oBAAM,MAAM,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AAE5E,yBAAW,SAAS,QAAQ,MAAM,GAAkB,GAAG;AACrD,8BAAc,KAAK;AACnB,4BAAY;AAAA,cACd;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU;AAC5B,kBAAI,cAAc,SAAS;AACzB,gCAAgB;AAChB,2BAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,gCAAc,KAAK;AACnB,8BAAY;AAAA,gBACd;AACA,8BAAc,IAAI;AAElB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,uBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,gBAC/C;AACA,wBAAQ;AAAA,cACV;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,WAAU,SAAI,SAAJ,mBAAU,YAAsB;AAChD,oBAAM,WAAU,SAAI,SAAJ,mBAAU;AAC1B,qBAAO,IAAI,MAAM,mBAAmB,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;AAC/D;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,KAAK;AACnB,0BAAY;AAAA,YACd;AACA,0BAAc,IAAI;AAElB,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,MAAM,aAAa,QAAQ,EAAE,UAAU,OAAO,CAAC;AACrD,YAAM,IAAI,MAAM,gCAAgC,GAAG,EAAE;AAAA,IACvD,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts"]}
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAItB,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EACV,YAAY,EACZ,SAAS,EACT,cAAc,EACd,WAAW,EACX,aAAa,EACb,aAAa,EACd,MAAM,aAAa,CAAC;AAcrB,UAAU,cAAc;IACtB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,kBAAkB,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAC3C,0DAA0D;IAC1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,UAAU,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACrC,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,iBAAiB,CAAC,EAAE,QAAQ,CAAC,iBAAiB,CAAC;CAChD;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,gDAAgD;IAChD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,EAAE,WAAW,CAAC;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,0EAA0E;IAC1E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wDAAwD;AACxD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,YAAY,CAAC;AAMrD,UAAU,kBAAkB;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,OAAO,EAAE,WAAW,GAAG,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAE9C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqHD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAgB;IAErB;;;;;;OAMG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAM1C;;;;;;;;;OASG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAkBvC;;;;;;;OAOG;IACH,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAQ/B;AAMD,kFAAkF;AAClF,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA0B;IAC/B,OAAO,CAAC,IAAI,CAAqB;IAEjC,gBAAgB;gBAEd,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,kBAAkB,EACxB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAMX,GAAG;CA+CpB;AAMD,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IACxD,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAA0B;IAE3C,KAAK,SAA6B;gBAEtB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,kBAAkB;YAMhC,cAAc;cAmCZ,GAAG;CAiKpB"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAKtB,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EACV,YAAY,EACZ,SAAS,EACT,cAAc,EACd,WAAW,EACX,aAAa,EACb,aAAa,EACd,MAAM,aAAa,CAAC;AAcrB,UAAU,cAAc;IACtB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,kBAAkB,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAC3C,0DAA0D;IAC1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,UAAU,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACrC,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,iBAAiB,CAAC,EAAE,QAAQ,CAAC,iBAAiB,CAAC;CAChD;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,gDAAgD;IAChD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,EAAE,WAAW,CAAC;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,0EAA0E;IAC1E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wDAAwD;AACxD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,YAAY,CAAC;AAMrD,UAAU,kBAAkB;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,OAAO,EAAE,WAAW,GAAG,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAE9C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqHD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAgB;IAErB;;;;;;OAMG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAM1C;;;;;;;;;OASG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAkBvC;;;;;;;OAOG;IACH,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAQ/B;AAMD,kFAAkF;AAClF,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA0B;IAC/B,OAAO,CAAC,IAAI,CAAqB;IAEjC,gBAAgB;gBAEd,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,kBAAkB,EACxB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAMX,GAAG;CA+CpB;AAMD,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IACxD,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAA0B;IAE3C,KAAK,SAA6B;gBAEtB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,kBAAkB;YAMhC,cAAc;cAmCZ,GAAG;CAiKpB"}
package/dist/tts.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  AudioByteStream,
3
3
  log,
4
+ normalizeLanguage,
4
5
  shortuuid,
5
6
  tokenize,
6
7
  tts
@@ -35,7 +36,7 @@ function resolveOptions(opts) {
35
36
  streaming: opts.streaming ?? true,
36
37
  model,
37
38
  speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),
38
- targetLanguageCode: opts.targetLanguageCode ?? "en-IN",
39
+ targetLanguageCode: normalizeLanguage(opts.targetLanguageCode ?? "en-IN"),
39
40
  pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),
40
41
  sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,
41
42
  baseURL: opts.baseURL ?? SARVAM_BASE_URL,
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n TTSLanguages,\n TTSModels,\n TTSSampleRates,\n TTSSpeakers,\n TTSV2Speakers,\n TTSV3Speakers,\n} from './models.js';\n\nconst SARVAM_TTS_SAMPLE_RATE = 24000;\nconst SARVAM_TTS_CHANNELS = 1;\nconst SARVAM_BASE_URL = 'https://api.sarvam.ai';\nconst SARVAM_WS_URL_PATH = '/text-to-speech/ws';\nconst MIN_SENTENCE_LENGTH = 8;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// V2 supports pitch / loudness / enablePreprocessing\n// V3 supports temperature (pitch, loudness, enablePreprocessing are NOT supported)\n// ---------------------------------------------------------------------------\n\ninterface TTSBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Target language code (BCP-47) */\n targetLanguageCode?: TTSLanguages | string;\n /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */\n pace?: number;\n /** Output sample rate in Hz (default 24000) */\n sampleRate?: TTSSampleRates | number;\n /** Base URL for the Sarvam API */\n baseURL?: string;\n /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */\n sentenceTokenizer?: tokenize.SentenceTokenizer;\n}\n\n/** Options specific to bulbul:v2 */\nexport interface TTSV2Options extends TTSBaseOptions {\n model?: 'bulbul:v2';\n /** Speaker voice (v2 voices). Default: 'anushka' */\n speaker?: TTSV2Speakers | string;\n /** Pitch adjustment, -0.75 to 0.75 (v2 only) */\n pitch?: number;\n /** Loudness, 0.3 to 3.0 (v2 only) */\n loudness?: number;\n /** Enable text preprocessing (v2 only) */\n enablePreprocessing?: boolean;\n}\n\n/** Options specific to bulbul:v3 */\nexport interface TTSV3Options extends TTSBaseOptions {\n model: 'bulbul:v3';\n /** Speaker voice (v3 voices). Default: 'shubh' */\n speaker?: TTSV3Speakers | string;\n /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */\n temperature?: number;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type TTSOptions = TTSV2Options | TTSV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedTTSOptions {\n apiKey: string;\n streaming: boolean;\n model: TTSModels;\n speaker: TTSSpeakers | string;\n targetLanguageCode: string;\n pace: number;\n sampleRate: number;\n baseURL: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n // V2 only\n pitch?: number;\n loudness?: number;\n enablePreprocessing?: boolean;\n // V3 only\n temperature?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst V2_DEFAULTS = {\n speaker: 'anushka' as const,\n pitch: 0,\n pace: 1.0,\n loudness: 1.0,\n enablePreprocessing: false,\n};\n\nconst V3_DEFAULTS = {\n speaker: 'shubh' as const,\n pace: 1.0,\n temperature: 0.6,\n};\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: TTSModels = opts.model ?? 'bulbul:v2';\n const isV3 = model === 'bulbul:v3';\n\n const base: ResolvedTTSOptions = {\n apiKey,\n streaming: opts.streaming ?? true,\n model,\n speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),\n targetLanguageCode: opts.targetLanguageCode ?? 'en-IN',\n pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),\n sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,\n baseURL: opts.baseURL ?? SARVAM_BASE_URL,\n sentenceTokenizer:\n opts.sentenceTokenizer ??\n new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH }),\n };\n\n if (isV3) {\n base.temperature = (opts as TTSV3Options).temperature ?? V3_DEFAULTS.temperature;\n } else {\n const v2 = opts as TTSV2Options;\n base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;\n base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;\n base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// Build the API request body — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildRequestBody(text: string, opts: ResolvedTTSOptions): Record<string, unknown> {\n const body: Record<string, unknown> = {\n text,\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n // Always request WAV — AudioByteStream requires raw PCM, which we get by\n // stripping the 44-byte WAV header. Other codecs produce compressed audio\n // that cannot be fed into AudioByteStream.\n output_audio_codec: 'wav',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) body.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) body.pitch = opts.pitch;\n if (opts.loudness != null) body.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return body;\n}\n\n// ---------------------------------------------------------------------------\n// Build WS config message (sent as first message after connection)\n// ---------------------------------------------------------------------------\n\nfunction buildWsConfigMessage(opts: ResolvedTTSOptions): string {\n const data: Record<string, unknown> = {\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n output_audio_codec: 'linear16',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) data.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) data.pitch = opts.pitch;\n if (opts.loudness != null) data.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return JSON.stringify({ type: 'config', data });\n}\n\n// ---------------------------------------------------------------------------\n// TTS class\n// ---------------------------------------------------------------------------\n\nexport class TTS extends tts.TTS {\n #opts: ResolvedTTSOptions;\n label = 'sarvam.TTS';\n\n /**\n * Create a new instance of Sarvam AI TTS.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolved = resolveOptions(opts);\n super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });\n this.#opts = resolved;\n }\n\n /**\n * Update TTS options after initialization.\n *\n * @remarks\n * When the model changes, only truly shared fields (apiKey,\n * targetLanguageCode, pace, sampleRate, baseURL) carry over.\n * Model-specific fields (speaker, pitch, loudness, temperature,\n * enablePreprocessing) are dropped so resolveOptions re-applies\n * the correct defaults for the new model.\n */\n updateOptions(opts: Partial<TTSOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<TTSOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n streaming: this.#opts.streaming,\n targetLanguageCode: this.#opts.targetLanguageCode as TTSLanguages,\n pace: this.#opts.pace,\n sampleRate: this.#opts.sampleRate as TTSSampleRates,\n baseURL: this.#opts.baseURL,\n sentenceTokenizer: this.#opts.sentenceTokenizer,\n }\n : ({ ...this.#opts } as Partial<TTSOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as TTSOptions);\n }\n\n /**\n * Synthesize text to audio using Sarvam AI TTS.\n *\n * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)\n * @param connOptions - API connection options\n * @param abortSignal - Abort signal for cancellation\n * @returns A chunked stream of synthesized audio\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior.',\n );\n }\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Chunked stream (non-streaming synthesis)\n// ---------------------------------------------------------------------------\n\n/** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'sarvam.ChunkedStream';\n private opts: ResolvedTTSOptions;\n\n /** @internal */\n constructor(\n tts: TTS,\n text: string,\n opts: ResolvedTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n\n const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'api-subscription-key': this.opts.apiKey,\n },\n body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as { audios: string[] };\n const audioBase64 = data.audios[0];\n if (!audioBase64) {\n throw new Error('Sarvam TTS returned empty audio');\n }\n\n // Decode base64 WAV and strip 44-byte header to get raw PCM\n const raw = Buffer.from(audioBase64, 'base64');\n const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);\n\n const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming synthesis\n// ---------------------------------------------------------------------------\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: ResolvedTTSOptions;\n private tokenizer: tokenize.SentenceStream;\n #logger = log();\n label = 'sarvam.SynthesizeStream';\n\n constructor(tts: TTS, opts: ResolvedTTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: 'flush' }));\n\n try {\n await new Promise<void>((resolve) => {\n const timeout = setTimeout(() => resolve(), 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n this.#logger.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // Build WS URL: wss://api.sarvam.ai/text-to-speech/ws?model=...&send_completion_event=true\n const wsBaseUrl = this.opts.baseURL.replace(/^http/, 'ws');\n const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);\n url.searchParams.set('model', this.opts.model);\n url.searchParams.set('send_completion_event', 'true');\n\n const ws = new WebSocket(url, {\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n });\n\n await new Promise<void>((resolve, reject) => {\n const onOpen = () => {\n cleanup();\n resolve();\n };\n const onError = (error: Error) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));\n };\n const onClose = (code: number) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));\n };\n const cleanup = () => {\n ws.removeListener('open', onOpen);\n ws.removeListener('error', onError);\n ws.removeListener('close', onClose);\n };\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n });\n\n // Send config message immediately after connection\n ws.send(buildWsConfigMessage(this.opts));\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n const text = event.token;\n ws.send(JSON.stringify({ type: 'text', data: { text } }));\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData) => {\n let msg: { type: string; data?: Record<string, unknown> };\n try {\n msg = JSON.parse(data.toString());\n } catch {\n this.#logger.warn('Sarvam WS: received non-JSON message');\n return;\n }\n\n switch (msg.type) {\n case 'audio': {\n const audioB64 = (msg.data?.audio as string) ?? '';\n if (!audioB64) break;\n\n const raw = Buffer.from(audioB64, 'base64');\n const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);\n\n for (const frame of bstream.write(pcm as ArrayBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n break;\n }\n\n case 'event': {\n const eventType = msg.data?.event_type as string | undefined;\n if (eventType === 'final') {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n break;\n }\n\n case 'error': {\n const errMsg = (msg.data?.message as string) ?? 'Unknown Sarvam WS error';\n const errCode = msg.data?.code as number | undefined;\n reject(new Error(`Sarvam WS error ${errCode ?? ''}: ${errMsg}`));\n break;\n }\n }\n });\n\n ws.on('close', () => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n const msg = e instanceof Error ? e.message : String(e);\n throw new Error(`Sarvam TTS streaming failed: ${msg}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAUxC,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAC5B,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,sBAAsB;AAgF5B,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,OAAO;AAAA,EACP,MAAM;AAAA,EACN,UAAU;AAAA,EACV,qBAAqB;AACvB;AAEA,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,MAAM;AAAA,EACN,aAAa;AACf;AAMA,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AACvC,QAAM,OAAO,UAAU;AAEvB,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA,SAAS,KAAK,YAAY,OAAO,YAAY,UAAU,YAAY;AAAA,IACnE,oBAAoB,KAAK,sBAAsB;AAAA,IAC/C,MAAM,KAAK,SAAS,OAAO,YAAY,OAAO,YAAY;AAAA,IAC1D,YAAY,KAAK,cAAc;AAAA,IAC/B,SAAS,KAAK,WAAW;AAAA,IACzB,mBACE,KAAK,qBACL,IAAI,SAAS,MAAM,kBAAkB,EAAE,mBAAmB,oBAAoB,CAAC;AAAA,EACnF;AAEA,MAAI,MAAM;AACR,SAAK,cAAe,KAAsB,eAAe,YAAY;AAAA,EACvE,OAAO;AACL,UAAM,KAAK;AACX,SAAK,QAAQ,GAAG,SAAS,YAAY;AACrC,SAAK,WAAW,GAAG,YAAY,YAAY;AAC3C,SAAK,sBAAsB,GAAG,uBAAuB,YAAY;AAAA,EACnE;AAEA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAc,MAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC;AAAA,IACA,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,IAI1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO;AACT;AAMA,SAAS,qBAAqB,MAAkC;AAC9D,QAAM,OAAgC;AAAA,IACpC,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA,IAC1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO,KAAK,UAAU,EAAE,MAAM,UAAU,KAAK,CAAC;AAChD;AAMO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,SAAS,YAAY,qBAAqB,EAAE,WAAW,SAAS,UAAU,CAAC;AACjF,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,WAAW,KAAK,MAAM;AAAA,MACtB,oBAAoB,KAAK,MAAM;AAAA,MAC/B,MAAM,KAAK,MAAM;AAAA,MACjB,YAAY,KAAK,MAAM;AAAA,MACvB,SAAS,KAAK,MAAM;AAAA,MACpB,mBAAmB,KAAK,MAAM;AAAA,IAChC,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA+B;AAC7B,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAOO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAE5B,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,KAAK,OAAO,mBAAmB;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM,KAAK,UAAU,iBAAiB,KAAK,WAAW,KAAK,IAAI,CAAC;AAAA,MAChE,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAClC,UAAM,cAAc,KAAK,OAAO,CAAC;AACjC,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,MAAM,OAAO,KAAK,aAAa,QAAQ;AAC7C,UAAM,UAAU,IAAI,OAAO,MAAM,IAAI,aAAa,IAAI,IAAI,aAAa,IAAI,UAAU;AAErF,UAAM,kBAAkB,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AACrF,UAAM,SAAS,CAAC,GAAG,gBAAgB,MAAM,OAAO,GAAG,GAAG,gBAAgB,MAAM,CAAC;AAE7E,QAAI;AACJ,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ;AAC1B,oBAAc,WAAW,KAAK;AAC9B,kBAAY;AAAA,IACd;AACA,kBAAc,WAAW,IAAI;AAE7B,SAAK,MAAM,MAAM;AAAA,EACnB;AACF;AAMO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAYA,MAAU,MAA0B;AAC9C,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AACF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAEzC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,YAAY;AACnC,kBAAM,UAAU,WAAW,MAAM,QAAQ,GAAG,GAAI;AAEhD,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,QAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IACjE,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAG5B,UAAM,YAAY,KAAK,KAAK,QAAQ,QAAQ,SAAS,IAAI;AACzD,UAAM,MAAM,IAAI,IAAI,GAAG,SAAS,GAAG,kBAAkB,EAAE;AACvD,QAAI,aAAa,IAAI,SAAS,KAAK,KAAK,KAAK;AAC7C,QAAI,aAAa,IAAI,yBAAyB,MAAM;AAEpD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,SAAS,MAAM;AACnB,gBAAQ;AACR,gBAAQ;AAAA,MACV;AACA,YAAM,UAAU,CAAC,UAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE,CAAC;AAAA,MACtE;AACA,YAAM,UAAU,CAAC,SAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,wCAAwC,IAAI,EAAE,CAAC;AAAA,MAClE;AACA,YAAM,UAAU,MAAM;AACpB,WAAG,eAAe,QAAQ,MAAM;AAChC,WAAG,eAAe,SAAS,OAAO;AAClC,WAAG,eAAe,SAAS,OAAO;AAAA,MACpC;AACA,SAAG,GAAG,QAAQ,MAAM;AACpB,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AAAA,IACxB,CAAC;AAGD,OAAG,KAAK,qBAAqB,KAAK,IAAI,CAAC;AAEvC,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,cAAM,OAAO,MAAM;AACnB,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AAAA,MAC1D;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AAC7E,UAAI,gBAAgB;AACpB,UAAI;AAEJ,YAAM,gBAAgB,CAAC,UAAmB;AACxC,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,SAAkB;AAze5C;AA0eU,cAAI;AACJ,cAAI;AACF,kBAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AAAA,UAClC,QAAQ;AACN,iBAAK,QAAQ,KAAK,sCAAsC;AACxD;AAAA,UACF;AAEA,kBAAQ,IAAI,MAAM;AAAA,YAChB,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU,UAAoB;AAChD,kBAAI,CAAC,SAAU;AAEf,oBAAM,MAAM,OAAO,KAAK,UAAU,QAAQ;AAC1C,oBAAM,MAAM,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AAE5E,yBAAW,SAAS,QAAQ,MAAM,GAAkB,GAAG;AACrD,8BAAc,KAAK;AACnB,4BAAY;AAAA,cACd;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU;AAC5B,kBAAI,cAAc,SAAS;AACzB,gCAAgB;AAChB,2BAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,gCAAc,KAAK;AACnB,8BAAY;AAAA,gBACd;AACA,8BAAc,IAAI;AAElB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,uBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,gBAC/C;AACA,wBAAQ;AAAA,cACV;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,WAAU,SAAI,SAAJ,mBAAU,YAAsB;AAChD,oBAAM,WAAU,SAAI,SAAJ,mBAAU;AAC1B,qBAAO,IAAI,MAAM,mBAAmB,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;AAC/D;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,KAAK;AACnB,0BAAY;AAAA,YACd;AACA,0BAAc,IAAI;AAElB,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,MAAM,aAAa,QAAQ,EAAE,UAAU,OAAO,CAAC;AACrD,YAAM,IAAI,MAAM,gCAAgC,GAAG,EAAE;AAAA,IACvD,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n normalizeLanguage,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n TTSLanguages,\n TTSModels,\n TTSSampleRates,\n TTSSpeakers,\n TTSV2Speakers,\n TTSV3Speakers,\n} from './models.js';\n\nconst SARVAM_TTS_SAMPLE_RATE = 24000;\nconst SARVAM_TTS_CHANNELS = 1;\nconst SARVAM_BASE_URL = 'https://api.sarvam.ai';\nconst SARVAM_WS_URL_PATH = '/text-to-speech/ws';\nconst MIN_SENTENCE_LENGTH = 8;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// V2 supports pitch / loudness / enablePreprocessing\n// V3 supports temperature (pitch, loudness, enablePreprocessing are NOT supported)\n// ---------------------------------------------------------------------------\n\ninterface TTSBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Target language code (BCP-47) */\n targetLanguageCode?: TTSLanguages | string;\n /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */\n pace?: number;\n /** Output sample rate in Hz (default 24000) */\n sampleRate?: TTSSampleRates | number;\n /** Base URL for the Sarvam API */\n baseURL?: string;\n /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */\n sentenceTokenizer?: tokenize.SentenceTokenizer;\n}\n\n/** Options specific to bulbul:v2 */\nexport interface TTSV2Options extends TTSBaseOptions {\n model?: 'bulbul:v2';\n /** Speaker voice (v2 voices). Default: 'anushka' */\n speaker?: TTSV2Speakers | string;\n /** Pitch adjustment, -0.75 to 0.75 (v2 only) */\n pitch?: number;\n /** Loudness, 0.3 to 3.0 (v2 only) */\n loudness?: number;\n /** Enable text preprocessing (v2 only) */\n enablePreprocessing?: boolean;\n}\n\n/** Options specific to bulbul:v3 */\nexport interface TTSV3Options extends TTSBaseOptions {\n model: 'bulbul:v3';\n /** Speaker voice (v3 voices). Default: 'shubh' */\n speaker?: TTSV3Speakers | string;\n /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */\n temperature?: number;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type TTSOptions = TTSV2Options | TTSV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedTTSOptions {\n apiKey: string;\n streaming: boolean;\n model: TTSModels;\n speaker: TTSSpeakers | string;\n targetLanguageCode: string;\n pace: number;\n sampleRate: number;\n baseURL: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n // V2 only\n pitch?: number;\n loudness?: number;\n enablePreprocessing?: boolean;\n // V3 only\n temperature?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst V2_DEFAULTS = {\n speaker: 'anushka' as const,\n pitch: 0,\n pace: 1.0,\n loudness: 1.0,\n enablePreprocessing: false,\n};\n\nconst V3_DEFAULTS = {\n speaker: 'shubh' as const,\n pace: 1.0,\n temperature: 0.6,\n};\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: TTSModels = opts.model ?? 'bulbul:v2';\n const isV3 = model === 'bulbul:v3';\n\n const base: ResolvedTTSOptions = {\n apiKey,\n streaming: opts.streaming ?? true,\n model,\n speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),\n targetLanguageCode: normalizeLanguage(opts.targetLanguageCode ?? 'en-IN'),\n pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),\n sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,\n baseURL: opts.baseURL ?? SARVAM_BASE_URL,\n sentenceTokenizer:\n opts.sentenceTokenizer ??\n new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH }),\n };\n\n if (isV3) {\n base.temperature = (opts as TTSV3Options).temperature ?? V3_DEFAULTS.temperature;\n } else {\n const v2 = opts as TTSV2Options;\n base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;\n base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;\n base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// Build the API request body — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildRequestBody(text: string, opts: ResolvedTTSOptions): Record<string, unknown> {\n const body: Record<string, unknown> = {\n text,\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n // Always request WAV — AudioByteStream requires raw PCM, which we get by\n // stripping the 44-byte WAV header. Other codecs produce compressed audio\n // that cannot be fed into AudioByteStream.\n output_audio_codec: 'wav',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) body.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) body.pitch = opts.pitch;\n if (opts.loudness != null) body.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return body;\n}\n\n// ---------------------------------------------------------------------------\n// Build WS config message (sent as first message after connection)\n// ---------------------------------------------------------------------------\n\nfunction buildWsConfigMessage(opts: ResolvedTTSOptions): string {\n const data: Record<string, unknown> = {\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n output_audio_codec: 'linear16',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) data.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) data.pitch = opts.pitch;\n if (opts.loudness != null) data.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return JSON.stringify({ type: 'config', data });\n}\n\n// ---------------------------------------------------------------------------\n// TTS class\n// ---------------------------------------------------------------------------\n\nexport class TTS extends tts.TTS {\n #opts: ResolvedTTSOptions;\n label = 'sarvam.TTS';\n\n /**\n * Create a new instance of Sarvam AI TTS.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolved = resolveOptions(opts);\n super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });\n this.#opts = resolved;\n }\n\n /**\n * Update TTS options after initialization.\n *\n * @remarks\n * When the model changes, only truly shared fields (apiKey,\n * targetLanguageCode, pace, sampleRate, baseURL) carry over.\n * Model-specific fields (speaker, pitch, loudness, temperature,\n * enablePreprocessing) are dropped so resolveOptions re-applies\n * the correct defaults for the new model.\n */\n updateOptions(opts: Partial<TTSOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<TTSOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n streaming: this.#opts.streaming,\n targetLanguageCode: this.#opts.targetLanguageCode as TTSLanguages,\n pace: this.#opts.pace,\n sampleRate: this.#opts.sampleRate as TTSSampleRates,\n baseURL: this.#opts.baseURL,\n sentenceTokenizer: this.#opts.sentenceTokenizer,\n }\n : ({ ...this.#opts } as Partial<TTSOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as TTSOptions);\n }\n\n /**\n * Synthesize text to audio using Sarvam AI TTS.\n *\n * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)\n * @param connOptions - API connection options\n * @param abortSignal - Abort signal for cancellation\n * @returns A chunked stream of synthesized audio\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior.',\n );\n }\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Chunked stream (non-streaming synthesis)\n// ---------------------------------------------------------------------------\n\n/** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'sarvam.ChunkedStream';\n private opts: ResolvedTTSOptions;\n\n /** @internal */\n constructor(\n tts: TTS,\n text: string,\n opts: ResolvedTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n\n const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'api-subscription-key': this.opts.apiKey,\n },\n body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as { audios: string[] };\n const audioBase64 = data.audios[0];\n if (!audioBase64) {\n throw new Error('Sarvam TTS returned empty audio');\n }\n\n // Decode base64 WAV and strip 44-byte header to get raw PCM\n const raw = Buffer.from(audioBase64, 'base64');\n const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);\n\n const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming synthesis\n// ---------------------------------------------------------------------------\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: ResolvedTTSOptions;\n private tokenizer: tokenize.SentenceStream;\n #logger = log();\n label = 'sarvam.SynthesizeStream';\n\n constructor(tts: TTS, opts: ResolvedTTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: 'flush' }));\n\n try {\n await new Promise<void>((resolve) => {\n const timeout = setTimeout(() => resolve(), 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n this.#logger.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // Build WS URL: wss://api.sarvam.ai/text-to-speech/ws?model=...&send_completion_event=true\n const wsBaseUrl = this.opts.baseURL.replace(/^http/, 'ws');\n const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);\n url.searchParams.set('model', this.opts.model);\n url.searchParams.set('send_completion_event', 'true');\n\n const ws = new WebSocket(url, {\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n });\n\n await new Promise<void>((resolve, reject) => {\n const onOpen = () => {\n cleanup();\n resolve();\n };\n const onError = (error: Error) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));\n };\n const onClose = (code: number) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));\n };\n const cleanup = () => {\n ws.removeListener('open', onOpen);\n ws.removeListener('error', onError);\n ws.removeListener('close', onClose);\n };\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n });\n\n // Send config message immediately after connection\n ws.send(buildWsConfigMessage(this.opts));\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n const text = event.token;\n ws.send(JSON.stringify({ type: 'text', data: { text } }));\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData) => {\n let msg: { type: string; data?: Record<string, unknown> };\n try {\n msg = JSON.parse(data.toString());\n } catch {\n this.#logger.warn('Sarvam WS: received non-JSON message');\n return;\n }\n\n switch (msg.type) {\n case 'audio': {\n const audioB64 = (msg.data?.audio as string) ?? '';\n if (!audioB64) break;\n\n const raw = Buffer.from(audioB64, 'base64');\n const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);\n\n for (const frame of bstream.write(pcm as ArrayBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n break;\n }\n\n case 'event': {\n const eventType = msg.data?.event_type as string | undefined;\n if (eventType === 'final') {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n break;\n }\n\n case 'error': {\n const errMsg = (msg.data?.message as string) ?? 'Unknown Sarvam WS error';\n const errCode = msg.data?.code as number | undefined;\n reject(new Error(`Sarvam WS error ${errCode ?? ''}: ${errMsg}`));\n break;\n }\n }\n });\n\n ws.on('close', () => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n const msg = e instanceof Error ? e.message : String(e);\n throw new Error(`Sarvam TTS streaming failed: ${msg}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAUxC,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAC5B,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,sBAAsB;AAgF5B,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,OAAO;AAAA,EACP,MAAM;AAAA,EACN,UAAU;AAAA,EACV,qBAAqB;AACvB;AAEA,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,MAAM;AAAA,EACN,aAAa;AACf;AAMA,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AACvC,QAAM,OAAO,UAAU;AAEvB,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA,SAAS,KAAK,YAAY,OAAO,YAAY,UAAU,YAAY;AAAA,IACnE,oBAAoB,kBAAkB,KAAK,sBAAsB,OAAO;AAAA,IACxE,MAAM,KAAK,SAAS,OAAO,YAAY,OAAO,YAAY;AAAA,IAC1D,YAAY,KAAK,cAAc;AAAA,IAC/B,SAAS,KAAK,WAAW;AAAA,IACzB,mBACE,KAAK,qBACL,IAAI,SAAS,MAAM,kBAAkB,EAAE,mBAAmB,oBAAoB,CAAC;AAAA,EACnF;AAEA,MAAI,MAAM;AACR,SAAK,cAAe,KAAsB,eAAe,YAAY;AAAA,EACvE,OAAO;AACL,UAAM,KAAK;AACX,SAAK,QAAQ,GAAG,SAAS,YAAY;AACrC,SAAK,WAAW,GAAG,YAAY,YAAY;AAC3C,SAAK,sBAAsB,GAAG,uBAAuB,YAAY;AAAA,EACnE;AAEA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAc,MAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC;AAAA,IACA,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,IAI1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO;AACT;AAMA,SAAS,qBAAqB,MAAkC;AAC9D,QAAM,OAAgC;AAAA,IACpC,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA,IAC1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO,KAAK,UAAU,EAAE,MAAM,UAAU,KAAK,CAAC;AAChD;AAMO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,SAAS,YAAY,qBAAqB,EAAE,WAAW,SAAS,UAAU,CAAC;AACjF,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,WAAW,KAAK,MAAM;AAAA,MACtB,oBAAoB,KAAK,MAAM;AAAA,MAC/B,MAAM,KAAK,MAAM;AAAA,MACjB,YAAY,KAAK,MAAM;AAAA,MACvB,SAAS,KAAK,MAAM;AAAA,MACpB,mBAAmB,KAAK,MAAM;AAAA,IAChC,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA+B;AAC7B,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAOO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAE5B,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,KAAK,OAAO,mBAAmB;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM,KAAK,UAAU,iBAAiB,KAAK,WAAW,KAAK,IAAI,CAAC;AAAA,MAChE,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAClC,UAAM,cAAc,KAAK,OAAO,CAAC;AACjC,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,MAAM,OAAO,KAAK,aAAa,QAAQ;AAC7C,UAAM,UAAU,IAAI,OAAO,MAAM,IAAI,aAAa,IAAI,IAAI,aAAa,IAAI,UAAU;AAErF,UAAM,kBAAkB,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AACrF,UAAM,SAAS,CAAC,GAAG,gBAAgB,MAAM,OAAO,GAAG,GAAG,gBAAgB,MAAM,CAAC;AAE7E,QAAI;AACJ,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ;AAC1B,oBAAc,WAAW,KAAK;AAC9B,kBAAY;AAAA,IACd;AACA,kBAAc,WAAW,IAAI;AAE7B,SAAK,MAAM,MAAM;AAAA,EACnB;AACF;AAMO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAYA,MAAU,MAA0B;AAC9C,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AACF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAEzC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,YAAY;AACnC,kBAAM,UAAU,WAAW,MAAM,QAAQ,GAAG,GAAI;AAEhD,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,QAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IACjE,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAG5B,UAAM,YAAY,KAAK,KAAK,QAAQ,QAAQ,SAAS,IAAI;AACzD,UAAM,MAAM,IAAI,IAAI,GAAG,SAAS,GAAG,kBAAkB,EAAE;AACvD,QAAI,aAAa,IAAI,SAAS,KAAK,KAAK,KAAK;AAC7C,QAAI,aAAa,IAAI,yBAAyB,MAAM;AAEpD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,SAAS,MAAM;AACnB,gBAAQ;AACR,gBAAQ;AAAA,MACV;AACA,YAAM,UAAU,CAAC,UAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE,CAAC;AAAA,MACtE;AACA,YAAM,UAAU,CAAC,SAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,wCAAwC,IAAI,EAAE,CAAC;AAAA,MAClE;AACA,YAAM,UAAU,MAAM;AACpB,WAAG,eAAe,QAAQ,MAAM;AAChC,WAAG,eAAe,SAAS,OAAO;AAClC,WAAG,eAAe,SAAS,OAAO;AAAA,MACpC;AACA,SAAG,GAAG,QAAQ,MAAM;AACpB,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AAAA,IACxB,CAAC;AAGD,OAAG,KAAK,qBAAqB,KAAK,IAAI,CAAC;AAEvC,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,cAAM,OAAO,MAAM;AACnB,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AAAA,MAC1D;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AAC7E,UAAI,gBAAgB;AACpB,UAAI;AAEJ,YAAM,gBAAgB,CAAC,UAAmB;AACxC,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,SAAkB;AA1e5C;AA2eU,cAAI;AACJ,cAAI;AACF,kBAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AAAA,UAClC,QAAQ;AACN,iBAAK,QAAQ,KAAK,sCAAsC;AACxD;AAAA,UACF;AAEA,kBAAQ,IAAI,MAAM;AAAA,YAChB,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU,UAAoB;AAChD,kBAAI,CAAC,SAAU;AAEf,oBAAM,MAAM,OAAO,KAAK,UAAU,QAAQ;AAC1C,oBAAM,MAAM,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AAE5E,yBAAW,SAAS,QAAQ,MAAM,GAAkB,GAAG;AACrD,8BAAc,KAAK;AACnB,4BAAY;AAAA,cACd;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU;AAC5B,kBAAI,cAAc,SAAS;AACzB,gCAAgB;AAChB,2BAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,gCAAc,KAAK;AACnB,8BAAY;AAAA,gBACd;AACA,8BAAc,IAAI;AAElB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,uBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,gBAC/C;AACA,wBAAQ;AAAA,cACV;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,WAAU,SAAI,SAAJ,mBAAU,YAAsB;AAChD,oBAAM,WAAU,SAAI,SAAJ,mBAAU;AAC1B,qBAAO,IAAI,MAAM,mBAAmB,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;AAC/D;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,KAAK;AACnB,0BAAY;AAAA,YACd;AACA,0BAAc,IAAI;AAElB,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,MAAM,aAAa,QAAQ,EAAE,UAAU,OAAO,CAAC;AACrD,YAAM,IAAI,MAAM,gCAAgC,GAAG,EAAE;AAAA,IACvD,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-sarvam",
3
- "version": "1.0.50",
3
+ "version": "1.0.51",
4
4
  "description": "Sarvam AI plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,17 +30,17 @@
30
30
  "@microsoft/api-extractor": "^7.35.0",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents-plugin-openai": "1.0.50",
34
- "@livekit/agents": "1.0.50",
35
- "@livekit/agents-plugin-silero": "1.0.50",
36
- "@livekit/agents-plugins-test": "1.0.50"
33
+ "@livekit/agents": "1.0.51",
34
+ "@livekit/agents-plugin-openai": "1.0.51",
35
+ "@livekit/agents-plugin-silero": "1.0.51",
36
+ "@livekit/agents-plugins-test": "1.0.51"
37
37
  },
38
38
  "dependencies": {
39
39
  "ws": "^8.16.0"
40
40
  },
41
41
  "peerDependencies": {
42
42
  "@livekit/rtc-node": "^0.13.24",
43
- "@livekit/agents": "1.0.50"
43
+ "@livekit/agents": "1.0.51"
44
44
  },
45
45
  "scripts": {
46
46
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/stt.ts CHANGED
@@ -10,6 +10,7 @@ import {
10
10
  Task,
11
11
  log,
12
12
  mergeFrames,
13
+ normalizeLanguage,
13
14
  stt,
14
15
  waitForAbort,
15
16
  } from '@livekit/agents';
@@ -181,15 +182,17 @@ function resolveOptions(opts: Partial<STTOptions>): ResolvedSTTOptions {
181
182
  base.mode = translateOpts.mode ?? SAARAS_TRANSLATE_DEFAULTS.mode;
182
183
  } else if (model === 'saaras:v3') {
183
184
  const v3Opts = opts as STTV3Options;
184
- base.languageCode = v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode;
185
+ base.languageCode = normalizeLanguage(v3Opts.languageCode ?? SAARAS_V3_DEFAULTS.languageCode);
185
186
  base.mode = v3Opts.mode ?? SAARAS_V3_DEFAULTS.mode;
186
187
  base.prompt = v3Opts.prompt;
187
188
  base.withTimestamps = v3Opts.withTimestamps;
188
189
  } else {
189
190
  // saarika:v2.5
190
- let languageCode = (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode;
191
+ let languageCode = normalizeLanguage(
192
+ (opts as STTV2Options).languageCode ?? SAARIKA_DEFAULTS.languageCode,
193
+ );
191
194
  if (!STTV2_LANGUAGE_SET.has(languageCode)) {
192
- languageCode = SAARIKA_DEFAULTS.languageCode;
195
+ languageCode = normalizeLanguage(SAARIKA_DEFAULTS.languageCode);
193
196
  }
194
197
  base.languageCode = languageCode;
195
198
  base.withTimestamps = (opts as STTV2Options).withTimestamps;
@@ -431,7 +434,7 @@ export class STT extends stt.STT {
431
434
  alternatives: [
432
435
  {
433
436
  text: data.transcript || '',
434
- language: data.language_code ?? this.opts.languageCode ?? 'unknown',
437
+ language: normalizeLanguage(data.language_code ?? this.opts.languageCode ?? 'unknown'),
435
438
  startTime,
436
439
  endTime,
437
440
  confidence: data.language_probability ?? 0,
@@ -688,7 +691,9 @@ export class SpeechStream extends stt.SpeechStream {
688
691
  } else if (msgType === 'data') {
689
692
  const td = (json['data'] as SarvamWSTranscriptData | undefined) ?? {};
690
693
  const transcript = td.transcript ?? '';
691
- const language = td.language_code ?? this.#opts.languageCode ?? 'unknown';
694
+ const language = normalizeLanguage(
695
+ td.language_code ?? this.#opts.languageCode ?? 'unknown',
696
+ );
692
697
  const requestId = td.request_id ?? '';
693
698
  const confidence = td.language_probability ?? 0;
694
699
  this.#requestId = requestId;
package/src/tts.ts CHANGED
@@ -5,6 +5,7 @@ import {
5
5
  type APIConnectOptions,
6
6
  AudioByteStream,
7
7
  log,
8
+ normalizeLanguage,
8
9
  shortuuid,
9
10
  tokenize,
10
11
  tts,
@@ -136,7 +137,7 @@ function resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {
136
137
  streaming: opts.streaming ?? true,
137
138
  model,
138
139
  speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),
139
- targetLanguageCode: opts.targetLanguageCode ?? 'en-IN',
140
+ targetLanguageCode: normalizeLanguage(opts.targetLanguageCode ?? 'en-IN'),
140
141
  pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),
141
142
  sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,
142
143
  baseURL: opts.baseURL ?? SARVAM_BASE_URL,