@livekit/agents-plugin-sarvam 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +110 -0
  3. package/dist/index.cjs +52 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +4 -0
  6. package/dist/index.d.ts +4 -0
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +29 -0
  9. package/dist/index.js.map +1 -0
  10. package/dist/models.cjs +17 -0
  11. package/dist/models.cjs.map +1 -0
  12. package/dist/models.d.cts +36 -0
  13. package/dist/models.d.ts +36 -0
  14. package/dist/models.d.ts.map +1 -0
  15. package/dist/models.js +1 -0
  16. package/dist/models.js.map +1 -0
  17. package/dist/stt.cjs +499 -0
  18. package/dist/stt.cjs.map +1 -0
  19. package/dist/stt.d.cts +104 -0
  20. package/dist/stt.d.ts +104 -0
  21. package/dist/stt.d.ts.map +1 -0
  22. package/dist/stt.js +483 -0
  23. package/dist/stt.js.map +1 -0
  24. package/dist/stt.test.cjs +18 -0
  25. package/dist/stt.test.cjs.map +1 -0
  26. package/dist/stt.test.d.cts +2 -0
  27. package/dist/stt.test.d.ts +2 -0
  28. package/dist/stt.test.d.ts.map +1 -0
  29. package/dist/stt.test.js +17 -0
  30. package/dist/stt.test.js.map +1 -0
  31. package/dist/tts.cjs +405 -0
  32. package/dist/tts.cjs.map +1 -0
  33. package/dist/tts.d.cts +111 -0
  34. package/dist/tts.d.ts +111 -0
  35. package/dist/tts.d.ts.map +1 -0
  36. package/dist/tts.js +385 -0
  37. package/dist/tts.js.map +1 -0
  38. package/dist/tts.test.cjs +17 -0
  39. package/dist/tts.test.cjs.map +1 -0
  40. package/dist/tts.test.d.cts +2 -0
  41. package/dist/tts.test.d.ts +2 -0
  42. package/dist/tts.test.d.ts.map +1 -0
  43. package/dist/tts.test.js +16 -0
  44. package/dist/tts.test.js.map +1 -0
  45. package/package.json +54 -0
  46. package/src/index.ts +34 -0
  47. package/src/models.ts +135 -0
  48. package/src/stt.test.ts +23 -0
  49. package/src/stt.ts +770 -0
  50. package/src/tts.test.ts +22 -0
  51. package/src/tts.ts +571 -0
package/dist/tts.d.cts ADDED
@@ -0,0 +1,111 @@
1
+ import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
2
+ import type { TTSLanguages, TTSModels, TTSSampleRates, TTSSpeakers, TTSV2Speakers, TTSV3Speakers } from './models.js';
3
+ interface TTSBaseOptions {
4
+ /** Sarvam API key. Defaults to $SARVAM_API_KEY */
5
+ apiKey?: string;
6
+ /**
7
+ * Whether to use native WebSocket streaming for `stream()`.
8
+ * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).
9
+ * Default: `true`.
10
+ */
11
+ streaming?: boolean;
12
+ /** Target language code (BCP-47) */
13
+ targetLanguageCode?: TTSLanguages | string;
14
+ /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */
15
+ pace?: number;
16
+ /** Output sample rate in Hz (default 24000) */
17
+ sampleRate?: TTSSampleRates | number;
18
+ /** Base URL for the Sarvam API */
19
+ baseURL?: string;
20
+ /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */
21
+ sentenceTokenizer?: tokenize.SentenceTokenizer;
22
+ }
23
+ /** Options specific to bulbul:v2 */
24
+ export interface TTSV2Options extends TTSBaseOptions {
25
+ model?: 'bulbul:v2';
26
+ /** Speaker voice (v2 voices). Default: 'anushka' */
27
+ speaker?: TTSV2Speakers | string;
28
+ /** Pitch adjustment, -0.75 to 0.75 (v2 only) */
29
+ pitch?: number;
30
+ /** Loudness, 0.3 to 3.0 (v2 only) */
31
+ loudness?: number;
32
+ /** Enable text preprocessing (v2 only) */
33
+ enablePreprocessing?: boolean;
34
+ }
35
+ /** Options specific to bulbul:v3 */
36
+ export interface TTSV3Options extends TTSBaseOptions {
37
+ model: 'bulbul:v3';
38
+ /** Speaker voice (v3 voices). Default: 'shubh' */
39
+ speaker?: TTSV3Speakers | string;
40
+ /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */
41
+ temperature?: number;
42
+ }
43
+ /** Combined options — discriminated by `model` field */
44
+ export type TTSOptions = TTSV2Options | TTSV3Options;
45
+ interface ResolvedTTSOptions {
46
+ apiKey: string;
47
+ streaming: boolean;
48
+ model: TTSModels;
49
+ speaker: TTSSpeakers | string;
50
+ targetLanguageCode: string;
51
+ pace: number;
52
+ sampleRate: number;
53
+ baseURL: string;
54
+ sentenceTokenizer: tokenize.SentenceTokenizer;
55
+ pitch?: number;
56
+ loudness?: number;
57
+ enablePreprocessing?: boolean;
58
+ temperature?: number;
59
+ }
60
+ export declare class TTS extends tts.TTS {
61
+ #private;
62
+ label: string;
63
+ /**
64
+ * Create a new instance of Sarvam AI TTS.
65
+ *
66
+ * @remarks
67
+ * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the
68
+ * `SARVAM_API_KEY` environment variable.
69
+ */
70
+ constructor(opts?: Partial<TTSOptions>);
71
+ /**
72
+ * Update TTS options after initialization.
73
+ *
74
+ * @remarks
75
+ * When the model changes, only truly shared fields (apiKey,
76
+ * targetLanguageCode, pace, sampleRate, baseURL) carry over.
77
+ * Model-specific fields (speaker, pitch, loudness, temperature,
78
+ * enablePreprocessing) are dropped so resolveOptions re-applies
79
+ * the correct defaults for the new model.
80
+ */
81
+ updateOptions(opts: Partial<TTSOptions>): void;
82
+ /**
83
+ * Synthesize text to audio using Sarvam AI TTS.
84
+ *
85
+ * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)
86
+ * @param connOptions - API connection options
87
+ * @param abortSignal - Abort signal for cancellation
88
+ * @returns A chunked stream of synthesized audio
89
+ */
90
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
91
+ stream(): tts.SynthesizeStream;
92
+ }
93
+ /** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */
94
+ export declare class ChunkedStream extends tts.ChunkedStream {
95
+ label: string;
96
+ private opts;
97
+ /** @internal */
98
+ constructor(tts: TTS, text: string, opts: ResolvedTTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
99
+ protected run(): Promise<void>;
100
+ }
101
+ export declare class SynthesizeStream extends tts.SynthesizeStream {
102
+ #private;
103
+ private opts;
104
+ private tokenizer;
105
+ label: string;
106
+ constructor(tts: TTS, opts: ResolvedTTSOptions);
107
+ private closeWebSocket;
108
+ protected run(): Promise<void>;
109
+ }
110
+ export {};
111
+ //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts ADDED
@@ -0,0 +1,111 @@
1
+ import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
2
+ import type { TTSLanguages, TTSModels, TTSSampleRates, TTSSpeakers, TTSV2Speakers, TTSV3Speakers } from './models.js';
3
+ interface TTSBaseOptions {
4
+ /** Sarvam API key. Defaults to $SARVAM_API_KEY */
5
+ apiKey?: string;
6
+ /**
7
+ * Whether to use native WebSocket streaming for `stream()`.
8
+ * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).
9
+ * Default: `true`.
10
+ */
11
+ streaming?: boolean;
12
+ /** Target language code (BCP-47) */
13
+ targetLanguageCode?: TTSLanguages | string;
14
+ /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */
15
+ pace?: number;
16
+ /** Output sample rate in Hz (default 24000) */
17
+ sampleRate?: TTSSampleRates | number;
18
+ /** Base URL for the Sarvam API */
19
+ baseURL?: string;
20
+ /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */
21
+ sentenceTokenizer?: tokenize.SentenceTokenizer;
22
+ }
23
+ /** Options specific to bulbul:v2 */
24
+ export interface TTSV2Options extends TTSBaseOptions {
25
+ model?: 'bulbul:v2';
26
+ /** Speaker voice (v2 voices). Default: 'anushka' */
27
+ speaker?: TTSV2Speakers | string;
28
+ /** Pitch adjustment, -0.75 to 0.75 (v2 only) */
29
+ pitch?: number;
30
+ /** Loudness, 0.3 to 3.0 (v2 only) */
31
+ loudness?: number;
32
+ /** Enable text preprocessing (v2 only) */
33
+ enablePreprocessing?: boolean;
34
+ }
35
+ /** Options specific to bulbul:v3 */
36
+ export interface TTSV3Options extends TTSBaseOptions {
37
+ model: 'bulbul:v3';
38
+ /** Speaker voice (v3 voices). Default: 'shubh' */
39
+ speaker?: TTSV3Speakers | string;
40
+ /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */
41
+ temperature?: number;
42
+ }
43
+ /** Combined options — discriminated by `model` field */
44
+ export type TTSOptions = TTSV2Options | TTSV3Options;
45
+ interface ResolvedTTSOptions {
46
+ apiKey: string;
47
+ streaming: boolean;
48
+ model: TTSModels;
49
+ speaker: TTSSpeakers | string;
50
+ targetLanguageCode: string;
51
+ pace: number;
52
+ sampleRate: number;
53
+ baseURL: string;
54
+ sentenceTokenizer: tokenize.SentenceTokenizer;
55
+ pitch?: number;
56
+ loudness?: number;
57
+ enablePreprocessing?: boolean;
58
+ temperature?: number;
59
+ }
60
+ export declare class TTS extends tts.TTS {
61
+ #private;
62
+ label: string;
63
+ /**
64
+ * Create a new instance of Sarvam AI TTS.
65
+ *
66
+ * @remarks
67
+ * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the
68
+ * `SARVAM_API_KEY` environment variable.
69
+ */
70
+ constructor(opts?: Partial<TTSOptions>);
71
+ /**
72
+ * Update TTS options after initialization.
73
+ *
74
+ * @remarks
75
+ * When the model changes, only truly shared fields (apiKey,
76
+ * targetLanguageCode, pace, sampleRate, baseURL) carry over.
77
+ * Model-specific fields (speaker, pitch, loudness, temperature,
78
+ * enablePreprocessing) are dropped so resolveOptions re-applies
79
+ * the correct defaults for the new model.
80
+ */
81
+ updateOptions(opts: Partial<TTSOptions>): void;
82
+ /**
83
+ * Synthesize text to audio using Sarvam AI TTS.
84
+ *
85
+ * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)
86
+ * @param connOptions - API connection options
87
+ * @param abortSignal - Abort signal for cancellation
88
+ * @returns A chunked stream of synthesized audio
89
+ */
90
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): ChunkedStream;
91
+ stream(): tts.SynthesizeStream;
92
+ }
93
+ /** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */
94
+ export declare class ChunkedStream extends tts.ChunkedStream {
95
+ label: string;
96
+ private opts;
97
+ /** @internal */
98
+ constructor(tts: TTS, text: string, opts: ResolvedTTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
99
+ protected run(): Promise<void>;
100
+ }
101
+ export declare class SynthesizeStream extends tts.SynthesizeStream {
102
+ #private;
103
+ private opts;
104
+ private tokenizer;
105
+ label: string;
106
+ constructor(tts: TTS, opts: ResolvedTTSOptions);
107
+ private closeWebSocket;
108
+ protected run(): Promise<void>;
109
+ }
110
+ export {};
111
+ //# sourceMappingURL=tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAItB,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EACV,YAAY,EACZ,SAAS,EACT,cAAc,EACd,WAAW,EACX,aAAa,EACb,aAAa,EACd,MAAM,aAAa,CAAC;AAcrB,UAAU,cAAc;IACtB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,kBAAkB,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAC3C,0DAA0D;IAC1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,UAAU,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACrC,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,iBAAiB,CAAC,EAAE,QAAQ,CAAC,iBAAiB,CAAC;CAChD;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,gDAAgD;IAChD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,oCAAoC;AACpC,MAAM,WAAW,YAAa,SAAQ,cAAc;IAClD,KAAK,EAAE,WAAW,CAAC;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IACjC,0EAA0E;IAC1E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wDAAwD;AACxD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,YAAY,CAAC;AAMrD,UAAU,kBAAkB;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,OAAO,EAAE,WAAW,GAAG,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAE9C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqHD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAgB;IAErB;;;;;;OAMG;gBACS,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAM1C;;;;;;;;;OASG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAkBvC;;;;;;;OAOG;IACH,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,aAAa;IAIhB,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAQ/B;AAMD,kFAAkF;AAClF,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA0B;IAC/B,OAAO,CAAC,IAAI,CAAqB;IAEjC,gBAAgB;gBAEd,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,kBAAkB,EACxB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAMX,GAAG;CA+CpB;AAMD,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IACxD,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,SAAS,CAA0B;IAE3C,KAAK,SAA6B;gBAEtB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,kBAAkB;YAMhC,cAAc;cAmCZ,GAAG;CAiKpB"}
package/dist/tts.js ADDED
@@ -0,0 +1,385 @@
1
+ import {
2
+ AudioByteStream,
3
+ log,
4
+ shortuuid,
5
+ tokenize,
6
+ tts
7
+ } from "@livekit/agents";
8
+ import { WebSocket } from "ws";
9
+ const SARVAM_TTS_SAMPLE_RATE = 24e3;
10
+ const SARVAM_TTS_CHANNELS = 1;
11
+ const SARVAM_BASE_URL = "https://api.sarvam.ai";
12
+ const SARVAM_WS_URL_PATH = "/text-to-speech/ws";
13
+ const MIN_SENTENCE_LENGTH = 8;
14
+ const V2_DEFAULTS = {
15
+ speaker: "anushka",
16
+ pitch: 0,
17
+ pace: 1,
18
+ loudness: 1,
19
+ enablePreprocessing: false
20
+ };
21
+ const V3_DEFAULTS = {
22
+ speaker: "shubh",
23
+ pace: 1,
24
+ temperature: 0.6
25
+ };
26
+ function resolveOptions(opts) {
27
+ const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;
28
+ if (!apiKey) {
29
+ throw new Error("Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY");
30
+ }
31
+ const model = opts.model ?? "bulbul:v2";
32
+ const isV3 = model === "bulbul:v3";
33
+ const base = {
34
+ apiKey,
35
+ streaming: opts.streaming ?? true,
36
+ model,
37
+ speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),
38
+ targetLanguageCode: opts.targetLanguageCode ?? "en-IN",
39
+ pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),
40
+ sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,
41
+ baseURL: opts.baseURL ?? SARVAM_BASE_URL,
42
+ sentenceTokenizer: opts.sentenceTokenizer ?? new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH })
43
+ };
44
+ if (isV3) {
45
+ base.temperature = opts.temperature ?? V3_DEFAULTS.temperature;
46
+ } else {
47
+ const v2 = opts;
48
+ base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;
49
+ base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;
50
+ base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;
51
+ }
52
+ return base;
53
+ }
54
+ function buildRequestBody(text, opts) {
55
+ const body = {
56
+ text,
57
+ target_language_code: opts.targetLanguageCode,
58
+ speaker: opts.speaker,
59
+ model: opts.model,
60
+ pace: opts.pace,
61
+ speech_sample_rate: String(opts.sampleRate),
62
+ // Always request WAV — AudioByteStream requires raw PCM, which we get by
63
+ // stripping the 44-byte WAV header. Other codecs produce compressed audio
64
+ // that cannot be fed into AudioByteStream.
65
+ output_audio_codec: "wav"
66
+ };
67
+ if (opts.model === "bulbul:v3") {
68
+ if (opts.temperature != null) body.temperature = opts.temperature;
69
+ } else {
70
+ if (opts.pitch != null) body.pitch = opts.pitch;
71
+ if (opts.loudness != null) body.loudness = opts.loudness;
72
+ if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;
73
+ }
74
+ return body;
75
+ }
76
+ function buildWsConfigMessage(opts) {
77
+ const data = {
78
+ target_language_code: opts.targetLanguageCode,
79
+ speaker: opts.speaker,
80
+ model: opts.model,
81
+ pace: opts.pace,
82
+ speech_sample_rate: String(opts.sampleRate),
83
+ output_audio_codec: "linear16"
84
+ };
85
+ if (opts.model === "bulbul:v3") {
86
+ if (opts.temperature != null) data.temperature = opts.temperature;
87
+ } else {
88
+ if (opts.pitch != null) data.pitch = opts.pitch;
89
+ if (opts.loudness != null) data.loudness = opts.loudness;
90
+ if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;
91
+ }
92
+ return JSON.stringify({ type: "config", data });
93
+ }
94
+ class TTS extends tts.TTS {
95
+ #opts;
96
+ label = "sarvam.TTS";
97
+ /**
98
+ * Create a new instance of Sarvam AI TTS.
99
+ *
100
+ * @remarks
101
+ * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the
102
+ * `SARVAM_API_KEY` environment variable.
103
+ */
104
+ constructor(opts = {}) {
105
+ const resolved = resolveOptions(opts);
106
+ super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });
107
+ this.#opts = resolved;
108
+ }
109
+ /**
110
+ * Update TTS options after initialization.
111
+ *
112
+ * @remarks
113
+ * When the model changes, only truly shared fields (apiKey,
114
+ * targetLanguageCode, pace, sampleRate, baseURL) carry over.
115
+ * Model-specific fields (speaker, pitch, loudness, temperature,
116
+ * enablePreprocessing) are dropped so resolveOptions re-applies
117
+ * the correct defaults for the new model.
118
+ */
119
+ updateOptions(opts) {
120
+ const modelChanging = opts.model != null && opts.model !== this.#opts.model;
121
+ const base = modelChanging ? {
122
+ apiKey: this.#opts.apiKey,
123
+ streaming: this.#opts.streaming,
124
+ targetLanguageCode: this.#opts.targetLanguageCode,
125
+ pace: this.#opts.pace,
126
+ sampleRate: this.#opts.sampleRate,
127
+ baseURL: this.#opts.baseURL,
128
+ sentenceTokenizer: this.#opts.sentenceTokenizer
129
+ } : { ...this.#opts };
130
+ this.#opts = resolveOptions({ ...base, ...opts });
131
+ }
132
+ /**
133
+ * Synthesize text to audio using Sarvam AI TTS.
134
+ *
135
+ * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)
136
+ * @param connOptions - API connection options
137
+ * @param abortSignal - Abort signal for cancellation
138
+ * @returns A chunked stream of synthesized audio
139
+ */
140
+ synthesize(text, connOptions, abortSignal) {
141
+ return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);
142
+ }
143
+ stream() {
144
+ if (!this.capabilities.streaming) {
145
+ throw new Error(
146
+ "Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior."
147
+ );
148
+ }
149
+ return new SynthesizeStream(this, this.#opts);
150
+ }
151
+ }
152
+ class ChunkedStream extends tts.ChunkedStream {
153
+ label = "sarvam.ChunkedStream";
154
+ opts;
155
+ /** @internal */
156
+ constructor(tts2, text, opts, connOptions, abortSignal) {
157
+ super(text, tts2, connOptions, abortSignal);
158
+ this.opts = opts;
159
+ }
160
+ async run() {
161
+ const requestId = shortuuid();
162
+ const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {
163
+ method: "POST",
164
+ headers: {
165
+ "Content-Type": "application/json",
166
+ "api-subscription-key": this.opts.apiKey
167
+ },
168
+ body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),
169
+ signal: this.abortSignal
170
+ });
171
+ if (!response.ok) {
172
+ const errorBody = await response.text();
173
+ throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);
174
+ }
175
+ const data = await response.json();
176
+ const audioBase64 = data.audios[0];
177
+ if (!audioBase64) {
178
+ throw new Error("Sarvam TTS returned empty audio");
179
+ }
180
+ const raw = Buffer.from(audioBase64, "base64");
181
+ const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);
182
+ const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);
183
+ const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];
184
+ let lastFrame;
185
+ const sendLastFrame = (segmentId, final) => {
186
+ if (lastFrame) {
187
+ this.queue.put({ requestId, segmentId, frame: lastFrame, final });
188
+ lastFrame = void 0;
189
+ }
190
+ };
191
+ for (const frame of frames) {
192
+ sendLastFrame(requestId, false);
193
+ lastFrame = frame;
194
+ }
195
+ sendLastFrame(requestId, true);
196
+ this.queue.close();
197
+ }
198
+ }
199
+ class SynthesizeStream extends tts.SynthesizeStream {
200
+ opts;
201
+ tokenizer;
202
+ #logger = log();
203
+ label = "sarvam.SynthesizeStream";
204
+ constructor(tts2, opts) {
205
+ super(tts2);
206
+ this.opts = opts;
207
+ this.tokenizer = opts.sentenceTokenizer.stream();
208
+ }
209
+ async closeWebSocket(ws) {
210
+ try {
211
+ if (ws.readyState === WebSocket.OPEN) {
212
+ ws.send(JSON.stringify({ type: "flush" }));
213
+ try {
214
+ await new Promise((resolve) => {
215
+ const timeout = setTimeout(() => resolve(), 1e3);
216
+ ws.once("message", () => {
217
+ clearTimeout(timeout);
218
+ resolve();
219
+ });
220
+ ws.once("close", () => {
221
+ clearTimeout(timeout);
222
+ resolve();
223
+ });
224
+ ws.once("error", () => {
225
+ clearTimeout(timeout);
226
+ resolve();
227
+ });
228
+ });
229
+ } catch {
230
+ }
231
+ }
232
+ } catch (e) {
233
+ this.#logger.warn(`Error during WebSocket close sequence: ${e}`);
234
+ } finally {
235
+ if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
236
+ ws.close();
237
+ }
238
+ }
239
+ }
240
+ async run() {
241
+ const requestId = shortuuid();
242
+ const segmentId = shortuuid();
243
+ const wsBaseUrl = this.opts.baseURL.replace(/^http/, "ws");
244
+ const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);
245
+ url.searchParams.set("model", this.opts.model);
246
+ url.searchParams.set("send_completion_event", "true");
247
+ const ws = new WebSocket(url, {
248
+ headers: {
249
+ "api-subscription-key": this.opts.apiKey
250
+ }
251
+ });
252
+ await new Promise((resolve, reject) => {
253
+ const onOpen = () => {
254
+ cleanup();
255
+ resolve();
256
+ };
257
+ const onError = (error) => {
258
+ cleanup();
259
+ reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));
260
+ };
261
+ const onClose = (code) => {
262
+ cleanup();
263
+ reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));
264
+ };
265
+ const cleanup = () => {
266
+ ws.removeListener("open", onOpen);
267
+ ws.removeListener("error", onError);
268
+ ws.removeListener("close", onClose);
269
+ };
270
+ ws.on("open", onOpen);
271
+ ws.on("error", onError);
272
+ ws.on("close", onClose);
273
+ });
274
+ ws.send(buildWsConfigMessage(this.opts));
275
+ const inputTask = async () => {
276
+ for await (const data of this.input) {
277
+ if (data === SynthesizeStream.FLUSH_SENTINEL) {
278
+ this.tokenizer.flush();
279
+ continue;
280
+ }
281
+ this.tokenizer.pushText(data);
282
+ }
283
+ this.tokenizer.endInput();
284
+ this.tokenizer.close();
285
+ };
286
+ const sendTask = async () => {
287
+ for await (const event of this.tokenizer) {
288
+ if (this.abortController.signal.aborted) break;
289
+ const text = event.token;
290
+ ws.send(JSON.stringify({ type: "text", data: { text } }));
291
+ }
292
+ if (!this.abortController.signal.aborted) {
293
+ ws.send(JSON.stringify({ type: "flush" }));
294
+ }
295
+ };
296
+ const recvTask = async () => {
297
+ const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);
298
+ let finalReceived = false;
299
+ let lastFrame;
300
+ const sendLastFrame = (final) => {
301
+ if (lastFrame && !this.queue.closed) {
302
+ this.queue.put({ requestId, segmentId, frame: lastFrame, final });
303
+ lastFrame = void 0;
304
+ }
305
+ };
306
+ return new Promise((resolve, reject) => {
307
+ ws.on("message", (data) => {
308
+ var _a, _b, _c, _d;
309
+ let msg;
310
+ try {
311
+ msg = JSON.parse(data.toString());
312
+ } catch {
313
+ this.#logger.warn("Sarvam WS: received non-JSON message");
314
+ return;
315
+ }
316
+ switch (msg.type) {
317
+ case "audio": {
318
+ const audioB64 = ((_a = msg.data) == null ? void 0 : _a.audio) ?? "";
319
+ if (!audioB64) break;
320
+ const raw = Buffer.from(audioB64, "base64");
321
+ const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);
322
+ for (const frame of bstream.write(pcm)) {
323
+ sendLastFrame(false);
324
+ lastFrame = frame;
325
+ }
326
+ break;
327
+ }
328
+ case "event": {
329
+ const eventType = (_b = msg.data) == null ? void 0 : _b.event_type;
330
+ if (eventType === "final") {
331
+ finalReceived = true;
332
+ for (const frame of bstream.flush()) {
333
+ sendLastFrame(false);
334
+ lastFrame = frame;
335
+ }
336
+ sendLastFrame(true);
337
+ if (!this.queue.closed) {
338
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
339
+ }
340
+ resolve();
341
+ }
342
+ break;
343
+ }
344
+ case "error": {
345
+ const errMsg = ((_c = msg.data) == null ? void 0 : _c.message) ?? "Unknown Sarvam WS error";
346
+ const errCode = (_d = msg.data) == null ? void 0 : _d.code;
347
+ reject(new Error(`Sarvam WS error ${errCode ?? ""}: ${errMsg}`));
348
+ break;
349
+ }
350
+ }
351
+ });
352
+ ws.on("close", () => {
353
+ if (!finalReceived) {
354
+ for (const frame of bstream.flush()) {
355
+ sendLastFrame(false);
356
+ lastFrame = frame;
357
+ }
358
+ sendLastFrame(true);
359
+ if (!this.queue.closed) {
360
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
361
+ }
362
+ }
363
+ resolve();
364
+ });
365
+ ws.on("error", (error) => {
366
+ reject(error);
367
+ });
368
+ });
369
+ };
370
+ try {
371
+ await Promise.all([inputTask(), sendTask(), recvTask()]);
372
+ } catch (e) {
373
+ const msg = e instanceof Error ? e.message : String(e);
374
+ throw new Error(`Sarvam TTS streaming failed: ${msg}`);
375
+ } finally {
376
+ await this.closeWebSocket(ws);
377
+ }
378
+ }
379
+ }
380
+ export {
381
+ ChunkedStream,
382
+ SynthesizeStream,
383
+ TTS
384
+ };
385
+ //# sourceMappingURL=tts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type {\n TTSLanguages,\n TTSModels,\n TTSSampleRates,\n TTSSpeakers,\n TTSV2Speakers,\n TTSV3Speakers,\n} from './models.js';\n\nconst SARVAM_TTS_SAMPLE_RATE = 24000;\nconst SARVAM_TTS_CHANNELS = 1;\nconst SARVAM_BASE_URL = 'https://api.sarvam.ai';\nconst SARVAM_WS_URL_PATH = '/text-to-speech/ws';\nconst MIN_SENTENCE_LENGTH = 8;\n\n// ---------------------------------------------------------------------------\n// Model-specific option types\n// V2 supports pitch / loudness / enablePreprocessing\n// V3 supports temperature (pitch, loudness, enablePreprocessing are NOT supported)\n// ---------------------------------------------------------------------------\n\ninterface TTSBaseOptions {\n /** Sarvam API key. Defaults to $SARVAM_API_KEY */\n apiKey?: string;\n /**\n * Whether to use native WebSocket streaming for `stream()`.\n * Set to `false` to prefer non-streaming REST synthesis (used by Agent via TTS StreamAdapter).\n * Default: `true`.\n */\n streaming?: boolean;\n /** Target language code (BCP-47) */\n targetLanguageCode?: TTSLanguages | string;\n /** Speech pace. v2: 0.3–3.0, v3: 0.5–2.0 (default 1.0) */\n pace?: number;\n /** Output sample rate in Hz (default 24000) */\n sampleRate?: TTSSampleRates | number;\n /** Base URL for the Sarvam API */\n baseURL?: string;\n /** Sentence tokenizer for streaming (default: basic sentence tokenizer) */\n sentenceTokenizer?: tokenize.SentenceTokenizer;\n}\n\n/** Options specific to bulbul:v2 */\nexport interface TTSV2Options extends TTSBaseOptions {\n model?: 'bulbul:v2';\n /** Speaker voice (v2 voices). Default: 'anushka' */\n speaker?: TTSV2Speakers | string;\n /** Pitch adjustment, -0.75 to 0.75 (v2 only) */\n pitch?: number;\n /** Loudness, 0.3 to 3.0 (v2 only) */\n loudness?: number;\n /** Enable text preprocessing (v2 only) */\n enablePreprocessing?: boolean;\n}\n\n/** Options specific to bulbul:v3 */\nexport interface TTSV3Options extends TTSBaseOptions {\n model: 'bulbul:v3';\n /** Speaker voice (v3 voices). Default: 'shubh' */\n speaker?: TTSV3Speakers | string;\n /** Temperature for voice variation, 0.01 to 2.0 (v3 only, default 0.6) */\n temperature?: number;\n}\n\n/** Combined options — discriminated by `model` field */\nexport type TTSOptions = TTSV2Options | TTSV3Options;\n\n// ---------------------------------------------------------------------------\n// Resolved (internal) options — flat union of all fields\n// ---------------------------------------------------------------------------\n\ninterface ResolvedTTSOptions {\n apiKey: string;\n streaming: boolean;\n model: TTSModels;\n speaker: TTSSpeakers | string;\n targetLanguageCode: string;\n pace: number;\n sampleRate: number;\n baseURL: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n // V2 only\n pitch?: number;\n loudness?: number;\n enablePreprocessing?: boolean;\n // V3 only\n temperature?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Defaults per model\n// ---------------------------------------------------------------------------\n\nconst V2_DEFAULTS = {\n speaker: 'anushka' as const,\n pitch: 0,\n pace: 1.0,\n loudness: 1.0,\n enablePreprocessing: false,\n};\n\nconst V3_DEFAULTS = {\n speaker: 'shubh' as const,\n pace: 1.0,\n temperature: 0.6,\n};\n\n// ---------------------------------------------------------------------------\n// Resolve caller options into a fully-populated internal struct\n// ---------------------------------------------------------------------------\n\nfunction resolveOptions(opts: Partial<TTSOptions>): ResolvedTTSOptions {\n const apiKey = opts.apiKey ?? process.env.SARVAM_API_KEY;\n if (!apiKey) {\n throw new Error('Sarvam API key is required, whether as an argument or as $SARVAM_API_KEY');\n }\n\n const model: TTSModels = opts.model ?? 'bulbul:v2';\n const isV3 = model === 'bulbul:v3';\n\n const base: ResolvedTTSOptions = {\n apiKey,\n streaming: opts.streaming ?? true,\n model,\n speaker: opts.speaker ?? (isV3 ? V3_DEFAULTS.speaker : V2_DEFAULTS.speaker),\n targetLanguageCode: opts.targetLanguageCode ?? 'en-IN',\n pace: opts.pace ?? (isV3 ? V3_DEFAULTS.pace : V2_DEFAULTS.pace),\n sampleRate: opts.sampleRate ?? SARVAM_TTS_SAMPLE_RATE,\n baseURL: opts.baseURL ?? SARVAM_BASE_URL,\n sentenceTokenizer:\n opts.sentenceTokenizer ??\n new tokenize.basic.SentenceTokenizer({ minSentenceLength: MIN_SENTENCE_LENGTH }),\n };\n\n if (isV3) {\n base.temperature = (opts as TTSV3Options).temperature ?? V3_DEFAULTS.temperature;\n } else {\n const v2 = opts as TTSV2Options;\n base.pitch = v2.pitch ?? V2_DEFAULTS.pitch;\n base.loudness = v2.loudness ?? V2_DEFAULTS.loudness;\n base.enablePreprocessing = v2.enablePreprocessing ?? V2_DEFAULTS.enablePreprocessing;\n }\n\n return base;\n}\n\n// ---------------------------------------------------------------------------\n// Build the API request body — only sends model-relevant fields\n// ---------------------------------------------------------------------------\n\nfunction buildRequestBody(text: string, opts: ResolvedTTSOptions): Record<string, unknown> {\n const body: Record<string, unknown> = {\n text,\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n // Always request WAV — AudioByteStream requires raw PCM, which we get by\n // stripping the 44-byte WAV header. Other codecs produce compressed audio\n // that cannot be fed into AudioByteStream.\n output_audio_codec: 'wav',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) body.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) body.pitch = opts.pitch;\n if (opts.loudness != null) body.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) body.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return body;\n}\n\n// ---------------------------------------------------------------------------\n// Build WS config message (sent as first message after connection)\n// ---------------------------------------------------------------------------\n\nfunction buildWsConfigMessage(opts: ResolvedTTSOptions): string {\n const data: Record<string, unknown> = {\n target_language_code: opts.targetLanguageCode,\n speaker: opts.speaker,\n model: opts.model,\n pace: opts.pace,\n speech_sample_rate: String(opts.sampleRate),\n output_audio_codec: 'linear16',\n };\n\n if (opts.model === 'bulbul:v3') {\n if (opts.temperature != null) data.temperature = opts.temperature;\n } else {\n if (opts.pitch != null) data.pitch = opts.pitch;\n if (opts.loudness != null) data.loudness = opts.loudness;\n if (opts.enablePreprocessing != null) data.enable_preprocessing = opts.enablePreprocessing;\n }\n\n return JSON.stringify({ type: 'config', data });\n}\n\n// ---------------------------------------------------------------------------\n// TTS class\n// ---------------------------------------------------------------------------\n\nexport class TTS extends tts.TTS {\n #opts: ResolvedTTSOptions;\n label = 'sarvam.TTS';\n\n /**\n * Create a new instance of Sarvam AI TTS.\n *\n * @remarks\n * `apiKey` must be set to your Sarvam API key, either using the argument or by setting the\n * `SARVAM_API_KEY` environment variable.\n */\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolved = resolveOptions(opts);\n super(resolved.sampleRate, SARVAM_TTS_CHANNELS, { streaming: resolved.streaming });\n this.#opts = resolved;\n }\n\n /**\n * Update TTS options after initialization.\n *\n * @remarks\n * When the model changes, only truly shared fields (apiKey,\n * targetLanguageCode, pace, sampleRate, baseURL) carry over.\n * Model-specific fields (speaker, pitch, loudness, temperature,\n * enablePreprocessing) are dropped so resolveOptions re-applies\n * the correct defaults for the new model.\n */\n updateOptions(opts: Partial<TTSOptions>) {\n const modelChanging = opts.model != null && opts.model !== this.#opts.model;\n\n const base: Partial<TTSOptions> = modelChanging\n ? {\n apiKey: this.#opts.apiKey,\n streaming: this.#opts.streaming,\n targetLanguageCode: this.#opts.targetLanguageCode as TTSLanguages,\n pace: this.#opts.pace,\n sampleRate: this.#opts.sampleRate as TTSSampleRates,\n baseURL: this.#opts.baseURL,\n sentenceTokenizer: this.#opts.sentenceTokenizer,\n }\n : ({ ...this.#opts } as Partial<TTSOptions>);\n\n this.#opts = resolveOptions({ ...base, ...opts } as TTSOptions);\n }\n\n /**\n * Synthesize text to audio using Sarvam AI TTS.\n *\n * @param text - Text to synthesize (max 2500 chars for v3, 1500 for v2)\n * @param connOptions - API connection options\n * @param abortSignal - Abort signal for cancellation\n * @returns A chunked stream of synthesized audio\n */\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n if (!this.capabilities.streaming) {\n throw new Error(\n 'Sarvam TTS streaming is disabled (`streaming: false`). Use synthesize() for REST or wrap with tts.StreamAdapter for streaming behavior.',\n );\n }\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Chunked stream (non-streaming synthesis)\n// ---------------------------------------------------------------------------\n\n/** Chunked stream for Sarvam AI TTS that processes a single synthesis request. */\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'sarvam.ChunkedStream';\n private opts: ResolvedTTSOptions;\n\n /** @internal */\n constructor(\n tts: TTS,\n text: string,\n opts: ResolvedTTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n\n const response = await fetch(`${this.opts.baseURL}/text-to-speech`, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'api-subscription-key': this.opts.apiKey,\n },\n body: JSON.stringify(buildRequestBody(this.inputText, this.opts)),\n signal: this.abortSignal,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n throw new Error(`Sarvam TTS API error ${response.status}: ${errorBody}`);\n }\n\n const data = (await response.json()) as { audios: string[] };\n const audioBase64 = data.audios[0];\n if (!audioBase64) {\n throw new Error('Sarvam TTS returned empty audio');\n }\n\n // Decode base64 WAV and strip 44-byte header to get raw PCM\n const raw = Buffer.from(audioBase64, 'base64');\n const pcmData = raw.buffer.slice(raw.byteOffset + 44, raw.byteOffset + raw.byteLength);\n\n const audioByteStream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n const frames = [...audioByteStream.write(pcmData), ...audioByteStream.flush()];\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n for (const frame of frames) {\n sendLastFrame(requestId, false);\n lastFrame = frame;\n }\n sendLastFrame(requestId, true);\n\n this.queue.close();\n }\n}\n\n// ---------------------------------------------------------------------------\n// WebSocket streaming synthesis\n// ---------------------------------------------------------------------------\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: ResolvedTTSOptions;\n private tokenizer: tokenize.SentenceStream;\n #logger = log();\n label = 'sarvam.SynthesizeStream';\n\n constructor(tts: TTS, opts: ResolvedTTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: 'flush' }));\n\n try {\n await new Promise<void>((resolve) => {\n const timeout = setTimeout(() => resolve(), 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n this.#logger.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // Build WS URL: wss://api.sarvam.ai/text-to-speech/ws?model=...&send_completion_event=true\n const wsBaseUrl = this.opts.baseURL.replace(/^http/, 'ws');\n const url = new URL(`${wsBaseUrl}${SARVAM_WS_URL_PATH}`);\n url.searchParams.set('model', this.opts.model);\n url.searchParams.set('send_completion_event', 'true');\n\n const ws = new WebSocket(url, {\n headers: {\n 'api-subscription-key': this.opts.apiKey,\n },\n });\n\n await new Promise<void>((resolve, reject) => {\n const onOpen = () => {\n cleanup();\n resolve();\n };\n const onError = (error: Error) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS connection error: ${error.message}`));\n };\n const onClose = (code: number) => {\n cleanup();\n reject(new Error(`Sarvam TTS WS closed during connect: ${code}`));\n };\n const cleanup = () => {\n ws.removeListener('open', onOpen);\n ws.removeListener('error', onError);\n ws.removeListener('close', onClose);\n };\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n });\n\n // Send config message immediately after connection\n ws.send(buildWsConfigMessage(this.opts));\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n const text = event.token;\n ws.send(JSON.stringify({ type: 'text', data: { text } }));\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(JSON.stringify({ type: 'flush' }));\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, SARVAM_TTS_CHANNELS);\n let finalReceived = false;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData) => {\n let msg: { type: string; data?: Record<string, unknown> };\n try {\n msg = JSON.parse(data.toString());\n } catch {\n this.#logger.warn('Sarvam WS: received non-JSON message');\n return;\n }\n\n switch (msg.type) {\n case 'audio': {\n const audioB64 = (msg.data?.audio as string) ?? '';\n if (!audioB64) break;\n\n const raw = Buffer.from(audioB64, 'base64');\n const pcm = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);\n\n for (const frame of bstream.write(pcm as ArrayBuffer)) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n break;\n }\n\n case 'event': {\n const eventType = msg.data?.event_type as string | undefined;\n if (eventType === 'final') {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n break;\n }\n\n case 'error': {\n const errMsg = (msg.data?.message as string) ?? 'Unknown Sarvam WS error';\n const errCode = msg.data?.code as number | undefined;\n reject(new Error(`Sarvam WS error ${errCode ?? ''}: ${errMsg}`));\n break;\n }\n }\n });\n\n ws.on('close', () => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(false);\n lastFrame = frame;\n }\n sendLastFrame(true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n const msg = e instanceof Error ? e.message : String(e);\n throw new Error(`Sarvam TTS streaming failed: ${msg}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAUxC,MAAM,yBAAyB;AAC/B,MAAM,sBAAsB;AAC5B,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,sBAAsB;AAgF5B,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,OAAO;AAAA,EACP,MAAM;AAAA,EACN,UAAU;AAAA,EACV,qBAAqB;AACvB;AAEA,MAAM,cAAc;AAAA,EAClB,SAAS;AAAA,EACT,MAAM;AAAA,EACN,aAAa;AACf;AAMA,SAAS,eAAe,MAA+C;AACrE,QAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,0EAA0E;AAAA,EAC5F;AAEA,QAAM,QAAmB,KAAK,SAAS;AACvC,QAAM,OAAO,UAAU;AAEvB,QAAM,OAA2B;AAAA,IAC/B;AAAA,IACA,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA,SAAS,KAAK,YAAY,OAAO,YAAY,UAAU,YAAY;AAAA,IACnE,oBAAoB,KAAK,sBAAsB;AAAA,IAC/C,MAAM,KAAK,SAAS,OAAO,YAAY,OAAO,YAAY;AAAA,IAC1D,YAAY,KAAK,cAAc;AAAA,IAC/B,SAAS,KAAK,WAAW;AAAA,IACzB,mBACE,KAAK,qBACL,IAAI,SAAS,MAAM,kBAAkB,EAAE,mBAAmB,oBAAoB,CAAC;AAAA,EACnF;AAEA,MAAI,MAAM;AACR,SAAK,cAAe,KAAsB,eAAe,YAAY;AAAA,EACvE,OAAO;AACL,UAAM,KAAK;AACX,SAAK,QAAQ,GAAG,SAAS,YAAY;AACrC,SAAK,WAAW,GAAG,YAAY,YAAY;AAC3C,SAAK,sBAAsB,GAAG,uBAAuB,YAAY;AAAA,EACnE;AAEA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAc,MAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC;AAAA,IACA,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,IAI1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO;AACT;AAMA,SAAS,qBAAqB,MAAkC;AAC9D,QAAM,OAAgC;AAAA,IACpC,sBAAsB,KAAK;AAAA,IAC3B,SAAS,KAAK;AAAA,IACd,OAAO,KAAK;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,oBAAoB,OAAO,KAAK,UAAU;AAAA,IAC1C,oBAAoB;AAAA,EACtB;AAEA,MAAI,KAAK,UAAU,aAAa;AAC9B,QAAI,KAAK,eAAe,KAAM,MAAK,cAAc,KAAK;AAAA,EACxD,OAAO;AACL,QAAI,KAAK,SAAS,KAAM,MAAK,QAAQ,KAAK;AAC1C,QAAI,KAAK,YAAY,KAAM,MAAK,WAAW,KAAK;AAChD,QAAI,KAAK,uBAAuB,KAAM,MAAK,uBAAuB,KAAK;AAAA,EACzE;AAEA,SAAO,KAAK,UAAU,EAAE,MAAM,UAAU,KAAK,CAAC;AAChD;AAMO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,SAAS,YAAY,qBAAqB,EAAE,WAAW,SAAS,UAAU,CAAC;AACjF,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,cAAc,MAA2B;AACvC,UAAM,gBAAgB,KAAK,SAAS,QAAQ,KAAK,UAAU,KAAK,MAAM;AAEtE,UAAM,OAA4B,gBAC9B;AAAA,MACE,QAAQ,KAAK,MAAM;AAAA,MACnB,WAAW,KAAK,MAAM;AAAA,MACtB,oBAAoB,KAAK,MAAM;AAAA,MAC/B,MAAM,KAAK,MAAM;AAAA,MACjB,YAAY,KAAK,MAAM;AAAA,MACvB,SAAS,KAAK,MAAM;AAAA,MACpB,mBAAmB,KAAK,MAAM;AAAA,IAChC,IACC,EAAE,GAAG,KAAK,MAAM;AAErB,SAAK,QAAQ,eAAe,EAAE,GAAG,MAAM,GAAG,KAAK,CAAe;AAAA,EAChE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA+B;AAC7B,QAAI,CAAC,KAAK,aAAa,WAAW;AAChC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAOO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA;AAAA,EAGR,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAE5B,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,KAAK,OAAO,mBAAmB;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,MACA,MAAM,KAAK,UAAU,iBAAiB,KAAK,WAAW,KAAK,IAAI,CAAC;AAAA,MAChE,QAAQ,KAAK;AAAA,IACf,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,KAAK,SAAS,EAAE;AAAA,IACzE;AAEA,UAAM,OAAQ,MAAM,SAAS,KAAK;AAClC,UAAM,cAAc,KAAK,OAAO,CAAC;AACjC,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAGA,UAAM,MAAM,OAAO,KAAK,aAAa,QAAQ;AAC7C,UAAM,UAAU,IAAI,OAAO,MAAM,IAAI,aAAa,IAAI,IAAI,aAAa,IAAI,UAAU;AAErF,UAAM,kBAAkB,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AACrF,UAAM,SAAS,CAAC,GAAG,gBAAgB,MAAM,OAAO,GAAG,GAAG,gBAAgB,MAAM,CAAC;AAE7E,QAAI;AACJ,UAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ;AAC1B,oBAAc,WAAW,KAAK;AAC9B,kBAAY;AAAA,IACd;AACA,kBAAc,WAAW,IAAI;AAE7B,SAAK,MAAM,MAAM;AAAA,EACnB;AACF;AAMO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAYA,MAAU,MAA0B;AAC9C,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AACF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAEzC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,YAAY;AACnC,kBAAM,UAAU,WAAW,MAAM,QAAQ,GAAG,GAAI;AAEhD,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AACD,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,QAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IACjE,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAG5B,UAAM,YAAY,KAAK,KAAK,QAAQ,QAAQ,SAAS,IAAI;AACzD,UAAM,MAAM,IAAI,IAAI,GAAG,SAAS,GAAG,kBAAkB,EAAE;AACvD,QAAI,aAAa,IAAI,SAAS,KAAK,KAAK,KAAK;AAC7C,QAAI,aAAa,IAAI,yBAAyB,MAAM;AAEpD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,wBAAwB,KAAK,KAAK;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,SAAS,MAAM;AACnB,gBAAQ;AACR,gBAAQ;AAAA,MACV;AACA,YAAM,UAAU,CAAC,UAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE,CAAC;AAAA,MACtE;AACA,YAAM,UAAU,CAAC,SAAiB;AAChC,gBAAQ;AACR,eAAO,IAAI,MAAM,wCAAwC,IAAI,EAAE,CAAC;AAAA,MAClE;AACA,YAAM,UAAU,MAAM;AACpB,WAAG,eAAe,QAAQ,MAAM;AAChC,WAAG,eAAe,SAAS,OAAO;AAClC,WAAG,eAAe,SAAS,OAAO;AAAA,MACpC;AACA,SAAG,GAAG,QAAQ,MAAM;AACpB,SAAG,GAAG,SAAS,OAAO;AACtB,SAAG,GAAG,SAAS,OAAO;AAAA,IACxB,CAAC;AAGD,OAAG,KAAK,qBAAqB,KAAK,IAAI,CAAC;AAEvC,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,cAAM,OAAO,MAAM;AACnB,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AAAA,MAC1D;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC,CAAC;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,mBAAmB;AAC7E,UAAI,gBAAgB;AACpB,UAAI;AAEJ,YAAM,gBAAgB,CAAC,UAAmB;AACxC,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,SAAkB;AAze5C;AA0eU,cAAI;AACJ,cAAI;AACF,kBAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AAAA,UAClC,QAAQ;AACN,iBAAK,QAAQ,KAAK,sCAAsC;AACxD;AAAA,UACF;AAEA,kBAAQ,IAAI,MAAM;AAAA,YAChB,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU,UAAoB;AAChD,kBAAI,CAAC,SAAU;AAEf,oBAAM,MAAM,OAAO,KAAK,UAAU,QAAQ;AAC1C,oBAAM,MAAM,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AAE5E,yBAAW,SAAS,QAAQ,MAAM,GAAkB,GAAG;AACrD,8BAAc,KAAK;AACnB,4BAAY;AAAA,cACd;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,aAAY,SAAI,SAAJ,mBAAU;AAC5B,kBAAI,cAAc,SAAS;AACzB,gCAAgB;AAChB,2BAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,gCAAc,KAAK;AACnB,8BAAY;AAAA,gBACd;AACA,8BAAc,IAAI;AAElB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,uBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,gBAC/C;AACA,wBAAQ;AAAA,cACV;AACA;AAAA,YACF;AAAA,YAEA,KAAK,SAAS;AACZ,oBAAM,WAAU,SAAI,SAAJ,mBAAU,YAAsB;AAChD,oBAAM,WAAU,SAAI,SAAJ,mBAAU;AAC1B,qBAAO,IAAI,MAAM,mBAAmB,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;AAC/D;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,MAAM;AACnB,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,KAAK;AACnB,0BAAY;AAAA,YACd;AACA,0BAAc,IAAI;AAElB,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,MAAM,aAAa,QAAQ,EAAE,UAAU,OAAO,CAAC;AACrD,YAAM,IAAI,MAAM,gCAAgC,GAAG,EAAE;AAAA,IACvD,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts"]}