@mastra/voice-sarvam 0.11.12 → 0.12.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ # Voice API Reference
2
+
3
+ > API reference for voice - 1 entries
4
+
5
+
6
+ ---
7
+
8
+ ## Reference: Sarvam
9
+
10
+ > Documentation for the Sarvam class, providing text-to-speech and speech-to-text capabilities.
11
+
12
+ The SarvamVoice class in Mastra provides text-to-speech and speech-to-text capabilities using Sarvam AI models.
13
+
14
+ ## Usage Example
15
+
16
+ ```typescript
17
+ import { SarvamVoice } from "@mastra/voice-sarvam";
18
+
19
+ // Initialize with default configuration using environment variables
20
+ const voice = new SarvamVoice();
21
+
22
+ // Or initialize with specific configuration
23
+ const voiceWithConfig = new SarvamVoice({
24
+ speechModel: {
25
+ model: "bulbul:v1",
26
+ apiKey: process.env.SARVAM_API_KEY!,
27
+ language: "en-IN",
28
+ properties: {
29
+ pitch: 0,
30
+ pace: 1.65,
31
+ loudness: 1.5,
32
+ speech_sample_rate: 8000,
33
+ enable_preprocessing: false,
34
+ eng_interpolation_wt: 123,
35
+ },
36
+ },
37
+ listeningModel: {
38
+ model: "saarika:v2",
39
+ apiKey: process.env.SARVAM_API_KEY!,
40
+ languageCode: "en-IN",
41
+ filetype?: 'wav';
42
+ },
43
+ speaker: "meera", // Default voice
44
+ });
45
+
46
+
47
+ // Convert text to speech
48
+ const audioStream = await voice.speak("Hello, how can I help you?");
49
+
50
+
51
+ // Convert speech to text
52
+ const text = await voice.listen(audioStream, {
53
+ filetype: "wav",
54
+ });
55
+ ```
56
+
57
+ ### Sarvam API Docs -
58
+
59
+ https://docs.sarvam.ai/api-reference-docs/endpoints/text-to-speech
60
+
61
+ ## Configuration
62
+
63
+ ### Constructor Options
64
+
65
+ ### SarvamVoiceConfig
66
+
67
+ ### SarvamListenOptions
68
+
69
+ ## Methods
70
+
71
+ ### speak()
72
+
73
+ Converts text to speech using Sarvam's text-to-speech models.
74
+
75
+ Returns: `Promise<NodeJS.ReadableStream>`
76
+
77
+ ### listen()
78
+
79
+ Transcribes audio using Sarvam's speech recognition models.
80
+
81
+ Returns: `Promise<string>`
82
+
83
+ ### getSpeakers()
84
+
85
+ Returns an array of available voice options.
86
+
87
+ Returns: `Promise<Array<{voiceId: SarvamVoiceId}>>`
88
+
89
+ ## Notes
90
+
91
+ - API key can be provided via constructor options or the `SARVAM_API_KEY` environment variable
92
+ - If no API key is provided, the constructor will throw an error
93
+ - The service communicates with the Sarvam AI API at `https://api.sarvam.ai`
94
+ - Audio is returned as a stream containing binary audio data
95
+ - Speech recognition supports mp3 and wav audio formats
package/dist/index.cjs CHANGED
@@ -97,32 +97,28 @@ var SarvamVoice = class extends voice.MastraVoice {
97
97
  }
98
98
  async speak(input, options) {
99
99
  const text = typeof input === "string" ? input : await this.streamToString(input);
100
- return this.traced(async () => {
101
- const payload = {
102
- inputs: [text],
103
- target_language_code: this.language,
104
- speaker: options?.speaker || this.speaker,
105
- model: this.model,
106
- ...this.properties
107
- };
108
- const response = await this.makeRequest("/text-to-speech", payload);
109
- const { audios } = await response.json();
110
- if (!audios || !audios.length) {
111
- throw new Error("No audio received from Sarvam AI");
112
- }
113
- const audioBuffer = Buffer.from(audios[0], "base64");
114
- const stream$1 = new stream.PassThrough();
115
- stream$1.write(audioBuffer);
116
- stream$1.end();
117
- return stream$1;
118
- }, "voice.sarvam.speak")();
100
+ const payload = {
101
+ inputs: [text],
102
+ target_language_code: this.language,
103
+ speaker: options?.speaker || this.speaker,
104
+ model: this.model,
105
+ ...this.properties
106
+ };
107
+ const response = await this.makeRequest("/text-to-speech", payload);
108
+ const { audios } = await response.json();
109
+ if (!audios || !audios.length) {
110
+ throw new Error("No audio received from Sarvam AI");
111
+ }
112
+ const audioBuffer = Buffer.from(audios[0], "base64");
113
+ const stream$1 = new stream.PassThrough();
114
+ stream$1.write(audioBuffer);
115
+ stream$1.end();
116
+ return stream$1;
119
117
  }
120
118
  async getSpeakers() {
121
- return this.traced(async () => {
122
- return SARVAM_VOICES.map((voice) => ({
123
- voiceId: voice
124
- }));
125
- }, "voice.deepgram.getSpeakers")();
119
+ return SARVAM_VOICES.map((voice) => ({
120
+ voiceId: voice
121
+ }));
126
122
  }
127
123
  /**
128
124
  * Checks if listening capabilities are enabled.
@@ -133,38 +129,36 @@ var SarvamVoice = class extends voice.MastraVoice {
133
129
  return { enabled: true };
134
130
  }
135
131
  async listen(input, options) {
136
- return this.traced(async () => {
137
- const chunks = [];
138
- for await (const chunk of input) {
139
- if (typeof chunk === "string") {
140
- chunks.push(Buffer.from(chunk));
141
- } else {
142
- chunks.push(chunk);
143
- }
144
- }
145
- const audioBuffer = Buffer.concat(chunks);
146
- const form = new FormData();
147
- const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
148
- const blob = new Blob([audioBuffer], { type: mimeType });
149
- form.append("file", blob);
150
- form.append("model", options?.model || "saarika:v2");
151
- form.append("language_code", options?.languageCode || "unknown");
152
- const requestOptions = {
153
- method: "POST",
154
- headers: {
155
- "api-subscription-key": this.apiKey
156
- },
157
- body: form
158
- };
159
- try {
160
- const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
161
- const result = await response.json();
162
- return result.transcript;
163
- } catch (error) {
164
- console.error("Error during speech-to-text request:", error);
165
- throw error;
132
+ const chunks = [];
133
+ for await (const chunk of input) {
134
+ if (typeof chunk === "string") {
135
+ chunks.push(Buffer.from(chunk));
136
+ } else {
137
+ chunks.push(chunk);
166
138
  }
167
- }, "voice.sarvam.listen")();
139
+ }
140
+ const audioBuffer = Buffer.concat(chunks);
141
+ const form = new FormData();
142
+ const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
143
+ const blob = new Blob([audioBuffer], { type: mimeType });
144
+ form.append("file", blob);
145
+ form.append("model", options?.model || "saarika:v2");
146
+ form.append("language_code", options?.languageCode || "unknown");
147
+ const requestOptions = {
148
+ method: "POST",
149
+ headers: {
150
+ "api-subscription-key": this.apiKey
151
+ },
152
+ body: form
153
+ };
154
+ try {
155
+ const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
156
+ const result = await response.json();
157
+ return result.transcript;
158
+ } catch (error) {
159
+ console.error("Error during speech-to-text request:", error);
160
+ throw error;
161
+ }
168
162
  }
169
163
  };
170
164
 
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":["MastraVoice","stream","PassThrough"],"mappings":";;;;;;;;AAAO,IAAM,aAAA,GAAgB;AAAA,EAC3B,OAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,QAAA;AAAA,EACA,MAAA;AAAA,EACA,SAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;;;ACcA,IAAM,kBAAA,GAAqB;AAAA,EACzB,KAAA,EAAO,WAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAAA;AAAA,EACpB,QAAA,EAAU;AACZ,CAAA;AAEA,IAAM,qBAAA,GAAwB;AAAA,EAC5B,KAAA,EAAO,YAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAEtB,CAAA;AAEO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EACnC,MAAA;AAAA,EACA,KAAA,GAAwB,WAAA;AAAA,EACxB,QAAA,GAA8B,OAAA;AAAA,EAC9B,aAAkC,EAAC;AAAA,EAC3C,OAAA,GAAyB,OAAA;AAAA,EACjB,OAAA,GAAU,uBAAA;AAAA,EAElB,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,OAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AAAA,QAC/C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB,KAAA;AAAA,QACrD,MAAA,EAAQ,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB;AAAA,OACzD;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB,MAAA;AACxD,IAAA,IAAI,CAAC,KAAK,MAAA,EAAQ;AAChB,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AACtD,IAAA,IAAA,CAAK,QAAA,GAAW,WAAA,EAAa,QAAA,IAAY,kBAAA,CAAmB,QAAA;AAC5D,IAAA,IAAA,CAAK,UAAA,GAAa,WAAA,EAAa,UAAA,IAAc,EAAC;AAC9C,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,OAAA;AAAA,EAC5B;AAAA,EAEA,MAAc,WAAA,CAAY,QAAA,EAAkB,OAAA,EAAc;AACxD,IAAA,MAAM,OAAA,GAAU,IAAI,OAAA,CAAQ;AAAA,MAC1B,wBAAwB,IAAA,CAAK,MAAA;AAAA,MAC7B,cAAA,EAAgB;AAAA,KACjB,CAAA;AACD,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,KAAK,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI;AAAA,MACzD,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,KAC7B,CAAA;AACD,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,IAAI,YAAA;AACJ,MAAA,IAAI;AACF,QAAA,MAAM,KAAA,GAAS,MAAM,QAAA,CAAS,IAAA,EAAK;AACnC,QAAA,YAAA,GAAe,KAAA,CAAM,WAAW,QAAA,CAAS,UAAA;AAAA,MAC3C,CAAA,CAAA,MAAQ;AACN,QAAA,YAAA,GAAe,QAAA,CAAS,UAAA;AAAA,MAC1B;AACA,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,YAAY,CAAA,CAAE,CAAA;AAAA,IACxD;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AAAA,EACA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA,EACA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAC7B,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,MAAA,EAAQ,CAAC,IAAI,CAAA;AAAA,QACb,sBAAsB,IAAA,CAAK,QAAA;AAAA,QAC3B,OAAA,EAAS,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,QAClC,OAAO,IAAA,CAAK,KAAA;AAAA,QACZ,GAAG,IAAA,CAAK;AAAA,OACV;AAEA,MAAA,MAAM,QAAA,GAAW,MAAM,IAAA,CAAK,WAAA,CAAY,mBAAmB,OAAO,CAAA;AAElE,MAAA,MAAM,EAAE,MAAA,EAAO,GAAK,MAAM,SAAS,IAAA,EAAK;AAExC,MAAA,IAAI,CAAC,MAAA,IAAU,CAAC,MAAA,CAAO,MAAA,EAAQ;AAC7B,QAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,MACpD;AAGA,MAAA,MAAM,cAAc,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,CAAC,GAAG,QAAQ,CAAA;AAGnD,MAAA,MAAMC,QAAA,GAAS,IAAIC,kBAAA,EAAY;AAC/B,MAAAD,QAAA,CAAO,MAAM,WAAW,CAAA;AACxB,MAAAA,QAAA,CAAO,GAAA,EAAI;AAEX,MAAA,OAAOA,QAAA;AAAA,IACT,CAAA,EAAG,oBAAoB,CAAA,EAAE;AAAA,EAC3B;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAC7B,MAAA,OAAO,aAAA,CAAc,IAAI,CAAA,KAAA,MAAU;AAAA,QACjC,OAAA,EAAS;AAAA,OACX,CAAE,CAAA;AAAA,IACJ,CAAA,EAAG,4BAA4B,CAAA,EAAE;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA,EAEA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAgD;AACzF,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAE7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,MAAA,MAAM,IAAA,GAAO,IAAI,QAAA,EAAS;AAC1B,MAAA,MAAM,QAAA,GAAW,OAAA,EAAS,QAAA,KAAa,KAAA,GAAQ,YAAA,GAAe,WAAA;AAC9D,MAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,EAAE,IAAA,EAAM,QAAA,EAAU,CAAA;AAEvD,MAAA,IAAA,CAAK,MAAA,CAAO,QAAQ,IAAI,CAAA;AACxB,MAAA,IAAA,CAAK,MAAA,CAAO,OAAA,EAAS,OAAA,EAAS,KAAA,IAAS,YAAY,CAAA;AACnD,MAAA,IAAA,CAAK,MAAA,CAAO,eAAA,EAAiB,OAAA,EAAS,YAAA,IAAgB,SAAS,CAAA;AAC/D,MAAA,MAAM,cAAA,GAAiB;AAAA,QACrB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,wBAAwB,IAAA,CAAK;AAAA,SAC/B;AAAA,QACA,IAAA,EAAM;AAAA,OACR;AAEA,MAAA,IAAI;AACF,QAAA,MAAM,WAAW,MAAM,KAAA,CAAM,GAAG,IAAA,CAAK,OAAO,mBAAmB,cAAc,CAAA;AAC7E,QAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAEpC,QAAA,OAAO,MAAA,CAAO,UAAA;AAAA,MAChB,SAAS,KAAA,EAAO;AACd,QAAA,OAAA,CAAQ,KAAA,CAAM,wCAAwC,KAAK,CAAA;AAC3D,QAAA,MAAM,KAAA;AAAA,MACR;AAAA,IACF,CAAA,EAAG,qBAAqB,CAAA,EAAE;AAAA,EAC5B;AACF","file":"index.cjs","sourcesContent":["export const SARVAM_VOICES = [\n 'meera',\n 'pavithra',\n 'maitreyi',\n 'arvind',\n 'amol',\n 'amartya',\n 'diya',\n 'neel',\n 'misha',\n 'vian',\n 'arjun',\n 'maya',\n] as const;\n\nexport const SARVAM_TTS_LANGUAGES = [\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n] as const;\n\nexport const SARVAM_STT_LANGUAGES = [...SARVAM_TTS_LANGUAGES, 'unknown'] as const;\n\nexport const SARVAM_TTS_MODELS = ['bulbul:v1'] as const;\nexport const SARVAM_STT_MODELS = ['saarika:v1', 'saarika:v2', 'saarika:flash'] as const;\n\nexport type SarvamVoiceId = (typeof SARVAM_VOICES)[number];\n\nexport type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];\nexport type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];\n\nexport type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];\nexport type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];\n","import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport { SARVAM_VOICES } from './voices';\nimport type { SarvamTTSLanguage, SarvamSTTLanguage, SarvamSTTModel, SarvamTTSModel, SarvamVoiceId } from './voices';\n\ninterface SarvamVoiceConfig {\n apiKey?: string;\n model?: SarvamTTSModel;\n language?: SarvamTTSLanguage;\n properties?: {\n pitch?: number;\n pace?: number;\n loudness?: number;\n speech_sample_rate?: 8000 | 16000 | 22050;\n enable_preprocessing?: boolean;\n eng_interpolation_wt?: number;\n };\n}\n\ninterface SarvamListenOptions {\n apiKey?: string;\n model?: SarvamSTTModel;\n languageCode?: SarvamSTTLanguage;\n filetype?: 'mp3' | 'wav';\n}\n\nconst defaultSpeechModel = {\n model: 'bulbul:v1' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language: 'en-IN' as const,\n};\n\nconst defaultListeningModel = {\n model: 'saarika:v2' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language_code: 'unknown' as const,\n};\n\nexport class SarvamVoice extends MastraVoice {\n private apiKey?: string;\n private model: SarvamTTSModel = 'bulbul:v1';\n private language: SarvamTTSLanguage = 'en-IN';\n private properties: Record<string, any> = {};\n speaker: SarvamVoiceId = 'meera';\n private baseUrl = 'https://api.sarvam.ai';\n\n constructor({\n speechModel,\n speaker,\n listeningModel,\n }: {\n speechModel?: SarvamVoiceConfig;\n speaker?: SarvamVoiceId;\n listeningModel?: SarvamListenOptions;\n } = {}) {\n super({\n speechModel: {\n name: speechModel?.model ?? defaultSpeechModel.model,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.model ?? defaultListeningModel.model,\n apiKey: listeningModel?.model ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;\n if (!this.apiKey) {\n throw new Error('SARVAM_API_KEY must be set');\n }\n this.model = speechModel?.model || defaultSpeechModel.model;\n this.language = speechModel?.language || defaultSpeechModel.language;\n this.properties = speechModel?.properties || {};\n this.speaker = speaker || 'meera';\n }\n\n private async makeRequest(endpoint: string, payload: any) {\n const headers = new Headers({\n 'api-subscription-key': this.apiKey!,\n 'Content-Type': 'application/json',\n });\n const response = await fetch(`${this.baseUrl}${endpoint}`, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n });\n if (!response.ok) {\n let errorMessage;\n try {\n const error = (await response.json()) as { message?: string };\n errorMessage = error.message || response.statusText;\n } catch {\n errorMessage = response.statusText;\n }\n throw new Error(`Sarvam AI API Error: ${errorMessage}`);\n }\n\n return response;\n }\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: { speaker?: SarvamVoiceId },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return this.traced(async () => {\n const payload = {\n inputs: [text],\n target_language_code: this.language,\n speaker: options?.speaker || this.speaker,\n model: this.model,\n ...this.properties,\n };\n\n const response = await this.makeRequest('/text-to-speech', payload);\n\n const { audios } = (await response.json()) as { audios: any };\n\n if (!audios || !audios.length) {\n throw new Error('No audio received from Sarvam AI');\n }\n\n // Convert base64 to buffer\n const audioBuffer = Buffer.from(audios[0], 'base64');\n\n // Create a PassThrough stream for the audio\n const stream = new PassThrough();\n stream.write(audioBuffer);\n stream.end();\n\n return stream;\n }, 'voice.sarvam.speak')();\n }\n\n async getSpeakers() {\n return this.traced(async () => {\n return SARVAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }, 'voice.deepgram.getSpeakers')();\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n async listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string> {\n return this.traced(async () => {\n // Collect audio data into buffer\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const form = new FormData();\n const mimeType = options?.filetype === 'mp3' ? 'audio/mpeg' : 'audio/wav';\n const blob = new Blob([audioBuffer], { type: mimeType });\n\n form.append('file', blob);\n form.append('model', options?.model || 'saarika:v2');\n form.append('language_code', options?.languageCode || 'unknown');\n const requestOptions = {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.apiKey!,\n },\n body: form,\n };\n\n try {\n const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);\n const result = (await response.json()) as any;\n //console.log(result);\n return result.transcript;\n } catch (error) {\n console.error('Error during speech-to-text request:', error);\n throw error;\n }\n }, 'voice.sarvam.listen')();\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":["MastraVoice","stream","PassThrough"],"mappings":";;;;;;;;AAAO,IAAM,aAAA,GAAgB;AAAA,EAC3B,OAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,QAAA;AAAA,EACA,MAAA;AAAA,EACA,SAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;;;ACcA,IAAM,kBAAA,GAAqB;AAAA,EACzB,KAAA,EAAO,WAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAAA;AAAA,EACpB,QAAA,EAAU;AACZ,CAAA;AAEA,IAAM,qBAAA,GAAwB;AAAA,EAC5B,KAAA,EAAO,YAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAEtB,CAAA;AAEO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EACnC,MAAA;AAAA,EACA,KAAA,GAAwB,WAAA;AAAA,EACxB,QAAA,GAA8B,OAAA;AAAA,EAC9B,aAAkC,EAAC;AAAA,EAC3C,OAAA,GAAyB,OAAA;AAAA,EACjB,OAAA,GAAU,uBAAA;AAAA,EAElB,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,OAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AAAA,QAC/C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB,KAAA;AAAA,QACrD,MAAA,EAAQ,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB;AAAA,OACzD;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB,MAAA;AACxD,IAAA,IAAI,CAAC,KAAK,MAAA,EAAQ;AAChB,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AACtD,IAAA,IAAA,CAAK,QAAA,GAAW,WAAA,EAAa,QAAA,IAAY,kBAAA,CAAmB,QAAA;AAC5D,IAAA,IAAA,CAAK,UAAA,GAAa,WAAA,EAAa,UAAA,IAAc,EAAC;AAC9C,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,OAAA;AAAA,EAC5B;AAAA,EAEA,MAAc,WAAA,CAAY,QAAA,EAAkB,OAAA,EAAc;AACxD,IAAA,MAAM,OAAA,GAAU,IAAI,OAAA,CAAQ;AAAA,MAC1B,wBAAwB,IAAA,CAAK,MAAA;AAAA,MAC7B,cAAA,EAAgB;AAAA,KACjB,CAAA;AACD,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,KAAK,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI;AAAA,MACzD,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,KAC7B,CAAA;AACD,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,IAAI,YAAA;AACJ,MAAA,IAAI;AACF,QAAA,MAAM,KAAA,GAAS,MAAM,QAAA,CAAS,IAAA,EAAK;AACnC,QAAA,YAAA,GAAe,KAAA,CAAM,WAAW,QAAA,CAAS,UAAA;AAAA,MAC3C,CAAA,CAAA,MAAQ;AACN,QAAA,YAAA,GAAe,QAAA,CAAS,UAAA;AAAA,MAC1B;AACA,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,YAAY,CAAA,CAAE,CAAA;AAAA,IACxD;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AAAA,EACA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA,EACA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,MAAM,OAAA,GAAU;AAAA,MACd,MAAA,EAAQ,CAAC,IAAI,CAAA;AAAA,MACb,sBAAsB,IAAA,CAAK,QAAA;AAAA,MAC3B,OAAA,EAAS,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,MAClC,OAAO,IAAA,CAAK,KAAA;AAAA,MACZ,GAAG,IAAA,CAAK;AAAA,KACV;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,IAAA,CAAK,WAAA,CAAY,mBAAmB,OAAO,CAAA;AAElE,IAAA,MAAM,EAAE,MAAA,EAAO,GAAK,MAAM,SAAS,IAAA,EAAK;AAExC,IAAA,IAAI,CAAC,MAAA,IAAU,CAAC,MAAA,CAAO,MAAA,EAAQ;AAC7B,MAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,IACpD;AAGA,IAAA,MAAM,cAAc,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,CAAC,GAAG,QAAQ,CAAA;AAGnD,IAAA,MAAMC,QAAA,GAAS,IAAIC,kBAAA,EAAY;AAC/B,IAAAD,QAAA,CAAO,MAAM,WAAW,CAAA;AACxB,IAAAA,QAAA,CAAO,GAAA,EAAI;AAEX,IAAA,OAAOA,QAAA;AAAA,EACT;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,aAAA,CAAc,IAAI,CAAA,KAAA,MAAU;AAAA,MACjC,OAAA,EAAS;AAAA,KACX,CAAE,CAAA;AAAA,EACJ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA,EAEA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAgD;AAEzF,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,IAAA,GAAO,IAAI,QAAA,EAAS;AAC1B,IAAA,MAAM,QAAA,GAAW,OAAA,EAAS,QAAA,KAAa,KAAA,GAAQ,YAAA,GAAe,WAAA;AAC9D,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,EAAE,IAAA,EAAM,QAAA,EAAU,CAAA;AAEvD,IAAA,IAAA,CAAK,MAAA,CAAO,QAAQ,IAAI,CAAA;AACxB,IAAA,IAAA,CAAK,MAAA,CAAO,OAAA,EAAS,OAAA,EAAS,KAAA,IAAS,YAAY,CAAA;AACnD,IAAA,IAAA,CAAK,MAAA,CAAO,eAAA,EAAiB,OAAA,EAAS,YAAA,IAAgB,SAAS,CAAA;AAC/D,IAAA,MAAM,cAAA,GAAiB;AAAA,MACrB,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,wBAAwB,IAAA,CAAK;AAAA,OAC/B;AAAA,MACA,IAAA,EAAM;AAAA,KACR;AAEA,IAAA,IAAI;AACF,MAAA,MAAM,WAAW,MAAM,KAAA,CAAM,GAAG,IAAA,CAAK,OAAO,mBAAmB,cAAc,CAAA;AAC7E,MAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAEpC,MAAA,OAAO,MAAA,CAAO,UAAA;AAAA,IAChB,SAAS,KAAA,EAAO;AACd,MAAA,OAAA,CAAQ,KAAA,CAAM,wCAAwC,KAAK,CAAA;AAC3D,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AACF","file":"index.cjs","sourcesContent":["export const SARVAM_VOICES = [\n 'meera',\n 'pavithra',\n 'maitreyi',\n 'arvind',\n 'amol',\n 'amartya',\n 'diya',\n 'neel',\n 'misha',\n 'vian',\n 'arjun',\n 'maya',\n] as const;\n\nexport const SARVAM_TTS_LANGUAGES = [\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n] as const;\n\nexport const SARVAM_STT_LANGUAGES = [...SARVAM_TTS_LANGUAGES, 'unknown'] as const;\n\nexport const SARVAM_TTS_MODELS = ['bulbul:v1'] as const;\nexport const SARVAM_STT_MODELS = ['saarika:v1', 'saarika:v2', 'saarika:flash'] as const;\n\nexport type SarvamVoiceId = (typeof SARVAM_VOICES)[number];\n\nexport type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];\nexport type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];\n\nexport type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];\nexport type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];\n","import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport { SARVAM_VOICES } from './voices';\nimport type { SarvamTTSLanguage, SarvamSTTLanguage, SarvamSTTModel, SarvamTTSModel, SarvamVoiceId } from './voices';\n\ninterface SarvamVoiceConfig {\n apiKey?: string;\n model?: SarvamTTSModel;\n language?: SarvamTTSLanguage;\n properties?: {\n pitch?: number;\n pace?: number;\n loudness?: number;\n speech_sample_rate?: 8000 | 16000 | 22050;\n enable_preprocessing?: boolean;\n eng_interpolation_wt?: number;\n };\n}\n\ninterface SarvamListenOptions {\n apiKey?: string;\n model?: SarvamSTTModel;\n languageCode?: SarvamSTTLanguage;\n filetype?: 'mp3' | 'wav';\n}\n\nconst defaultSpeechModel = {\n model: 'bulbul:v1' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language: 'en-IN' as const,\n};\n\nconst defaultListeningModel = {\n model: 'saarika:v2' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language_code: 'unknown' as const,\n};\n\nexport class SarvamVoice extends MastraVoice {\n private apiKey?: string;\n private model: SarvamTTSModel = 'bulbul:v1';\n private language: SarvamTTSLanguage = 'en-IN';\n private properties: Record<string, any> = {};\n speaker: SarvamVoiceId = 'meera';\n private baseUrl = 'https://api.sarvam.ai';\n\n constructor({\n speechModel,\n speaker,\n listeningModel,\n }: {\n speechModel?: SarvamVoiceConfig;\n speaker?: SarvamVoiceId;\n listeningModel?: SarvamListenOptions;\n } = {}) {\n super({\n speechModel: {\n name: speechModel?.model ?? defaultSpeechModel.model,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.model ?? defaultListeningModel.model,\n apiKey: listeningModel?.model ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;\n if (!this.apiKey) {\n throw new Error('SARVAM_API_KEY must be set');\n }\n this.model = speechModel?.model || defaultSpeechModel.model;\n this.language = speechModel?.language || defaultSpeechModel.language;\n this.properties = speechModel?.properties || {};\n this.speaker = speaker || 'meera';\n }\n\n private async makeRequest(endpoint: string, payload: any) {\n const headers = new Headers({\n 'api-subscription-key': this.apiKey!,\n 'Content-Type': 'application/json',\n });\n const response = await fetch(`${this.baseUrl}${endpoint}`, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n });\n if (!response.ok) {\n let errorMessage;\n try {\n const error = (await response.json()) as { message?: string };\n errorMessage = error.message || response.statusText;\n } catch {\n errorMessage = response.statusText;\n }\n throw new Error(`Sarvam AI API Error: ${errorMessage}`);\n }\n\n return response;\n }\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: { speaker?: SarvamVoiceId },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n const payload = {\n inputs: [text],\n target_language_code: this.language,\n speaker: options?.speaker || this.speaker,\n model: this.model,\n ...this.properties,\n };\n\n const response = await this.makeRequest('/text-to-speech', payload);\n\n const { audios } = (await response.json()) as { audios: any };\n\n if (!audios || !audios.length) {\n throw new Error('No audio received from Sarvam AI');\n }\n\n // Convert base64 to buffer\n const audioBuffer = Buffer.from(audios[0], 'base64');\n\n // Create a PassThrough stream for the audio\n const stream = new PassThrough();\n stream.write(audioBuffer);\n stream.end();\n\n return stream;\n }\n\n async getSpeakers() {\n return SARVAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n async listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string> {\n // Collect audio data into buffer\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const form = new FormData();\n const mimeType = options?.filetype === 'mp3' ? 'audio/mpeg' : 'audio/wav';\n const blob = new Blob([audioBuffer], { type: mimeType });\n\n form.append('file', blob);\n form.append('model', options?.model || 'saarika:v2');\n form.append('language_code', options?.languageCode || 'unknown');\n const requestOptions = {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.apiKey!,\n },\n body: form,\n };\n\n try {\n const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);\n const result = (await response.json()) as any;\n //console.log(result);\n return result.transcript;\n } catch (error) {\n console.error('Error during speech-to-text request:', error);\n throw error;\n }\n }\n}\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,KAAK,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,cAAc,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEpH,UAAU,iBAAiB;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB,QAAQ,CAAC,EAAE,iBAAiB,CAAC;IAC7B,UAAU,CAAC,EAAE;QACX,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,kBAAkB,CAAC,EAAE,IAAI,GAAG,KAAK,GAAG,KAAK,CAAC;QAC1C,oBAAoB,CAAC,EAAE,OAAO,CAAC;QAC/B,oBAAoB,CAAC,EAAE,MAAM,CAAC;KAC/B,CAAC;CACH;AAED,UAAU,mBAAmB;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB,YAAY,CAAC,EAAE,iBAAiB,CAAC;IACjC,QAAQ,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;CAC1B;AAcD,qBAAa,WAAY,SAAQ,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAC,CAAS;IACxB,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,EAAE,aAAa,CAAW;IACjC,OAAO,CAAC,OAAO,CAA2B;gBAE9B,EACV,WAAW,EACX,OAAO,EACP,cAAc,GACf,GAAE;QACD,WAAW,CAAC,EAAE,iBAAiB,CAAC;QAChC,OAAO,CAAC,EAAE,aAAa,CAAC;QACxB,cAAc,CAAC,EAAE,mBAAmB,CAAC;KACjC;YAuBQ,WAAW;YAuBX,cAAc;IAWtB,KAAK,CACT,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EACrC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,GACpC,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IAgC3B,WAAW;;;IAQjB;;;;OAIG;IACG,WAAW;;;IAIX,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,MAAM,CAAC;CAuC3F"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,KAAK,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,cAAc,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEpH,UAAU,iBAAiB;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB,QAAQ,CAAC,EAAE,iBAAiB,CAAC;IAC7B,UAAU,CAAC,EAAE;QACX,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,kBAAkB,CAAC,EAAE,IAAI,GAAG,KAAK,GAAG,KAAK,CAAC;QAC1C,oBAAoB,CAAC,EAAE,OAAO,CAAC;QAC/B,oBAAoB,CAAC,EAAE,MAAM,CAAC;KAC/B,CAAC;CACH;AAED,UAAU,mBAAmB;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB,YAAY,CAAC,EAAE,iBAAiB,CAAC;IACjC,QAAQ,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;CAC1B;AAcD,qBAAa,WAAY,SAAQ,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAC,CAAS;IACxB,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,EAAE,aAAa,CAAW;IACjC,OAAO,CAAC,OAAO,CAA2B;gBAE9B,EACV,WAAW,EACX,OAAO,EACP,cAAc,GACf,GAAE;QACD,WAAW,CAAC,EAAE,iBAAiB,CAAC;QAChC,OAAO,CAAC,EAAE,aAAa,CAAC;QACxB,cAAc,CAAC,EAAE,mBAAmB,CAAC;KACjC;YAuBQ,WAAW;YAuBX,cAAc;IAWtB,KAAK,CACT,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EACrC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,GACpC,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IA8B3B,WAAW;;;IAMjB;;;;OAIG;IACG,WAAW;;;IAIX,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,MAAM,CAAC;CAqC3F"}
package/dist/index.js CHANGED
@@ -95,32 +95,28 @@ var SarvamVoice = class extends MastraVoice {
95
95
  }
96
96
  async speak(input, options) {
97
97
  const text = typeof input === "string" ? input : await this.streamToString(input);
98
- return this.traced(async () => {
99
- const payload = {
100
- inputs: [text],
101
- target_language_code: this.language,
102
- speaker: options?.speaker || this.speaker,
103
- model: this.model,
104
- ...this.properties
105
- };
106
- const response = await this.makeRequest("/text-to-speech", payload);
107
- const { audios } = await response.json();
108
- if (!audios || !audios.length) {
109
- throw new Error("No audio received from Sarvam AI");
110
- }
111
- const audioBuffer = Buffer.from(audios[0], "base64");
112
- const stream = new PassThrough();
113
- stream.write(audioBuffer);
114
- stream.end();
115
- return stream;
116
- }, "voice.sarvam.speak")();
98
+ const payload = {
99
+ inputs: [text],
100
+ target_language_code: this.language,
101
+ speaker: options?.speaker || this.speaker,
102
+ model: this.model,
103
+ ...this.properties
104
+ };
105
+ const response = await this.makeRequest("/text-to-speech", payload);
106
+ const { audios } = await response.json();
107
+ if (!audios || !audios.length) {
108
+ throw new Error("No audio received from Sarvam AI");
109
+ }
110
+ const audioBuffer = Buffer.from(audios[0], "base64");
111
+ const stream = new PassThrough();
112
+ stream.write(audioBuffer);
113
+ stream.end();
114
+ return stream;
117
115
  }
118
116
  async getSpeakers() {
119
- return this.traced(async () => {
120
- return SARVAM_VOICES.map((voice) => ({
121
- voiceId: voice
122
- }));
123
- }, "voice.deepgram.getSpeakers")();
117
+ return SARVAM_VOICES.map((voice) => ({
118
+ voiceId: voice
119
+ }));
124
120
  }
125
121
  /**
126
122
  * Checks if listening capabilities are enabled.
@@ -131,38 +127,36 @@ var SarvamVoice = class extends MastraVoice {
131
127
  return { enabled: true };
132
128
  }
133
129
  async listen(input, options) {
134
- return this.traced(async () => {
135
- const chunks = [];
136
- for await (const chunk of input) {
137
- if (typeof chunk === "string") {
138
- chunks.push(Buffer.from(chunk));
139
- } else {
140
- chunks.push(chunk);
141
- }
142
- }
143
- const audioBuffer = Buffer.concat(chunks);
144
- const form = new FormData();
145
- const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
146
- const blob = new Blob([audioBuffer], { type: mimeType });
147
- form.append("file", blob);
148
- form.append("model", options?.model || "saarika:v2");
149
- form.append("language_code", options?.languageCode || "unknown");
150
- const requestOptions = {
151
- method: "POST",
152
- headers: {
153
- "api-subscription-key": this.apiKey
154
- },
155
- body: form
156
- };
157
- try {
158
- const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
159
- const result = await response.json();
160
- return result.transcript;
161
- } catch (error) {
162
- console.error("Error during speech-to-text request:", error);
163
- throw error;
130
+ const chunks = [];
131
+ for await (const chunk of input) {
132
+ if (typeof chunk === "string") {
133
+ chunks.push(Buffer.from(chunk));
134
+ } else {
135
+ chunks.push(chunk);
164
136
  }
165
- }, "voice.sarvam.listen")();
137
+ }
138
+ const audioBuffer = Buffer.concat(chunks);
139
+ const form = new FormData();
140
+ const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
141
+ const blob = new Blob([audioBuffer], { type: mimeType });
142
+ form.append("file", blob);
143
+ form.append("model", options?.model || "saarika:v2");
144
+ form.append("language_code", options?.languageCode || "unknown");
145
+ const requestOptions = {
146
+ method: "POST",
147
+ headers: {
148
+ "api-subscription-key": this.apiKey
149
+ },
150
+ body: form
151
+ };
152
+ try {
153
+ const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
154
+ const result = await response.json();
155
+ return result.transcript;
156
+ } catch (error) {
157
+ console.error("Error during speech-to-text request:", error);
158
+ throw error;
159
+ }
166
160
  }
167
161
  };
168
162
 
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":[],"mappings":";;;;;;AAAO,IAAM,aAAA,GAAgB;AAAA,EAC3B,OAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,QAAA;AAAA,EACA,MAAA;AAAA,EACA,SAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;;;ACcA,IAAM,kBAAA,GAAqB;AAAA,EACzB,KAAA,EAAO,WAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAAA;AAAA,EACpB,QAAA,EAAU;AACZ,CAAA;AAEA,IAAM,qBAAA,GAAwB;AAAA,EAC5B,KAAA,EAAO,YAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAEtB,CAAA;AAEO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EACnC,MAAA;AAAA,EACA,KAAA,GAAwB,WAAA;AAAA,EACxB,QAAA,GAA8B,OAAA;AAAA,EAC9B,aAAkC,EAAC;AAAA,EAC3C,OAAA,GAAyB,OAAA;AAAA,EACjB,OAAA,GAAU,uBAAA;AAAA,EAElB,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,OAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AAAA,QAC/C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB,KAAA;AAAA,QACrD,MAAA,EAAQ,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB;AAAA,OACzD;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB,MAAA;AACxD,IAAA,IAAI,CAAC,KAAK,MAAA,EAAQ;AAChB,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AACtD,IAAA,IAAA,CAAK,QAAA,GAAW,WAAA,EAAa,QAAA,IAAY,kBAAA,CAAmB,QAAA;AAC5D,IAAA,IAAA,CAAK,UAAA,GAAa,WAAA,EAAa,UAAA,IAAc,EAAC;AAC9C,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,OAAA;AAAA,EAC5B;AAAA,EAEA,MAAc,WAAA,CAAY,QAAA,EAAkB,OAAA,EAAc;AACxD,IAAA,MAAM,OAAA,GAAU,IAAI,OAAA,CAAQ;AAAA,MAC1B,wBAAwB,IAAA,CAAK,MAAA;AAAA,MAC7B,cAAA,EAAgB;AAAA,KACjB,CAAA;AACD,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,KAAK,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI;AAAA,MACzD,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,KAC7B,CAAA;AACD,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,IAAI,YAAA;AACJ,MAAA,IAAI;AACF,QAAA,MAAM,KAAA,GAAS,MAAM,QAAA,CAAS,IAAA,EAAK;AACnC,QAAA,YAAA,GAAe,KAAA,CAAM,WAAW,QAAA,CAAS,UAAA;AAAA,MAC3C,CAAA,CAAA,MAAQ;AACN,QAAA,YAAA,GAAe,QAAA,CAAS,UAAA;AAAA,MAC1B;AACA,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,YAAY,CAAA,CAAE,CAAA;AAAA,IACxD;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AAAA,EACA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA,EACA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAC7B,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,MAAA,EAAQ,CAAC,IAAI,CAAA;AAAA,QACb,sBAAsB,IAAA,CAAK,QAAA;AAAA,QAC3B,OAAA,EAAS,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,QAClC,OAAO,IAAA,CAAK,KAAA;AAAA,QACZ,GAAG,IAAA,CAAK;AAAA,OACV;AAEA,MAAA,MAAM,QAAA,GAAW,MAAM,IAAA,CAAK,WAAA,CAAY,mBAAmB,OAAO,CAAA;AAElE,MAAA,MAAM,EAAE,MAAA,EAAO,GAAK,MAAM,SAAS,IAAA,EAAK;AAExC,MAAA,IAAI,CAAC,MAAA,IAAU,CAAC,MAAA,CAAO,MAAA,EAAQ;AAC7B,QAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,MACpD;AAGA,MAAA,MAAM,cAAc,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,CAAC,GAAG,QAAQ,CAAA;AAGnD,MAAA,MAAM,MAAA,GAAS,IAAI,WAAA,EAAY;AAC/B,MAAA,MAAA,CAAO,MAAM,WAAW,CAAA;AACxB,MAAA,MAAA,CAAO,GAAA,EAAI;AAEX,MAAA,OAAO,MAAA;AAAA,IACT,CAAA,EAAG,oBAAoB,CAAA,EAAE;AAAA,EAC3B;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAC7B,MAAA,OAAO,aAAA,CAAc,IAAI,CAAA,KAAA,MAAU;AAAA,QACjC,OAAA,EAAS;AAAA,OACX,CAAE,CAAA;AAAA,IACJ,CAAA,EAAG,4BAA4B,CAAA,EAAE;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA,EAEA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAgD;AACzF,IAAA,OAAO,IAAA,CAAK,OAAO,YAAY;AAE7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,MAAA,MAAM,IAAA,GAAO,IAAI,QAAA,EAAS;AAC1B,MAAA,MAAM,QAAA,GAAW,OAAA,EAAS,QAAA,KAAa,KAAA,GAAQ,YAAA,GAAe,WAAA;AAC9D,MAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,EAAE,IAAA,EAAM,QAAA,EAAU,CAAA;AAEvD,MAAA,IAAA,CAAK,MAAA,CAAO,QAAQ,IAAI,CAAA;AACxB,MAAA,IAAA,CAAK,MAAA,CAAO,OAAA,EAAS,OAAA,EAAS,KAAA,IAAS,YAAY,CAAA;AACnD,MAAA,IAAA,CAAK,MAAA,CAAO,eAAA,EAAiB,OAAA,EAAS,YAAA,IAAgB,SAAS,CAAA;AAC/D,MAAA,MAAM,cAAA,GAAiB;AAAA,QACrB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,wBAAwB,IAAA,CAAK;AAAA,SAC/B;AAAA,QACA,IAAA,EAAM;AAAA,OACR;AAEA,MAAA,IAAI;AACF,QAAA,MAAM,WAAW,MAAM,KAAA,CAAM,GAAG,IAAA,CAAK,OAAO,mBAAmB,cAAc,CAAA;AAC7E,QAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAEpC,QAAA,OAAO,MAAA,CAAO,UAAA;AAAA,MAChB,SAAS,KAAA,EAAO;AACd,QAAA,OAAA,CAAQ,KAAA,CAAM,wCAAwC,KAAK,CAAA;AAC3D,QAAA,MAAM,KAAA;AAAA,MACR;AAAA,IACF,CAAA,EAAG,qBAAqB,CAAA,EAAE;AAAA,EAC5B;AACF","file":"index.js","sourcesContent":["export const SARVAM_VOICES = [\n 'meera',\n 'pavithra',\n 'maitreyi',\n 'arvind',\n 'amol',\n 'amartya',\n 'diya',\n 'neel',\n 'misha',\n 'vian',\n 'arjun',\n 'maya',\n] as const;\n\nexport const SARVAM_TTS_LANGUAGES = [\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n] as const;\n\nexport const SARVAM_STT_LANGUAGES = [...SARVAM_TTS_LANGUAGES, 'unknown'] as const;\n\nexport const SARVAM_TTS_MODELS = ['bulbul:v1'] as const;\nexport const SARVAM_STT_MODELS = ['saarika:v1', 'saarika:v2', 'saarika:flash'] as const;\n\nexport type SarvamVoiceId = (typeof SARVAM_VOICES)[number];\n\nexport type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];\nexport type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];\n\nexport type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];\nexport type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];\n","import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport { SARVAM_VOICES } from './voices';\nimport type { SarvamTTSLanguage, SarvamSTTLanguage, SarvamSTTModel, SarvamTTSModel, SarvamVoiceId } from './voices';\n\ninterface SarvamVoiceConfig {\n apiKey?: string;\n model?: SarvamTTSModel;\n language?: SarvamTTSLanguage;\n properties?: {\n pitch?: number;\n pace?: number;\n loudness?: number;\n speech_sample_rate?: 8000 | 16000 | 22050;\n enable_preprocessing?: boolean;\n eng_interpolation_wt?: number;\n };\n}\n\ninterface SarvamListenOptions {\n apiKey?: string;\n model?: SarvamSTTModel;\n languageCode?: SarvamSTTLanguage;\n filetype?: 'mp3' | 'wav';\n}\n\nconst defaultSpeechModel = {\n model: 'bulbul:v1' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language: 'en-IN' as const,\n};\n\nconst defaultListeningModel = {\n model: 'saarika:v2' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language_code: 'unknown' as const,\n};\n\nexport class SarvamVoice extends MastraVoice {\n private apiKey?: string;\n private model: SarvamTTSModel = 'bulbul:v1';\n private language: SarvamTTSLanguage = 'en-IN';\n private properties: Record<string, any> = {};\n speaker: SarvamVoiceId = 'meera';\n private baseUrl = 'https://api.sarvam.ai';\n\n constructor({\n speechModel,\n speaker,\n listeningModel,\n }: {\n speechModel?: SarvamVoiceConfig;\n speaker?: SarvamVoiceId;\n listeningModel?: SarvamListenOptions;\n } = {}) {\n super({\n speechModel: {\n name: speechModel?.model ?? defaultSpeechModel.model,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.model ?? defaultListeningModel.model,\n apiKey: listeningModel?.model ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;\n if (!this.apiKey) {\n throw new Error('SARVAM_API_KEY must be set');\n }\n this.model = speechModel?.model || defaultSpeechModel.model;\n this.language = speechModel?.language || defaultSpeechModel.language;\n this.properties = speechModel?.properties || {};\n this.speaker = speaker || 'meera';\n }\n\n private async makeRequest(endpoint: string, payload: any) {\n const headers = new Headers({\n 'api-subscription-key': this.apiKey!,\n 'Content-Type': 'application/json',\n });\n const response = await fetch(`${this.baseUrl}${endpoint}`, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n });\n if (!response.ok) {\n let errorMessage;\n try {\n const error = (await response.json()) as { message?: string };\n errorMessage = error.message || response.statusText;\n } catch {\n errorMessage = response.statusText;\n }\n throw new Error(`Sarvam AI API Error: ${errorMessage}`);\n }\n\n return response;\n }\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: { speaker?: SarvamVoiceId },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return this.traced(async () => {\n const payload = {\n inputs: [text],\n target_language_code: this.language,\n speaker: options?.speaker || this.speaker,\n model: this.model,\n ...this.properties,\n };\n\n const response = await this.makeRequest('/text-to-speech', payload);\n\n const { audios } = (await response.json()) as { audios: any };\n\n if (!audios || !audios.length) {\n throw new Error('No audio received from Sarvam AI');\n }\n\n // Convert base64 to buffer\n const audioBuffer = Buffer.from(audios[0], 'base64');\n\n // Create a PassThrough stream for the audio\n const stream = new PassThrough();\n stream.write(audioBuffer);\n stream.end();\n\n return stream;\n }, 'voice.sarvam.speak')();\n }\n\n async getSpeakers() {\n return this.traced(async () => {\n return SARVAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }, 'voice.deepgram.getSpeakers')();\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n async listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string> {\n return this.traced(async () => {\n // Collect audio data into buffer\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const form = new FormData();\n const mimeType = options?.filetype === 'mp3' ? 'audio/mpeg' : 'audio/wav';\n const blob = new Blob([audioBuffer], { type: mimeType });\n\n form.append('file', blob);\n form.append('model', options?.model || 'saarika:v2');\n form.append('language_code', options?.languageCode || 'unknown');\n const requestOptions = {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.apiKey!,\n },\n body: form,\n };\n\n try {\n const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);\n const result = (await response.json()) as any;\n //console.log(result);\n return result.transcript;\n } catch (error) {\n console.error('Error during speech-to-text request:', error);\n throw error;\n }\n }, 'voice.sarvam.listen')();\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":[],"mappings":";;;;;;AAAO,IAAM,aAAA,GAAgB;AAAA,EAC3B,OAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,QAAA;AAAA,EACA,MAAA;AAAA,EACA,SAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;;;ACcA,IAAM,kBAAA,GAAqB;AAAA,EACzB,KAAA,EAAO,WAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAAA;AAAA,EACpB,QAAA,EAAU;AACZ,CAAA;AAEA,IAAM,qBAAA,GAAwB;AAAA,EAC5B,KAAA,EAAO,YAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAEtB,CAAA;AAEO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EACnC,MAAA;AAAA,EACA,KAAA,GAAwB,WAAA;AAAA,EACxB,QAAA,GAA8B,OAAA;AAAA,EAC9B,aAAkC,EAAC;AAAA,EAC3C,OAAA,GAAyB,OAAA;AAAA,EACjB,OAAA,GAAU,uBAAA;AAAA,EAElB,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,OAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AAAA,QAC/C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB,KAAA;AAAA,QACrD,MAAA,EAAQ,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB;AAAA,OACzD;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB,MAAA;AACxD,IAAA,IAAI,CAAC,KAAK,MAAA,EAAQ;AAChB,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AACtD,IAAA,IAAA,CAAK,QAAA,GAAW,WAAA,EAAa,QAAA,IAAY,kBAAA,CAAmB,QAAA;AAC5D,IAAA,IAAA,CAAK,UAAA,GAAa,WAAA,EAAa,UAAA,IAAc,EAAC;AAC9C,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,OAAA;AAAA,EAC5B;AAAA,EAEA,MAAc,WAAA,CAAY,QAAA,EAAkB,OAAA,EAAc;AACxD,IAAA,MAAM,OAAA,GAAU,IAAI,OAAA,CAAQ;AAAA,MAC1B,wBAAwB,IAAA,CAAK,MAAA;AAAA,MAC7B,cAAA,EAAgB;AAAA,KACjB,CAAA;AACD,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,KAAK,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI;AAAA,MACzD,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,KAC7B,CAAA;AACD,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,IAAI,YAAA;AACJ,MAAA,IAAI;AACF,QAAA,MAAM,KAAA,GAAS,MAAM,QAAA,CAAS,IAAA,EAAK;AACnC,QAAA,YAAA,GAAe,KAAA,CAAM,WAAW,QAAA,CAAS,UAAA;AAAA,MAC3C,CAAA,CAAA,MAAQ;AACN,QAAA,YAAA,GAAe,QAAA,CAAS,UAAA;AAAA,MAC1B;AACA,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,YAAY,CAAA,CAAE,CAAA;AAAA,IACxD;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AAAA,EACA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA,EACA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,MAAM,OAAA,GAAU;AAAA,MACd,MAAA,EAAQ,CAAC,IAAI,CAAA;AAAA,MACb,sBAAsB,IAAA,CAAK,QAAA;AAAA,MAC3B,OAAA,EAAS,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,MAClC,OAAO,IAAA,CAAK,KAAA;AAAA,MACZ,GAAG,IAAA,CAAK;AAAA,KACV;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,IAAA,CAAK,WAAA,CAAY,mBAAmB,OAAO,CAAA;AAElE,IAAA,MAAM,EAAE,MAAA,EAAO,GAAK,MAAM,SAAS,IAAA,EAAK;AAExC,IAAA,IAAI,CAAC,MAAA,IAAU,CAAC,MAAA,CAAO,MAAA,EAAQ;AAC7B,MAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,IACpD;AAGA,IAAA,MAAM,cAAc,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,CAAC,GAAG,QAAQ,CAAA;AAGnD,IAAA,MAAM,MAAA,GAAS,IAAI,WAAA,EAAY;AAC/B,IAAA,MAAA,CAAO,MAAM,WAAW,CAAA;AACxB,IAAA,MAAA,CAAO,GAAA,EAAI;AAEX,IAAA,OAAO,MAAA;AAAA,EACT;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,aAAA,CAAc,IAAI,CAAA,KAAA,MAAU;AAAA,MACjC,OAAA,EAAS;AAAA,KACX,CAAE,CAAA;AAAA,EACJ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA,EAEA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAgD;AAEzF,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,IAAA,GAAO,IAAI,QAAA,EAAS;AAC1B,IAAA,MAAM,QAAA,GAAW,OAAA,EAAS,QAAA,KAAa,KAAA,GAAQ,YAAA,GAAe,WAAA;AAC9D,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,EAAE,IAAA,EAAM,QAAA,EAAU,CAAA;AAEvD,IAAA,IAAA,CAAK,MAAA,CAAO,QAAQ,IAAI,CAAA;AACxB,IAAA,IAAA,CAAK,MAAA,CAAO,OAAA,EAAS,OAAA,EAAS,KAAA,IAAS,YAAY,CAAA;AACnD,IAAA,IAAA,CAAK,MAAA,CAAO,eAAA,EAAiB,OAAA,EAAS,YAAA,IAAgB,SAAS,CAAA;AAC/D,IAAA,MAAM,cAAA,GAAiB;AAAA,MACrB,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,wBAAwB,IAAA,CAAK;AAAA,OAC/B;AAAA,MACA,IAAA,EAAM;AAAA,KACR;AAEA,IAAA,IAAI;AACF,MAAA,MAAM,WAAW,MAAM,KAAA,CAAM,GAAG,IAAA,CAAK,OAAO,mBAAmB,cAAc,CAAA;AAC7E,MAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AAEpC,MAAA,OAAO,MAAA,CAAO,UAAA;AAAA,IAChB,SAAS,KAAA,EAAO;AACd,MAAA,OAAA,CAAQ,KAAA,CAAM,wCAAwC,KAAK,CAAA;AAC3D,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AACF","file":"index.js","sourcesContent":["export const SARVAM_VOICES = [\n 'meera',\n 'pavithra',\n 'maitreyi',\n 'arvind',\n 'amol',\n 'amartya',\n 'diya',\n 'neel',\n 'misha',\n 'vian',\n 'arjun',\n 'maya',\n] as const;\n\nexport const SARVAM_TTS_LANGUAGES = [\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n] as const;\n\nexport const SARVAM_STT_LANGUAGES = [...SARVAM_TTS_LANGUAGES, 'unknown'] as const;\n\nexport const SARVAM_TTS_MODELS = ['bulbul:v1'] as const;\nexport const SARVAM_STT_MODELS = ['saarika:v1', 'saarika:v2', 'saarika:flash'] as const;\n\nexport type SarvamVoiceId = (typeof SARVAM_VOICES)[number];\n\nexport type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];\nexport type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];\n\nexport type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];\nexport type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];\n","import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport { SARVAM_VOICES } from './voices';\nimport type { SarvamTTSLanguage, SarvamSTTLanguage, SarvamSTTModel, SarvamTTSModel, SarvamVoiceId } from './voices';\n\ninterface SarvamVoiceConfig {\n apiKey?: string;\n model?: SarvamTTSModel;\n language?: SarvamTTSLanguage;\n properties?: {\n pitch?: number;\n pace?: number;\n loudness?: number;\n speech_sample_rate?: 8000 | 16000 | 22050;\n enable_preprocessing?: boolean;\n eng_interpolation_wt?: number;\n };\n}\n\ninterface SarvamListenOptions {\n apiKey?: string;\n model?: SarvamSTTModel;\n languageCode?: SarvamSTTLanguage;\n filetype?: 'mp3' | 'wav';\n}\n\nconst defaultSpeechModel = {\n model: 'bulbul:v1' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language: 'en-IN' as const,\n};\n\nconst defaultListeningModel = {\n model: 'saarika:v2' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language_code: 'unknown' as const,\n};\n\nexport class SarvamVoice extends MastraVoice {\n private apiKey?: string;\n private model: SarvamTTSModel = 'bulbul:v1';\n private language: SarvamTTSLanguage = 'en-IN';\n private properties: Record<string, any> = {};\n speaker: SarvamVoiceId = 'meera';\n private baseUrl = 'https://api.sarvam.ai';\n\n constructor({\n speechModel,\n speaker,\n listeningModel,\n }: {\n speechModel?: SarvamVoiceConfig;\n speaker?: SarvamVoiceId;\n listeningModel?: SarvamListenOptions;\n } = {}) {\n super({\n speechModel: {\n name: speechModel?.model ?? defaultSpeechModel.model,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.model ?? defaultListeningModel.model,\n apiKey: listeningModel?.model ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;\n if (!this.apiKey) {\n throw new Error('SARVAM_API_KEY must be set');\n }\n this.model = speechModel?.model || defaultSpeechModel.model;\n this.language = speechModel?.language || defaultSpeechModel.language;\n this.properties = speechModel?.properties || {};\n this.speaker = speaker || 'meera';\n }\n\n private async makeRequest(endpoint: string, payload: any) {\n const headers = new Headers({\n 'api-subscription-key': this.apiKey!,\n 'Content-Type': 'application/json',\n });\n const response = await fetch(`${this.baseUrl}${endpoint}`, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n });\n if (!response.ok) {\n let errorMessage;\n try {\n const error = (await response.json()) as { message?: string };\n errorMessage = error.message || response.statusText;\n } catch {\n errorMessage = response.statusText;\n }\n throw new Error(`Sarvam AI API Error: ${errorMessage}`);\n }\n\n return response;\n }\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: { speaker?: SarvamVoiceId },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n const payload = {\n inputs: [text],\n target_language_code: this.language,\n speaker: options?.speaker || this.speaker,\n model: this.model,\n ...this.properties,\n };\n\n const response = await this.makeRequest('/text-to-speech', payload);\n\n const { audios } = (await response.json()) as { audios: any };\n\n if (!audios || !audios.length) {\n throw new Error('No audio received from Sarvam AI');\n }\n\n // Convert base64 to buffer\n const audioBuffer = Buffer.from(audios[0], 'base64');\n\n // Create a PassThrough stream for the audio\n const stream = new PassThrough();\n stream.write(audioBuffer);\n stream.end();\n\n return stream;\n }\n\n async getSpeakers() {\n return SARVAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n async listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string> {\n // Collect audio data into buffer\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const form = new FormData();\n const mimeType = options?.filetype === 'mp3' ? 'audio/mpeg' : 'audio/wav';\n const blob = new Blob([audioBuffer], { type: mimeType });\n\n form.append('file', blob);\n form.append('model', options?.model || 'saarika:v2');\n form.append('language_code', options?.languageCode || 'unknown');\n const requestOptions = {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.apiKey!,\n },\n body: form,\n };\n\n try {\n const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);\n const result = (await response.json()) as any;\n //console.log(result);\n return result.transcript;\n } catch (error) {\n console.error('Error during speech-to-text request:', error);\n throw error;\n }\n }\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-sarvam",
3
- "version": "0.11.12",
3
+ "version": "0.12.0-beta.1",
4
4
  "description": "Mastra Sarvam AI voice integration",
5
5
  "type": "module",
6
6
  "files": [
@@ -23,18 +23,18 @@
23
23
  "./package.json": "./package.json"
24
24
  },
25
25
  "license": "Apache-2.0",
26
- "dependencies": {},
27
26
  "devDependencies": {
28
- "@microsoft/api-extractor": "^7.52.8",
29
- "@types/node": "^20.19.0",
27
+ "@types/node": "22.13.17",
28
+ "@vitest/coverage-v8": "4.0.12",
29
+ "@vitest/ui": "4.0.12",
30
30
  "eslint": "^9.37.0",
31
31
  "tsup": "^8.5.0",
32
- "typescript": "^5.8.3",
33
- "vitest": "^3.2.4",
32
+ "typescript": "^5.9.3",
33
+ "vitest": "4.0.16",
34
34
  "zod": "^3.25.76",
35
- "@internal/lint": "0.0.58",
36
- "@internal/types-builder": "0.0.33",
37
- "@mastra/core": "0.24.0"
35
+ "@internal/lint": "0.0.53",
36
+ "@internal/types-builder": "0.0.28",
37
+ "@mastra/core": "1.0.0-beta.20"
38
38
  },
39
39
  "keywords": [
40
40
  "mastra",
@@ -47,7 +47,7 @@
47
47
  "speech-recognition"
48
48
  ],
49
49
  "peerDependencies": {
50
- "@mastra/core": ">=0.18.1-0 <0.25.0-0",
50
+ "@mastra/core": ">=1.0.0-0 <2.0.0-0",
51
51
  "zod": "^3.25.0 || ^4.0.0"
52
52
  },
53
53
  "homepage": "https://mastra.ai",
@@ -59,15 +59,12 @@
59
59
  "bugs": {
60
60
  "url": "https://github.com/mastra-ai/mastra/issues"
61
61
  },
62
- "publishConfig": {
63
- "access": "public",
64
- "publish-branch": [
65
- "main",
66
- "0.x"
67
- ]
62
+ "engines": {
63
+ "node": ">=22.13.0"
68
64
  },
69
65
  "scripts": {
70
66
  "build": "tsup --silent --config tsup.config.ts",
67
+ "postbuild": "pnpx tsx ../../scripts/generate-package-docs.ts voice/sarvam",
71
68
  "build:watch": "tsup --watch --silent --config tsup.config.ts",
72
69
  "test": "vitest run",
73
70
  "test:watch": "vitest watch",