@mastra/voice-sarvam 0.1.2 → 0.1.3-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -21,17 +21,24 @@ SARVAM_API_KEY=your_api_key
21
21
  ```typescript
22
22
  import { SarvamVoice } from '@mastra/voice-sarvam';
23
23
 
24
- const voice = new CompositeVoice({
25
- speakProvider: new SarvamVoice({
26
- speechModel: { apiKey: 'YOUR-API-KEY' },
27
- speaker: 'meera',
28
- }),
24
+ const voice = new SarvamVoice({
25
+ speechModel: {
26
+ model: 'bulbul:v1',
27
+ apiKey: process.env.SARVAM_API_KEY!,
28
+ language: 'en-IN',
29
+ },
30
+ listeningModel: {
31
+ apiKey: process.env.SARVAM_API_KEY!,
32
+ model: 'saarika:v2',
33
+ languageCode: 'unknown', // By default only works with saarika:v2
34
+ },
35
+ speaker: 'meera',
29
36
  });
30
37
 
31
38
  // Create an agent with voice capabilities
32
39
  export const agent = new Agent({
33
40
  name: 'Agent',
34
- instructions: `You are a helpful assistant with voice capabilities.`,
41
+ instructions: `You are a helpful assistant with both TTS and STT capabilities.`,
35
42
  model: google('gemini-1.5-pro-latest'),
36
43
  voice: voice,
37
44
  });
@@ -41,7 +48,7 @@ const speakers = await voice.getSpeakers();
41
48
 
42
49
  // Generate speech and save to file
43
50
  const audio = await agent.speak("Hello, I'm your AI assistant!");
44
- const filePath = path.join(process.cwd(), 'agent.mp3');
51
+ const filePath = path.join(process.cwd(), 'agent.wav');
45
52
  const writer = createWriteStream(filePath);
46
53
 
47
54
  audio.pipe(writer);
@@ -62,11 +69,14 @@ const streamWriter = createWriteStream(streamFilePath);
62
69
  audioStream.pipe(streamWriter);
63
70
 
64
71
  console.log(`Speech saved to ${filePath} and ${streamFilePath}`);
72
+
73
+ // Generate Text from an audio stream
74
+ const text = await voice.listen(audioStream);
65
75
  ```
66
76
 
67
77
  ## Features
68
78
 
69
- - High-quality Text-to-Speech synthesis
79
+ - High-quality Text-to-Speech and Speech-to-Text synthesis
70
80
  - Support for 10+ Indian languages
71
81
  - Choice of 10+ diverse speakers
72
82
  - Advanced voice customization options
@@ -1,14 +1,29 @@
1
1
  import { MastraVoice } from '@mastra/core/voice';
2
2
 
3
- export declare const SARVAM_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
3
+ export declare const SARVAM_STT_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown"];
4
4
 
5
- export declare const SARVAM_MODELS: readonly ["bulbul:v1"];
5
+ export declare const SARVAM_STT_MODELS: readonly ["saarika:v1", "saarika:v2", "saarika:flash"];
6
+
7
+ export declare const SARVAM_TTS_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
8
+
9
+ export declare const SARVAM_TTS_MODELS: readonly ["bulbul:v1"];
6
10
 
7
11
  export declare const SARVAM_VOICES: readonly ["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya"];
8
12
 
9
- export declare type SarvamLanguage = (typeof SARVAM_LANGUAGES)[number];
13
+ declare interface SarvamListenOptions {
14
+ apiKey?: string;
15
+ model?: SarvamSTTModel;
16
+ languageCode?: SarvamSTTLanguage;
17
+ filetype?: 'mp3' | 'wav';
18
+ }
19
+
20
+ export declare type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];
21
+
22
+ export declare type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];
23
+
24
+ export declare type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];
10
25
 
11
- export declare type SarvamModel = (typeof SARVAM_MODELS)[number];
26
+ export declare type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];
12
27
 
13
28
  export declare class SarvamVoice extends MastraVoice {
14
29
  private apiKey?;
@@ -17,9 +32,10 @@ export declare class SarvamVoice extends MastraVoice {
17
32
  private properties;
18
33
  protected speaker: SarvamVoiceId;
19
34
  private baseUrl;
20
- constructor({ speechModel, speaker, }?: {
35
+ constructor({ speechModel, speaker, listeningModel, }?: {
21
36
  speechModel?: SarvamVoiceConfig;
22
37
  speaker?: SarvamVoiceId;
38
+ listeningModel?: SarvamListenOptions;
23
39
  });
24
40
  private makeRequest;
25
41
  private streamToString;
@@ -29,13 +45,13 @@ export declare class SarvamVoice extends MastraVoice {
29
45
  getSpeakers(): Promise<{
30
46
  voiceId: "meera" | "pavithra" | "maitreyi" | "arvind" | "amol" | "amartya" | "diya" | "neel" | "misha" | "vian" | "arjun" | "maya";
31
47
  }[]>;
32
- listen(_input: NodeJS.ReadableStream, _options?: Record<string, unknown>): Promise<string | NodeJS.ReadableStream>;
48
+ listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string>;
33
49
  }
34
50
 
35
51
  declare interface SarvamVoiceConfig {
36
52
  apiKey?: string;
37
- model?: SarvamModel;
38
- language?: SarvamLanguage;
53
+ model?: SarvamTTSModel;
54
+ language?: SarvamTTSLanguage;
39
55
  properties?: {
40
56
  pitch?: number;
41
57
  pace?: number;
@@ -1,14 +1,29 @@
1
1
  import { MastraVoice } from '@mastra/core/voice';
2
2
 
3
- export declare const SARVAM_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
3
+ export declare const SARVAM_STT_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown"];
4
4
 
5
- export declare const SARVAM_MODELS: readonly ["bulbul:v1"];
5
+ export declare const SARVAM_STT_MODELS: readonly ["saarika:v1", "saarika:v2", "saarika:flash"];
6
+
7
+ export declare const SARVAM_TTS_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
8
+
9
+ export declare const SARVAM_TTS_MODELS: readonly ["bulbul:v1"];
6
10
 
7
11
  export declare const SARVAM_VOICES: readonly ["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya"];
8
12
 
9
- export declare type SarvamLanguage = (typeof SARVAM_LANGUAGES)[number];
13
+ declare interface SarvamListenOptions {
14
+ apiKey?: string;
15
+ model?: SarvamSTTModel;
16
+ languageCode?: SarvamSTTLanguage;
17
+ filetype?: 'mp3' | 'wav';
18
+ }
19
+
20
+ export declare type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];
21
+
22
+ export declare type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];
23
+
24
+ export declare type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];
10
25
 
11
- export declare type SarvamModel = (typeof SARVAM_MODELS)[number];
26
+ export declare type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];
12
27
 
13
28
  export declare class SarvamVoice extends MastraVoice {
14
29
  private apiKey?;
@@ -17,9 +32,10 @@ export declare class SarvamVoice extends MastraVoice {
17
32
  private properties;
18
33
  protected speaker: SarvamVoiceId;
19
34
  private baseUrl;
20
- constructor({ speechModel, speaker, }?: {
35
+ constructor({ speechModel, speaker, listeningModel, }?: {
21
36
  speechModel?: SarvamVoiceConfig;
22
37
  speaker?: SarvamVoiceId;
38
+ listeningModel?: SarvamListenOptions;
23
39
  });
24
40
  private makeRequest;
25
41
  private streamToString;
@@ -29,13 +45,13 @@ export declare class SarvamVoice extends MastraVoice {
29
45
  getSpeakers(): Promise<{
30
46
  voiceId: "meera" | "pavithra" | "maitreyi" | "arvind" | "amol" | "amartya" | "diya" | "neel" | "misha" | "vian" | "arjun" | "maya";
31
47
  }[]>;
32
- listen(_input: NodeJS.ReadableStream, _options?: Record<string, unknown>): Promise<string | NodeJS.ReadableStream>;
48
+ listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string>;
33
49
  }
34
50
 
35
51
  declare interface SarvamVoiceConfig {
36
52
  apiKey?: string;
37
- model?: SarvamModel;
38
- language?: SarvamLanguage;
53
+ model?: SarvamTTSModel;
54
+ language?: SarvamTTSLanguage;
39
55
  properties?: {
40
56
  pitch?: number;
41
57
  pace?: number;
package/dist/index.cjs CHANGED
@@ -22,6 +22,14 @@ var SARVAM_VOICES = [
22
22
  ];
23
23
 
24
24
  // src/index.ts
25
+ var defaultSpeechModel = {
26
+ model: "bulbul:v1",
27
+ apiKey: process.env.SARVAM_API_KEY,
28
+ language: "en-IN"
29
+ };
30
+ var defaultListeningModel = {
31
+ model: "saarika:v2",
32
+ apiKey: process.env.SARVAM_API_KEY};
25
33
  var SarvamVoice = class extends voice.MastraVoice {
26
34
  apiKey;
27
35
  model = "bulbul:v1";
@@ -31,18 +39,18 @@ var SarvamVoice = class extends voice.MastraVoice {
31
39
  baseUrl = "https://api.sarvam.ai";
32
40
  constructor({
33
41
  speechModel,
34
- speaker
42
+ speaker,
43
+ listeningModel
35
44
  } = {}) {
36
- const defaultSpeechModel = {
37
- model: "bulbul:v1",
38
- apiKey: process.env.SARVAM_API_KEY,
39
- language: "en-IN"
40
- };
41
45
  super({
42
46
  speechModel: {
43
47
  name: speechModel?.model ?? defaultSpeechModel.model,
44
48
  apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
45
49
  },
50
+ listeningModel: {
51
+ name: listeningModel?.model ?? defaultListeningModel.model,
52
+ apiKey: listeningModel?.model ?? defaultListeningModel.apiKey
53
+ },
46
54
  speaker
47
55
  });
48
56
  this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;
@@ -116,9 +124,39 @@ var SarvamVoice = class extends voice.MastraVoice {
116
124
  }));
117
125
  }, "voice.deepgram.getSpeakers")();
118
126
  }
119
- //Todo: Implement the listen method
120
- async listen(_input, _options) {
121
- throw new Error("Listening method coming soon.");
127
+ async listen(input, options) {
128
+ return this.traced(async () => {
129
+ const chunks = [];
130
+ for await (const chunk of input) {
131
+ if (typeof chunk === "string") {
132
+ chunks.push(Buffer.from(chunk));
133
+ } else {
134
+ chunks.push(chunk);
135
+ }
136
+ }
137
+ const audioBuffer = Buffer.concat(chunks);
138
+ const form = new FormData();
139
+ const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
140
+ const blob = new Blob([audioBuffer], { type: mimeType });
141
+ form.append("file", blob);
142
+ form.append("model", options?.model || "saarika:v2");
143
+ form.append("language_code", options?.languageCode || "unknown");
144
+ const requestOptions = {
145
+ method: "POST",
146
+ headers: {
147
+ "api-subscription-key": this.apiKey
148
+ },
149
+ body: form
150
+ };
151
+ try {
152
+ const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
153
+ const result = await response.json();
154
+ return result.transcript;
155
+ } catch (error) {
156
+ console.error("Error during speech-to-text request:", error);
157
+ throw error;
158
+ }
159
+ }, "voice.sarvam.listen")();
122
160
  }
123
161
  };
124
162
 
package/dist/index.js CHANGED
@@ -20,6 +20,14 @@ var SARVAM_VOICES = [
20
20
  ];
21
21
 
22
22
  // src/index.ts
23
+ var defaultSpeechModel = {
24
+ model: "bulbul:v1",
25
+ apiKey: process.env.SARVAM_API_KEY,
26
+ language: "en-IN"
27
+ };
28
+ var defaultListeningModel = {
29
+ model: "saarika:v2",
30
+ apiKey: process.env.SARVAM_API_KEY};
23
31
  var SarvamVoice = class extends MastraVoice {
24
32
  apiKey;
25
33
  model = "bulbul:v1";
@@ -29,18 +37,18 @@ var SarvamVoice = class extends MastraVoice {
29
37
  baseUrl = "https://api.sarvam.ai";
30
38
  constructor({
31
39
  speechModel,
32
- speaker
40
+ speaker,
41
+ listeningModel
33
42
  } = {}) {
34
- const defaultSpeechModel = {
35
- model: "bulbul:v1",
36
- apiKey: process.env.SARVAM_API_KEY,
37
- language: "en-IN"
38
- };
39
43
  super({
40
44
  speechModel: {
41
45
  name: speechModel?.model ?? defaultSpeechModel.model,
42
46
  apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
43
47
  },
48
+ listeningModel: {
49
+ name: listeningModel?.model ?? defaultListeningModel.model,
50
+ apiKey: listeningModel?.model ?? defaultListeningModel.apiKey
51
+ },
44
52
  speaker
45
53
  });
46
54
  this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;
@@ -114,9 +122,39 @@ var SarvamVoice = class extends MastraVoice {
114
122
  }));
115
123
  }, "voice.deepgram.getSpeakers")();
116
124
  }
117
- //Todo: Implement the listen method
118
- async listen(_input, _options) {
119
- throw new Error("Listening method coming soon.");
125
+ async listen(input, options) {
126
+ return this.traced(async () => {
127
+ const chunks = [];
128
+ for await (const chunk of input) {
129
+ if (typeof chunk === "string") {
130
+ chunks.push(Buffer.from(chunk));
131
+ } else {
132
+ chunks.push(chunk);
133
+ }
134
+ }
135
+ const audioBuffer = Buffer.concat(chunks);
136
+ const form = new FormData();
137
+ const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
138
+ const blob = new Blob([audioBuffer], { type: mimeType });
139
+ form.append("file", blob);
140
+ form.append("model", options?.model || "saarika:v2");
141
+ form.append("language_code", options?.languageCode || "unknown");
142
+ const requestOptions = {
143
+ method: "POST",
144
+ headers: {
145
+ "api-subscription-key": this.apiKey
146
+ },
147
+ body: form
148
+ };
149
+ try {
150
+ const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
151
+ const result = await response.json();
152
+ return result.transcript;
153
+ } catch (error) {
154
+ console.error("Error during speech-to-text request:", error);
155
+ throw error;
156
+ }
157
+ }, "voice.sarvam.listen")();
120
158
  }
121
159
  };
122
160
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-sarvam",
3
- "version": "0.1.2",
3
+ "version": "0.1.3-alpha.0",
4
4
  "description": "Mastra Sarvam AI voice integration",
5
5
  "type": "module",
6
6
  "files": [
@@ -23,7 +23,7 @@
23
23
  },
24
24
  "dependencies": {
25
25
  "zod": "^3.24.2",
26
- "@mastra/core": "^0.6.3"
26
+ "@mastra/core": "^0.6.4-alpha.0"
27
27
  },
28
28
  "devDependencies": {
29
29
  "@microsoft/api-extractor": "^7.52.1",