@mastra/voice-sarvam 0.1.2 → 0.1.3-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -9
- package/dist/_tsup-dts-rollup.d.cts +24 -8
- package/dist/_tsup-dts-rollup.d.ts +24 -8
- package/dist/index.cjs +47 -9
- package/dist/index.js +47 -9
- package/package.json +12 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @mastra/voice-sarvam
|
|
2
2
|
|
|
3
|
-
Sarvam Voice integration for Mastra, providing Text-to-Speech (TTS) capabilities using Sarvam's voice technology.
|
|
3
|
+
Sarvam Voice integration for Mastra, providing Text-to-Speech (TTS) and Speech-to-text (STT) capabilities using Sarvam's voice technology.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -21,17 +21,24 @@ SARVAM_API_KEY=your_api_key
|
|
|
21
21
|
```typescript
|
|
22
22
|
import { SarvamVoice } from '@mastra/voice-sarvam';
|
|
23
23
|
|
|
24
|
-
const voice = new
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
const voice = new SarvamVoice({
|
|
25
|
+
speechModel: {
|
|
26
|
+
model: 'bulbul:v1',
|
|
27
|
+
apiKey: process.env.SARVAM_API_KEY!,
|
|
28
|
+
language: 'en-IN',
|
|
29
|
+
},
|
|
30
|
+
listeningModel: {
|
|
31
|
+
apiKey: process.env.SARVAM_API_KEY!,
|
|
32
|
+
model: 'saarika:v2',
|
|
33
|
+
languageCode: 'unknown', // By default only works with saarika:v2
|
|
34
|
+
},
|
|
35
|
+
speaker: 'meera',
|
|
29
36
|
});
|
|
30
37
|
|
|
31
38
|
// Create an agent with voice capabilities
|
|
32
39
|
export const agent = new Agent({
|
|
33
40
|
name: 'Agent',
|
|
34
|
-
instructions: `You are a helpful assistant with
|
|
41
|
+
instructions: `You are a helpful assistant with both TTS and STT capabilities.`,
|
|
35
42
|
model: google('gemini-1.5-pro-latest'),
|
|
36
43
|
voice: voice,
|
|
37
44
|
});
|
|
@@ -41,7 +48,7 @@ const speakers = await voice.getSpeakers();
|
|
|
41
48
|
|
|
42
49
|
// Generate speech and save to file
|
|
43
50
|
const audio = await agent.speak("Hello, I'm your AI assistant!");
|
|
44
|
-
const filePath = path.join(process.cwd(), 'agent.
|
|
51
|
+
const filePath = path.join(process.cwd(), 'agent.wav');
|
|
45
52
|
const writer = createWriteStream(filePath);
|
|
46
53
|
|
|
47
54
|
audio.pipe(writer);
|
|
@@ -62,11 +69,14 @@ const streamWriter = createWriteStream(streamFilePath);
|
|
|
62
69
|
audioStream.pipe(streamWriter);
|
|
63
70
|
|
|
64
71
|
console.log(`Speech saved to ${filePath} and ${streamFilePath}`);
|
|
72
|
+
|
|
73
|
+
// Generate Text from an audio stream
|
|
74
|
+
const text = await voice.listen(audioStream);
|
|
65
75
|
```
|
|
66
76
|
|
|
67
77
|
## Features
|
|
68
78
|
|
|
69
|
-
- High-quality Text-to-Speech synthesis
|
|
79
|
+
- High-quality Text-to-Speech and Speech-to-Text synthesis
|
|
70
80
|
- Support for 10+ Indian languages
|
|
71
81
|
- Choice of 10+ diverse speakers
|
|
72
82
|
- Advanced voice customization options
|
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
import { MastraVoice } from '@mastra/core/voice';
|
|
2
2
|
|
|
3
|
-
export declare const
|
|
3
|
+
export declare const SARVAM_STT_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown"];
|
|
4
4
|
|
|
5
|
-
export declare const
|
|
5
|
+
export declare const SARVAM_STT_MODELS: readonly ["saarika:v1", "saarika:v2", "saarika:flash"];
|
|
6
|
+
|
|
7
|
+
export declare const SARVAM_TTS_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
|
|
8
|
+
|
|
9
|
+
export declare const SARVAM_TTS_MODELS: readonly ["bulbul:v1"];
|
|
6
10
|
|
|
7
11
|
export declare const SARVAM_VOICES: readonly ["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya"];
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
declare interface SarvamListenOptions {
|
|
14
|
+
apiKey?: string;
|
|
15
|
+
model?: SarvamSTTModel;
|
|
16
|
+
languageCode?: SarvamSTTLanguage;
|
|
17
|
+
filetype?: 'mp3' | 'wav';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export declare type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];
|
|
21
|
+
|
|
22
|
+
export declare type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];
|
|
23
|
+
|
|
24
|
+
export declare type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];
|
|
10
25
|
|
|
11
|
-
export declare type
|
|
26
|
+
export declare type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];
|
|
12
27
|
|
|
13
28
|
export declare class SarvamVoice extends MastraVoice {
|
|
14
29
|
private apiKey?;
|
|
@@ -17,9 +32,10 @@ export declare class SarvamVoice extends MastraVoice {
|
|
|
17
32
|
private properties;
|
|
18
33
|
protected speaker: SarvamVoiceId;
|
|
19
34
|
private baseUrl;
|
|
20
|
-
constructor({ speechModel, speaker, }?: {
|
|
35
|
+
constructor({ speechModel, speaker, listeningModel, }?: {
|
|
21
36
|
speechModel?: SarvamVoiceConfig;
|
|
22
37
|
speaker?: SarvamVoiceId;
|
|
38
|
+
listeningModel?: SarvamListenOptions;
|
|
23
39
|
});
|
|
24
40
|
private makeRequest;
|
|
25
41
|
private streamToString;
|
|
@@ -29,13 +45,13 @@ export declare class SarvamVoice extends MastraVoice {
|
|
|
29
45
|
getSpeakers(): Promise<{
|
|
30
46
|
voiceId: "meera" | "pavithra" | "maitreyi" | "arvind" | "amol" | "amartya" | "diya" | "neel" | "misha" | "vian" | "arjun" | "maya";
|
|
31
47
|
}[]>;
|
|
32
|
-
listen(
|
|
48
|
+
listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string>;
|
|
33
49
|
}
|
|
34
50
|
|
|
35
51
|
declare interface SarvamVoiceConfig {
|
|
36
52
|
apiKey?: string;
|
|
37
|
-
model?:
|
|
38
|
-
language?:
|
|
53
|
+
model?: SarvamTTSModel;
|
|
54
|
+
language?: SarvamTTSLanguage;
|
|
39
55
|
properties?: {
|
|
40
56
|
pitch?: number;
|
|
41
57
|
pace?: number;
|
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
import { MastraVoice } from '@mastra/core/voice';
|
|
2
2
|
|
|
3
|
-
export declare const
|
|
3
|
+
export declare const SARVAM_STT_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown"];
|
|
4
4
|
|
|
5
|
-
export declare const
|
|
5
|
+
export declare const SARVAM_STT_MODELS: readonly ["saarika:v1", "saarika:v2", "saarika:flash"];
|
|
6
|
+
|
|
7
|
+
export declare const SARVAM_TTS_LANGUAGES: readonly ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"];
|
|
8
|
+
|
|
9
|
+
export declare const SARVAM_TTS_MODELS: readonly ["bulbul:v1"];
|
|
6
10
|
|
|
7
11
|
export declare const SARVAM_VOICES: readonly ["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya"];
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
declare interface SarvamListenOptions {
|
|
14
|
+
apiKey?: string;
|
|
15
|
+
model?: SarvamSTTModel;
|
|
16
|
+
languageCode?: SarvamSTTLanguage;
|
|
17
|
+
filetype?: 'mp3' | 'wav';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export declare type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];
|
|
21
|
+
|
|
22
|
+
export declare type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];
|
|
23
|
+
|
|
24
|
+
export declare type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];
|
|
10
25
|
|
|
11
|
-
export declare type
|
|
26
|
+
export declare type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];
|
|
12
27
|
|
|
13
28
|
export declare class SarvamVoice extends MastraVoice {
|
|
14
29
|
private apiKey?;
|
|
@@ -17,9 +32,10 @@ export declare class SarvamVoice extends MastraVoice {
|
|
|
17
32
|
private properties;
|
|
18
33
|
protected speaker: SarvamVoiceId;
|
|
19
34
|
private baseUrl;
|
|
20
|
-
constructor({ speechModel, speaker, }?: {
|
|
35
|
+
constructor({ speechModel, speaker, listeningModel, }?: {
|
|
21
36
|
speechModel?: SarvamVoiceConfig;
|
|
22
37
|
speaker?: SarvamVoiceId;
|
|
38
|
+
listeningModel?: SarvamListenOptions;
|
|
23
39
|
});
|
|
24
40
|
private makeRequest;
|
|
25
41
|
private streamToString;
|
|
@@ -29,13 +45,13 @@ export declare class SarvamVoice extends MastraVoice {
|
|
|
29
45
|
getSpeakers(): Promise<{
|
|
30
46
|
voiceId: "meera" | "pavithra" | "maitreyi" | "arvind" | "amol" | "amartya" | "diya" | "neel" | "misha" | "vian" | "arjun" | "maya";
|
|
31
47
|
}[]>;
|
|
32
|
-
listen(
|
|
48
|
+
listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string>;
|
|
33
49
|
}
|
|
34
50
|
|
|
35
51
|
declare interface SarvamVoiceConfig {
|
|
36
52
|
apiKey?: string;
|
|
37
|
-
model?:
|
|
38
|
-
language?:
|
|
53
|
+
model?: SarvamTTSModel;
|
|
54
|
+
language?: SarvamTTSLanguage;
|
|
39
55
|
properties?: {
|
|
40
56
|
pitch?: number;
|
|
41
57
|
pace?: number;
|
package/dist/index.cjs
CHANGED
|
@@ -22,6 +22,14 @@ var SARVAM_VOICES = [
|
|
|
22
22
|
];
|
|
23
23
|
|
|
24
24
|
// src/index.ts
|
|
25
|
+
var defaultSpeechModel = {
|
|
26
|
+
model: "bulbul:v1",
|
|
27
|
+
apiKey: process.env.SARVAM_API_KEY,
|
|
28
|
+
language: "en-IN"
|
|
29
|
+
};
|
|
30
|
+
var defaultListeningModel = {
|
|
31
|
+
model: "saarika:v2",
|
|
32
|
+
apiKey: process.env.SARVAM_API_KEY};
|
|
25
33
|
var SarvamVoice = class extends voice.MastraVoice {
|
|
26
34
|
apiKey;
|
|
27
35
|
model = "bulbul:v1";
|
|
@@ -31,18 +39,18 @@ var SarvamVoice = class extends voice.MastraVoice {
|
|
|
31
39
|
baseUrl = "https://api.sarvam.ai";
|
|
32
40
|
constructor({
|
|
33
41
|
speechModel,
|
|
34
|
-
speaker
|
|
42
|
+
speaker,
|
|
43
|
+
listeningModel
|
|
35
44
|
} = {}) {
|
|
36
|
-
const defaultSpeechModel = {
|
|
37
|
-
model: "bulbul:v1",
|
|
38
|
-
apiKey: process.env.SARVAM_API_KEY,
|
|
39
|
-
language: "en-IN"
|
|
40
|
-
};
|
|
41
45
|
super({
|
|
42
46
|
speechModel: {
|
|
43
47
|
name: speechModel?.model ?? defaultSpeechModel.model,
|
|
44
48
|
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
|
|
45
49
|
},
|
|
50
|
+
listeningModel: {
|
|
51
|
+
name: listeningModel?.model ?? defaultListeningModel.model,
|
|
52
|
+
apiKey: listeningModel?.model ?? defaultListeningModel.apiKey
|
|
53
|
+
},
|
|
46
54
|
speaker
|
|
47
55
|
});
|
|
48
56
|
this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;
|
|
@@ -116,9 +124,39 @@ var SarvamVoice = class extends voice.MastraVoice {
|
|
|
116
124
|
}));
|
|
117
125
|
}, "voice.deepgram.getSpeakers")();
|
|
118
126
|
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
127
|
+
async listen(input, options) {
|
|
128
|
+
return this.traced(async () => {
|
|
129
|
+
const chunks = [];
|
|
130
|
+
for await (const chunk of input) {
|
|
131
|
+
if (typeof chunk === "string") {
|
|
132
|
+
chunks.push(Buffer.from(chunk));
|
|
133
|
+
} else {
|
|
134
|
+
chunks.push(chunk);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const audioBuffer = Buffer.concat(chunks);
|
|
138
|
+
const form = new FormData();
|
|
139
|
+
const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
|
|
140
|
+
const blob = new Blob([audioBuffer], { type: mimeType });
|
|
141
|
+
form.append("file", blob);
|
|
142
|
+
form.append("model", options?.model || "saarika:v2");
|
|
143
|
+
form.append("language_code", options?.languageCode || "unknown");
|
|
144
|
+
const requestOptions = {
|
|
145
|
+
method: "POST",
|
|
146
|
+
headers: {
|
|
147
|
+
"api-subscription-key": this.apiKey
|
|
148
|
+
},
|
|
149
|
+
body: form
|
|
150
|
+
};
|
|
151
|
+
try {
|
|
152
|
+
const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
|
|
153
|
+
const result = await response.json();
|
|
154
|
+
return result.transcript;
|
|
155
|
+
} catch (error) {
|
|
156
|
+
console.error("Error during speech-to-text request:", error);
|
|
157
|
+
throw error;
|
|
158
|
+
}
|
|
159
|
+
}, "voice.sarvam.listen")();
|
|
122
160
|
}
|
|
123
161
|
};
|
|
124
162
|
|
package/dist/index.js
CHANGED
|
@@ -20,6 +20,14 @@ var SARVAM_VOICES = [
|
|
|
20
20
|
];
|
|
21
21
|
|
|
22
22
|
// src/index.ts
|
|
23
|
+
var defaultSpeechModel = {
|
|
24
|
+
model: "bulbul:v1",
|
|
25
|
+
apiKey: process.env.SARVAM_API_KEY,
|
|
26
|
+
language: "en-IN"
|
|
27
|
+
};
|
|
28
|
+
var defaultListeningModel = {
|
|
29
|
+
model: "saarika:v2",
|
|
30
|
+
apiKey: process.env.SARVAM_API_KEY};
|
|
23
31
|
var SarvamVoice = class extends MastraVoice {
|
|
24
32
|
apiKey;
|
|
25
33
|
model = "bulbul:v1";
|
|
@@ -29,18 +37,18 @@ var SarvamVoice = class extends MastraVoice {
|
|
|
29
37
|
baseUrl = "https://api.sarvam.ai";
|
|
30
38
|
constructor({
|
|
31
39
|
speechModel,
|
|
32
|
-
speaker
|
|
40
|
+
speaker,
|
|
41
|
+
listeningModel
|
|
33
42
|
} = {}) {
|
|
34
|
-
const defaultSpeechModel = {
|
|
35
|
-
model: "bulbul:v1",
|
|
36
|
-
apiKey: process.env.SARVAM_API_KEY,
|
|
37
|
-
language: "en-IN"
|
|
38
|
-
};
|
|
39
43
|
super({
|
|
40
44
|
speechModel: {
|
|
41
45
|
name: speechModel?.model ?? defaultSpeechModel.model,
|
|
42
46
|
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
|
|
43
47
|
},
|
|
48
|
+
listeningModel: {
|
|
49
|
+
name: listeningModel?.model ?? defaultListeningModel.model,
|
|
50
|
+
apiKey: listeningModel?.model ?? defaultListeningModel.apiKey
|
|
51
|
+
},
|
|
44
52
|
speaker
|
|
45
53
|
});
|
|
46
54
|
this.apiKey = speechModel?.apiKey || defaultSpeechModel.apiKey;
|
|
@@ -114,9 +122,39 @@ var SarvamVoice = class extends MastraVoice {
|
|
|
114
122
|
}));
|
|
115
123
|
}, "voice.deepgram.getSpeakers")();
|
|
116
124
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
125
|
+
async listen(input, options) {
|
|
126
|
+
return this.traced(async () => {
|
|
127
|
+
const chunks = [];
|
|
128
|
+
for await (const chunk of input) {
|
|
129
|
+
if (typeof chunk === "string") {
|
|
130
|
+
chunks.push(Buffer.from(chunk));
|
|
131
|
+
} else {
|
|
132
|
+
chunks.push(chunk);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
const audioBuffer = Buffer.concat(chunks);
|
|
136
|
+
const form = new FormData();
|
|
137
|
+
const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
|
|
138
|
+
const blob = new Blob([audioBuffer], { type: mimeType });
|
|
139
|
+
form.append("file", blob);
|
|
140
|
+
form.append("model", options?.model || "saarika:v2");
|
|
141
|
+
form.append("language_code", options?.languageCode || "unknown");
|
|
142
|
+
const requestOptions = {
|
|
143
|
+
method: "POST",
|
|
144
|
+
headers: {
|
|
145
|
+
"api-subscription-key": this.apiKey
|
|
146
|
+
},
|
|
147
|
+
body: form
|
|
148
|
+
};
|
|
149
|
+
try {
|
|
150
|
+
const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
|
|
151
|
+
const result = await response.json();
|
|
152
|
+
return result.transcript;
|
|
153
|
+
} catch (error) {
|
|
154
|
+
console.error("Error during speech-to-text request:", error);
|
|
155
|
+
throw error;
|
|
156
|
+
}
|
|
157
|
+
}, "voice.sarvam.listen")();
|
|
120
158
|
}
|
|
121
159
|
};
|
|
122
160
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-sarvam",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3-alpha.1",
|
|
4
4
|
"description": "Mastra Sarvam AI voice integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
},
|
|
24
24
|
"dependencies": {
|
|
25
25
|
"zod": "^3.24.2",
|
|
26
|
-
"@mastra/core": "^0.6.
|
|
26
|
+
"@mastra/core": "^0.6.4-alpha.1"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
29
|
"@microsoft/api-extractor": "^7.52.1",
|
|
@@ -34,6 +34,16 @@
|
|
|
34
34
|
"vitest": "^2.1.9",
|
|
35
35
|
"@internal/lint": "0.0.1"
|
|
36
36
|
},
|
|
37
|
+
"keywords": [
|
|
38
|
+
"mastra",
|
|
39
|
+
"sarvam",
|
|
40
|
+
"tts",
|
|
41
|
+
"stt",
|
|
42
|
+
"indian-languages",
|
|
43
|
+
"speech-to-text",
|
|
44
|
+
"text-to-speech",
|
|
45
|
+
"speech-recognition"
|
|
46
|
+
],
|
|
37
47
|
"scripts": {
|
|
38
48
|
"build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting",
|
|
39
49
|
"build:watch": "pnpm build --watch",
|