sarvam-ai-sdk 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -10
- package/dist/index.d.mts +59 -11
- package/dist/index.d.ts +59 -11
- package/dist/index.js +53 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +53 -23
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AI SDK - Sarvam Provider
|
|
2
2
|
|
|
3
|
-
The **[Sarvam provider](https://ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://ai-sdk.dev/docs)
|
|
3
|
+
The **[Sarvam provider](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://v4.ai-sdk.dev/docs)
|
|
4
4
|
contains language model support for the Sarvam chat completion, Text-to-Speech and Speech-to-Text APIs.
|
|
5
5
|
|
|
6
6
|
## Setup
|
|
@@ -11,6 +11,9 @@ The **[Sarvam](http://sarvam.ai)** provider is available in the `sarvam-ai-sdk`
|
|
|
11
11
|
npm i sarvam-ai-sdk
|
|
12
12
|
```
|
|
13
13
|
|
|
14
|
+
> [!WARNING]
|
|
15
|
+
> This package only works with Vercel AI-SDK v4, not latest v6. Make sure to install `ai@4` in your project.
|
|
16
|
+
|
|
14
17
|
## Provider Instance
|
|
15
18
|
|
|
16
19
|
You can import the default provider instance `sarvam` from `sarvam-ai-sdk`:
|
|
@@ -31,7 +34,7 @@ import { sarvam } from 'sarvam-ai-sdk';
|
|
|
31
34
|
import { generateText } from 'ai';
|
|
32
35
|
|
|
33
36
|
const { text } = await generateText({
|
|
34
|
-
|
|
37
|
+
model: sarvam("sarvam-30b"),
|
|
35
38
|
prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
36
39
|
});
|
|
37
40
|
|
|
@@ -46,7 +49,7 @@ import { experimental_generateSpeech as generateSpeech } from "ai";
|
|
|
46
49
|
import { writeFile } from "fs/promises";
|
|
47
50
|
|
|
48
51
|
const { audio } = await generateSpeech({
|
|
49
|
-
model: sarvam.speech("bulbul:
|
|
52
|
+
model: sarvam.speech("bulbul:v3", "ml-IN"),
|
|
50
53
|
text: "പാചകം തുടരൂ, സുഹൃത്തുക്കളേ",
|
|
51
54
|
});
|
|
52
55
|
|
|
@@ -62,13 +65,26 @@ import { experimental_transcribe as transcribe } from "ai";
|
|
|
62
65
|
import { readFile } from "fs/promises";
|
|
63
66
|
|
|
64
67
|
const { text } = await transcribe({
|
|
65
|
-
model: sarvam.transcription("saarika:v2", "ml-IN")
|
|
68
|
+
model: sarvam.transcription("saarika:v2.5", "ml-IN")
|
|
66
69
|
audio: await readFile("./src/transcript-test.wav"),
|
|
67
70
|
});
|
|
68
71
|
|
|
69
72
|
console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
|
|
70
73
|
```
|
|
71
74
|
|
|
75
|
+
```ts
|
|
76
|
+
import { sarvam } from "sarvam-ai-sdk";
|
|
77
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
78
|
+
import { readFile } from "fs/promises";
|
|
79
|
+
|
|
80
|
+
const { text } = await transcribe({
|
|
81
|
+
model: sarvam.transcription("saaras:v3", "en-IN"),
|
|
82
|
+
audio: await readFile("./src/transcript-test.wav"),
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
console.log(text); // Pachakam thudaroo, suhruthukkale.
|
|
86
|
+
```
|
|
87
|
+
|
|
72
88
|
## Speech-to-Text-Translate
|
|
73
89
|
|
|
74
90
|
```ts
|
|
@@ -77,7 +93,7 @@ import { experimental_transcribe as transcribe } from "ai";
|
|
|
77
93
|
import { readFile } from "fs/promises";
|
|
78
94
|
|
|
79
95
|
const result = await transcribe({
|
|
80
|
-
model: sarvam.speechTranslation("saaras:v2"),
|
|
96
|
+
model: sarvam.speechTranslation("saaras:v2.5"),
|
|
81
97
|
audio: await readFile("./src/transcript-test.wav"),
|
|
82
98
|
});
|
|
83
99
|
|
|
@@ -141,7 +157,7 @@ console.log(result.text); // ml-IN
|
|
|
141
157
|
## Tool Calling
|
|
142
158
|
|
|
143
159
|
> [!WARNING]
|
|
144
|
-
> Latest `sarvam
|
|
160
|
+
> Latest `sarvam` models isn't trained on native tool calling feature (aka JSON mode). So we simulate this with prompt engineering technique.
|
|
145
161
|
|
|
146
162
|
```ts
|
|
147
163
|
import { z } from "zod";
|
|
@@ -150,7 +166,7 @@ import { sarvam } from "sarvam-ai-sdk";
|
|
|
150
166
|
|
|
151
167
|
|
|
152
168
|
const result = await generateText({
|
|
153
|
-
model: sarvam("sarvam-
|
|
169
|
+
model: sarvam("sarvam-30b", {
|
|
154
170
|
simulate: "tool-calling" // ⚠️ important
|
|
155
171
|
}),
|
|
156
172
|
tools: {
|
|
@@ -174,7 +190,7 @@ console.log(result.toolResults);
|
|
|
174
190
|
## Generate JSON object
|
|
175
191
|
|
|
176
192
|
> [!WARNING]
|
|
177
|
-
> Latest `sarvam
|
|
193
|
+
> Latest `sarvam` models isn't trained on native JSON object generation. So we simulate this with prompt engineering technique.
|
|
178
194
|
|
|
179
195
|
```ts
|
|
180
196
|
import { z } from "zod";
|
|
@@ -182,7 +198,7 @@ import { sarvam } from "sarvam-ai-sdk";
|
|
|
182
198
|
import { generateObject } from 'ai';
|
|
183
199
|
|
|
184
200
|
const { object } = await generateObject({
|
|
185
|
-
model: sarvam("sarvam-
|
|
201
|
+
model: sarvam("sarvam-30b", {
|
|
186
202
|
simulate: "json-object" // ⚠️ important
|
|
187
203
|
}),
|
|
188
204
|
schema: z.object({
|
|
@@ -200,4 +216,4 @@ console.log(object);
|
|
|
200
216
|
|
|
201
217
|
## Documentation
|
|
202
218
|
|
|
203
|
-
Please check out the **[Sarvam provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
|
|
219
|
+
Please check out the **[Sarvam provider documentation](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
|
package/dist/index.d.mts
CHANGED
|
@@ -2,7 +2,15 @@ import { LanguageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/pr
|
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
/**
|
|
6
|
+
* @description Product models
|
|
7
|
+
*/
|
|
8
|
+
type SarvamChatModelId = "sarvam-30b" | "sarvam-30b-16k" | "sarvam-105b" | "sarvam-105b-32k" | SarvamChatLegacyModelId | (string & {});
|
|
9
|
+
/**
|
|
10
|
+
* @description Legacy models
|
|
11
|
+
* @deprecated
|
|
12
|
+
*/
|
|
13
|
+
type SarvamChatLegacyModelId = "sarvam-m";
|
|
6
14
|
interface SarvamChatSettings {
|
|
7
15
|
/**
|
|
8
16
|
* Whether to simulate artificial tool calling or JSON object generation, because Sarvam Models doen't support native Tool Calling or JSON Schmea.
|
|
@@ -42,9 +50,9 @@ interface SarvamChatSettings {
|
|
|
42
50
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
43
51
|
declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
|
|
44
52
|
|
|
45
|
-
type SarvamSpeechModelId = "bulbul:
|
|
53
|
+
type SarvamSpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
46
54
|
type SarvamSpeechVoices = z.infer<typeof SpeakerSchema>;
|
|
47
|
-
declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["
|
|
55
|
+
declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya", "shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>>;
|
|
48
56
|
/**
|
|
49
57
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
50
58
|
*
|
|
@@ -56,10 +64,10 @@ type SarvamSpeechSettings = {
|
|
|
56
64
|
/**
|
|
57
65
|
* The speaker voice to be used for the output audio.
|
|
58
66
|
*
|
|
59
|
-
* @default
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
*
|
|
67
|
+
* @default
|
|
68
|
+
* - "shubh" (Male voice for bulbul:v3)
|
|
69
|
+
* - "anushka" (Female voice for bulbul:v2)
|
|
70
|
+
* - "meera" (Female voice for bulbul:v1)
|
|
63
71
|
*/
|
|
64
72
|
speaker?: SarvamSpeechVoices;
|
|
65
73
|
/**
|
|
@@ -105,9 +113,41 @@ type SarvamSpeechSettings = {
|
|
|
105
113
|
enable_preprocessing?: boolean;
|
|
106
114
|
};
|
|
107
115
|
|
|
108
|
-
type SarvamTranscriptionModelId = "
|
|
109
|
-
type SarvamSpeechTranslationModelId = "saaras:
|
|
116
|
+
type SarvamTranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
117
|
+
type SarvamSpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
118
|
+
declare const SarvamProviderOptionsSchema: z.ZodObject<{
|
|
119
|
+
mode: z.ZodDefault<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>;
|
|
120
|
+
with_timestamps: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
|
|
121
|
+
with_diarization: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
|
|
122
|
+
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
123
|
+
}, "strip", z.ZodTypeAny, {
|
|
124
|
+
mode: "transcribe" | "translate" | "verbatim" | "translit" | "codemix";
|
|
125
|
+
with_timestamps: boolean | null;
|
|
126
|
+
with_diarization: boolean | null;
|
|
127
|
+
num_speakers?: number | null | undefined;
|
|
128
|
+
}, {
|
|
129
|
+
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | undefined;
|
|
130
|
+
with_timestamps?: boolean | null | undefined;
|
|
131
|
+
with_diarization?: boolean | null | undefined;
|
|
132
|
+
num_speakers?: number | null | undefined;
|
|
133
|
+
}>;
|
|
110
134
|
type SarvamTranscriptionCallOptions = {
|
|
135
|
+
/**
|
|
136
|
+
* @default "transcribe"
|
|
137
|
+
*
|
|
138
|
+
* @description
|
|
139
|
+
* - `transcribe`: Standard transcription in the original language, `output`: Text in source language
|
|
140
|
+
* - `translate`: Transcribe and translate to English, `output`: English text
|
|
141
|
+
* - `verbatim`: Word-for-word transcription including filler words and repetitions, `output`: Verbatim text in source language
|
|
142
|
+
* - `translit`: Transcribe and transliterate to Roman script, `output`: Romanized text
|
|
143
|
+
* - `codemix`: Transcribe code-mixed speech (e.g., Hindi-English) naturally, `output`: Code-mixed text
|
|
144
|
+
*/
|
|
145
|
+
mode?: z.infer<typeof SarvamProviderOptionsSchema.shape.mode>;
|
|
146
|
+
/**
|
|
147
|
+
* - Chunk-level timestamp support
|
|
148
|
+
* - Useful for subtitle alignment and audio navigation
|
|
149
|
+
* - Provides start and end times for each segment of text
|
|
150
|
+
*/
|
|
111
151
|
with_timestamps?: boolean;
|
|
112
152
|
/**
|
|
113
153
|
* Enables speaker diarization, which identifies and separates different speakers in the audio.
|
|
@@ -240,11 +280,19 @@ interface SarvamProvider {
|
|
|
240
280
|
/**
|
|
241
281
|
* Creates a model for text generation.
|
|
242
282
|
*/
|
|
243
|
-
(
|
|
283
|
+
(
|
|
284
|
+
/**
|
|
285
|
+
* @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
|
|
286
|
+
*/
|
|
287
|
+
modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
|
|
244
288
|
/**
|
|
245
289
|
* Creates an Sarvam chat model for text generation.
|
|
246
290
|
*/
|
|
247
|
-
languageModel(
|
|
291
|
+
languageModel(
|
|
292
|
+
/**
|
|
293
|
+
* @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
|
|
294
|
+
*/
|
|
295
|
+
modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
|
|
248
296
|
/**
|
|
249
297
|
* Creates a Sarvam model for transcription.
|
|
250
298
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,15 @@ import { LanguageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/pr
|
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
/**
|
|
6
|
+
* @description Product models
|
|
7
|
+
*/
|
|
8
|
+
type SarvamChatModelId = "sarvam-30b" | "sarvam-30b-16k" | "sarvam-105b" | "sarvam-105b-32k" | SarvamChatLegacyModelId | (string & {});
|
|
9
|
+
/**
|
|
10
|
+
* @description Legacy models
|
|
11
|
+
* @deprecated
|
|
12
|
+
*/
|
|
13
|
+
type SarvamChatLegacyModelId = "sarvam-m";
|
|
6
14
|
interface SarvamChatSettings {
|
|
7
15
|
/**
|
|
8
16
|
* Whether to simulate artificial tool calling or JSON object generation, because Sarvam Models doen't support native Tool Calling or JSON Schmea.
|
|
@@ -42,9 +50,9 @@ interface SarvamChatSettings {
|
|
|
42
50
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
43
51
|
declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
|
|
44
52
|
|
|
45
|
-
type SarvamSpeechModelId = "bulbul:
|
|
53
|
+
type SarvamSpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
46
54
|
type SarvamSpeechVoices = z.infer<typeof SpeakerSchema>;
|
|
47
|
-
declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["
|
|
55
|
+
declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya", "shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>>;
|
|
48
56
|
/**
|
|
49
57
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
50
58
|
*
|
|
@@ -56,10 +64,10 @@ type SarvamSpeechSettings = {
|
|
|
56
64
|
/**
|
|
57
65
|
* The speaker voice to be used for the output audio.
|
|
58
66
|
*
|
|
59
|
-
* @default
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
*
|
|
67
|
+
* @default
|
|
68
|
+
* - "shubh" (Male voice for bulbul:v3)
|
|
69
|
+
* - "anushka" (Female voice for bulbul:v2)
|
|
70
|
+
* - "meera" (Female voice for bulbul:v1)
|
|
63
71
|
*/
|
|
64
72
|
speaker?: SarvamSpeechVoices;
|
|
65
73
|
/**
|
|
@@ -105,9 +113,41 @@ type SarvamSpeechSettings = {
|
|
|
105
113
|
enable_preprocessing?: boolean;
|
|
106
114
|
};
|
|
107
115
|
|
|
108
|
-
type SarvamTranscriptionModelId = "
|
|
109
|
-
type SarvamSpeechTranslationModelId = "saaras:
|
|
116
|
+
type SarvamTranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
117
|
+
type SarvamSpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
118
|
+
declare const SarvamProviderOptionsSchema: z.ZodObject<{
|
|
119
|
+
mode: z.ZodDefault<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>;
|
|
120
|
+
with_timestamps: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
|
|
121
|
+
with_diarization: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
|
|
122
|
+
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
123
|
+
}, "strip", z.ZodTypeAny, {
|
|
124
|
+
mode: "transcribe" | "translate" | "verbatim" | "translit" | "codemix";
|
|
125
|
+
with_timestamps: boolean | null;
|
|
126
|
+
with_diarization: boolean | null;
|
|
127
|
+
num_speakers?: number | null | undefined;
|
|
128
|
+
}, {
|
|
129
|
+
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | undefined;
|
|
130
|
+
with_timestamps?: boolean | null | undefined;
|
|
131
|
+
with_diarization?: boolean | null | undefined;
|
|
132
|
+
num_speakers?: number | null | undefined;
|
|
133
|
+
}>;
|
|
110
134
|
type SarvamTranscriptionCallOptions = {
|
|
135
|
+
/**
|
|
136
|
+
* @default "transcribe"
|
|
137
|
+
*
|
|
138
|
+
* @description
|
|
139
|
+
* - `transcribe`: Standard transcription in the original language, `output`: Text in source language
|
|
140
|
+
* - `translate`: Transcribe and translate to English, `output`: English text
|
|
141
|
+
* - `verbatim`: Word-for-word transcription including filler words and repetitions, `output`: Verbatim text in source language
|
|
142
|
+
* - `translit`: Transcribe and transliterate to Roman script, `output`: Romanized text
|
|
143
|
+
* - `codemix`: Transcribe code-mixed speech (e.g., Hindi-English) naturally, `output`: Code-mixed text
|
|
144
|
+
*/
|
|
145
|
+
mode?: z.infer<typeof SarvamProviderOptionsSchema.shape.mode>;
|
|
146
|
+
/**
|
|
147
|
+
* - Chunk-level timestamp support
|
|
148
|
+
* - Useful for subtitle alignment and audio navigation
|
|
149
|
+
* - Provides start and end times for each segment of text
|
|
150
|
+
*/
|
|
111
151
|
with_timestamps?: boolean;
|
|
112
152
|
/**
|
|
113
153
|
* Enables speaker diarization, which identifies and separates different speakers in the audio.
|
|
@@ -240,11 +280,19 @@ interface SarvamProvider {
|
|
|
240
280
|
/**
|
|
241
281
|
* Creates a model for text generation.
|
|
242
282
|
*/
|
|
243
|
-
(
|
|
283
|
+
(
|
|
284
|
+
/**
|
|
285
|
+
* @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
|
|
286
|
+
*/
|
|
287
|
+
modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
|
|
244
288
|
/**
|
|
245
289
|
* Creates an Sarvam chat model for text generation.
|
|
246
290
|
*/
|
|
247
|
-
languageModel(
|
|
291
|
+
languageModel(
|
|
292
|
+
/**
|
|
293
|
+
* @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
|
|
294
|
+
*/
|
|
295
|
+
modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
|
|
248
296
|
/**
|
|
249
297
|
* Creates a Sarvam model for transcription.
|
|
250
298
|
*/
|
package/dist/index.js
CHANGED
|
@@ -771,26 +771,57 @@ var import_provider_utils5 = require("@ai-sdk/provider-utils");
|
|
|
771
771
|
// src/sarvam-speech-settings.ts
|
|
772
772
|
var import_zod3 = require("zod");
|
|
773
773
|
var SpeakerSchema = import_zod3.z.enum([
|
|
774
|
-
|
|
775
|
-
"pavithra",
|
|
776
|
-
"maitreyi",
|
|
777
|
-
"arvind",
|
|
778
|
-
"amol",
|
|
779
|
-
"amartya",
|
|
780
|
-
"diya",
|
|
781
|
-
"neel",
|
|
782
|
-
"misha",
|
|
783
|
-
"vian",
|
|
784
|
-
"arjun",
|
|
785
|
-
"maya",
|
|
786
|
-
"anushka",
|
|
774
|
+
// male bulbul:v2
|
|
787
775
|
"abhilash",
|
|
776
|
+
"karun",
|
|
777
|
+
"hitesh",
|
|
778
|
+
// female bulbul:v2
|
|
779
|
+
"anushka",
|
|
788
780
|
"manisha",
|
|
789
781
|
"vidya",
|
|
790
782
|
"arya",
|
|
791
|
-
|
|
792
|
-
"
|
|
793
|
-
|
|
783
|
+
// male bulbul:v3
|
|
784
|
+
"shubh",
|
|
785
|
+
"aditya",
|
|
786
|
+
"rahul",
|
|
787
|
+
"rohan",
|
|
788
|
+
"amit",
|
|
789
|
+
"dev",
|
|
790
|
+
"ratan",
|
|
791
|
+
"varun",
|
|
792
|
+
"manan",
|
|
793
|
+
"sumit",
|
|
794
|
+
"kabir",
|
|
795
|
+
"aayan",
|
|
796
|
+
"ashutosh",
|
|
797
|
+
"advait",
|
|
798
|
+
"anand",
|
|
799
|
+
"tarun",
|
|
800
|
+
"sunny",
|
|
801
|
+
"mani",
|
|
802
|
+
"gokul",
|
|
803
|
+
"vijay",
|
|
804
|
+
"mohit",
|
|
805
|
+
"rehan",
|
|
806
|
+
"soham",
|
|
807
|
+
// female bulbul:v3
|
|
808
|
+
"ritu",
|
|
809
|
+
"priya",
|
|
810
|
+
"neha",
|
|
811
|
+
"pooja",
|
|
812
|
+
"simran",
|
|
813
|
+
"kavya",
|
|
814
|
+
"ishita",
|
|
815
|
+
"shreya",
|
|
816
|
+
"roopa",
|
|
817
|
+
"amelia",
|
|
818
|
+
"sophia",
|
|
819
|
+
"tanya",
|
|
820
|
+
"shruti",
|
|
821
|
+
"suhani",
|
|
822
|
+
"kavitha",
|
|
823
|
+
"rupali"
|
|
824
|
+
]).default("shubh");
|
|
794
825
|
var SarvamProviderOptionsSchema = import_zod3.z.object({
|
|
795
826
|
speaker: SpeakerSchema,
|
|
796
827
|
pitch: import_zod3.z.number().min(-0.75).max(0.75).default(0),
|
|
@@ -842,12 +873,12 @@ var SarvamSpeechModel = class {
|
|
|
842
873
|
return SpeakerSchema.parse(voice);
|
|
843
874
|
}
|
|
844
875
|
switch (this.modelId) {
|
|
845
|
-
case "bulbul:v1":
|
|
846
|
-
return "meera";
|
|
847
876
|
case "bulbul:v2":
|
|
848
877
|
return "manisha";
|
|
878
|
+
case "bulbul:v3":
|
|
879
|
+
return "shubh";
|
|
849
880
|
}
|
|
850
|
-
return "
|
|
881
|
+
return "shubh";
|
|
851
882
|
};
|
|
852
883
|
const requestBody = {
|
|
853
884
|
model: this.modelId,
|
|
@@ -934,6 +965,7 @@ var import_zod6 = require("zod");
|
|
|
934
965
|
// src/sarvam-transcription-settings.ts
|
|
935
966
|
var import_zod5 = require("zod");
|
|
936
967
|
var SarvamProviderOptionsSchema2 = import_zod5.z.object({
|
|
968
|
+
mode: import_zod5.z.enum(["transcribe", "translate", "verbatim", "translit", "codemix"]).default("transcribe"),
|
|
937
969
|
with_timestamps: import_zod5.z.boolean().nullish().default(false),
|
|
938
970
|
with_diarization: import_zod5.z.boolean().nullish().default(false),
|
|
939
971
|
num_speakers: import_zod5.z.number().int().nullish()
|
|
@@ -955,11 +987,8 @@ var SarvamTranscriptionModel = class {
|
|
|
955
987
|
mediaType,
|
|
956
988
|
providerOptions
|
|
957
989
|
}) {
|
|
990
|
+
var _a;
|
|
958
991
|
const warnings = [];
|
|
959
|
-
if (this.modelId === "saarika:v1" && this.languageCode === "unknown")
|
|
960
|
-
throw new Error(
|
|
961
|
-
"Language code unknown is not supported for model saarika:v1"
|
|
962
|
-
);
|
|
963
992
|
const sarvamOptions = (0, import_provider_utils6.parseProviderOptions)({
|
|
964
993
|
provider: "sarvam",
|
|
965
994
|
providerOptions: {
|
|
@@ -975,6 +1004,7 @@ var SarvamTranscriptionModel = class {
|
|
|
975
1004
|
formData.append("file", blob);
|
|
976
1005
|
formData.append("model", this.modelId);
|
|
977
1006
|
if (sarvamOptions) {
|
|
1007
|
+
formData.append("mode", (_a = sarvamOptions.mode) != null ? _a : "transcribe");
|
|
978
1008
|
formData.append("language_code", this.languageCode);
|
|
979
1009
|
formData.append(
|
|
980
1010
|
"with_timestamps",
|