sarvam-ai-sdk 0.1.5-beta → 0.2.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -13
- package/dist/index.d.mts +134 -35
- package/dist/index.d.ts +134 -35
- package/dist/index.js +244 -284
- package/dist/index.mjs +244 -284
- package/package.json +7 -6
package/README.md
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
# Package
|
|
2
1
|
# AI SDK - Sarvam Provider
|
|
3
2
|
|
|
4
|
-
The **[Sarvam provider](https://
|
|
3
|
+
The **[Sarvam provider](https://v5.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://v5.ai-sdk.dev/docs)
|
|
5
4
|
contains language model support for the Sarvam chat completion, Text-to-Speech and Speech-to-Text APIs.
|
|
6
5
|
|
|
7
6
|
## Setup
|
|
@@ -9,11 +8,11 @@ contains language model support for the Sarvam chat completion, Text-to-Speech a
|
|
|
9
8
|
The **[Sarvam](http://sarvam.ai)** provider is available in the `sarvam-ai-sdk` module. You can install it with
|
|
10
9
|
|
|
11
10
|
```bash
|
|
12
|
-
npm i sarvam-ai-sdk ai@
|
|
11
|
+
npm i sarvam-ai-sdk ai@5
|
|
13
12
|
```
|
|
14
13
|
|
|
15
14
|
> [!WARNING]
|
|
16
|
-
> This package only works with Vercel AI-SDK
|
|
15
|
+
> This package only works with Vercel AI-SDK v5, not v6 or latest v7. Make sure to install `ai@5` in your project.
|
|
17
16
|
|
|
18
17
|
## Provider Instance
|
|
19
18
|
|
|
@@ -168,8 +167,8 @@ const result = await generateText({
|
|
|
168
167
|
tools: {
|
|
169
168
|
weather: tool({
|
|
170
169
|
description: "Get the weather in a location",
|
|
171
|
-
|
|
172
|
-
|
|
170
|
+
inputSchema: z.object({
|
|
171
|
+
location: z.string(),
|
|
173
172
|
}),
|
|
174
173
|
execute: async ({ location }) => ({
|
|
175
174
|
location,
|
|
@@ -184,9 +183,6 @@ const result = await generateText({
|
|
|
184
183
|
console.log(result.toolResults);
|
|
185
184
|
```
|
|
186
185
|
|
|
187
|
-
> [!WARNING]
|
|
188
|
-
> Old `sarvam-m` models isn't trained on native tool calling feature (aka JSON mode). So we recommend using latest models.
|
|
189
|
-
|
|
190
186
|
## Generate JSON object
|
|
191
187
|
|
|
192
188
|
```ts
|
|
@@ -196,6 +192,8 @@ import { generateObject } from 'ai';
|
|
|
196
192
|
|
|
197
193
|
const { object } = await generateObject({
|
|
198
194
|
model: sarvam("sarvam-30b"),
|
|
195
|
+
schemaName: "Recipe",
|
|
196
|
+
schemaDescription: "A recipe with a name, ingredients and steps",
|
|
199
197
|
schema: z.object({
|
|
200
198
|
recipe: z.object({
|
|
201
199
|
name: z.string(),
|
|
@@ -209,9 +207,6 @@ const { object } = await generateObject({
|
|
|
209
207
|
console.log(object);
|
|
210
208
|
```
|
|
211
209
|
|
|
212
|
-
> [!WARNING]
|
|
213
|
-
> Old `sarvam-m` models isn't trained on native JSON object generation. So we recommend using latest models.
|
|
214
|
-
|
|
215
210
|
## All APIs
|
|
216
211
|
|
|
217
212
|
```ts
|
|
@@ -243,4 +238,4 @@ sarvam.speechTranslation("saaras:v3");
|
|
|
243
238
|
|
|
244
239
|
## Documentation
|
|
245
240
|
|
|
246
|
-
Please check out the **[Sarvam provider documentation](https://
|
|
241
|
+
Please check out the **[Sarvam provider documentation](https://v5.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
2
|
-
import {
|
|
2
|
+
import { LanguageModelV2, SpeechModelV2, TranscriptionModelV2 } from "@ai-sdk/provider";
|
|
3
3
|
import z$1, { z } from "zod";
|
|
4
4
|
|
|
5
5
|
//#region src/config.d.ts
|
|
@@ -8,9 +8,34 @@ import z$1, { z } from "zod";
|
|
|
8
8
|
* Specifies the language in BCP-47 format.
|
|
9
9
|
*/
|
|
10
10
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
11
|
-
declare const SarvamLanguageCodeSchema: z.ZodEnum<
|
|
11
|
+
declare const SarvamLanguageCodeSchema: z.ZodEnum<{
|
|
12
|
+
"hi-IN": "hi-IN";
|
|
13
|
+
"bn-IN": "bn-IN";
|
|
14
|
+
"kn-IN": "kn-IN";
|
|
15
|
+
"ml-IN": "ml-IN";
|
|
16
|
+
"mr-IN": "mr-IN";
|
|
17
|
+
"od-IN": "od-IN";
|
|
18
|
+
"pa-IN": "pa-IN";
|
|
19
|
+
"ta-IN": "ta-IN";
|
|
20
|
+
"te-IN": "te-IN";
|
|
21
|
+
"en-IN": "en-IN";
|
|
22
|
+
"gu-IN": "gu-IN";
|
|
23
|
+
}>;
|
|
12
24
|
type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
|
|
13
|
-
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<
|
|
25
|
+
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
|
|
26
|
+
"as-IN": "as-IN";
|
|
27
|
+
"ur-IN": "ur-IN";
|
|
28
|
+
"ne-IN": "ne-IN";
|
|
29
|
+
"kok-IN": "kok-IN";
|
|
30
|
+
"ks-IN": "ks-IN";
|
|
31
|
+
"sd-IN": "sd-IN";
|
|
32
|
+
"sa-IN": "sa-IN";
|
|
33
|
+
"sat-IN": "sat-IN";
|
|
34
|
+
"mni-IN": "mni-IN";
|
|
35
|
+
"brx-IN": "brx-IN";
|
|
36
|
+
"mai-IN": "mai-IN";
|
|
37
|
+
"doi-IN": "doi-IN";
|
|
38
|
+
}>;
|
|
14
39
|
interface SarvamProviderSettings {
|
|
15
40
|
/**
|
|
16
41
|
* URL for the Sarvam API calls.
|
|
@@ -60,6 +85,34 @@ type ChatSettings = {
|
|
|
60
85
|
n?: number;
|
|
61
86
|
};
|
|
62
87
|
//#endregion
|
|
88
|
+
//#region src/stt/utils.d.ts
|
|
89
|
+
declare const input_audio_codec: z$1.ZodEnum<{
|
|
90
|
+
mp3: "mp3";
|
|
91
|
+
opus: "opus";
|
|
92
|
+
flac: "flac";
|
|
93
|
+
aac: "aac";
|
|
94
|
+
wav: "wav";
|
|
95
|
+
"x-wav": "x-wav";
|
|
96
|
+
wave: "wave";
|
|
97
|
+
mpeg: "mpeg";
|
|
98
|
+
mpeg3: "mpeg3";
|
|
99
|
+
"x-mp3": "x-mp3";
|
|
100
|
+
"x-mpeg-3": "x-mpeg-3";
|
|
101
|
+
"x-aac": "x-aac";
|
|
102
|
+
aiff: "aiff";
|
|
103
|
+
"x-aiff": "x-aiff";
|
|
104
|
+
ogg: "ogg";
|
|
105
|
+
"x-flac": "x-flac";
|
|
106
|
+
mp4: "mp4";
|
|
107
|
+
"x-m4a": "x-m4a";
|
|
108
|
+
amr: "amr";
|
|
109
|
+
"x-ms-wma": "x-ms-wma";
|
|
110
|
+
webm: "webm";
|
|
111
|
+
pcm_s16le: "pcm_s16le";
|
|
112
|
+
pcm_l16: "pcm_l16";
|
|
113
|
+
pcm_raw: "pcm_raw";
|
|
114
|
+
}>;
|
|
115
|
+
//#endregion
|
|
63
116
|
//#region src/stt/speech-translation-settings.d.ts
|
|
64
117
|
/**
|
|
65
118
|
* Specifies the speech generation model to use.
|
|
@@ -68,13 +121,6 @@ type ChatSettings = {
|
|
|
68
121
|
* - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
|
|
69
122
|
*/
|
|
70
123
|
type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
71
|
-
declare const speechTranslationSettingsSchema: z$1.ZodObject<{
|
|
72
|
-
input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
|
|
73
|
-
}, "strip", z$1.ZodTypeAny, {
|
|
74
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
75
|
-
}, {
|
|
76
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
77
|
-
}>;
|
|
78
124
|
type SpeechTranslationSettings = {
|
|
79
125
|
/**
|
|
80
126
|
* Audio codec/format of the input file.
|
|
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
|
|
|
82
128
|
* Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
|
|
83
129
|
* PCM files are supported only at 16kHz sample rate.
|
|
84
130
|
*/
|
|
85
|
-
input_audio_codec?: z$1.infer<typeof
|
|
131
|
+
input_audio_codec?: z$1.infer<typeof input_audio_codec>;
|
|
86
132
|
};
|
|
87
133
|
//#endregion
|
|
88
134
|
//#region src/stt/transcription-settings.d.ts
|
|
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
|
|
|
92
138
|
*/
|
|
93
139
|
type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
94
140
|
declare const transcriptionProviderOptionsSchema: z.ZodObject<{
|
|
95
|
-
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<
|
|
141
|
+
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
142
|
+
transcribe: "transcribe";
|
|
143
|
+
translate: "translate";
|
|
144
|
+
verbatim: "verbatim";
|
|
145
|
+
translit: "translit";
|
|
146
|
+
codemix: "codemix";
|
|
147
|
+
}>>>;
|
|
96
148
|
with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
97
149
|
with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
98
150
|
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
99
|
-
},
|
|
100
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
101
|
-
with_timestamps?: boolean | null | undefined;
|
|
102
|
-
with_diarization?: boolean | null | undefined;
|
|
103
|
-
num_speakers?: number | null | undefined;
|
|
104
|
-
}, {
|
|
105
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
106
|
-
with_timestamps?: boolean | null | undefined;
|
|
107
|
-
with_diarization?: boolean | null | undefined;
|
|
108
|
-
num_speakers?: number | null | undefined;
|
|
109
|
-
}>;
|
|
151
|
+
}, z.core.$strip>;
|
|
110
152
|
type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
|
|
111
153
|
/**
|
|
112
154
|
* Mode of operation. Only applicable when using `saaras:v3` model.
|
|
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
|
|
|
149
191
|
* - `bulbul:v2`: Legacy model with pitch and loudness controls
|
|
150
192
|
*/
|
|
151
193
|
type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
152
|
-
declare const bulbul_v2: z.ZodEnum<
|
|
153
|
-
|
|
154
|
-
|
|
194
|
+
declare const bulbul_v2: z.ZodEnum<{
|
|
195
|
+
abhilash: "abhilash";
|
|
196
|
+
karun: "karun";
|
|
197
|
+
hitesh: "hitesh";
|
|
198
|
+
anushka: "anushka";
|
|
199
|
+
manisha: "manisha";
|
|
200
|
+
vidya: "vidya";
|
|
201
|
+
arya: "arya";
|
|
202
|
+
}>;
|
|
203
|
+
declare const bulbul_v3: z.ZodEnum<{
|
|
204
|
+
shubh: "shubh";
|
|
205
|
+
aditya: "aditya";
|
|
206
|
+
rahul: "rahul";
|
|
207
|
+
rohan: "rohan";
|
|
208
|
+
amit: "amit";
|
|
209
|
+
dev: "dev";
|
|
210
|
+
ratan: "ratan";
|
|
211
|
+
varun: "varun";
|
|
212
|
+
manan: "manan";
|
|
213
|
+
sumit: "sumit";
|
|
214
|
+
kabir: "kabir";
|
|
215
|
+
aayan: "aayan";
|
|
216
|
+
ashutosh: "ashutosh";
|
|
217
|
+
advait: "advait";
|
|
218
|
+
anand: "anand";
|
|
219
|
+
tarun: "tarun";
|
|
220
|
+
sunny: "sunny";
|
|
221
|
+
mani: "mani";
|
|
222
|
+
gokul: "gokul";
|
|
223
|
+
vijay: "vijay";
|
|
224
|
+
mohit: "mohit";
|
|
225
|
+
rehan: "rehan";
|
|
226
|
+
soham: "soham";
|
|
227
|
+
ritu: "ritu";
|
|
228
|
+
priya: "priya";
|
|
229
|
+
neha: "neha";
|
|
230
|
+
pooja: "pooja";
|
|
231
|
+
simran: "simran";
|
|
232
|
+
kavya: "kavya";
|
|
233
|
+
ishita: "ishita";
|
|
234
|
+
shreya: "shreya";
|
|
235
|
+
roopa: "roopa";
|
|
236
|
+
amelia: "amelia";
|
|
237
|
+
sophia: "sophia";
|
|
238
|
+
tanya: "tanya";
|
|
239
|
+
shruti: "shruti";
|
|
240
|
+
suhani: "suhani";
|
|
241
|
+
kavitha: "kavitha";
|
|
242
|
+
rupali: "rupali";
|
|
243
|
+
}>;
|
|
244
|
+
declare const outputAudioCodecSchema: z.ZodEnum<{
|
|
245
|
+
mp3: "mp3";
|
|
246
|
+
linear16: "linear16";
|
|
247
|
+
mulaw: "mulaw";
|
|
248
|
+
alaw: "alaw";
|
|
249
|
+
opus: "opus";
|
|
250
|
+
flac: "flac";
|
|
251
|
+
aac: "aac";
|
|
252
|
+
wav: "wav";
|
|
253
|
+
}>;
|
|
155
254
|
/**
|
|
156
255
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
157
256
|
*
|
|
@@ -372,7 +471,7 @@ type SarvamProvider = {
|
|
|
372
471
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
373
472
|
* });
|
|
374
473
|
*/
|
|
375
|
-
(modelId: ChatModelId, settings?: ChatSettings):
|
|
474
|
+
(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
376
475
|
/**
|
|
377
476
|
* Creates an Sarvam chat model for text generation.
|
|
378
477
|
*
|
|
@@ -382,7 +481,7 @@ type SarvamProvider = {
|
|
|
382
481
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
383
482
|
* });
|
|
384
483
|
*/
|
|
385
|
-
languageModel(modelId: ChatModelId, settings?: ChatSettings):
|
|
484
|
+
languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
386
485
|
/**
|
|
387
486
|
* Creates a Sarvam model for chat.
|
|
388
487
|
*
|
|
@@ -392,7 +491,7 @@ type SarvamProvider = {
|
|
|
392
491
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
393
492
|
* });
|
|
394
493
|
*/
|
|
395
|
-
chat(modelId: ChatModelId, settings?: ChatSettings):
|
|
494
|
+
chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
396
495
|
/**
|
|
397
496
|
* Creates a Sarvam model for transcription.
|
|
398
497
|
*
|
|
@@ -409,7 +508,7 @@ type SarvamProvider = {
|
|
|
409
508
|
*
|
|
410
509
|
* @default unknown
|
|
411
510
|
*/
|
|
412
|
-
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>):
|
|
511
|
+
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV2;
|
|
413
512
|
/**
|
|
414
513
|
* Creates a Sarvam model for Speech translation.
|
|
415
514
|
*
|
|
@@ -419,7 +518,7 @@ type SarvamProvider = {
|
|
|
419
518
|
* audio: await readFile("./audio.wav"),
|
|
420
519
|
* });
|
|
421
520
|
*/
|
|
422
|
-
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings):
|
|
521
|
+
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV2;
|
|
423
522
|
/**
|
|
424
523
|
* Creates a Sarvam model for speech.
|
|
425
524
|
* @example
|
|
@@ -430,7 +529,7 @@ type SarvamProvider = {
|
|
|
430
529
|
*
|
|
431
530
|
* await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
|
|
432
531
|
*/
|
|
433
|
-
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>):
|
|
532
|
+
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV2;
|
|
434
533
|
/**
|
|
435
534
|
* Creates an Sarvam model for transliterate.
|
|
436
535
|
*
|
|
@@ -443,7 +542,7 @@ type SarvamProvider = {
|
|
|
443
542
|
* prompt: "eda mone, happy alle?",
|
|
444
543
|
* });
|
|
445
544
|
*/
|
|
446
|
-
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>):
|
|
545
|
+
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV2;
|
|
447
546
|
/**
|
|
448
547
|
* Creates an Sarvam model for translation.
|
|
449
548
|
*
|
|
@@ -456,7 +555,7 @@ type SarvamProvider = {
|
|
|
456
555
|
* prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
|
|
457
556
|
* });
|
|
458
557
|
*/
|
|
459
|
-
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>):
|
|
558
|
+
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV2;
|
|
460
559
|
/**
|
|
461
560
|
* Creates an Sarvam model for language identification.
|
|
462
561
|
*
|
|
@@ -466,7 +565,7 @@ type SarvamProvider = {
|
|
|
466
565
|
* prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
|
|
467
566
|
* });
|
|
468
567
|
*/
|
|
469
|
-
languageIdentification():
|
|
568
|
+
languageIdentification(): LanguageModelV2;
|
|
470
569
|
};
|
|
471
570
|
//#endregion
|
|
472
571
|
//#region src/provider.d.ts
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
2
2
|
import z$1, { z } from "zod";
|
|
3
|
-
import {
|
|
3
|
+
import { LanguageModelV2, SpeechModelV2, TranscriptionModelV2 } from "@ai-sdk/provider";
|
|
4
4
|
|
|
5
5
|
//#region src/config.d.ts
|
|
6
6
|
|
|
@@ -8,9 +8,34 @@ import { LanguageModelV1, SpeechModelV1, TranscriptionModelV1 } from "@ai-sdk/pr
|
|
|
8
8
|
* Specifies the language in BCP-47 format.
|
|
9
9
|
*/
|
|
10
10
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
11
|
-
declare const SarvamLanguageCodeSchema: z.ZodEnum<
|
|
11
|
+
declare const SarvamLanguageCodeSchema: z.ZodEnum<{
|
|
12
|
+
"hi-IN": "hi-IN";
|
|
13
|
+
"bn-IN": "bn-IN";
|
|
14
|
+
"kn-IN": "kn-IN";
|
|
15
|
+
"ml-IN": "ml-IN";
|
|
16
|
+
"mr-IN": "mr-IN";
|
|
17
|
+
"od-IN": "od-IN";
|
|
18
|
+
"pa-IN": "pa-IN";
|
|
19
|
+
"ta-IN": "ta-IN";
|
|
20
|
+
"te-IN": "te-IN";
|
|
21
|
+
"en-IN": "en-IN";
|
|
22
|
+
"gu-IN": "gu-IN";
|
|
23
|
+
}>;
|
|
12
24
|
type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
|
|
13
|
-
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<
|
|
25
|
+
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
|
|
26
|
+
"as-IN": "as-IN";
|
|
27
|
+
"ur-IN": "ur-IN";
|
|
28
|
+
"ne-IN": "ne-IN";
|
|
29
|
+
"kok-IN": "kok-IN";
|
|
30
|
+
"ks-IN": "ks-IN";
|
|
31
|
+
"sd-IN": "sd-IN";
|
|
32
|
+
"sa-IN": "sa-IN";
|
|
33
|
+
"sat-IN": "sat-IN";
|
|
34
|
+
"mni-IN": "mni-IN";
|
|
35
|
+
"brx-IN": "brx-IN";
|
|
36
|
+
"mai-IN": "mai-IN";
|
|
37
|
+
"doi-IN": "doi-IN";
|
|
38
|
+
}>;
|
|
14
39
|
interface SarvamProviderSettings {
|
|
15
40
|
/**
|
|
16
41
|
* URL for the Sarvam API calls.
|
|
@@ -60,6 +85,34 @@ type ChatSettings = {
|
|
|
60
85
|
n?: number;
|
|
61
86
|
};
|
|
62
87
|
//#endregion
|
|
88
|
+
//#region src/stt/utils.d.ts
|
|
89
|
+
declare const input_audio_codec: z$1.ZodEnum<{
|
|
90
|
+
mp3: "mp3";
|
|
91
|
+
opus: "opus";
|
|
92
|
+
flac: "flac";
|
|
93
|
+
aac: "aac";
|
|
94
|
+
wav: "wav";
|
|
95
|
+
"x-wav": "x-wav";
|
|
96
|
+
wave: "wave";
|
|
97
|
+
mpeg: "mpeg";
|
|
98
|
+
mpeg3: "mpeg3";
|
|
99
|
+
"x-mp3": "x-mp3";
|
|
100
|
+
"x-mpeg-3": "x-mpeg-3";
|
|
101
|
+
"x-aac": "x-aac";
|
|
102
|
+
aiff: "aiff";
|
|
103
|
+
"x-aiff": "x-aiff";
|
|
104
|
+
ogg: "ogg";
|
|
105
|
+
"x-flac": "x-flac";
|
|
106
|
+
mp4: "mp4";
|
|
107
|
+
"x-m4a": "x-m4a";
|
|
108
|
+
amr: "amr";
|
|
109
|
+
"x-ms-wma": "x-ms-wma";
|
|
110
|
+
webm: "webm";
|
|
111
|
+
pcm_s16le: "pcm_s16le";
|
|
112
|
+
pcm_l16: "pcm_l16";
|
|
113
|
+
pcm_raw: "pcm_raw";
|
|
114
|
+
}>;
|
|
115
|
+
//#endregion
|
|
63
116
|
//#region src/stt/speech-translation-settings.d.ts
|
|
64
117
|
/**
|
|
65
118
|
* Specifies the speech generation model to use.
|
|
@@ -68,13 +121,6 @@ type ChatSettings = {
|
|
|
68
121
|
* - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
|
|
69
122
|
*/
|
|
70
123
|
type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
71
|
-
declare const speechTranslationSettingsSchema: z$1.ZodObject<{
|
|
72
|
-
input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
|
|
73
|
-
}, "strip", z$1.ZodTypeAny, {
|
|
74
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
75
|
-
}, {
|
|
76
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
77
|
-
}>;
|
|
78
124
|
type SpeechTranslationSettings = {
|
|
79
125
|
/**
|
|
80
126
|
* Audio codec/format of the input file.
|
|
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
|
|
|
82
128
|
* Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
|
|
83
129
|
* PCM files are supported only at 16kHz sample rate.
|
|
84
130
|
*/
|
|
85
|
-
input_audio_codec?: z$1.infer<typeof
|
|
131
|
+
input_audio_codec?: z$1.infer<typeof input_audio_codec>;
|
|
86
132
|
};
|
|
87
133
|
//#endregion
|
|
88
134
|
//#region src/stt/transcription-settings.d.ts
|
|
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
|
|
|
92
138
|
*/
|
|
93
139
|
type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
94
140
|
declare const transcriptionProviderOptionsSchema: z.ZodObject<{
|
|
95
|
-
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<
|
|
141
|
+
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
142
|
+
transcribe: "transcribe";
|
|
143
|
+
translate: "translate";
|
|
144
|
+
verbatim: "verbatim";
|
|
145
|
+
translit: "translit";
|
|
146
|
+
codemix: "codemix";
|
|
147
|
+
}>>>;
|
|
96
148
|
with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
97
149
|
with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
98
150
|
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
99
|
-
},
|
|
100
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
101
|
-
with_timestamps?: boolean | null | undefined;
|
|
102
|
-
with_diarization?: boolean | null | undefined;
|
|
103
|
-
num_speakers?: number | null | undefined;
|
|
104
|
-
}, {
|
|
105
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
106
|
-
with_timestamps?: boolean | null | undefined;
|
|
107
|
-
with_diarization?: boolean | null | undefined;
|
|
108
|
-
num_speakers?: number | null | undefined;
|
|
109
|
-
}>;
|
|
151
|
+
}, z.core.$strip>;
|
|
110
152
|
type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
|
|
111
153
|
/**
|
|
112
154
|
* Mode of operation. Only applicable when using `saaras:v3` model.
|
|
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
|
|
|
149
191
|
* - `bulbul:v2`: Legacy model with pitch and loudness controls
|
|
150
192
|
*/
|
|
151
193
|
type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
152
|
-
declare const bulbul_v2: z.ZodEnum<
|
|
153
|
-
|
|
154
|
-
|
|
194
|
+
declare const bulbul_v2: z.ZodEnum<{
|
|
195
|
+
abhilash: "abhilash";
|
|
196
|
+
karun: "karun";
|
|
197
|
+
hitesh: "hitesh";
|
|
198
|
+
anushka: "anushka";
|
|
199
|
+
manisha: "manisha";
|
|
200
|
+
vidya: "vidya";
|
|
201
|
+
arya: "arya";
|
|
202
|
+
}>;
|
|
203
|
+
declare const bulbul_v3: z.ZodEnum<{
|
|
204
|
+
shubh: "shubh";
|
|
205
|
+
aditya: "aditya";
|
|
206
|
+
rahul: "rahul";
|
|
207
|
+
rohan: "rohan";
|
|
208
|
+
amit: "amit";
|
|
209
|
+
dev: "dev";
|
|
210
|
+
ratan: "ratan";
|
|
211
|
+
varun: "varun";
|
|
212
|
+
manan: "manan";
|
|
213
|
+
sumit: "sumit";
|
|
214
|
+
kabir: "kabir";
|
|
215
|
+
aayan: "aayan";
|
|
216
|
+
ashutosh: "ashutosh";
|
|
217
|
+
advait: "advait";
|
|
218
|
+
anand: "anand";
|
|
219
|
+
tarun: "tarun";
|
|
220
|
+
sunny: "sunny";
|
|
221
|
+
mani: "mani";
|
|
222
|
+
gokul: "gokul";
|
|
223
|
+
vijay: "vijay";
|
|
224
|
+
mohit: "mohit";
|
|
225
|
+
rehan: "rehan";
|
|
226
|
+
soham: "soham";
|
|
227
|
+
ritu: "ritu";
|
|
228
|
+
priya: "priya";
|
|
229
|
+
neha: "neha";
|
|
230
|
+
pooja: "pooja";
|
|
231
|
+
simran: "simran";
|
|
232
|
+
kavya: "kavya";
|
|
233
|
+
ishita: "ishita";
|
|
234
|
+
shreya: "shreya";
|
|
235
|
+
roopa: "roopa";
|
|
236
|
+
amelia: "amelia";
|
|
237
|
+
sophia: "sophia";
|
|
238
|
+
tanya: "tanya";
|
|
239
|
+
shruti: "shruti";
|
|
240
|
+
suhani: "suhani";
|
|
241
|
+
kavitha: "kavitha";
|
|
242
|
+
rupali: "rupali";
|
|
243
|
+
}>;
|
|
244
|
+
declare const outputAudioCodecSchema: z.ZodEnum<{
|
|
245
|
+
mp3: "mp3";
|
|
246
|
+
linear16: "linear16";
|
|
247
|
+
mulaw: "mulaw";
|
|
248
|
+
alaw: "alaw";
|
|
249
|
+
opus: "opus";
|
|
250
|
+
flac: "flac";
|
|
251
|
+
aac: "aac";
|
|
252
|
+
wav: "wav";
|
|
253
|
+
}>;
|
|
155
254
|
/**
|
|
156
255
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
157
256
|
*
|
|
@@ -372,7 +471,7 @@ type SarvamProvider = {
|
|
|
372
471
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
373
472
|
* });
|
|
374
473
|
*/
|
|
375
|
-
(modelId: ChatModelId, settings?: ChatSettings):
|
|
474
|
+
(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
376
475
|
/**
|
|
377
476
|
* Creates an Sarvam chat model for text generation.
|
|
378
477
|
*
|
|
@@ -382,7 +481,7 @@ type SarvamProvider = {
|
|
|
382
481
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
383
482
|
* });
|
|
384
483
|
*/
|
|
385
|
-
languageModel(modelId: ChatModelId, settings?: ChatSettings):
|
|
484
|
+
languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
386
485
|
/**
|
|
387
486
|
* Creates a Sarvam model for chat.
|
|
388
487
|
*
|
|
@@ -392,7 +491,7 @@ type SarvamProvider = {
|
|
|
392
491
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
393
492
|
* });
|
|
394
493
|
*/
|
|
395
|
-
chat(modelId: ChatModelId, settings?: ChatSettings):
|
|
494
|
+
chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
|
|
396
495
|
/**
|
|
397
496
|
* Creates a Sarvam model for transcription.
|
|
398
497
|
*
|
|
@@ -409,7 +508,7 @@ type SarvamProvider = {
|
|
|
409
508
|
*
|
|
410
509
|
* @default unknown
|
|
411
510
|
*/
|
|
412
|
-
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>):
|
|
511
|
+
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV2;
|
|
413
512
|
/**
|
|
414
513
|
* Creates a Sarvam model for Speech translation.
|
|
415
514
|
*
|
|
@@ -419,7 +518,7 @@ type SarvamProvider = {
|
|
|
419
518
|
* audio: await readFile("./audio.wav"),
|
|
420
519
|
* });
|
|
421
520
|
*/
|
|
422
|
-
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings):
|
|
521
|
+
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV2;
|
|
423
522
|
/**
|
|
424
523
|
* Creates a Sarvam model for speech.
|
|
425
524
|
* @example
|
|
@@ -430,7 +529,7 @@ type SarvamProvider = {
|
|
|
430
529
|
*
|
|
431
530
|
* await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
|
|
432
531
|
*/
|
|
433
|
-
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>):
|
|
532
|
+
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV2;
|
|
434
533
|
/**
|
|
435
534
|
* Creates an Sarvam model for transliterate.
|
|
436
535
|
*
|
|
@@ -443,7 +542,7 @@ type SarvamProvider = {
|
|
|
443
542
|
* prompt: "eda mone, happy alle?",
|
|
444
543
|
* });
|
|
445
544
|
*/
|
|
446
|
-
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>):
|
|
545
|
+
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV2;
|
|
447
546
|
/**
|
|
448
547
|
* Creates an Sarvam model for translation.
|
|
449
548
|
*
|
|
@@ -456,7 +555,7 @@ type SarvamProvider = {
|
|
|
456
555
|
* prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
|
|
457
556
|
* });
|
|
458
557
|
*/
|
|
459
|
-
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>):
|
|
558
|
+
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV2;
|
|
460
559
|
/**
|
|
461
560
|
* Creates an Sarvam model for language identification.
|
|
462
561
|
*
|
|
@@ -466,7 +565,7 @@ type SarvamProvider = {
|
|
|
466
565
|
* prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
|
|
467
566
|
* });
|
|
468
567
|
*/
|
|
469
|
-
languageIdentification():
|
|
568
|
+
languageIdentification(): LanguageModelV2;
|
|
470
569
|
};
|
|
471
570
|
//#endregion
|
|
472
571
|
//#region src/provider.d.ts
|