sarvam-ai-sdk 0.1.5-beta → 0.3.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -13
- package/dist/index.d.mts +134 -35
- package/dist/index.d.ts +134 -35
- package/dist/index.js +319 -306
- package/dist/index.mjs +319 -306
- package/package.json +7 -6
package/README.md
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
<a href="https://github.com/rajatsandeepsen/sarvam-ai-sdk">
|
|
2
|
+
<img alt="cover" src="https://github.com/rajatsandeepsen/sarvam-ai-sdk/blob/master/cover.png?raw=true" />
|
|
3
|
+
</a>
|
|
4
|
+
|
|
2
5
|
# AI SDK - Sarvam Provider
|
|
3
6
|
|
|
4
|
-
The **[Sarvam provider](https://
|
|
7
|
+
The **[Sarvam provider](https://v6.ai-sdk.dev/providers/community-providers/sarvam)** for the [AI SDK](https://v6.ai-sdk.dev/docs)
|
|
5
8
|
contains language model support for the Sarvam chat completion, Text-to-Speech and Speech-to-Text APIs.
|
|
6
9
|
|
|
7
10
|
## Setup
|
|
@@ -9,11 +12,11 @@ contains language model support for the Sarvam chat completion, Text-to-Speech a
|
|
|
9
12
|
The **[Sarvam](http://sarvam.ai)** provider is available in the `sarvam-ai-sdk` module. You can install it with
|
|
10
13
|
|
|
11
14
|
```bash
|
|
12
|
-
npm i sarvam-ai-sdk ai@
|
|
15
|
+
npm i sarvam-ai-sdk ai@6
|
|
13
16
|
```
|
|
14
17
|
|
|
15
18
|
> [!WARNING]
|
|
16
|
-
> This package only works with Vercel AI-SDK
|
|
19
|
+
> This package only works with Vercel AI-SDK v6, not v7. Make sure to install `ai@6` in your project.
|
|
17
20
|
|
|
18
21
|
## Provider Instance
|
|
19
22
|
|
|
@@ -168,8 +171,8 @@ const result = await generateText({
|
|
|
168
171
|
tools: {
|
|
169
172
|
weather: tool({
|
|
170
173
|
description: "Get the weather in a location",
|
|
171
|
-
|
|
172
|
-
|
|
174
|
+
inputSchema: z.object({
|
|
175
|
+
location: z.string(),
|
|
173
176
|
}),
|
|
174
177
|
execute: async ({ location }) => ({
|
|
175
178
|
location,
|
|
@@ -184,9 +187,6 @@ const result = await generateText({
|
|
|
184
187
|
console.log(result.toolResults);
|
|
185
188
|
```
|
|
186
189
|
|
|
187
|
-
> [!WARNING]
|
|
188
|
-
> Old `sarvam-m` models isn't trained on native tool calling feature (aka JSON mode). So we recommend using latest models.
|
|
189
|
-
|
|
190
190
|
## Generate JSON object
|
|
191
191
|
|
|
192
192
|
```ts
|
|
@@ -196,6 +196,8 @@ import { generateObject } from 'ai';
|
|
|
196
196
|
|
|
197
197
|
const { object } = await generateObject({
|
|
198
198
|
model: sarvam("sarvam-30b"),
|
|
199
|
+
schemaName: "Recipe",
|
|
200
|
+
schemaDescription: "A recipe with a name, ingredients and steps",
|
|
199
201
|
schema: z.object({
|
|
200
202
|
recipe: z.object({
|
|
201
203
|
name: z.string(),
|
|
@@ -209,9 +211,6 @@ const { object } = await generateObject({
|
|
|
209
211
|
console.log(object);
|
|
210
212
|
```
|
|
211
213
|
|
|
212
|
-
> [!WARNING]
|
|
213
|
-
> Old `sarvam-m` models isn't trained on native JSON object generation. So we recommend using latest models.
|
|
214
|
-
|
|
215
214
|
## All APIs
|
|
216
215
|
|
|
217
216
|
```ts
|
|
@@ -243,4 +242,4 @@ sarvam.speechTranslation("saaras:v3");
|
|
|
243
242
|
|
|
244
243
|
## Documentation
|
|
245
244
|
|
|
246
|
-
Please check out the **[Sarvam provider documentation](https://
|
|
245
|
+
Please check out the **[Sarvam provider documentation](https://v6.ai-sdk.dev/providers/community-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
2
|
-
import {
|
|
2
|
+
import { LanguageModelV3, SpeechModelV3, TranscriptionModelV3 } from "@ai-sdk/provider";
|
|
3
3
|
import z$1, { z } from "zod";
|
|
4
4
|
|
|
5
5
|
//#region src/config.d.ts
|
|
@@ -8,9 +8,34 @@ import z$1, { z } from "zod";
|
|
|
8
8
|
* Specifies the language in BCP-47 format.
|
|
9
9
|
*/
|
|
10
10
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
11
|
-
declare const SarvamLanguageCodeSchema: z.ZodEnum<
|
|
11
|
+
declare const SarvamLanguageCodeSchema: z.ZodEnum<{
|
|
12
|
+
"hi-IN": "hi-IN";
|
|
13
|
+
"bn-IN": "bn-IN";
|
|
14
|
+
"kn-IN": "kn-IN";
|
|
15
|
+
"ml-IN": "ml-IN";
|
|
16
|
+
"mr-IN": "mr-IN";
|
|
17
|
+
"od-IN": "od-IN";
|
|
18
|
+
"pa-IN": "pa-IN";
|
|
19
|
+
"ta-IN": "ta-IN";
|
|
20
|
+
"te-IN": "te-IN";
|
|
21
|
+
"en-IN": "en-IN";
|
|
22
|
+
"gu-IN": "gu-IN";
|
|
23
|
+
}>;
|
|
12
24
|
type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
|
|
13
|
-
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<
|
|
25
|
+
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
|
|
26
|
+
"as-IN": "as-IN";
|
|
27
|
+
"ur-IN": "ur-IN";
|
|
28
|
+
"ne-IN": "ne-IN";
|
|
29
|
+
"kok-IN": "kok-IN";
|
|
30
|
+
"ks-IN": "ks-IN";
|
|
31
|
+
"sd-IN": "sd-IN";
|
|
32
|
+
"sa-IN": "sa-IN";
|
|
33
|
+
"sat-IN": "sat-IN";
|
|
34
|
+
"mni-IN": "mni-IN";
|
|
35
|
+
"brx-IN": "brx-IN";
|
|
36
|
+
"mai-IN": "mai-IN";
|
|
37
|
+
"doi-IN": "doi-IN";
|
|
38
|
+
}>;
|
|
14
39
|
interface SarvamProviderSettings {
|
|
15
40
|
/**
|
|
16
41
|
* URL for the Sarvam API calls.
|
|
@@ -60,6 +85,34 @@ type ChatSettings = {
|
|
|
60
85
|
n?: number;
|
|
61
86
|
};
|
|
62
87
|
//#endregion
|
|
88
|
+
//#region src/stt/utils.d.ts
|
|
89
|
+
declare const input_audio_codec: z$1.ZodEnum<{
|
|
90
|
+
mp3: "mp3";
|
|
91
|
+
opus: "opus";
|
|
92
|
+
flac: "flac";
|
|
93
|
+
aac: "aac";
|
|
94
|
+
wav: "wav";
|
|
95
|
+
"x-wav": "x-wav";
|
|
96
|
+
wave: "wave";
|
|
97
|
+
mpeg: "mpeg";
|
|
98
|
+
mpeg3: "mpeg3";
|
|
99
|
+
"x-mp3": "x-mp3";
|
|
100
|
+
"x-mpeg-3": "x-mpeg-3";
|
|
101
|
+
"x-aac": "x-aac";
|
|
102
|
+
aiff: "aiff";
|
|
103
|
+
"x-aiff": "x-aiff";
|
|
104
|
+
ogg: "ogg";
|
|
105
|
+
"x-flac": "x-flac";
|
|
106
|
+
mp4: "mp4";
|
|
107
|
+
"x-m4a": "x-m4a";
|
|
108
|
+
amr: "amr";
|
|
109
|
+
"x-ms-wma": "x-ms-wma";
|
|
110
|
+
webm: "webm";
|
|
111
|
+
pcm_s16le: "pcm_s16le";
|
|
112
|
+
pcm_l16: "pcm_l16";
|
|
113
|
+
pcm_raw: "pcm_raw";
|
|
114
|
+
}>;
|
|
115
|
+
//#endregion
|
|
63
116
|
//#region src/stt/speech-translation-settings.d.ts
|
|
64
117
|
/**
|
|
65
118
|
* Specifies the speech generation model to use.
|
|
@@ -68,13 +121,6 @@ type ChatSettings = {
|
|
|
68
121
|
* - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
|
|
69
122
|
*/
|
|
70
123
|
type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
71
|
-
declare const speechTranslationSettingsSchema: z$1.ZodObject<{
|
|
72
|
-
input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
|
|
73
|
-
}, "strip", z$1.ZodTypeAny, {
|
|
74
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
75
|
-
}, {
|
|
76
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
77
|
-
}>;
|
|
78
124
|
type SpeechTranslationSettings = {
|
|
79
125
|
/**
|
|
80
126
|
* Audio codec/format of the input file.
|
|
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
|
|
|
82
128
|
* Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
|
|
83
129
|
* PCM files are supported only at 16kHz sample rate.
|
|
84
130
|
*/
|
|
85
|
-
input_audio_codec?: z$1.infer<typeof
|
|
131
|
+
input_audio_codec?: z$1.infer<typeof input_audio_codec>;
|
|
86
132
|
};
|
|
87
133
|
//#endregion
|
|
88
134
|
//#region src/stt/transcription-settings.d.ts
|
|
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
|
|
|
92
138
|
*/
|
|
93
139
|
type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
94
140
|
declare const transcriptionProviderOptionsSchema: z.ZodObject<{
|
|
95
|
-
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<
|
|
141
|
+
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
142
|
+
transcribe: "transcribe";
|
|
143
|
+
translate: "translate";
|
|
144
|
+
verbatim: "verbatim";
|
|
145
|
+
translit: "translit";
|
|
146
|
+
codemix: "codemix";
|
|
147
|
+
}>>>;
|
|
96
148
|
with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
97
149
|
with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
98
150
|
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
99
|
-
},
|
|
100
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
101
|
-
with_timestamps?: boolean | null | undefined;
|
|
102
|
-
with_diarization?: boolean | null | undefined;
|
|
103
|
-
num_speakers?: number | null | undefined;
|
|
104
|
-
}, {
|
|
105
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
106
|
-
with_timestamps?: boolean | null | undefined;
|
|
107
|
-
with_diarization?: boolean | null | undefined;
|
|
108
|
-
num_speakers?: number | null | undefined;
|
|
109
|
-
}>;
|
|
151
|
+
}, z.core.$strip>;
|
|
110
152
|
type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
|
|
111
153
|
/**
|
|
112
154
|
* Mode of operation. Only applicable when using `saaras:v3` model.
|
|
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
|
|
|
149
191
|
* - `bulbul:v2`: Legacy model with pitch and loudness controls
|
|
150
192
|
*/
|
|
151
193
|
type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
152
|
-
declare const bulbul_v2: z.ZodEnum<
|
|
153
|
-
|
|
154
|
-
|
|
194
|
+
declare const bulbul_v2: z.ZodEnum<{
|
|
195
|
+
abhilash: "abhilash";
|
|
196
|
+
karun: "karun";
|
|
197
|
+
hitesh: "hitesh";
|
|
198
|
+
anushka: "anushka";
|
|
199
|
+
manisha: "manisha";
|
|
200
|
+
vidya: "vidya";
|
|
201
|
+
arya: "arya";
|
|
202
|
+
}>;
|
|
203
|
+
declare const bulbul_v3: z.ZodEnum<{
|
|
204
|
+
shubh: "shubh";
|
|
205
|
+
aditya: "aditya";
|
|
206
|
+
rahul: "rahul";
|
|
207
|
+
rohan: "rohan";
|
|
208
|
+
amit: "amit";
|
|
209
|
+
dev: "dev";
|
|
210
|
+
ratan: "ratan";
|
|
211
|
+
varun: "varun";
|
|
212
|
+
manan: "manan";
|
|
213
|
+
sumit: "sumit";
|
|
214
|
+
kabir: "kabir";
|
|
215
|
+
aayan: "aayan";
|
|
216
|
+
ashutosh: "ashutosh";
|
|
217
|
+
advait: "advait";
|
|
218
|
+
anand: "anand";
|
|
219
|
+
tarun: "tarun";
|
|
220
|
+
sunny: "sunny";
|
|
221
|
+
mani: "mani";
|
|
222
|
+
gokul: "gokul";
|
|
223
|
+
vijay: "vijay";
|
|
224
|
+
mohit: "mohit";
|
|
225
|
+
rehan: "rehan";
|
|
226
|
+
soham: "soham";
|
|
227
|
+
ritu: "ritu";
|
|
228
|
+
priya: "priya";
|
|
229
|
+
neha: "neha";
|
|
230
|
+
pooja: "pooja";
|
|
231
|
+
simran: "simran";
|
|
232
|
+
kavya: "kavya";
|
|
233
|
+
ishita: "ishita";
|
|
234
|
+
shreya: "shreya";
|
|
235
|
+
roopa: "roopa";
|
|
236
|
+
amelia: "amelia";
|
|
237
|
+
sophia: "sophia";
|
|
238
|
+
tanya: "tanya";
|
|
239
|
+
shruti: "shruti";
|
|
240
|
+
suhani: "suhani";
|
|
241
|
+
kavitha: "kavitha";
|
|
242
|
+
rupali: "rupali";
|
|
243
|
+
}>;
|
|
244
|
+
declare const outputAudioCodecSchema: z.ZodEnum<{
|
|
245
|
+
mp3: "mp3";
|
|
246
|
+
linear16: "linear16";
|
|
247
|
+
mulaw: "mulaw";
|
|
248
|
+
alaw: "alaw";
|
|
249
|
+
opus: "opus";
|
|
250
|
+
flac: "flac";
|
|
251
|
+
aac: "aac";
|
|
252
|
+
wav: "wav";
|
|
253
|
+
}>;
|
|
155
254
|
/**
|
|
156
255
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
157
256
|
*
|
|
@@ -372,7 +471,7 @@ type SarvamProvider = {
|
|
|
372
471
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
373
472
|
* });
|
|
374
473
|
*/
|
|
375
|
-
(modelId: ChatModelId, settings?: ChatSettings):
|
|
474
|
+
(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
376
475
|
/**
|
|
377
476
|
* Creates an Sarvam chat model for text generation.
|
|
378
477
|
*
|
|
@@ -382,7 +481,7 @@ type SarvamProvider = {
|
|
|
382
481
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
383
482
|
* });
|
|
384
483
|
*/
|
|
385
|
-
languageModel(modelId: ChatModelId, settings?: ChatSettings):
|
|
484
|
+
languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
386
485
|
/**
|
|
387
486
|
* Creates a Sarvam model for chat.
|
|
388
487
|
*
|
|
@@ -392,7 +491,7 @@ type SarvamProvider = {
|
|
|
392
491
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
393
492
|
* });
|
|
394
493
|
*/
|
|
395
|
-
chat(modelId: ChatModelId, settings?: ChatSettings):
|
|
494
|
+
chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
396
495
|
/**
|
|
397
496
|
* Creates a Sarvam model for transcription.
|
|
398
497
|
*
|
|
@@ -409,7 +508,7 @@ type SarvamProvider = {
|
|
|
409
508
|
*
|
|
410
509
|
* @default unknown
|
|
411
510
|
*/
|
|
412
|
-
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>):
|
|
511
|
+
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV3;
|
|
413
512
|
/**
|
|
414
513
|
* Creates a Sarvam model for Speech translation.
|
|
415
514
|
*
|
|
@@ -419,7 +518,7 @@ type SarvamProvider = {
|
|
|
419
518
|
* audio: await readFile("./audio.wav"),
|
|
420
519
|
* });
|
|
421
520
|
*/
|
|
422
|
-
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings):
|
|
521
|
+
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV3;
|
|
423
522
|
/**
|
|
424
523
|
* Creates a Sarvam model for speech.
|
|
425
524
|
* @example
|
|
@@ -430,7 +529,7 @@ type SarvamProvider = {
|
|
|
430
529
|
*
|
|
431
530
|
* await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
|
|
432
531
|
*/
|
|
433
|
-
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>):
|
|
532
|
+
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV3;
|
|
434
533
|
/**
|
|
435
534
|
* Creates an Sarvam model for transliterate.
|
|
436
535
|
*
|
|
@@ -443,7 +542,7 @@ type SarvamProvider = {
|
|
|
443
542
|
* prompt: "eda mone, happy alle?",
|
|
444
543
|
* });
|
|
445
544
|
*/
|
|
446
|
-
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>):
|
|
545
|
+
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV3;
|
|
447
546
|
/**
|
|
448
547
|
* Creates an Sarvam model for translation.
|
|
449
548
|
*
|
|
@@ -456,7 +555,7 @@ type SarvamProvider = {
|
|
|
456
555
|
* prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
|
|
457
556
|
* });
|
|
458
557
|
*/
|
|
459
|
-
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>):
|
|
558
|
+
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV3;
|
|
460
559
|
/**
|
|
461
560
|
* Creates an Sarvam model for language identification.
|
|
462
561
|
*
|
|
@@ -466,7 +565,7 @@ type SarvamProvider = {
|
|
|
466
565
|
* prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
|
|
467
566
|
* });
|
|
468
567
|
*/
|
|
469
|
-
languageIdentification():
|
|
568
|
+
languageIdentification(): LanguageModelV3;
|
|
470
569
|
};
|
|
471
570
|
//#endregion
|
|
472
571
|
//#region src/provider.d.ts
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
2
2
|
import z$1, { z } from "zod";
|
|
3
|
-
import {
|
|
3
|
+
import { LanguageModelV3, SpeechModelV3, TranscriptionModelV3 } from "@ai-sdk/provider";
|
|
4
4
|
|
|
5
5
|
//#region src/config.d.ts
|
|
6
6
|
|
|
@@ -8,9 +8,34 @@ import { LanguageModelV1, SpeechModelV1, TranscriptionModelV1 } from "@ai-sdk/pr
|
|
|
8
8
|
* Specifies the language in BCP-47 format.
|
|
9
9
|
*/
|
|
10
10
|
type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
|
|
11
|
-
declare const SarvamLanguageCodeSchema: z.ZodEnum<
|
|
11
|
+
declare const SarvamLanguageCodeSchema: z.ZodEnum<{
|
|
12
|
+
"hi-IN": "hi-IN";
|
|
13
|
+
"bn-IN": "bn-IN";
|
|
14
|
+
"kn-IN": "kn-IN";
|
|
15
|
+
"ml-IN": "ml-IN";
|
|
16
|
+
"mr-IN": "mr-IN";
|
|
17
|
+
"od-IN": "od-IN";
|
|
18
|
+
"pa-IN": "pa-IN";
|
|
19
|
+
"ta-IN": "ta-IN";
|
|
20
|
+
"te-IN": "te-IN";
|
|
21
|
+
"en-IN": "en-IN";
|
|
22
|
+
"gu-IN": "gu-IN";
|
|
23
|
+
}>;
|
|
12
24
|
type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
|
|
13
|
-
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<
|
|
25
|
+
declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
|
|
26
|
+
"as-IN": "as-IN";
|
|
27
|
+
"ur-IN": "ur-IN";
|
|
28
|
+
"ne-IN": "ne-IN";
|
|
29
|
+
"kok-IN": "kok-IN";
|
|
30
|
+
"ks-IN": "ks-IN";
|
|
31
|
+
"sd-IN": "sd-IN";
|
|
32
|
+
"sa-IN": "sa-IN";
|
|
33
|
+
"sat-IN": "sat-IN";
|
|
34
|
+
"mni-IN": "mni-IN";
|
|
35
|
+
"brx-IN": "brx-IN";
|
|
36
|
+
"mai-IN": "mai-IN";
|
|
37
|
+
"doi-IN": "doi-IN";
|
|
38
|
+
}>;
|
|
14
39
|
interface SarvamProviderSettings {
|
|
15
40
|
/**
|
|
16
41
|
* URL for the Sarvam API calls.
|
|
@@ -60,6 +85,34 @@ type ChatSettings = {
|
|
|
60
85
|
n?: number;
|
|
61
86
|
};
|
|
62
87
|
//#endregion
|
|
88
|
+
//#region src/stt/utils.d.ts
|
|
89
|
+
declare const input_audio_codec: z$1.ZodEnum<{
|
|
90
|
+
mp3: "mp3";
|
|
91
|
+
opus: "opus";
|
|
92
|
+
flac: "flac";
|
|
93
|
+
aac: "aac";
|
|
94
|
+
wav: "wav";
|
|
95
|
+
"x-wav": "x-wav";
|
|
96
|
+
wave: "wave";
|
|
97
|
+
mpeg: "mpeg";
|
|
98
|
+
mpeg3: "mpeg3";
|
|
99
|
+
"x-mp3": "x-mp3";
|
|
100
|
+
"x-mpeg-3": "x-mpeg-3";
|
|
101
|
+
"x-aac": "x-aac";
|
|
102
|
+
aiff: "aiff";
|
|
103
|
+
"x-aiff": "x-aiff";
|
|
104
|
+
ogg: "ogg";
|
|
105
|
+
"x-flac": "x-flac";
|
|
106
|
+
mp4: "mp4";
|
|
107
|
+
"x-m4a": "x-m4a";
|
|
108
|
+
amr: "amr";
|
|
109
|
+
"x-ms-wma": "x-ms-wma";
|
|
110
|
+
webm: "webm";
|
|
111
|
+
pcm_s16le: "pcm_s16le";
|
|
112
|
+
pcm_l16: "pcm_l16";
|
|
113
|
+
pcm_raw: "pcm_raw";
|
|
114
|
+
}>;
|
|
115
|
+
//#endregion
|
|
63
116
|
//#region src/stt/speech-translation-settings.d.ts
|
|
64
117
|
/**
|
|
65
118
|
* Specifies the speech generation model to use.
|
|
@@ -68,13 +121,6 @@ type ChatSettings = {
|
|
|
68
121
|
* - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
|
|
69
122
|
*/
|
|
70
123
|
type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
|
|
71
|
-
declare const speechTranslationSettingsSchema: z$1.ZodObject<{
|
|
72
|
-
input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
|
|
73
|
-
}, "strip", z$1.ZodTypeAny, {
|
|
74
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
75
|
-
}, {
|
|
76
|
-
input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
|
|
77
|
-
}>;
|
|
78
124
|
type SpeechTranslationSettings = {
|
|
79
125
|
/**
|
|
80
126
|
* Audio codec/format of the input file.
|
|
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
|
|
|
82
128
|
* Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
|
|
83
129
|
* PCM files are supported only at 16kHz sample rate.
|
|
84
130
|
*/
|
|
85
|
-
input_audio_codec?: z$1.infer<typeof
|
|
131
|
+
input_audio_codec?: z$1.infer<typeof input_audio_codec>;
|
|
86
132
|
};
|
|
87
133
|
//#endregion
|
|
88
134
|
//#region src/stt/transcription-settings.d.ts
|
|
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
|
|
|
92
138
|
*/
|
|
93
139
|
type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
|
|
94
140
|
declare const transcriptionProviderOptionsSchema: z.ZodObject<{
|
|
95
|
-
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<
|
|
141
|
+
mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
142
|
+
transcribe: "transcribe";
|
|
143
|
+
translate: "translate";
|
|
144
|
+
verbatim: "verbatim";
|
|
145
|
+
translit: "translit";
|
|
146
|
+
codemix: "codemix";
|
|
147
|
+
}>>>;
|
|
96
148
|
with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
97
149
|
with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
98
150
|
num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
99
|
-
},
|
|
100
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
101
|
-
with_timestamps?: boolean | null | undefined;
|
|
102
|
-
with_diarization?: boolean | null | undefined;
|
|
103
|
-
num_speakers?: number | null | undefined;
|
|
104
|
-
}, {
|
|
105
|
-
mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
|
|
106
|
-
with_timestamps?: boolean | null | undefined;
|
|
107
|
-
with_diarization?: boolean | null | undefined;
|
|
108
|
-
num_speakers?: number | null | undefined;
|
|
109
|
-
}>;
|
|
151
|
+
}, z.core.$strip>;
|
|
110
152
|
type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
|
|
111
153
|
/**
|
|
112
154
|
* Mode of operation. Only applicable when using `saaras:v3` model.
|
|
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
|
|
|
149
191
|
* - `bulbul:v2`: Legacy model with pitch and loudness controls
|
|
150
192
|
*/
|
|
151
193
|
type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
|
|
152
|
-
declare const bulbul_v2: z.ZodEnum<
|
|
153
|
-
|
|
154
|
-
|
|
194
|
+
declare const bulbul_v2: z.ZodEnum<{
|
|
195
|
+
abhilash: "abhilash";
|
|
196
|
+
karun: "karun";
|
|
197
|
+
hitesh: "hitesh";
|
|
198
|
+
anushka: "anushka";
|
|
199
|
+
manisha: "manisha";
|
|
200
|
+
vidya: "vidya";
|
|
201
|
+
arya: "arya";
|
|
202
|
+
}>;
|
|
203
|
+
declare const bulbul_v3: z.ZodEnum<{
|
|
204
|
+
shubh: "shubh";
|
|
205
|
+
aditya: "aditya";
|
|
206
|
+
rahul: "rahul";
|
|
207
|
+
rohan: "rohan";
|
|
208
|
+
amit: "amit";
|
|
209
|
+
dev: "dev";
|
|
210
|
+
ratan: "ratan";
|
|
211
|
+
varun: "varun";
|
|
212
|
+
manan: "manan";
|
|
213
|
+
sumit: "sumit";
|
|
214
|
+
kabir: "kabir";
|
|
215
|
+
aayan: "aayan";
|
|
216
|
+
ashutosh: "ashutosh";
|
|
217
|
+
advait: "advait";
|
|
218
|
+
anand: "anand";
|
|
219
|
+
tarun: "tarun";
|
|
220
|
+
sunny: "sunny";
|
|
221
|
+
mani: "mani";
|
|
222
|
+
gokul: "gokul";
|
|
223
|
+
vijay: "vijay";
|
|
224
|
+
mohit: "mohit";
|
|
225
|
+
rehan: "rehan";
|
|
226
|
+
soham: "soham";
|
|
227
|
+
ritu: "ritu";
|
|
228
|
+
priya: "priya";
|
|
229
|
+
neha: "neha";
|
|
230
|
+
pooja: "pooja";
|
|
231
|
+
simran: "simran";
|
|
232
|
+
kavya: "kavya";
|
|
233
|
+
ishita: "ishita";
|
|
234
|
+
shreya: "shreya";
|
|
235
|
+
roopa: "roopa";
|
|
236
|
+
amelia: "amelia";
|
|
237
|
+
sophia: "sophia";
|
|
238
|
+
tanya: "tanya";
|
|
239
|
+
shruti: "shruti";
|
|
240
|
+
suhani: "suhani";
|
|
241
|
+
kavitha: "kavitha";
|
|
242
|
+
rupali: "rupali";
|
|
243
|
+
}>;
|
|
244
|
+
declare const outputAudioCodecSchema: z.ZodEnum<{
|
|
245
|
+
mp3: "mp3";
|
|
246
|
+
linear16: "linear16";
|
|
247
|
+
mulaw: "mulaw";
|
|
248
|
+
alaw: "alaw";
|
|
249
|
+
opus: "opus";
|
|
250
|
+
flac: "flac";
|
|
251
|
+
aac: "aac";
|
|
252
|
+
wav: "wav";
|
|
253
|
+
}>;
|
|
155
254
|
/**
|
|
156
255
|
* Configuration settings for Sarvam Text-to-Speech API.
|
|
157
256
|
*
|
|
@@ -372,7 +471,7 @@ type SarvamProvider = {
|
|
|
372
471
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
373
472
|
* });
|
|
374
473
|
*/
|
|
375
|
-
(modelId: ChatModelId, settings?: ChatSettings):
|
|
474
|
+
(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
376
475
|
/**
|
|
377
476
|
* Creates an Sarvam chat model for text generation.
|
|
378
477
|
*
|
|
@@ -382,7 +481,7 @@ type SarvamProvider = {
|
|
|
382
481
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
383
482
|
* });
|
|
384
483
|
*/
|
|
385
|
-
languageModel(modelId: ChatModelId, settings?: ChatSettings):
|
|
484
|
+
languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
386
485
|
/**
|
|
387
486
|
* Creates a Sarvam model for chat.
|
|
388
487
|
*
|
|
@@ -392,7 +491,7 @@ type SarvamProvider = {
|
|
|
392
491
|
* prompt: "Translate this to malayalam: 'Keep cooking, guys'",
|
|
393
492
|
* });
|
|
394
493
|
*/
|
|
395
|
-
chat(modelId: ChatModelId, settings?: ChatSettings):
|
|
494
|
+
chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV3;
|
|
396
495
|
/**
|
|
397
496
|
* Creates a Sarvam model for transcription.
|
|
398
497
|
*
|
|
@@ -409,7 +508,7 @@ type SarvamProvider = {
|
|
|
409
508
|
*
|
|
410
509
|
* @default unknown
|
|
411
510
|
*/
|
|
412
|
-
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>):
|
|
511
|
+
languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV3;
|
|
413
512
|
/**
|
|
414
513
|
* Creates a Sarvam model for Speech translation.
|
|
415
514
|
*
|
|
@@ -419,7 +518,7 @@ type SarvamProvider = {
|
|
|
419
518
|
* audio: await readFile("./audio.wav"),
|
|
420
519
|
* });
|
|
421
520
|
*/
|
|
422
|
-
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings):
|
|
521
|
+
speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV3;
|
|
423
522
|
/**
|
|
424
523
|
* Creates a Sarvam model for speech.
|
|
425
524
|
* @example
|
|
@@ -430,7 +529,7 @@ type SarvamProvider = {
|
|
|
430
529
|
*
|
|
431
530
|
* await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
|
|
432
531
|
*/
|
|
433
|
-
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>):
|
|
532
|
+
speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV3;
|
|
434
533
|
/**
|
|
435
534
|
* Creates an Sarvam model for transliterate.
|
|
436
535
|
*
|
|
@@ -443,7 +542,7 @@ type SarvamProvider = {
|
|
|
443
542
|
* prompt: "eda mone, happy alle?",
|
|
444
543
|
* });
|
|
445
544
|
*/
|
|
446
|
-
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>):
|
|
545
|
+
transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV3;
|
|
447
546
|
/**
|
|
448
547
|
* Creates an Sarvam model for translation.
|
|
449
548
|
*
|
|
@@ -456,7 +555,7 @@ type SarvamProvider = {
|
|
|
456
555
|
* prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
|
|
457
556
|
* });
|
|
458
557
|
*/
|
|
459
|
-
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>):
|
|
558
|
+
translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV3;
|
|
460
559
|
/**
|
|
461
560
|
* Creates an Sarvam model for language identification.
|
|
462
561
|
*
|
|
@@ -466,7 +565,7 @@ type SarvamProvider = {
|
|
|
466
565
|
* prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
|
|
467
566
|
* });
|
|
468
567
|
*/
|
|
469
|
-
languageIdentification():
|
|
568
|
+
languageIdentification(): LanguageModelV3;
|
|
470
569
|
};
|
|
471
570
|
//#endregion
|
|
472
571
|
//#region src/provider.d.ts
|