sarvam-ai-sdk 0.1.5-beta → 0.2.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,7 +1,6 @@
1
- # Package
2
1
  # AI SDK - Sarvam Provider
3
2
 
4
- The **[Sarvam provider](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://v4.ai-sdk.dev/docs)
3
+ The **[Sarvam provider](https://v5.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://v5.ai-sdk.dev/docs)
5
4
  contains language model support for the Sarvam chat completion, Text-to-Speech and Speech-to-Text APIs.
6
5
 
7
6
  ## Setup
@@ -9,11 +8,11 @@ contains language model support for the Sarvam chat completion, Text-to-Speech a
9
8
  The **[Sarvam](http://sarvam.ai)** provider is available in the `sarvam-ai-sdk` module. You can install it with
10
9
 
11
10
  ```bash
12
- npm i sarvam-ai-sdk ai@4
11
+ npm i sarvam-ai-sdk ai@5
13
12
  ```
14
13
 
15
14
  > [!WARNING]
16
- > This package only works with Vercel AI-SDK v4, not latest v6. Make sure to install `ai@4` in your project.
15
+ > This package only works with Vercel AI-SDK v5, not v6 or latest v7. Make sure to install `ai@5` in your project.
17
16
 
18
17
  ## Provider Instance
19
18
 
@@ -168,8 +167,8 @@ const result = await generateText({
168
167
  tools: {
169
168
  weather: tool({
170
169
  description: "Get the weather in a location",
171
- parameters: z.object({
172
- location: z.string().describe("The location to get the weather for"),
170
+ inputSchema: z.object({
171
+ location: z.string(),
173
172
  }),
174
173
  execute: async ({ location }) => ({
175
174
  location,
@@ -184,9 +183,6 @@ const result = await generateText({
184
183
  console.log(result.toolResults);
185
184
  ```
186
185
 
187
- > [!WARNING]
188
- > Old `sarvam-m` models isn't trained on native tool calling feature (aka JSON mode). So we recommend using latest models.
189
-
190
186
  ## Generate JSON object
191
187
 
192
188
  ```ts
@@ -196,6 +192,8 @@ import { generateObject } from 'ai';
196
192
 
197
193
  const { object } = await generateObject({
198
194
  model: sarvam("sarvam-30b"),
195
+ schemaName: "Recipe",
196
+ schemaDescription: "A recipe with a name, ingredients and steps",
199
197
  schema: z.object({
200
198
  recipe: z.object({
201
199
  name: z.string(),
@@ -209,9 +207,6 @@ const { object } = await generateObject({
209
207
  console.log(object);
210
208
  ```
211
209
 
212
- > [!WARNING]
213
- > Old `sarvam-m` models isn't trained on native JSON object generation. So we recommend using latest models.
214
-
215
210
  ## All APIs
216
211
 
217
212
  ```ts
@@ -243,4 +238,4 @@ sarvam.speechTranslation("saaras:v3");
243
238
 
244
239
  ## Documentation
245
240
 
246
- Please check out the **[Sarvam provider documentation](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
241
+ Please check out the **[Sarvam provider documentation](https://v5.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { FetchFunction } from "@ai-sdk/provider-utils";
2
- import { LanguageModelV1, SpeechModelV1, TranscriptionModelV1 } from "@ai-sdk/provider";
2
+ import { LanguageModelV2, SpeechModelV2, TranscriptionModelV2 } from "@ai-sdk/provider";
3
3
  import z$1, { z } from "zod";
4
4
 
5
5
  //#region src/config.d.ts
@@ -8,9 +8,34 @@ import z$1, { z } from "zod";
8
8
  * Specifies the language in BCP-47 format.
9
9
  */
10
10
  type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
11
- declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
11
+ declare const SarvamLanguageCodeSchema: z.ZodEnum<{
12
+ "hi-IN": "hi-IN";
13
+ "bn-IN": "bn-IN";
14
+ "kn-IN": "kn-IN";
15
+ "ml-IN": "ml-IN";
16
+ "mr-IN": "mr-IN";
17
+ "od-IN": "od-IN";
18
+ "pa-IN": "pa-IN";
19
+ "ta-IN": "ta-IN";
20
+ "te-IN": "te-IN";
21
+ "en-IN": "en-IN";
22
+ "gu-IN": "gu-IN";
23
+ }>;
12
24
  type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
13
- declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<["as-IN", "ur-IN", "ne-IN", "kok-IN", "ks-IN", "sd-IN", "sa-IN", "sat-IN", "mni-IN", "brx-IN", "mai-IN", "doi-IN"]>;
25
+ declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
26
+ "as-IN": "as-IN";
27
+ "ur-IN": "ur-IN";
28
+ "ne-IN": "ne-IN";
29
+ "kok-IN": "kok-IN";
30
+ "ks-IN": "ks-IN";
31
+ "sd-IN": "sd-IN";
32
+ "sa-IN": "sa-IN";
33
+ "sat-IN": "sat-IN";
34
+ "mni-IN": "mni-IN";
35
+ "brx-IN": "brx-IN";
36
+ "mai-IN": "mai-IN";
37
+ "doi-IN": "doi-IN";
38
+ }>;
14
39
  interface SarvamProviderSettings {
15
40
  /**
16
41
  * URL for the Sarvam API calls.
@@ -60,6 +85,34 @@ type ChatSettings = {
60
85
  n?: number;
61
86
  };
62
87
  //#endregion
88
+ //#region src/stt/utils.d.ts
89
+ declare const input_audio_codec: z$1.ZodEnum<{
90
+ mp3: "mp3";
91
+ opus: "opus";
92
+ flac: "flac";
93
+ aac: "aac";
94
+ wav: "wav";
95
+ "x-wav": "x-wav";
96
+ wave: "wave";
97
+ mpeg: "mpeg";
98
+ mpeg3: "mpeg3";
99
+ "x-mp3": "x-mp3";
100
+ "x-mpeg-3": "x-mpeg-3";
101
+ "x-aac": "x-aac";
102
+ aiff: "aiff";
103
+ "x-aiff": "x-aiff";
104
+ ogg: "ogg";
105
+ "x-flac": "x-flac";
106
+ mp4: "mp4";
107
+ "x-m4a": "x-m4a";
108
+ amr: "amr";
109
+ "x-ms-wma": "x-ms-wma";
110
+ webm: "webm";
111
+ pcm_s16le: "pcm_s16le";
112
+ pcm_l16: "pcm_l16";
113
+ pcm_raw: "pcm_raw";
114
+ }>;
115
+ //#endregion
63
116
  //#region src/stt/speech-translation-settings.d.ts
64
117
  /**
65
118
  * Specifies the speech generation model to use.
@@ -68,13 +121,6 @@ type ChatSettings = {
68
121
  * - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
69
122
  */
70
123
  type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
71
- declare const speechTranslationSettingsSchema: z$1.ZodObject<{
72
- input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
73
- }, "strip", z$1.ZodTypeAny, {
74
- input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
75
- }, {
76
- input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
77
- }>;
78
124
  type SpeechTranslationSettings = {
79
125
  /**
80
126
  * Audio codec/format of the input file.
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
82
128
  * Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
83
129
  * PCM files are supported only at 16kHz sample rate.
84
130
  */
85
- input_audio_codec?: z$1.infer<typeof speechTranslationSettingsSchema.shape.input_audio_codec>;
131
+ input_audio_codec?: z$1.infer<typeof input_audio_codec>;
86
132
  };
87
133
  //#endregion
88
134
  //#region src/stt/transcription-settings.d.ts
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
92
138
  */
93
139
  type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
94
140
  declare const transcriptionProviderOptionsSchema: z.ZodObject<{
95
- mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>>;
141
+ mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
142
+ transcribe: "transcribe";
143
+ translate: "translate";
144
+ verbatim: "verbatim";
145
+ translit: "translit";
146
+ codemix: "codemix";
147
+ }>>>;
96
148
  with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
97
149
  with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
98
150
  num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
99
- }, "strip", z.ZodTypeAny, {
100
- mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
101
- with_timestamps?: boolean | null | undefined;
102
- with_diarization?: boolean | null | undefined;
103
- num_speakers?: number | null | undefined;
104
- }, {
105
- mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
106
- with_timestamps?: boolean | null | undefined;
107
- with_diarization?: boolean | null | undefined;
108
- num_speakers?: number | null | undefined;
109
- }>;
151
+ }, z.core.$strip>;
110
152
  type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
111
153
  /**
112
154
  * Mode of operation. Only applicable when using `saaras:v3` model.
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
149
191
  * - `bulbul:v2`: Legacy model with pitch and loudness controls
150
192
  */
151
193
  type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
152
- declare const bulbul_v2: z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya"]>;
153
- declare const bulbul_v3: z.ZodEnum<["shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>;
154
- declare const outputAudioCodecSchema: z.ZodEnum<["mp3", "linear16", "mulaw", "alaw", "opus", "flac", "aac", "wav"]>;
194
+ declare const bulbul_v2: z.ZodEnum<{
195
+ abhilash: "abhilash";
196
+ karun: "karun";
197
+ hitesh: "hitesh";
198
+ anushka: "anushka";
199
+ manisha: "manisha";
200
+ vidya: "vidya";
201
+ arya: "arya";
202
+ }>;
203
+ declare const bulbul_v3: z.ZodEnum<{
204
+ shubh: "shubh";
205
+ aditya: "aditya";
206
+ rahul: "rahul";
207
+ rohan: "rohan";
208
+ amit: "amit";
209
+ dev: "dev";
210
+ ratan: "ratan";
211
+ varun: "varun";
212
+ manan: "manan";
213
+ sumit: "sumit";
214
+ kabir: "kabir";
215
+ aayan: "aayan";
216
+ ashutosh: "ashutosh";
217
+ advait: "advait";
218
+ anand: "anand";
219
+ tarun: "tarun";
220
+ sunny: "sunny";
221
+ mani: "mani";
222
+ gokul: "gokul";
223
+ vijay: "vijay";
224
+ mohit: "mohit";
225
+ rehan: "rehan";
226
+ soham: "soham";
227
+ ritu: "ritu";
228
+ priya: "priya";
229
+ neha: "neha";
230
+ pooja: "pooja";
231
+ simran: "simran";
232
+ kavya: "kavya";
233
+ ishita: "ishita";
234
+ shreya: "shreya";
235
+ roopa: "roopa";
236
+ amelia: "amelia";
237
+ sophia: "sophia";
238
+ tanya: "tanya";
239
+ shruti: "shruti";
240
+ suhani: "suhani";
241
+ kavitha: "kavitha";
242
+ rupali: "rupali";
243
+ }>;
244
+ declare const outputAudioCodecSchema: z.ZodEnum<{
245
+ mp3: "mp3";
246
+ linear16: "linear16";
247
+ mulaw: "mulaw";
248
+ alaw: "alaw";
249
+ opus: "opus";
250
+ flac: "flac";
251
+ aac: "aac";
252
+ wav: "wav";
253
+ }>;
155
254
  /**
156
255
  * Configuration settings for Sarvam Text-to-Speech API.
157
256
  *
@@ -372,7 +471,7 @@ type SarvamProvider = {
372
471
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
373
472
  * });
374
473
  */
375
- (modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
474
+ (modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
376
475
  /**
377
476
  * Creates an Sarvam chat model for text generation.
378
477
  *
@@ -382,7 +481,7 @@ type SarvamProvider = {
382
481
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
383
482
  * });
384
483
  */
385
- languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
484
+ languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
386
485
  /**
387
486
  * Creates a Sarvam model for chat.
388
487
  *
@@ -392,7 +491,7 @@ type SarvamProvider = {
392
491
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
393
492
  * });
394
493
  */
395
- chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
494
+ chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
396
495
  /**
397
496
  * Creates a Sarvam model for transcription.
398
497
  *
@@ -409,7 +508,7 @@ type SarvamProvider = {
409
508
  *
410
509
  * @default unknown
411
510
  */
412
- languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV1;
511
+ languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV2;
413
512
  /**
414
513
  * Creates a Sarvam model for Speech translation.
415
514
  *
@@ -419,7 +518,7 @@ type SarvamProvider = {
419
518
  * audio: await readFile("./audio.wav"),
420
519
  * });
421
520
  */
422
- speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV1;
521
+ speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV2;
423
522
  /**
424
523
  * Creates a Sarvam model for speech.
425
524
  * @example
@@ -430,7 +529,7 @@ type SarvamProvider = {
430
529
  *
431
530
  * await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
432
531
  */
433
- speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV1;
532
+ speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV2;
434
533
  /**
435
534
  * Creates an Sarvam model for transliterate.
436
535
  *
@@ -443,7 +542,7 @@ type SarvamProvider = {
443
542
  * prompt: "eda mone, happy alle?",
444
543
  * });
445
544
  */
446
- transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV1;
545
+ transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV2;
447
546
  /**
448
547
  * Creates an Sarvam model for translation.
449
548
  *
@@ -456,7 +555,7 @@ type SarvamProvider = {
456
555
  * prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
457
556
  * });
458
557
  */
459
- translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV1;
558
+ translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV2;
460
559
  /**
461
560
  * Creates an Sarvam model for language identification.
462
561
  *
@@ -466,7 +565,7 @@ type SarvamProvider = {
466
565
  * prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
467
566
  * });
468
567
  */
469
- languageIdentification(): LanguageModelV1;
568
+ languageIdentification(): LanguageModelV2;
470
569
  };
471
570
  //#endregion
472
571
  //#region src/provider.d.ts
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { FetchFunction } from "@ai-sdk/provider-utils";
2
2
  import z$1, { z } from "zod";
3
- import { LanguageModelV1, SpeechModelV1, TranscriptionModelV1 } from "@ai-sdk/provider";
3
+ import { LanguageModelV2, SpeechModelV2, TranscriptionModelV2 } from "@ai-sdk/provider";
4
4
 
5
5
  //#region src/config.d.ts
6
6
 
@@ -8,9 +8,34 @@ import { LanguageModelV1, SpeechModelV1, TranscriptionModelV1 } from "@ai-sdk/pr
8
8
  * Specifies the language in BCP-47 format.
9
9
  */
10
10
  type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
11
- declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
11
+ declare const SarvamLanguageCodeSchema: z.ZodEnum<{
12
+ "hi-IN": "hi-IN";
13
+ "bn-IN": "bn-IN";
14
+ "kn-IN": "kn-IN";
15
+ "ml-IN": "ml-IN";
16
+ "mr-IN": "mr-IN";
17
+ "od-IN": "od-IN";
18
+ "pa-IN": "pa-IN";
19
+ "ta-IN": "ta-IN";
20
+ "te-IN": "te-IN";
21
+ "en-IN": "en-IN";
22
+ "gu-IN": "gu-IN";
23
+ }>;
12
24
  type MoreSarvamLanguageCode = z.infer<typeof MoreSarvamLanguageCodeSchema>;
13
- declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<["as-IN", "ur-IN", "ne-IN", "kok-IN", "ks-IN", "sd-IN", "sa-IN", "sat-IN", "mni-IN", "brx-IN", "mai-IN", "doi-IN"]>;
25
+ declare const MoreSarvamLanguageCodeSchema: z.ZodEnum<{
26
+ "as-IN": "as-IN";
27
+ "ur-IN": "ur-IN";
28
+ "ne-IN": "ne-IN";
29
+ "kok-IN": "kok-IN";
30
+ "ks-IN": "ks-IN";
31
+ "sd-IN": "sd-IN";
32
+ "sa-IN": "sa-IN";
33
+ "sat-IN": "sat-IN";
34
+ "mni-IN": "mni-IN";
35
+ "brx-IN": "brx-IN";
36
+ "mai-IN": "mai-IN";
37
+ "doi-IN": "doi-IN";
38
+ }>;
14
39
  interface SarvamProviderSettings {
15
40
  /**
16
41
  * URL for the Sarvam API calls.
@@ -60,6 +85,34 @@ type ChatSettings = {
60
85
  n?: number;
61
86
  };
62
87
  //#endregion
88
+ //#region src/stt/utils.d.ts
89
+ declare const input_audio_codec: z$1.ZodEnum<{
90
+ mp3: "mp3";
91
+ opus: "opus";
92
+ flac: "flac";
93
+ aac: "aac";
94
+ wav: "wav";
95
+ "x-wav": "x-wav";
96
+ wave: "wave";
97
+ mpeg: "mpeg";
98
+ mpeg3: "mpeg3";
99
+ "x-mp3": "x-mp3";
100
+ "x-mpeg-3": "x-mpeg-3";
101
+ "x-aac": "x-aac";
102
+ aiff: "aiff";
103
+ "x-aiff": "x-aiff";
104
+ ogg: "ogg";
105
+ "x-flac": "x-flac";
106
+ mp4: "mp4";
107
+ "x-m4a": "x-m4a";
108
+ amr: "amr";
109
+ "x-ms-wma": "x-ms-wma";
110
+ webm: "webm";
111
+ pcm_s16le: "pcm_s16le";
112
+ pcm_l16: "pcm_l16";
113
+ pcm_raw: "pcm_raw";
114
+ }>;
115
+ //#endregion
63
116
  //#region src/stt/speech-translation-settings.d.ts
64
117
  /**
65
118
  * Specifies the speech generation model to use.
@@ -68,13 +121,6 @@ type ChatSettings = {
68
121
  * - `saaras:v3`: Translation model that translates audio from any spoken Indic language to English, with improved accuracy and support for more languages.
69
122
  */
70
123
  type SpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
71
- declare const speechTranslationSettingsSchema: z$1.ZodObject<{
72
- input_audio_codec: z$1.ZodEnum<["wav", "x-wav", "wave", "mp3", "mpeg", "mpeg3", "x-mp3", "x-mpeg-3", "aac", "x-aac", "aiff", "x-aiff", "ogg", "opus", "flac", "x-flac", "mp4", "x-m4a", "amr", "x-ms-wma", "webm", "pcm_s16le", "pcm_l16", "pcm_raw"]>;
73
- }, "strip", z$1.ZodTypeAny, {
74
- input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
75
- }, {
76
- input_audio_codec: "wav" | "x-wav" | "wave" | "mp3" | "mpeg" | "mpeg3" | "x-mp3" | "x-mpeg-3" | "aac" | "x-aac" | "aiff" | "x-aiff" | "ogg" | "opus" | "flac" | "x-flac" | "mp4" | "x-m4a" | "amr" | "x-ms-wma" | "webm" | "pcm_s16le" | "pcm_l16" | "pcm_raw";
77
- }>;
78
124
  type SpeechTranslationSettings = {
79
125
  /**
80
126
  * Audio codec/format of the input file.
@@ -82,7 +128,7 @@ type SpeechTranslationSettings = {
82
128
  * Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter.
83
129
  * PCM files are supported only at 16kHz sample rate.
84
130
  */
85
- input_audio_codec?: z$1.infer<typeof speechTranslationSettingsSchema.shape.input_audio_codec>;
131
+ input_audio_codec?: z$1.infer<typeof input_audio_codec>;
86
132
  };
87
133
  //#endregion
88
134
  //#region src/stt/transcription-settings.d.ts
@@ -92,21 +138,17 @@ type SpeechTranslationSettings = {
92
138
  */
93
139
  type TranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
94
140
  declare const transcriptionProviderOptionsSchema: z.ZodObject<{
95
- mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>>;
141
+ mode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
142
+ transcribe: "transcribe";
143
+ translate: "translate";
144
+ verbatim: "verbatim";
145
+ translit: "translit";
146
+ codemix: "codemix";
147
+ }>>>;
96
148
  with_timestamps: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
97
149
  with_diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
98
150
  num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
99
- }, "strip", z.ZodTypeAny, {
100
- mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
101
- with_timestamps?: boolean | null | undefined;
102
- with_diarization?: boolean | null | undefined;
103
- num_speakers?: number | null | undefined;
104
- }, {
105
- mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | null | undefined;
106
- with_timestamps?: boolean | null | undefined;
107
- with_diarization?: boolean | null | undefined;
108
- num_speakers?: number | null | undefined;
109
- }>;
151
+ }, z.core.$strip>;
110
152
  type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId> = {
111
153
  /**
112
154
  * Mode of operation. Only applicable when using `saaras:v3` model.
@@ -149,9 +191,66 @@ type TranscriptionSettings<T extends TranscriptionModelId = TranscriptionModelId
149
191
  * - `bulbul:v2`: Legacy model with pitch and loudness controls
150
192
  */
151
193
  type SpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
152
- declare const bulbul_v2: z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya"]>;
153
- declare const bulbul_v3: z.ZodEnum<["shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>;
154
- declare const outputAudioCodecSchema: z.ZodEnum<["mp3", "linear16", "mulaw", "alaw", "opus", "flac", "aac", "wav"]>;
194
+ declare const bulbul_v2: z.ZodEnum<{
195
+ abhilash: "abhilash";
196
+ karun: "karun";
197
+ hitesh: "hitesh";
198
+ anushka: "anushka";
199
+ manisha: "manisha";
200
+ vidya: "vidya";
201
+ arya: "arya";
202
+ }>;
203
+ declare const bulbul_v3: z.ZodEnum<{
204
+ shubh: "shubh";
205
+ aditya: "aditya";
206
+ rahul: "rahul";
207
+ rohan: "rohan";
208
+ amit: "amit";
209
+ dev: "dev";
210
+ ratan: "ratan";
211
+ varun: "varun";
212
+ manan: "manan";
213
+ sumit: "sumit";
214
+ kabir: "kabir";
215
+ aayan: "aayan";
216
+ ashutosh: "ashutosh";
217
+ advait: "advait";
218
+ anand: "anand";
219
+ tarun: "tarun";
220
+ sunny: "sunny";
221
+ mani: "mani";
222
+ gokul: "gokul";
223
+ vijay: "vijay";
224
+ mohit: "mohit";
225
+ rehan: "rehan";
226
+ soham: "soham";
227
+ ritu: "ritu";
228
+ priya: "priya";
229
+ neha: "neha";
230
+ pooja: "pooja";
231
+ simran: "simran";
232
+ kavya: "kavya";
233
+ ishita: "ishita";
234
+ shreya: "shreya";
235
+ roopa: "roopa";
236
+ amelia: "amelia";
237
+ sophia: "sophia";
238
+ tanya: "tanya";
239
+ shruti: "shruti";
240
+ suhani: "suhani";
241
+ kavitha: "kavitha";
242
+ rupali: "rupali";
243
+ }>;
244
+ declare const outputAudioCodecSchema: z.ZodEnum<{
245
+ mp3: "mp3";
246
+ linear16: "linear16";
247
+ mulaw: "mulaw";
248
+ alaw: "alaw";
249
+ opus: "opus";
250
+ flac: "flac";
251
+ aac: "aac";
252
+ wav: "wav";
253
+ }>;
155
254
  /**
156
255
  * Configuration settings for Sarvam Text-to-Speech API.
157
256
  *
@@ -372,7 +471,7 @@ type SarvamProvider = {
372
471
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
373
472
  * });
374
473
  */
375
- (modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
474
+ (modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
376
475
  /**
377
476
  * Creates an Sarvam chat model for text generation.
378
477
  *
@@ -382,7 +481,7 @@ type SarvamProvider = {
382
481
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
383
482
  * });
384
483
  */
385
- languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
484
+ languageModel(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
386
485
  /**
387
486
  * Creates a Sarvam model for chat.
388
487
  *
@@ -392,7 +491,7 @@ type SarvamProvider = {
392
491
  * prompt: "Translate this to malayalam: 'Keep cooking, guys'",
393
492
  * });
394
493
  */
395
- chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV1;
494
+ chat(modelId: ChatModelId, settings?: ChatSettings): LanguageModelV2;
396
495
  /**
397
496
  * Creates a Sarvam model for transcription.
398
497
  *
@@ -409,7 +508,7 @@ type SarvamProvider = {
409
508
  *
410
509
  * @default unknown
411
510
  */
412
- languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV1;
511
+ languageCode?: (T extends "saaras:v3" ? MoreSarvamLanguageCode : never) | SarvamLanguageCode | "unknown", settings?: TranscriptionSettings<T>): TranscriptionModelV2;
413
512
  /**
414
513
  * Creates a Sarvam model for Speech translation.
415
514
  *
@@ -419,7 +518,7 @@ type SarvamProvider = {
419
518
  * audio: await readFile("./audio.wav"),
420
519
  * });
421
520
  */
422
- speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV1;
521
+ speechTranslation<T extends SpeechTranslationModelId>(modelId: T, settings?: SpeechTranslationSettings): TranscriptionModelV2;
423
522
  /**
424
523
  * Creates a Sarvam model for speech.
425
524
  * @example
@@ -430,7 +529,7 @@ type SarvamProvider = {
430
529
  *
431
530
  * await writeFile("./audio.wav", Buffer.from(audio.base64, "base64"););
432
531
  */
433
- speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV1;
532
+ speech<T extends SpeechModelId>(modelId: T, languageCode: SarvamLanguageCode, settings?: SpeechSettings<T>): SpeechModelV2;
434
533
  /**
435
534
  * Creates an Sarvam model for transliterate.
436
535
  *
@@ -443,7 +542,7 @@ type SarvamProvider = {
443
542
  * prompt: "eda mone, happy alle?",
444
543
  * });
445
544
  */
446
- transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV1;
545
+ transliterate<T extends SarvamLanguageCode>(settings: TransliterateSettings<false, T>): LanguageModelV2;
447
546
  /**
448
547
  * Creates an Sarvam model for translation.
449
548
  *
@@ -456,7 +555,7 @@ type SarvamProvider = {
456
555
  * prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
457
556
  * });
458
557
  */
459
- translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV1;
558
+ translation<T extends TranslationModelId>(model: T, settings: TranslationSettings<T>): LanguageModelV2;
460
559
  /**
461
560
  * Creates an Sarvam model for language identification.
462
561
  *
@@ -466,7 +565,7 @@ type SarvamProvider = {
466
565
  * prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
467
566
  * });
468
567
  */
469
- languageIdentification(): LanguageModelV1;
568
+ languageIdentification(): LanguageModelV2;
470
569
  };
471
570
  //#endregion
472
571
  //#region src/provider.d.ts