sarvam-ai-sdk 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # AI SDK - Sarvam Provider
2
2
 
3
- The **[Sarvam provider](https://ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://ai-sdk.dev/docs)
3
+ The **[Sarvam provider](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** for the [AI SDK](https://v4.ai-sdk.dev/docs)
4
4
  contains language model support for the Sarvam chat completion, Text-to-Speech and Speech-to-Text APIs.
5
5
 
6
6
  ## Setup
@@ -11,6 +11,9 @@ The **[Sarvam](http://sarvam.ai)** provider is available in the `sarvam-ai-sdk`
11
11
  npm i sarvam-ai-sdk
12
12
  ```
13
13
 
14
+ > [!WARNING]
15
+ > This package only works with Vercel AI-SDK v4, not latest v6. Make sure to install `ai@4` in your project.
16
+
14
17
  ## Provider Instance
15
18
 
16
19
  You can import the default provider instance `sarvam` from `sarvam-ai-sdk`:
@@ -31,7 +34,7 @@ import { sarvam } from 'sarvam-ai-sdk';
31
34
  import { generateText } from 'ai';
32
35
 
33
36
  const { text } = await generateText({
34
- model: sarvam("sarvam-m"),
37
+ model: sarvam("sarvam-30b"),
35
38
  prompt: "Translate this to malayalam: 'Keep cooking, guys'",
36
39
  });
37
40
 
@@ -46,7 +49,7 @@ import { experimental_generateSpeech as generateSpeech } from "ai";
46
49
  import { writeFile } from "fs/promises";
47
50
 
48
51
  const { audio } = await generateSpeech({
49
- model: sarvam.speech("bulbul:v2", "ml-IN"),
52
+ model: sarvam.speech("bulbul:v3", "ml-IN"),
50
53
  text: "പാചകം തുടരൂ, സുഹൃത്തുക്കളേ",
51
54
  });
52
55
 
@@ -62,13 +65,26 @@ import { experimental_transcribe as transcribe } from "ai";
62
65
  import { readFile } from "fs/promises";
63
66
 
64
67
  const { text } = await transcribe({
65
- model: sarvam.transcription("saarika:v2", "ml-IN")
68
+ model: sarvam.transcription("saarika:v2.5", "ml-IN")
66
69
  audio: await readFile("./src/transcript-test.wav"),
67
70
  });
68
71
 
69
72
  console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
70
73
  ```
71
74
 
75
+ ```ts
76
+ import { sarvam } from "sarvam-ai-sdk";
77
+ import { experimental_transcribe as transcribe } from "ai";
78
+ import { readFile } from "fs/promises";
79
+
80
+ const { text } = await transcribe({
81
+ model: sarvam.transcription("saaras:v3", "en-IN"),
82
+ audio: await readFile("./src/transcript-test.wav"),
83
+ });
84
+
85
+ console.log(text); // Pachakam thudaroo, suhruthukkale.
86
+ ```
87
+
72
88
  ## Speech-to-Text-Translate
73
89
 
74
90
  ```ts
@@ -77,7 +93,7 @@ import { experimental_transcribe as transcribe } from "ai";
77
93
  import { readFile } from "fs/promises";
78
94
 
79
95
  const result = await transcribe({
80
- model: sarvam.speechTranslation("saaras:v2"),
96
+ model: sarvam.speechTranslation("saaras:v2.5"),
81
97
  audio: await readFile("./src/transcript-test.wav"),
82
98
  });
83
99
 
@@ -141,7 +157,7 @@ console.log(result.text); // ml-IN
141
157
  ## Tool Calling
142
158
 
143
159
  > [!WARNING]
144
- > Latest `sarvam-m` model isn't trained on native tool calling feature (aka JSON mode). So we simulate this with prompt engineering technique.
160
+ > Latest `sarvam` models isn't trained on native tool calling feature (aka JSON mode). So we simulate this with prompt engineering technique.
145
161
 
146
162
  ```ts
147
163
  import { z } from "zod";
@@ -150,7 +166,7 @@ import { sarvam } from "sarvam-ai-sdk";
150
166
 
151
167
 
152
168
  const result = await generateText({
153
- model: sarvam("sarvam-m", {
169
+ model: sarvam("sarvam-30b", {
154
170
  simulate: "tool-calling" // ⚠️ important
155
171
  }),
156
172
  tools: {
@@ -174,7 +190,7 @@ console.log(result.toolResults);
174
190
  ## Generate JSON object
175
191
 
176
192
  > [!WARNING]
177
- > Latest `sarvam-m` model isn't trained on native JSON object generation. So we simulate this with prompt engineering technique.
193
+ > Latest `sarvam` models isn't trained on native JSON object generation. So we simulate this with prompt engineering technique.
178
194
 
179
195
  ```ts
180
196
  import { z } from "zod";
@@ -182,7 +198,7 @@ import { sarvam } from "sarvam-ai-sdk";
182
198
  import { generateObject } from 'ai';
183
199
 
184
200
  const { object } = await generateObject({
185
- model: sarvam("sarvam-m", {
201
+ model: sarvam("sarvam-30b", {
186
202
  simulate: "json-object" // ⚠️ important
187
203
  }),
188
204
  schema: z.object({
@@ -200,4 +216,4 @@ console.log(object);
200
216
 
201
217
  ## Documentation
202
218
 
203
- Please check out the **[Sarvam provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
219
+ Please check out the **[Sarvam provider documentation](https://v4.ai-sdk.dev/providers/ai-sdk-providers/sarvam)** and **[Sarvam API documentation](https://docs.sarvam.ai)** for more information.
package/dist/index.d.mts CHANGED
@@ -2,7 +2,15 @@ import { LanguageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/pr
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type SarvamChatModelId = "sarvam-m" | (string & {});
5
+ /**
6
+ * @description Product models
7
+ */
8
+ type SarvamChatModelId = "sarvam-30b" | "sarvam-30b-16k" | "sarvam-105b" | "sarvam-105b-32k" | SarvamChatLegacyModelId | (string & {});
9
+ /**
10
+ * @description Legacy models
11
+ * @deprecated
12
+ */
13
+ type SarvamChatLegacyModelId = "sarvam-m";
6
14
  interface SarvamChatSettings {
7
15
  /**
8
16
  * Whether to simulate artificial tool calling or JSON object generation, because Sarvam Models doen't support native Tool Calling or JSON Schmea.
@@ -42,9 +50,9 @@ interface SarvamChatSettings {
42
50
  type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
43
51
  declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
44
52
 
45
- type SarvamSpeechModelId = "bulbul:v1" | "bulbul:v2" | (string & {});
53
+ type SarvamSpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
46
54
  type SarvamSpeechVoices = z.infer<typeof SpeakerSchema>;
47
- declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya", "anushka", "abhilash", "manisha", "vidya", "arya", "karun", "hitesh"]>>;
55
+ declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya", "shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>>;
48
56
  /**
49
57
  * Configuration settings for Sarvam Text-to-Speech API.
50
58
  *
@@ -56,10 +64,10 @@ type SarvamSpeechSettings = {
56
64
  /**
57
65
  * The speaker voice to be used for the output audio.
58
66
  *
59
- * @default "meera"
60
- * @example "meera" (Default female voice for bulbul:v1)
61
- * @example "arvind" (Male voice for bulbul:v1)
62
- * @example "anushka" (Female voice for bulbul:v2)
67
+ * @default
68
+ * - "shubh" (Male voice for bulbul:v3)
69
+ * - "anushka" (Female voice for bulbul:v2)
70
+ * - "meera" (Female voice for bulbul:v1)
63
71
  */
64
72
  speaker?: SarvamSpeechVoices;
65
73
  /**
@@ -105,9 +113,41 @@ type SarvamSpeechSettings = {
105
113
  enable_preprocessing?: boolean;
106
114
  };
107
115
 
108
- type SarvamTranscriptionModelId = "saarika:v2" | "saarika:v1" | "saarika:flash" | (string & {});
109
- type SarvamSpeechTranslationModelId = "saaras:v1" | "saaras:v2" | "saaras:turbo" | "saaras:flash" | (string & {});
116
+ type SarvamTranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
117
+ type SarvamSpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
118
+ declare const SarvamProviderOptionsSchema: z.ZodObject<{
119
+ mode: z.ZodDefault<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>;
120
+ with_timestamps: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
121
+ with_diarization: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
122
+ num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
123
+ }, "strip", z.ZodTypeAny, {
124
+ mode: "transcribe" | "translate" | "verbatim" | "translit" | "codemix";
125
+ with_timestamps: boolean | null;
126
+ with_diarization: boolean | null;
127
+ num_speakers?: number | null | undefined;
128
+ }, {
129
+ mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | undefined;
130
+ with_timestamps?: boolean | null | undefined;
131
+ with_diarization?: boolean | null | undefined;
132
+ num_speakers?: number | null | undefined;
133
+ }>;
110
134
  type SarvamTranscriptionCallOptions = {
135
+ /**
136
+ * @default "transcribe"
137
+ *
138
+ * @description
139
+ * - `transcribe`: Standard transcription in the original language, `output`: Text in source language
140
+ * - `translate`: Transcribe and translate to English, `output`: English text
141
+ * - `verbatim`: Word-for-word transcription including filler words and repetitions, `output`: Verbatim text in source language
142
+ * - `translit`: Transcribe and transliterate to Roman script, `output`: Romanized text
143
+ * - `codemix`: Transcribe code-mixed speech (e.g., Hindi-English) naturally, `output`: Code-mixed text
144
+ */
145
+ mode?: z.infer<typeof SarvamProviderOptionsSchema.shape.mode>;
146
+ /**
147
+ * - Chunk-level timestamp support
148
+ * - Useful for subtitle alignment and audio navigation
149
+ * - Provides start and end times for each segment of text
150
+ */
111
151
  with_timestamps?: boolean;
112
152
  /**
113
153
  * Enables speaker diarization, which identifies and separates different speakers in the audio.
@@ -240,11 +280,19 @@ interface SarvamProvider {
240
280
  /**
241
281
  * Creates a model for text generation.
242
282
  */
243
- (modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
283
+ (
284
+ /**
285
+ * @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
286
+ */
287
+ modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
244
288
  /**
245
289
  * Creates an Sarvam chat model for text generation.
246
290
  */
247
- languageModel(modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
291
+ languageModel(
292
+ /**
293
+ * @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
294
+ */
295
+ modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
248
296
  /**
249
297
  * Creates a Sarvam model for transcription.
250
298
  */
package/dist/index.d.ts CHANGED
@@ -2,7 +2,15 @@ import { LanguageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/pr
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type SarvamChatModelId = "sarvam-m" | (string & {});
5
+ /**
6
+ * @description Product models
7
+ */
8
+ type SarvamChatModelId = "sarvam-30b" | "sarvam-30b-16k" | "sarvam-105b" | "sarvam-105b-32k" | SarvamChatLegacyModelId | (string & {});
9
+ /**
10
+ * @description Legacy models
11
+ * @deprecated
12
+ */
13
+ type SarvamChatLegacyModelId = "sarvam-m";
6
14
  interface SarvamChatSettings {
7
15
  /**
8
16
  * Whether to simulate artificial tool calling or JSON object generation, because Sarvam Models doen't support native Tool Calling or JSON Schmea.
@@ -42,9 +50,9 @@ interface SarvamChatSettings {
42
50
  type SarvamLanguageCode = z.infer<typeof SarvamLanguageCodeSchema>;
43
51
  declare const SarvamLanguageCodeSchema: z.ZodEnum<["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN"]>;
44
52
 
45
- type SarvamSpeechModelId = "bulbul:v1" | "bulbul:v2" | (string & {});
53
+ type SarvamSpeechModelId = "bulbul:v2" | "bulbul:v3" | (string & {});
46
54
  type SarvamSpeechVoices = z.infer<typeof SpeakerSchema>;
47
- declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["meera", "pavithra", "maitreyi", "arvind", "amol", "amartya", "diya", "neel", "misha", "vian", "arjun", "maya", "anushka", "abhilash", "manisha", "vidya", "arya", "karun", "hitesh"]>>;
55
+ declare const SpeakerSchema: z.ZodDefault<z.ZodEnum<["abhilash", "karun", "hitesh", "anushka", "manisha", "vidya", "arya", "shubh", "aditya", "rahul", "rohan", "amit", "dev", "ratan", "varun", "manan", "sumit", "kabir", "aayan", "ashutosh", "advait", "anand", "tarun", "sunny", "mani", "gokul", "vijay", "mohit", "rehan", "soham", "ritu", "priya", "neha", "pooja", "simran", "kavya", "ishita", "shreya", "roopa", "amelia", "sophia", "tanya", "shruti", "suhani", "kavitha", "rupali"]>>;
48
56
  /**
49
57
  * Configuration settings for Sarvam Text-to-Speech API.
50
58
  *
@@ -56,10 +64,10 @@ type SarvamSpeechSettings = {
56
64
  /**
57
65
  * The speaker voice to be used for the output audio.
58
66
  *
59
- * @default "meera"
60
- * @example "meera" (Default female voice for bulbul:v1)
61
- * @example "arvind" (Male voice for bulbul:v1)
62
- * @example "anushka" (Female voice for bulbul:v2)
67
+ * @default
68
+ * - "shubh" (Male voice for bulbul:v3)
69
+ * - "anushka" (Female voice for bulbul:v2)
70
+ * - "meera" (Female voice for bulbul:v1)
63
71
  */
64
72
  speaker?: SarvamSpeechVoices;
65
73
  /**
@@ -105,9 +113,41 @@ type SarvamSpeechSettings = {
105
113
  enable_preprocessing?: boolean;
106
114
  };
107
115
 
108
- type SarvamTranscriptionModelId = "saarika:v2" | "saarika:v1" | "saarika:flash" | (string & {});
109
- type SarvamSpeechTranslationModelId = "saaras:v1" | "saaras:v2" | "saaras:turbo" | "saaras:flash" | (string & {});
116
+ type SarvamTranscriptionModelId = "saaras:v3" | "saarika:v2.5" | (string & {});
117
+ type SarvamSpeechTranslationModelId = "saaras:v3" | "saaras:v2.5" | (string & {});
118
+ declare const SarvamProviderOptionsSchema: z.ZodObject<{
119
+ mode: z.ZodDefault<z.ZodEnum<["transcribe", "translate", "verbatim", "translit", "codemix"]>>;
120
+ with_timestamps: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
121
+ with_diarization: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodBoolean>>>;
122
+ num_speakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
123
+ }, "strip", z.ZodTypeAny, {
124
+ mode: "transcribe" | "translate" | "verbatim" | "translit" | "codemix";
125
+ with_timestamps: boolean | null;
126
+ with_diarization: boolean | null;
127
+ num_speakers?: number | null | undefined;
128
+ }, {
129
+ mode?: "transcribe" | "translate" | "verbatim" | "translit" | "codemix" | undefined;
130
+ with_timestamps?: boolean | null | undefined;
131
+ with_diarization?: boolean | null | undefined;
132
+ num_speakers?: number | null | undefined;
133
+ }>;
110
134
  type SarvamTranscriptionCallOptions = {
135
+ /**
136
+ * @default "transcribe"
137
+ *
138
+ * @description
139
+ * - `transcribe`: Standard transcription in the original language, `output`: Text in source language
140
+ * - `translate`: Transcribe and translate to English, `output`: English text
141
+ * - `verbatim`: Word-for-word transcription including filler words and repetitions, `output`: Verbatim text in source language
142
+ * - `translit`: Transcribe and transliterate to Roman script, `output`: Romanized text
143
+ * - `codemix`: Transcribe code-mixed speech (e.g., Hindi-English) naturally, `output`: Code-mixed text
144
+ */
145
+ mode?: z.infer<typeof SarvamProviderOptionsSchema.shape.mode>;
146
+ /**
147
+ * - Chunk-level timestamp support
148
+ * - Useful for subtitle alignment and audio navigation
149
+ * - Provides start and end times for each segment of text
150
+ */
111
151
  with_timestamps?: boolean;
112
152
  /**
113
153
  * Enables speaker diarization, which identifies and separates different speakers in the audio.
@@ -240,11 +280,19 @@ interface SarvamProvider {
240
280
  /**
241
281
  * Creates a model for text generation.
242
282
  */
243
- (modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
283
+ (
284
+ /**
285
+ * @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
286
+ */
287
+ modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
244
288
  /**
245
289
  * Creates an Sarvam chat model for text generation.
246
290
  */
247
- languageModel(modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
291
+ languageModel(
292
+ /**
293
+ * @description Sarvam-M (24B) is now a legacy model. But we recommend migrating to Sarvam-30B or Sarvam-105B for improved performance.
294
+ */
295
+ modelId: SarvamChatModelId, settings?: SarvamChatSettings): LanguageModelV1;
248
296
  /**
249
297
  * Creates a Sarvam model for transcription.
250
298
  */
package/dist/index.js CHANGED
@@ -771,26 +771,57 @@ var import_provider_utils5 = require("@ai-sdk/provider-utils");
771
771
  // src/sarvam-speech-settings.ts
772
772
  var import_zod3 = require("zod");
773
773
  var SpeakerSchema = import_zod3.z.enum([
774
- "meera",
775
- "pavithra",
776
- "maitreyi",
777
- "arvind",
778
- "amol",
779
- "amartya",
780
- "diya",
781
- "neel",
782
- "misha",
783
- "vian",
784
- "arjun",
785
- "maya",
786
- "anushka",
774
+ // male bulbul:v2
787
775
  "abhilash",
776
+ "karun",
777
+ "hitesh",
778
+ // female bulbul:v2
779
+ "anushka",
788
780
  "manisha",
789
781
  "vidya",
790
782
  "arya",
791
- "karun",
792
- "hitesh"
793
- ]).default("meera");
783
+ // male bulbul:v3
784
+ "shubh",
785
+ "aditya",
786
+ "rahul",
787
+ "rohan",
788
+ "amit",
789
+ "dev",
790
+ "ratan",
791
+ "varun",
792
+ "manan",
793
+ "sumit",
794
+ "kabir",
795
+ "aayan",
796
+ "ashutosh",
797
+ "advait",
798
+ "anand",
799
+ "tarun",
800
+ "sunny",
801
+ "mani",
802
+ "gokul",
803
+ "vijay",
804
+ "mohit",
805
+ "rehan",
806
+ "soham",
807
+ // female bulbul:v3
808
+ "ritu",
809
+ "priya",
810
+ "neha",
811
+ "pooja",
812
+ "simran",
813
+ "kavya",
814
+ "ishita",
815
+ "shreya",
816
+ "roopa",
817
+ "amelia",
818
+ "sophia",
819
+ "tanya",
820
+ "shruti",
821
+ "suhani",
822
+ "kavitha",
823
+ "rupali"
824
+ ]).default("shubh");
794
825
  var SarvamProviderOptionsSchema = import_zod3.z.object({
795
826
  speaker: SpeakerSchema,
796
827
  pitch: import_zod3.z.number().min(-0.75).max(0.75).default(0),
@@ -842,12 +873,12 @@ var SarvamSpeechModel = class {
842
873
  return SpeakerSchema.parse(voice);
843
874
  }
844
875
  switch (this.modelId) {
845
- case "bulbul:v1":
846
- return "meera";
847
876
  case "bulbul:v2":
848
877
  return "manisha";
878
+ case "bulbul:v3":
879
+ return "shubh";
849
880
  }
850
- return "meera";
881
+ return "shubh";
851
882
  };
852
883
  const requestBody = {
853
884
  model: this.modelId,
@@ -934,6 +965,7 @@ var import_zod6 = require("zod");
934
965
  // src/sarvam-transcription-settings.ts
935
966
  var import_zod5 = require("zod");
936
967
  var SarvamProviderOptionsSchema2 = import_zod5.z.object({
968
+ mode: import_zod5.z.enum(["transcribe", "translate", "verbatim", "translit", "codemix"]).default("transcribe"),
937
969
  with_timestamps: import_zod5.z.boolean().nullish().default(false),
938
970
  with_diarization: import_zod5.z.boolean().nullish().default(false),
939
971
  num_speakers: import_zod5.z.number().int().nullish()
@@ -955,11 +987,8 @@ var SarvamTranscriptionModel = class {
955
987
  mediaType,
956
988
  providerOptions
957
989
  }) {
990
+ var _a;
958
991
  const warnings = [];
959
- if (this.modelId === "saarika:v1" && this.languageCode === "unknown")
960
- throw new Error(
961
- "Language code unknown is not supported for model saarika:v1"
962
- );
963
992
  const sarvamOptions = (0, import_provider_utils6.parseProviderOptions)({
964
993
  provider: "sarvam",
965
994
  providerOptions: {
@@ -975,6 +1004,7 @@ var SarvamTranscriptionModel = class {
975
1004
  formData.append("file", blob);
976
1005
  formData.append("model", this.modelId);
977
1006
  if (sarvamOptions) {
1007
+ formData.append("mode", (_a = sarvamOptions.mode) != null ? _a : "transcribe");
978
1008
  formData.append("language_code", this.languageCode);
979
1009
  formData.append(
980
1010
  "with_timestamps",
@@ -1484,14 +1514,14 @@ var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
1484
1514
  function createSarvam(options = {}) {
1485
1515
  var _a;
1486
1516
  const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1487
- const ApiKey = (0, import_provider_utils11.loadApiKey)({
1517
+ const getApiKey = () => (0, import_provider_utils11.loadApiKey)({
1488
1518
  apiKey: options.apiKey,
1489
1519
  environmentVariableName: "SARVAM_API_KEY",
1490
1520
  description: "Sarvam"
1491
1521
  });
1492
1522
  const getHeaders = () => ({
1493
- Authorization: `Bearer ${ApiKey}`,
1494
- "api-subscription-key": ApiKey,
1523
+ Authorization: `Bearer ${getApiKey()}`,
1524
+ "api-subscription-key": getApiKey(),
1495
1525
  ...options.headers
1496
1526
  });
1497
1527
  const createChatModel = (modelId, settings = {}) => new SarvamChatLanguageModel(modelId, settings, {