sarvam-ai-sdk 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,9 +69,24 @@ const { text } = await transcribe({
69
69
  console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
70
70
  ```
71
71
 
72
+ ## Speech-to-Text-Translate
73
+
74
+ ```ts
75
+ import { sarvam } from "sarvam-ai-sdk";
76
+ import { experimental_transcribe as transcribe } from "ai";
77
+ import { readFile } from "fs/promises";
78
+
79
+ const result = await transcribe({
80
+ model: sarvam.speechTranslation("saaras:v2"),
81
+ audio: await readFile("./src/transcript-test.wav"),
82
+ });
83
+
84
+ console.log(result.text); // Cooking continues, my friends
85
+ ```
86
+
72
87
  ## Translation
73
88
 
74
- > Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
89
+ > NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
75
90
 
76
91
  ```ts
77
92
  import { sarvam } from "sarvam-ai-sdk";
@@ -90,7 +105,7 @@ console.log(result.text); // Shouldn't we be careful about this, Ambane?
90
105
 
91
106
  ## Transliterate
92
107
 
93
- > Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
108
+ > NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
94
109
 
95
110
  ```ts
96
111
  import { sarvam } from "sarvam-ai-sdk";
@@ -107,6 +122,22 @@ const result = await generateText({
107
122
  console.log(result.text); // എടാ മോനെ, ഹാപ്പി അല്ലേ?
108
123
  ```
109
124
 
125
+ ## Language Identification
126
+
127
+ > NB: Only identifies `prompt` and `role:user` messages, not `system` not `assistant`.
128
+
129
+ ```ts
130
+ import { sarvam } from "sarvam-ai-sdk";
131
+ import { generateText } from "ai";
132
+
133
+ const result = await generateText({
134
+ model: sarvam.languageIdentification(),
135
+ prompt: "ബുദ്ധിയാണ് സാറേ ഇവൻ്റെ മെയിൻ",
136
+ });
137
+
138
+ console.log(result.text); // ml-IN
139
+ ```
140
+
110
141
  ## Tool Calling
111
142
 
112
143
  > [!WARNING]
@@ -120,7 +151,7 @@ import { sarvam } from "sarvam-ai-sdk";
120
151
 
121
152
  const result = await generateText({
122
153
  model: sarvam("sarvam-m", {
123
- simulateToolCalling: true, // ⚠️ important
154
+ simulate: "tool-calling" // ⚠️ important
124
155
  }),
125
156
  tools: {
126
157
  weather: tool({
@@ -140,6 +171,32 @@ const result = await generateText({
140
171
 
141
172
  console.log(result.toolResults);
142
173
  ```
174
+ ## Generate JSON object
175
+
176
+ > [!WARNING]
177
+ > Latest `sarvam-m` model isn't trained on native JSON object generation. So we simulate this with prompt engineering technique.
178
+
179
+ ```ts
180
+ import { z } from "zod";
181
+ import { sarvam } from "sarvam-ai-sdk";
182
+ import { generateObject } from 'ai';
183
+
184
+ const { object } = await generateObject({
185
+ model: sarvam("sarvam-m", {
186
+ simulate: "json-object" // ⚠️ important
187
+ }),
188
+ schema: z.object({
189
+ recipe: z.object({
190
+ name: z.string(),
191
+ ingredients: z.array(z.string()),
192
+ steps: z.array(z.string()),
193
+ }),
194
+ }),
195
+ prompt: 'Generate a South Indian recipe, in Malayalam',
196
+ });
197
+
198
+ console.log(object);
199
+ ```
143
200
 
144
201
  ## Documentation
145
202
 
package/dist/index.cjs CHANGED
@@ -55,7 +55,7 @@ __export(index_exports, {
55
55
  module.exports = __toCommonJS(index_exports);
56
56
 
57
57
  // src/sarvam-provider.ts
58
- var import_provider_utils9 = require("@ai-sdk/provider-utils");
58
+ var import_provider_utils11 = require("@ai-sdk/provider-utils");
59
59
 
60
60
  // src/sarvam-chat-language-model.ts
61
61
  var import_provider3 = require("@ai-sdk/provider");
@@ -300,24 +300,29 @@ const myChoice: YourToolChoices = {
300
300
  }`;
301
301
  return text;
302
302
  };
303
- var extractToolCallData = (text) => {
303
+ var extractToolCallData = (jsonObject) => {
304
+ const toolFunction = jsonObject;
305
+ if (!("toolName" in toolFunction)) return;
306
+ if (!("toolData" in toolFunction)) return;
307
+ return {
308
+ args: JSON.stringify(toolFunction.toolData),
309
+ toolCallId: (0, import_provider_utils3.generateId)(),
310
+ toolCallType: "function",
311
+ toolName: toolFunction.toolName
312
+ };
313
+ };
314
+ var parseJSON = (text) => {
304
315
  const jsonRegex = /\{(?:[^{}]*|\{[^{}]*\})*\}/g;
305
316
  const jsonMatches = text.match(jsonRegex);
306
317
  if (jsonMatches && jsonMatches[0]) {
307
318
  try {
308
- const toolFunction = JSON.parse(jsonMatches[0]);
309
- if (!("toolName" in toolFunction)) return;
310
- if (!("toolData" in toolFunction)) return;
311
- return {
312
- args: JSON.stringify(toolFunction.toolData),
313
- toolCallId: (0, import_provider_utils3.generateId)(),
314
- toolCallType: "function",
315
- toolName: toolFunction.toolName
316
- };
319
+ const jsonObject = JSON.parse(jsonMatches[0]);
320
+ return jsonObject;
317
321
  } catch (error) {
318
322
  }
319
323
  }
320
324
  };
325
+ var simulateJsonSchema = () => "If user doen't specify, make sure to translate json data content into pure English.";
321
326
 
322
327
  // src/sarvam-chat-language-model.ts
323
328
  var SarvamChatLanguageModel = class {
@@ -351,7 +356,18 @@ var SarvamChatLanguageModel = class {
351
356
  providerMetadata
352
357
  }) {
353
358
  const type = mode.type;
359
+ const simulate = this.settings.simulate;
360
+ if (type === "object-json" && simulate === "tool-calling")
361
+ throw new Error('Use { simulate: "json-object" } with generateObject()');
362
+ if (type === "regular" && simulate === "json-object")
363
+ throw new Error('Use { simulate: "tool-calling" } with generateText()');
354
364
  const warnings = [];
365
+ if (stream) {
366
+ warnings.push({
367
+ type: "other",
368
+ message: "Streaming is still experimental for Sarvam"
369
+ });
370
+ }
355
371
  if (topK != null) {
356
372
  warnings.push({
357
373
  type: "unsupported-setting",
@@ -372,7 +388,7 @@ var SarvamChatLanguageModel = class {
372
388
  reasoningFormat: import_zod2.z.enum(["parsed", "raw", "hidden"]).nullish()
373
389
  })
374
390
  });
375
- const baseArgs = (prompt2, fakeToolSystemPrompt) => ({
391
+ const baseArgs = (prompt2, extraSystemPrompt) => ({
376
392
  // model id:
377
393
  model: this.modelId,
378
394
  // model specific settings:
@@ -394,16 +410,16 @@ var SarvamChatLanguageModel = class {
394
410
  // provider options:
395
411
  reasoning_format: sarvamOptions == null ? void 0 : sarvamOptions.reasoningFormat,
396
412
  // messages:
397
- messages: convertToSarvamChatMessages(prompt2, fakeToolSystemPrompt)
413
+ messages: convertToSarvamChatMessages(prompt2, extraSystemPrompt)
398
414
  });
399
415
  switch (type) {
400
416
  case "regular": {
401
417
  const { tools, tool_choice, toolWarnings } = prepareTools({
402
418
  mode
403
419
  });
404
- const fakeSystemPrompt = tools && this.settings.simulateToolCalling ? await simulateToolCalling(tools) : void 0;
420
+ const extraSystemPrompt = tools && simulate === "tool-calling" ? await simulateToolCalling(tools) : void 0;
405
421
  return {
406
- args: __spreadProps(__spreadValues({}, baseArgs(prompt, fakeSystemPrompt)), {
422
+ args: __spreadProps(__spreadValues({}, baseArgs(prompt, extraSystemPrompt)), {
407
423
  tools,
408
424
  tool_choice
409
425
  }),
@@ -411,8 +427,9 @@ var SarvamChatLanguageModel = class {
411
427
  };
412
428
  }
413
429
  case "object-json": {
430
+ const extraSystemPrompt = simulate === "json-object" ? simulateJsonSchema() : void 0;
414
431
  return {
415
- args: __spreadProps(__spreadValues({}, baseArgs(prompt)), {
432
+ args: __spreadProps(__spreadValues({}, baseArgs(prompt, extraSystemPrompt)), {
416
433
  response_format: (
417
434
  // json object response format is not supported for streaming:
418
435
  stream === false ? { type: "json_object" } : void 0
@@ -484,12 +501,24 @@ var SarvamChatLanguageModel = class {
484
501
  args: toolCall.function.arguments
485
502
  };
486
503
  });
487
- if (this.settings.simulateToolCalling) {
488
- if (text && text.length !== 0 && (!toolCalls || (toolCalls == null ? void 0 : toolCalls.length) === 0)) {
489
- const newTools = extractToolCallData(text);
490
- if (newTools) {
491
- toolCalls = [newTools];
492
- text = void 0;
504
+ if (this.settings.simulate === "tool-calling") {
505
+ if (text && text.length !== 0) {
506
+ const jsonObject = parseJSON(text);
507
+ if (jsonObject) {
508
+ const newTools = extractToolCallData(jsonObject);
509
+ if (newTools) {
510
+ toolCalls = [newTools];
511
+ text = void 0;
512
+ }
513
+ }
514
+ }
515
+ }
516
+ if (this.settings.simulate === "json-object") {
517
+ if (text && text.length !== 0) {
518
+ const jsonObject = parseJSON(text);
519
+ if (jsonObject) {
520
+ const newTools = extractToolCallData(jsonObject);
521
+ text = JSON.stringify(jsonObject);
493
522
  }
494
523
  }
495
524
  }
@@ -1064,6 +1093,18 @@ var SarvamLanguageCodeSchema = import_zod7.z.enum([
1064
1093
  "en-IN",
1065
1094
  "gu-IN"
1066
1095
  ]);
1096
+ var SarvamScriptCodeSchema = import_zod7.z.enum([
1097
+ "Latn",
1098
+ "Deva",
1099
+ "Beng",
1100
+ "Gujr",
1101
+ "Knda",
1102
+ "Mlym",
1103
+ "Orya",
1104
+ "Guru",
1105
+ "Taml",
1106
+ "Telu"
1107
+ ]);
1067
1108
 
1068
1109
  // src/sarvam-translation-model.ts
1069
1110
  var SarvamTranslationModel = class {
@@ -1267,11 +1308,185 @@ var sarvamTransliterateResponseSchema = import_zod9.z.object({
1267
1308
  request_id: import_zod9.z.string().nullish()
1268
1309
  });
1269
1310
 
1311
+ // src/sarvam-lid-model.ts
1312
+ var import_provider_utils9 = require("@ai-sdk/provider-utils");
1313
+ var import_zod10 = require("zod");
1314
+ var SarvamLidModel = class {
1315
+ constructor(config) {
1316
+ this.specificationVersion = "v1";
1317
+ this.supportsStructuredOutputs = false;
1318
+ this.defaultObjectGenerationMode = "json";
1319
+ this.modelId = "unknown";
1320
+ this.config = config;
1321
+ }
1322
+ get provider() {
1323
+ return this.config.provider;
1324
+ }
1325
+ get supportsImageUrls() {
1326
+ return false;
1327
+ }
1328
+ getArgs({
1329
+ mode,
1330
+ prompt
1331
+ }) {
1332
+ const type = mode.type;
1333
+ const warnings = [];
1334
+ if (type !== "regular") {
1335
+ const _exhaustiveCheck = type;
1336
+ throw new Error(`Unsupported type: ${_exhaustiveCheck}`);
1337
+ }
1338
+ const messages = convertToSarvamChatMessages(prompt);
1339
+ return {
1340
+ messages,
1341
+ args: {
1342
+ input: messages.filter((m) => m.role === "user").map((m) => m.content).join("\n")
1343
+ },
1344
+ warnings
1345
+ };
1346
+ }
1347
+ async doGenerate(options) {
1348
+ var _b;
1349
+ const { args, warnings, messages } = this.getArgs(__spreadProps(__spreadValues({}, options), {
1350
+ stream: false
1351
+ }));
1352
+ const body = JSON.stringify(args);
1353
+ const {
1354
+ responseHeaders,
1355
+ value: response,
1356
+ rawValue: rawResponse
1357
+ } = await (0, import_provider_utils9.postJsonToApi)({
1358
+ url: this.config.url({
1359
+ path: "/text-lid"
1360
+ }),
1361
+ headers: (0, import_provider_utils9.combineHeaders)(this.config.headers(), options.headers),
1362
+ body: args,
1363
+ failedResponseHandler: sarvamFailedResponseHandler,
1364
+ successfulResponseHandler: (0, import_provider_utils9.createJsonResponseHandler)(
1365
+ sarvamLidResponseSchema
1366
+ ),
1367
+ abortSignal: options.abortSignal,
1368
+ fetch: this.config.fetch
1369
+ });
1370
+ const _a = args, { input: rawPrompt } = _a, rawSettings = __objRest(_a, ["input"]);
1371
+ const text = (_b = response.language_code) != null ? _b : void 0;
1372
+ return {
1373
+ text,
1374
+ toolCalls: void 0,
1375
+ reasoning: void 0,
1376
+ finishReason: "unknown",
1377
+ usage: {
1378
+ promptTokens: NaN,
1379
+ completionTokens: NaN
1380
+ },
1381
+ rawCall: { rawPrompt, rawSettings },
1382
+ rawResponse: { headers: responseHeaders, body: rawResponse },
1383
+ response: void 0,
1384
+ warnings,
1385
+ request: { body }
1386
+ };
1387
+ }
1388
+ async doStream(options) {
1389
+ throw new Error("Language Identification feature doesn't streaming yet");
1390
+ }
1391
+ };
1392
+ var sarvamLidResponseSchema = import_zod10.z.object({
1393
+ script_code: SarvamScriptCodeSchema.nullish(),
1394
+ language_code: SarvamLanguageCodeSchema.nullable(),
1395
+ request_id: import_zod10.z.string().nullish()
1396
+ });
1397
+
1398
+ // src/sarvam-speech-translation-model.ts
1399
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
1400
+ var import_zod11 = require("zod");
1401
+ var SarvamSpeechTranslationModel = class {
1402
+ constructor(modelId, config) {
1403
+ this.modelId = modelId;
1404
+ this.config = config;
1405
+ this.specificationVersion = "v1";
1406
+ }
1407
+ get provider() {
1408
+ return this.config.provider;
1409
+ }
1410
+ getArgs({
1411
+ audio,
1412
+ mediaType,
1413
+ providerOptions
1414
+ }) {
1415
+ const warnings = [];
1416
+ const formData = new FormData();
1417
+ const blob = audio instanceof Blob ? audio : new Blob([audio], { type: mediaType });
1418
+ formData.append("file", blob);
1419
+ formData.append("model", this.modelId);
1420
+ return {
1421
+ formData,
1422
+ warnings
1423
+ };
1424
+ }
1425
+ async doGenerate(options) {
1426
+ var _a, _b, _c;
1427
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1428
+ const { formData, warnings } = this.getArgs(options);
1429
+ const {
1430
+ value: response,
1431
+ responseHeaders,
1432
+ rawValue: rawResponse
1433
+ } = await (0, import_provider_utils10.postFormDataToApi)({
1434
+ url: this.config.url({
1435
+ path: "/speech-to-text-translate",
1436
+ modelId: this.modelId
1437
+ }),
1438
+ headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
1439
+ formData,
1440
+ failedResponseHandler: sarvamFailedResponseHandler,
1441
+ successfulResponseHandler: (0, import_provider_utils10.createJsonResponseHandler)(
1442
+ sarvamTranscriptionResponseSchema2
1443
+ ),
1444
+ abortSignal: options.abortSignal,
1445
+ fetch: this.config.fetch
1446
+ });
1447
+ return {
1448
+ text: response.transcript,
1449
+ segments: [],
1450
+ language: response.language_code ? response.language_code : void 0,
1451
+ durationInSeconds: void 0,
1452
+ warnings,
1453
+ response: {
1454
+ timestamp: currentDate,
1455
+ modelId: this.modelId,
1456
+ headers: responseHeaders,
1457
+ body: rawResponse
1458
+ }
1459
+ };
1460
+ }
1461
+ };
1462
+ var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
1463
+ request_id: import_zod11.z.string().nullable(),
1464
+ transcript: import_zod11.z.string(),
1465
+ language_code: import_zod11.z.string().nullable(),
1466
+ // timestamps: z
1467
+ // .object({
1468
+ // end_time_seconds: z.array(z.number()),
1469
+ // start_time_seconds: z.array(z.number()),
1470
+ // words: z.array(z.string()),
1471
+ // })
1472
+ // .optional(),
1473
+ diarized_transcript: import_zod11.z.object({
1474
+ entries: import_zod11.z.array(
1475
+ import_zod11.z.object({
1476
+ end_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1477
+ start_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1478
+ transcript: import_zod11.z.string(),
1479
+ speaker_id: import_zod11.z.string()
1480
+ })
1481
+ )
1482
+ }).nullable().optional()
1483
+ });
1484
+
1270
1485
  // src/sarvam-provider.ts
1271
1486
  function createSarvam(options = {}) {
1272
1487
  var _a;
1273
- const baseURL = (_a = (0, import_provider_utils9.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1274
- const ApiKey = (0, import_provider_utils9.loadApiKey)({
1488
+ const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1489
+ const ApiKey = (0, import_provider_utils11.loadApiKey)({
1275
1490
  apiKey: options.apiKey,
1276
1491
  environmentVariableName: "SARVAM_API_KEY",
1277
1492
  description: "Sarvam"
@@ -1294,15 +1509,19 @@ function createSarvam(options = {}) {
1294
1509
  }
1295
1510
  return createChatModel(modelId, settings);
1296
1511
  };
1297
- const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => {
1298
- return new SarvamTranscriptionModel(modelId, languageCode, {
1299
- provider: "sarvam.transcription",
1300
- url: ({ path }) => `${baseURL}${path}`,
1301
- headers: getHeaders,
1302
- fetch: options.fetch,
1303
- transcription: settings
1304
- });
1305
- };
1512
+ const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => new SarvamTranscriptionModel(modelId, languageCode, {
1513
+ provider: "sarvam.transcription",
1514
+ url: ({ path }) => `${baseURL}${path}`,
1515
+ headers: getHeaders,
1516
+ fetch: options.fetch,
1517
+ transcription: settings
1518
+ });
1519
+ const createSpeechTranslation = (modelId) => new SarvamSpeechTranslationModel(modelId, {
1520
+ provider: "sarvam.transcription",
1521
+ url: ({ path }) => `${baseURL}${path}`,
1522
+ headers: getHeaders,
1523
+ fetch: options.fetch
1524
+ });
1306
1525
  const createSpeechModel = (modelId, languageCode, settings) => new SarvamSpeechModel(modelId, languageCode, {
1307
1526
  provider: "sarvam.speech",
1308
1527
  url: ({ path }) => `${baseURL}${path}`,
@@ -1334,13 +1553,23 @@ function createSarvam(options = {}) {
1334
1553
  fetch: options.fetch
1335
1554
  }
1336
1555
  );
1556
+ const createLidModel = () => new SarvamLidModel(
1557
+ {
1558
+ provider: "sarvam.lid",
1559
+ url: ({ path }) => `${baseURL}${path}`,
1560
+ headers: getHeaders,
1561
+ fetch: options.fetch
1562
+ }
1563
+ );
1337
1564
  const provider = (modelId, settings) => createLanguageModel(modelId, settings);
1338
1565
  provider.languageModel = createLanguageModel;
1339
1566
  provider.chat = createChatModel;
1340
1567
  provider.transcription = createTranscriptionModel;
1568
+ provider.speechTranslation = createSpeechTranslation;
1341
1569
  provider.speech = createSpeechModel;
1342
1570
  provider.transliterate = createTransliterateModel;
1343
1571
  provider.translation = createTranslationModel;
1572
+ provider.languageIdentification = createLidModel;
1344
1573
  return provider;
1345
1574
  }
1346
1575
  var sarvam = createSarvam();