sarvam-ai-sdk 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,6 +69,21 @@ const { text } = await transcribe({
69
69
  console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
70
70
  ```
71
71
 
72
+ ## Speech-to-Text-Translate
73
+
74
+ ```ts
75
+ import { sarvam } from "sarvam-ai-sdk";
76
+ import { experimental_transcribe as transcribe } from "ai";
77
+ import { readFile } from "fs/promises";
78
+
79
+ const result = await transcribe({
80
+ model: sarvam.speechTranslation("saaras:v2"),
81
+ audio: await readFile("./src/transcript-test.wav"),
82
+ });
83
+
84
+ console.log(result.text); // Cooking continues, my friends
85
+ ```
86
+
72
87
  ## Translation
73
88
 
74
89
  > NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
@@ -79,8 +94,8 @@ import { generateText } from "ai";
79
94
 
80
95
  const result = await generateText({
81
96
  model: sarvam.translation({
97
+ "from": "ml-IN",
82
98
  "to": "en-IN",
83
- "from": "ml-IN"
84
99
  }),
85
100
  prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
86
101
  });
@@ -98,7 +113,7 @@ import { generateText } from "ai";
98
113
 
99
114
  const result = await generateText({
100
115
  model: sarvam.transliterate({
101
- from: "en-IN"
116
+ from: "en-IN",
102
117
  to: "ml-IN",
103
118
  }),
104
119
  prompt: "eda mone, happy alle?",
package/dist/index.cjs CHANGED
@@ -55,7 +55,7 @@ __export(index_exports, {
55
55
  module.exports = __toCommonJS(index_exports);
56
56
 
57
57
  // src/sarvam-provider.ts
58
- var import_provider_utils10 = require("@ai-sdk/provider-utils");
58
+ var import_provider_utils11 = require("@ai-sdk/provider-utils");
59
59
 
60
60
  // src/sarvam-chat-language-model.ts
61
61
  var import_provider3 = require("@ai-sdk/provider");
@@ -1112,7 +1112,8 @@ var SarvamTranslationModel = class {
1112
1112
  this.specificationVersion = "v1";
1113
1113
  this.supportsStructuredOutputs = false;
1114
1114
  this.defaultObjectGenerationMode = "json";
1115
- this.modelId = "unknown";
1115
+ var _a;
1116
+ this.modelId = (_a = settings.model) != null ? _a : "mayura:v1";
1116
1117
  this.settings = settings;
1117
1118
  this.config = config;
1118
1119
  }
@@ -1126,13 +1127,22 @@ var SarvamTranslationModel = class {
1126
1127
  mode,
1127
1128
  prompt
1128
1129
  }) {
1129
- var _a, _b, _c, _d, _e, _f;
1130
+ var _a, _b, _c, _d, _e, _f, _g, _h;
1130
1131
  const type = mode.type;
1131
1132
  const warnings = [];
1132
- if (this.settings.from !== "auto") {
1133
- if (this.settings.to !== "en-IN" && this.settings.from !== "en-IN")
1133
+ if (this.settings.from === this.settings.to) {
1134
+ throw new Error(
1135
+ "Source and target languages code must be different."
1136
+ );
1137
+ }
1138
+ if (this.modelId === "sarvam-translate:v1") {
1139
+ if (((_a = this.settings.mode) != null ? _a : "formal") !== "formal")
1134
1140
  throw new Error(
1135
- "Sarvam doesn't support Indic-Indic Transliteration yet"
1141
+ "Sarvam 'sarvam-translate:v1' only support mode formal."
1142
+ );
1143
+ if (((_b = this.settings.from) != null ? _b : "auto") === "auto")
1144
+ throw new Error(
1145
+ "Sarvam 'sarvam-translate:v1' requires source language code."
1136
1146
  );
1137
1147
  }
1138
1148
  if (type !== "regular") {
@@ -1144,14 +1154,14 @@ var SarvamTranslationModel = class {
1144
1154
  messages,
1145
1155
  args: {
1146
1156
  input: messages.filter((m) => m.role === "user").map((m) => m.content).join("\n"),
1147
- source_language_code: (_a = this.settings.from) != null ? _a : "auto",
1157
+ source_language_code: (_c = this.settings.from) != null ? _c : "auto",
1148
1158
  target_language_code: this.settings.to,
1149
- numerals_format: (_b = this.settings.numerals_format) != null ? _b : "international",
1150
- enable_preprocessing: (_c = this.settings.enable_preprocessing) != null ? _c : false,
1151
- output_script: (_d = this.settings.output_script) != null ? _d : null,
1152
- speaker_gender: (_e = this.settings.speaker_gender) != null ? _e : "Male",
1153
- mode: (_f = this.settings.mode) != null ? _f : "formal"
1154
- // model: this.settings.model ?? "male",
1159
+ numerals_format: (_d = this.settings.numerals_format) != null ? _d : "international",
1160
+ enable_preprocessing: (_e = this.settings.enable_preprocessing) != null ? _e : false,
1161
+ output_script: (_f = this.settings.output_script) != null ? _f : null,
1162
+ speaker_gender: (_g = this.settings.speaker_gender) != null ? _g : "Male",
1163
+ mode: (_h = this.settings.mode) != null ? _h : "formal",
1164
+ model: this.modelId
1155
1165
  },
1156
1166
  warnings
1157
1167
  };
@@ -1198,7 +1208,7 @@ var SarvamTranslationModel = class {
1198
1208
  };
1199
1209
  }
1200
1210
  async doStream(options) {
1201
- throw new Error("Translation feature doesn't streaming yet");
1211
+ throw new Error("Translation feature doesn't support streaming yet");
1202
1212
  }
1203
1213
  };
1204
1214
  var sarvamTranslationResponseSchema = import_zod8.z.object({
@@ -1395,11 +1405,98 @@ var sarvamLidResponseSchema = import_zod10.z.object({
1395
1405
  request_id: import_zod10.z.string().nullish()
1396
1406
  });
1397
1407
 
1408
+ // src/sarvam-speech-translation-model.ts
1409
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
1410
+ var import_zod11 = require("zod");
1411
+ var SarvamSpeechTranslationModel = class {
1412
+ constructor(modelId, config) {
1413
+ this.modelId = modelId;
1414
+ this.config = config;
1415
+ this.specificationVersion = "v1";
1416
+ }
1417
+ get provider() {
1418
+ return this.config.provider;
1419
+ }
1420
+ getArgs({
1421
+ audio,
1422
+ mediaType,
1423
+ providerOptions
1424
+ }) {
1425
+ const warnings = [];
1426
+ const formData = new FormData();
1427
+ const blob = audio instanceof Blob ? audio : new Blob([audio], { type: mediaType });
1428
+ formData.append("file", blob);
1429
+ formData.append("model", this.modelId);
1430
+ return {
1431
+ formData,
1432
+ warnings
1433
+ };
1434
+ }
1435
+ async doGenerate(options) {
1436
+ var _a, _b, _c;
1437
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1438
+ const { formData, warnings } = this.getArgs(options);
1439
+ const {
1440
+ value: response,
1441
+ responseHeaders,
1442
+ rawValue: rawResponse
1443
+ } = await (0, import_provider_utils10.postFormDataToApi)({
1444
+ url: this.config.url({
1445
+ path: "/speech-to-text-translate",
1446
+ modelId: this.modelId
1447
+ }),
1448
+ headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
1449
+ formData,
1450
+ failedResponseHandler: sarvamFailedResponseHandler,
1451
+ successfulResponseHandler: (0, import_provider_utils10.createJsonResponseHandler)(
1452
+ sarvamTranscriptionResponseSchema2
1453
+ ),
1454
+ abortSignal: options.abortSignal,
1455
+ fetch: this.config.fetch
1456
+ });
1457
+ return {
1458
+ text: response.transcript,
1459
+ segments: [],
1460
+ language: response.language_code ? response.language_code : void 0,
1461
+ durationInSeconds: void 0,
1462
+ warnings,
1463
+ response: {
1464
+ timestamp: currentDate,
1465
+ modelId: this.modelId,
1466
+ headers: responseHeaders,
1467
+ body: rawResponse
1468
+ }
1469
+ };
1470
+ }
1471
+ };
1472
+ var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
1473
+ request_id: import_zod11.z.string().nullable(),
1474
+ transcript: import_zod11.z.string(),
1475
+ language_code: import_zod11.z.string().nullable(),
1476
+ // timestamps: z
1477
+ // .object({
1478
+ // end_time_seconds: z.array(z.number()),
1479
+ // start_time_seconds: z.array(z.number()),
1480
+ // words: z.array(z.string()),
1481
+ // })
1482
+ // .optional(),
1483
+ diarized_transcript: import_zod11.z.object({
1484
+ entries: import_zod11.z.array(
1485
+ import_zod11.z.object({
1486
+ end_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1487
+ start_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1488
+ transcript: import_zod11.z.string(),
1489
+ speaker_id: import_zod11.z.string()
1490
+ })
1491
+ )
1492
+ }).nullable().optional()
1493
+ });
1494
+
1398
1495
  // src/sarvam-provider.ts
1399
1496
  function createSarvam(options = {}) {
1400
1497
  var _a;
1401
- const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1402
- const ApiKey = (0, import_provider_utils10.loadApiKey)({
1498
+ const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1499
+ const ApiKey = (0, import_provider_utils11.loadApiKey)({
1403
1500
  apiKey: options.apiKey,
1404
1501
  environmentVariableName: "SARVAM_API_KEY",
1405
1502
  description: "Sarvam"
@@ -1422,15 +1519,19 @@ function createSarvam(options = {}) {
1422
1519
  }
1423
1520
  return createChatModel(modelId, settings);
1424
1521
  };
1425
- const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => {
1426
- return new SarvamTranscriptionModel(modelId, languageCode, {
1427
- provider: "sarvam.transcription",
1428
- url: ({ path }) => `${baseURL}${path}`,
1429
- headers: getHeaders,
1430
- fetch: options.fetch,
1431
- transcription: settings
1432
- });
1433
- };
1522
+ const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => new SarvamTranscriptionModel(modelId, languageCode, {
1523
+ provider: "sarvam.transcription",
1524
+ url: ({ path }) => `${baseURL}${path}`,
1525
+ headers: getHeaders,
1526
+ fetch: options.fetch,
1527
+ transcription: settings
1528
+ });
1529
+ const createSpeechTranslation = (modelId) => new SarvamSpeechTranslationModel(modelId, {
1530
+ provider: "sarvam.transcription",
1531
+ url: ({ path }) => `${baseURL}${path}`,
1532
+ headers: getHeaders,
1533
+ fetch: options.fetch
1534
+ });
1434
1535
  const createSpeechModel = (modelId, languageCode, settings) => new SarvamSpeechModel(modelId, languageCode, {
1435
1536
  provider: "sarvam.speech",
1436
1537
  url: ({ path }) => `${baseURL}${path}`,
@@ -1439,10 +1540,7 @@ function createSarvam(options = {}) {
1439
1540
  speech: settings
1440
1541
  });
1441
1542
  const createTransliterateModel = (settings) => new SarvamTransliterateModel(
1442
- {
1443
- to: settings.to,
1444
- from: settings.from
1445
- },
1543
+ settings,
1446
1544
  {
1447
1545
  provider: "sarvam.transliterate",
1448
1546
  url: ({ path }) => `${baseURL}${path}`,
@@ -1451,10 +1549,7 @@ function createSarvam(options = {}) {
1451
1549
  }
1452
1550
  );
1453
1551
  const createTranslationModel = (settings) => new SarvamTranslationModel(
1454
- {
1455
- to: settings.to,
1456
- from: settings.from
1457
- },
1552
+ settings,
1458
1553
  {
1459
1554
  provider: "sarvam.translation",
1460
1555
  url: ({ path }) => `${baseURL}${path}`,
@@ -1474,6 +1569,7 @@ function createSarvam(options = {}) {
1474
1569
  provider.languageModel = createLanguageModel;
1475
1570
  provider.chat = createChatModel;
1476
1571
  provider.transcription = createTranscriptionModel;
1572
+ provider.speechTranslation = createSpeechTranslation;
1477
1573
  provider.speech = createSpeechModel;
1478
1574
  provider.transliterate = createTransliterateModel;
1479
1575
  provider.translation = createTranslationModel;