sarvam-ai-sdk 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/dist/index.cjs +129 -33
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -4
- package/dist/index.d.ts +18 -4
- package/dist/index.js +130 -30
- package/dist/index.js.map +1 -1
- package/package.json +2 -4
package/README.md
CHANGED
|
@@ -69,6 +69,21 @@ const { text } = await transcribe({
|
|
|
69
69
|
console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
+
## Speech-to-Text-Translate
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
import { sarvam } from "sarvam-ai-sdk";
|
|
76
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
77
|
+
import { readFile } from "fs/promises";
|
|
78
|
+
|
|
79
|
+
const result = await transcribe({
|
|
80
|
+
model: sarvam.speechTranslation("saaras:v2"),
|
|
81
|
+
audio: await readFile("./src/transcript-test.wav"),
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
console.log(result.text); // Cooking continues, my friends
|
|
85
|
+
```
|
|
86
|
+
|
|
72
87
|
## Translation
|
|
73
88
|
|
|
74
89
|
> NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
|
|
@@ -79,8 +94,8 @@ import { generateText } from "ai";
|
|
|
79
94
|
|
|
80
95
|
const result = await generateText({
|
|
81
96
|
model: sarvam.translation({
|
|
97
|
+
"from": "ml-IN",
|
|
82
98
|
"to": "en-IN",
|
|
83
|
-
"from": "ml-IN"
|
|
84
99
|
}),
|
|
85
100
|
prompt: "ഇതൊക്കെ ശ്രദ്ധിക്കണ്ടേ അംബാനെ?",
|
|
86
101
|
});
|
|
@@ -98,7 +113,7 @@ import { generateText } from "ai";
|
|
|
98
113
|
|
|
99
114
|
const result = await generateText({
|
|
100
115
|
model: sarvam.transliterate({
|
|
101
|
-
from: "en-IN"
|
|
116
|
+
from: "en-IN",
|
|
102
117
|
to: "ml-IN",
|
|
103
118
|
}),
|
|
104
119
|
prompt: "eda mone, happy alle?",
|
package/dist/index.cjs
CHANGED
|
@@ -55,7 +55,7 @@ __export(index_exports, {
|
|
|
55
55
|
module.exports = __toCommonJS(index_exports);
|
|
56
56
|
|
|
57
57
|
// src/sarvam-provider.ts
|
|
58
|
-
var
|
|
58
|
+
var import_provider_utils11 = require("@ai-sdk/provider-utils");
|
|
59
59
|
|
|
60
60
|
// src/sarvam-chat-language-model.ts
|
|
61
61
|
var import_provider3 = require("@ai-sdk/provider");
|
|
@@ -1112,7 +1112,8 @@ var SarvamTranslationModel = class {
|
|
|
1112
1112
|
this.specificationVersion = "v1";
|
|
1113
1113
|
this.supportsStructuredOutputs = false;
|
|
1114
1114
|
this.defaultObjectGenerationMode = "json";
|
|
1115
|
-
|
|
1115
|
+
var _a;
|
|
1116
|
+
this.modelId = (_a = settings.model) != null ? _a : "mayura:v1";
|
|
1116
1117
|
this.settings = settings;
|
|
1117
1118
|
this.config = config;
|
|
1118
1119
|
}
|
|
@@ -1126,13 +1127,22 @@ var SarvamTranslationModel = class {
|
|
|
1126
1127
|
mode,
|
|
1127
1128
|
prompt
|
|
1128
1129
|
}) {
|
|
1129
|
-
var _a, _b, _c, _d, _e, _f;
|
|
1130
|
+
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
1130
1131
|
const type = mode.type;
|
|
1131
1132
|
const warnings = [];
|
|
1132
|
-
if (this.settings.from
|
|
1133
|
-
|
|
1133
|
+
if (this.settings.from === this.settings.to) {
|
|
1134
|
+
throw new Error(
|
|
1135
|
+
"Source and target languages code must be different."
|
|
1136
|
+
);
|
|
1137
|
+
}
|
|
1138
|
+
if (this.modelId === "sarvam-translate:v1") {
|
|
1139
|
+
if (((_a = this.settings.mode) != null ? _a : "formal") !== "formal")
|
|
1134
1140
|
throw new Error(
|
|
1135
|
-
"Sarvam
|
|
1141
|
+
"Sarvam 'sarvam-translate:v1' only support mode formal."
|
|
1142
|
+
);
|
|
1143
|
+
if (((_b = this.settings.from) != null ? _b : "auto") === "auto")
|
|
1144
|
+
throw new Error(
|
|
1145
|
+
"Sarvam 'sarvam-translate:v1' requires source language code."
|
|
1136
1146
|
);
|
|
1137
1147
|
}
|
|
1138
1148
|
if (type !== "regular") {
|
|
@@ -1144,14 +1154,14 @@ var SarvamTranslationModel = class {
|
|
|
1144
1154
|
messages,
|
|
1145
1155
|
args: {
|
|
1146
1156
|
input: messages.filter((m) => m.role === "user").map((m) => m.content).join("\n"),
|
|
1147
|
-
source_language_code: (
|
|
1157
|
+
source_language_code: (_c = this.settings.from) != null ? _c : "auto",
|
|
1148
1158
|
target_language_code: this.settings.to,
|
|
1149
|
-
numerals_format: (
|
|
1150
|
-
enable_preprocessing: (
|
|
1151
|
-
output_script: (
|
|
1152
|
-
speaker_gender: (
|
|
1153
|
-
mode: (
|
|
1154
|
-
|
|
1159
|
+
numerals_format: (_d = this.settings.numerals_format) != null ? _d : "international",
|
|
1160
|
+
enable_preprocessing: (_e = this.settings.enable_preprocessing) != null ? _e : false,
|
|
1161
|
+
output_script: (_f = this.settings.output_script) != null ? _f : null,
|
|
1162
|
+
speaker_gender: (_g = this.settings.speaker_gender) != null ? _g : "Male",
|
|
1163
|
+
mode: (_h = this.settings.mode) != null ? _h : "formal",
|
|
1164
|
+
model: this.modelId
|
|
1155
1165
|
},
|
|
1156
1166
|
warnings
|
|
1157
1167
|
};
|
|
@@ -1198,7 +1208,7 @@ var SarvamTranslationModel = class {
|
|
|
1198
1208
|
};
|
|
1199
1209
|
}
|
|
1200
1210
|
async doStream(options) {
|
|
1201
|
-
throw new Error("Translation feature doesn't streaming yet");
|
|
1211
|
+
throw new Error("Translation feature doesn't support streaming yet");
|
|
1202
1212
|
}
|
|
1203
1213
|
};
|
|
1204
1214
|
var sarvamTranslationResponseSchema = import_zod8.z.object({
|
|
@@ -1395,11 +1405,98 @@ var sarvamLidResponseSchema = import_zod10.z.object({
|
|
|
1395
1405
|
request_id: import_zod10.z.string().nullish()
|
|
1396
1406
|
});
|
|
1397
1407
|
|
|
1408
|
+
// src/sarvam-speech-translation-model.ts
|
|
1409
|
+
var import_provider_utils10 = require("@ai-sdk/provider-utils");
|
|
1410
|
+
var import_zod11 = require("zod");
|
|
1411
|
+
var SarvamSpeechTranslationModel = class {
|
|
1412
|
+
constructor(modelId, config) {
|
|
1413
|
+
this.modelId = modelId;
|
|
1414
|
+
this.config = config;
|
|
1415
|
+
this.specificationVersion = "v1";
|
|
1416
|
+
}
|
|
1417
|
+
get provider() {
|
|
1418
|
+
return this.config.provider;
|
|
1419
|
+
}
|
|
1420
|
+
getArgs({
|
|
1421
|
+
audio,
|
|
1422
|
+
mediaType,
|
|
1423
|
+
providerOptions
|
|
1424
|
+
}) {
|
|
1425
|
+
const warnings = [];
|
|
1426
|
+
const formData = new FormData();
|
|
1427
|
+
const blob = audio instanceof Blob ? audio : new Blob([audio], { type: mediaType });
|
|
1428
|
+
formData.append("file", blob);
|
|
1429
|
+
formData.append("model", this.modelId);
|
|
1430
|
+
return {
|
|
1431
|
+
formData,
|
|
1432
|
+
warnings
|
|
1433
|
+
};
|
|
1434
|
+
}
|
|
1435
|
+
async doGenerate(options) {
|
|
1436
|
+
var _a, _b, _c;
|
|
1437
|
+
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
1438
|
+
const { formData, warnings } = this.getArgs(options);
|
|
1439
|
+
const {
|
|
1440
|
+
value: response,
|
|
1441
|
+
responseHeaders,
|
|
1442
|
+
rawValue: rawResponse
|
|
1443
|
+
} = await (0, import_provider_utils10.postFormDataToApi)({
|
|
1444
|
+
url: this.config.url({
|
|
1445
|
+
path: "/speech-to-text-translate",
|
|
1446
|
+
modelId: this.modelId
|
|
1447
|
+
}),
|
|
1448
|
+
headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
|
|
1449
|
+
formData,
|
|
1450
|
+
failedResponseHandler: sarvamFailedResponseHandler,
|
|
1451
|
+
successfulResponseHandler: (0, import_provider_utils10.createJsonResponseHandler)(
|
|
1452
|
+
sarvamTranscriptionResponseSchema2
|
|
1453
|
+
),
|
|
1454
|
+
abortSignal: options.abortSignal,
|
|
1455
|
+
fetch: this.config.fetch
|
|
1456
|
+
});
|
|
1457
|
+
return {
|
|
1458
|
+
text: response.transcript,
|
|
1459
|
+
segments: [],
|
|
1460
|
+
language: response.language_code ? response.language_code : void 0,
|
|
1461
|
+
durationInSeconds: void 0,
|
|
1462
|
+
warnings,
|
|
1463
|
+
response: {
|
|
1464
|
+
timestamp: currentDate,
|
|
1465
|
+
modelId: this.modelId,
|
|
1466
|
+
headers: responseHeaders,
|
|
1467
|
+
body: rawResponse
|
|
1468
|
+
}
|
|
1469
|
+
};
|
|
1470
|
+
}
|
|
1471
|
+
};
|
|
1472
|
+
var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
|
|
1473
|
+
request_id: import_zod11.z.string().nullable(),
|
|
1474
|
+
transcript: import_zod11.z.string(),
|
|
1475
|
+
language_code: import_zod11.z.string().nullable(),
|
|
1476
|
+
// timestamps: z
|
|
1477
|
+
// .object({
|
|
1478
|
+
// end_time_seconds: z.array(z.number()),
|
|
1479
|
+
// start_time_seconds: z.array(z.number()),
|
|
1480
|
+
// words: z.array(z.string()),
|
|
1481
|
+
// })
|
|
1482
|
+
// .optional(),
|
|
1483
|
+
diarized_transcript: import_zod11.z.object({
|
|
1484
|
+
entries: import_zod11.z.array(
|
|
1485
|
+
import_zod11.z.object({
|
|
1486
|
+
end_time_seconds: import_zod11.z.array(import_zod11.z.number()),
|
|
1487
|
+
start_time_seconds: import_zod11.z.array(import_zod11.z.number()),
|
|
1488
|
+
transcript: import_zod11.z.string(),
|
|
1489
|
+
speaker_id: import_zod11.z.string()
|
|
1490
|
+
})
|
|
1491
|
+
)
|
|
1492
|
+
}).nullable().optional()
|
|
1493
|
+
});
|
|
1494
|
+
|
|
1398
1495
|
// src/sarvam-provider.ts
|
|
1399
1496
|
function createSarvam(options = {}) {
|
|
1400
1497
|
var _a;
|
|
1401
|
-
const baseURL = (_a = (0,
|
|
1402
|
-
const ApiKey = (0,
|
|
1498
|
+
const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
|
|
1499
|
+
const ApiKey = (0, import_provider_utils11.loadApiKey)({
|
|
1403
1500
|
apiKey: options.apiKey,
|
|
1404
1501
|
environmentVariableName: "SARVAM_API_KEY",
|
|
1405
1502
|
description: "Sarvam"
|
|
@@ -1422,15 +1519,19 @@ function createSarvam(options = {}) {
|
|
|
1422
1519
|
}
|
|
1423
1520
|
return createChatModel(modelId, settings);
|
|
1424
1521
|
};
|
|
1425
|
-
const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => {
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1522
|
+
const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => new SarvamTranscriptionModel(modelId, languageCode, {
|
|
1523
|
+
provider: "sarvam.transcription",
|
|
1524
|
+
url: ({ path }) => `${baseURL}${path}`,
|
|
1525
|
+
headers: getHeaders,
|
|
1526
|
+
fetch: options.fetch,
|
|
1527
|
+
transcription: settings
|
|
1528
|
+
});
|
|
1529
|
+
const createSpeechTranslation = (modelId) => new SarvamSpeechTranslationModel(modelId, {
|
|
1530
|
+
provider: "sarvam.transcription",
|
|
1531
|
+
url: ({ path }) => `${baseURL}${path}`,
|
|
1532
|
+
headers: getHeaders,
|
|
1533
|
+
fetch: options.fetch
|
|
1534
|
+
});
|
|
1434
1535
|
const createSpeechModel = (modelId, languageCode, settings) => new SarvamSpeechModel(modelId, languageCode, {
|
|
1435
1536
|
provider: "sarvam.speech",
|
|
1436
1537
|
url: ({ path }) => `${baseURL}${path}`,
|
|
@@ -1439,10 +1540,7 @@ function createSarvam(options = {}) {
|
|
|
1439
1540
|
speech: settings
|
|
1440
1541
|
});
|
|
1441
1542
|
const createTransliterateModel = (settings) => new SarvamTransliterateModel(
|
|
1442
|
-
|
|
1443
|
-
to: settings.to,
|
|
1444
|
-
from: settings.from
|
|
1445
|
-
},
|
|
1543
|
+
settings,
|
|
1446
1544
|
{
|
|
1447
1545
|
provider: "sarvam.transliterate",
|
|
1448
1546
|
url: ({ path }) => `${baseURL}${path}`,
|
|
@@ -1451,10 +1549,7 @@ function createSarvam(options = {}) {
|
|
|
1451
1549
|
}
|
|
1452
1550
|
);
|
|
1453
1551
|
const createTranslationModel = (settings) => new SarvamTranslationModel(
|
|
1454
|
-
|
|
1455
|
-
to: settings.to,
|
|
1456
|
-
from: settings.from
|
|
1457
|
-
},
|
|
1552
|
+
settings,
|
|
1458
1553
|
{
|
|
1459
1554
|
provider: "sarvam.translation",
|
|
1460
1555
|
url: ({ path }) => `${baseURL}${path}`,
|
|
@@ -1474,6 +1569,7 @@ function createSarvam(options = {}) {
|
|
|
1474
1569
|
provider.languageModel = createLanguageModel;
|
|
1475
1570
|
provider.chat = createChatModel;
|
|
1476
1571
|
provider.transcription = createTranscriptionModel;
|
|
1572
|
+
provider.speechTranslation = createSpeechTranslation;
|
|
1477
1573
|
provider.speech = createSpeechModel;
|
|
1478
1574
|
provider.transliterate = createTransliterateModel;
|
|
1479
1575
|
provider.translation = createTranslationModel;
|