sarvam-ai-sdk 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/index.cjs +104 -12
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +12 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.js +105 -9
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -69,6 +69,21 @@ const { text } = await transcribe({
|
|
|
69
69
|
console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
+
## Speech-to-Text-Translate
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
import { sarvam } from "sarvam-ai-sdk";
|
|
76
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
77
|
+
import { readFile } from "fs/promises";
|
|
78
|
+
|
|
79
|
+
const result = await transcribe({
|
|
80
|
+
model: sarvam.speechTranslation("saaras:v2"),
|
|
81
|
+
audio: await readFile("./src/transcript-test.wav"),
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
console.log(result.text); // Cooking continues, my friends
|
|
85
|
+
```
|
|
86
|
+
|
|
72
87
|
## Translation
|
|
73
88
|
|
|
74
89
|
> NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
|
package/dist/index.cjs
CHANGED
|
@@ -55,7 +55,7 @@ __export(index_exports, {
|
|
|
55
55
|
module.exports = __toCommonJS(index_exports);
|
|
56
56
|
|
|
57
57
|
// src/sarvam-provider.ts
|
|
58
|
-
var
|
|
58
|
+
var import_provider_utils11 = require("@ai-sdk/provider-utils");
|
|
59
59
|
|
|
60
60
|
// src/sarvam-chat-language-model.ts
|
|
61
61
|
var import_provider3 = require("@ai-sdk/provider");
|
|
@@ -1395,11 +1395,98 @@ var sarvamLidResponseSchema = import_zod10.z.object({
|
|
|
1395
1395
|
request_id: import_zod10.z.string().nullish()
|
|
1396
1396
|
});
|
|
1397
1397
|
|
|
1398
|
+
// src/sarvam-speech-translation-model.ts
|
|
1399
|
+
var import_provider_utils10 = require("@ai-sdk/provider-utils");
|
|
1400
|
+
var import_zod11 = require("zod");
|
|
1401
|
+
var SarvamSpeechTranslationModel = class {
|
|
1402
|
+
constructor(modelId, config) {
|
|
1403
|
+
this.modelId = modelId;
|
|
1404
|
+
this.config = config;
|
|
1405
|
+
this.specificationVersion = "v1";
|
|
1406
|
+
}
|
|
1407
|
+
get provider() {
|
|
1408
|
+
return this.config.provider;
|
|
1409
|
+
}
|
|
1410
|
+
getArgs({
|
|
1411
|
+
audio,
|
|
1412
|
+
mediaType,
|
|
1413
|
+
providerOptions
|
|
1414
|
+
}) {
|
|
1415
|
+
const warnings = [];
|
|
1416
|
+
const formData = new FormData();
|
|
1417
|
+
const blob = audio instanceof Blob ? audio : new Blob([audio], { type: mediaType });
|
|
1418
|
+
formData.append("file", blob);
|
|
1419
|
+
formData.append("model", this.modelId);
|
|
1420
|
+
return {
|
|
1421
|
+
formData,
|
|
1422
|
+
warnings
|
|
1423
|
+
};
|
|
1424
|
+
}
|
|
1425
|
+
async doGenerate(options) {
|
|
1426
|
+
var _a, _b, _c;
|
|
1427
|
+
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
1428
|
+
const { formData, warnings } = this.getArgs(options);
|
|
1429
|
+
const {
|
|
1430
|
+
value: response,
|
|
1431
|
+
responseHeaders,
|
|
1432
|
+
rawValue: rawResponse
|
|
1433
|
+
} = await (0, import_provider_utils10.postFormDataToApi)({
|
|
1434
|
+
url: this.config.url({
|
|
1435
|
+
path: "/speech-to-text-translate",
|
|
1436
|
+
modelId: this.modelId
|
|
1437
|
+
}),
|
|
1438
|
+
headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
|
|
1439
|
+
formData,
|
|
1440
|
+
failedResponseHandler: sarvamFailedResponseHandler,
|
|
1441
|
+
successfulResponseHandler: (0, import_provider_utils10.createJsonResponseHandler)(
|
|
1442
|
+
sarvamTranscriptionResponseSchema2
|
|
1443
|
+
),
|
|
1444
|
+
abortSignal: options.abortSignal,
|
|
1445
|
+
fetch: this.config.fetch
|
|
1446
|
+
});
|
|
1447
|
+
return {
|
|
1448
|
+
text: response.transcript,
|
|
1449
|
+
segments: [],
|
|
1450
|
+
language: response.language_code ? response.language_code : void 0,
|
|
1451
|
+
durationInSeconds: void 0,
|
|
1452
|
+
warnings,
|
|
1453
|
+
response: {
|
|
1454
|
+
timestamp: currentDate,
|
|
1455
|
+
modelId: this.modelId,
|
|
1456
|
+
headers: responseHeaders,
|
|
1457
|
+
body: rawResponse
|
|
1458
|
+
}
|
|
1459
|
+
};
|
|
1460
|
+
}
|
|
1461
|
+
};
|
|
1462
|
+
var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
|
|
1463
|
+
request_id: import_zod11.z.string().nullable(),
|
|
1464
|
+
transcript: import_zod11.z.string(),
|
|
1465
|
+
language_code: import_zod11.z.string().nullable(),
|
|
1466
|
+
// timestamps: z
|
|
1467
|
+
// .object({
|
|
1468
|
+
// end_time_seconds: z.array(z.number()),
|
|
1469
|
+
// start_time_seconds: z.array(z.number()),
|
|
1470
|
+
// words: z.array(z.string()),
|
|
1471
|
+
// })
|
|
1472
|
+
// .optional(),
|
|
1473
|
+
diarized_transcript: import_zod11.z.object({
|
|
1474
|
+
entries: import_zod11.z.array(
|
|
1475
|
+
import_zod11.z.object({
|
|
1476
|
+
end_time_seconds: import_zod11.z.array(import_zod11.z.number()),
|
|
1477
|
+
start_time_seconds: import_zod11.z.array(import_zod11.z.number()),
|
|
1478
|
+
transcript: import_zod11.z.string(),
|
|
1479
|
+
speaker_id: import_zod11.z.string()
|
|
1480
|
+
})
|
|
1481
|
+
)
|
|
1482
|
+
}).nullable().optional()
|
|
1483
|
+
});
|
|
1484
|
+
|
|
1398
1485
|
// src/sarvam-provider.ts
|
|
1399
1486
|
function createSarvam(options = {}) {
|
|
1400
1487
|
var _a;
|
|
1401
|
-
const baseURL = (_a = (0,
|
|
1402
|
-
const ApiKey = (0,
|
|
1488
|
+
const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
|
|
1489
|
+
const ApiKey = (0, import_provider_utils11.loadApiKey)({
|
|
1403
1490
|
apiKey: options.apiKey,
|
|
1404
1491
|
environmentVariableName: "SARVAM_API_KEY",
|
|
1405
1492
|
description: "Sarvam"
|
|
@@ -1422,15 +1509,19 @@ function createSarvam(options = {}) {
|
|
|
1422
1509
|
}
|
|
1423
1510
|
return createChatModel(modelId, settings);
|
|
1424
1511
|
};
|
|
1425
|
-
const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => {
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1512
|
+
const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => new SarvamTranscriptionModel(modelId, languageCode, {
|
|
1513
|
+
provider: "sarvam.transcription",
|
|
1514
|
+
url: ({ path }) => `${baseURL}${path}`,
|
|
1515
|
+
headers: getHeaders,
|
|
1516
|
+
fetch: options.fetch,
|
|
1517
|
+
transcription: settings
|
|
1518
|
+
});
|
|
1519
|
+
const createSpeechTranslation = (modelId) => new SarvamSpeechTranslationModel(modelId, {
|
|
1520
|
+
provider: "sarvam.transcription",
|
|
1521
|
+
url: ({ path }) => `${baseURL}${path}`,
|
|
1522
|
+
headers: getHeaders,
|
|
1523
|
+
fetch: options.fetch
|
|
1524
|
+
});
|
|
1434
1525
|
const createSpeechModel = (modelId, languageCode, settings) => new SarvamSpeechModel(modelId, languageCode, {
|
|
1435
1526
|
provider: "sarvam.speech",
|
|
1436
1527
|
url: ({ path }) => `${baseURL}${path}`,
|
|
@@ -1474,6 +1565,7 @@ function createSarvam(options = {}) {
|
|
|
1474
1565
|
provider.languageModel = createLanguageModel;
|
|
1475
1566
|
provider.chat = createChatModel;
|
|
1476
1567
|
provider.transcription = createTranscriptionModel;
|
|
1568
|
+
provider.speechTranslation = createSpeechTranslation;
|
|
1477
1569
|
provider.speech = createSpeechModel;
|
|
1478
1570
|
provider.transliterate = createTransliterateModel;
|
|
1479
1571
|
provider.translation = createTranslationModel;
|