sarvam-ai-sdk 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,6 +69,21 @@ const { text } = await transcribe({
69
69
  console.log(text); // പാചകം തുടരും സുഹൃത്തുക്കളെ
70
70
  ```
71
71
 
72
+ ## Speech-to-Text-Translate
73
+
74
+ ```ts
75
+ import { sarvam } from "sarvam-ai-sdk";
76
+ import { experimental_transcribe as transcribe } from "ai";
77
+ import { readFile } from "fs/promises";
78
+
79
+ const result = await transcribe({
80
+ model: sarvam.speechTranslation("saaras:v2"),
81
+ audio: await readFile("./src/transcript-test.wav"),
82
+ });
83
+
84
+ console.log(result.text); // Cooking continues, my friends
85
+ ```
86
+
72
87
  ## Translation
73
88
 
74
89
  > NB: Only transliterates `prompt` and `role:user` messages, not `system` not `assistant`.
package/dist/index.cjs CHANGED
@@ -55,7 +55,7 @@ __export(index_exports, {
55
55
  module.exports = __toCommonJS(index_exports);
56
56
 
57
57
  // src/sarvam-provider.ts
58
- var import_provider_utils10 = require("@ai-sdk/provider-utils");
58
+ var import_provider_utils11 = require("@ai-sdk/provider-utils");
59
59
 
60
60
  // src/sarvam-chat-language-model.ts
61
61
  var import_provider3 = require("@ai-sdk/provider");
@@ -1395,11 +1395,98 @@ var sarvamLidResponseSchema = import_zod10.z.object({
1395
1395
  request_id: import_zod10.z.string().nullish()
1396
1396
  });
1397
1397
 
1398
+ // src/sarvam-speech-translation-model.ts
1399
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
1400
+ var import_zod11 = require("zod");
1401
+ var SarvamSpeechTranslationModel = class {
1402
+ constructor(modelId, config) {
1403
+ this.modelId = modelId;
1404
+ this.config = config;
1405
+ this.specificationVersion = "v1";
1406
+ }
1407
+ get provider() {
1408
+ return this.config.provider;
1409
+ }
1410
+ getArgs({
1411
+ audio,
1412
+ mediaType,
1413
+ providerOptions
1414
+ }) {
1415
+ const warnings = [];
1416
+ const formData = new FormData();
1417
+ const blob = audio instanceof Blob ? audio : new Blob([audio], { type: mediaType });
1418
+ formData.append("file", blob);
1419
+ formData.append("model", this.modelId);
1420
+ return {
1421
+ formData,
1422
+ warnings
1423
+ };
1424
+ }
1425
+ async doGenerate(options) {
1426
+ var _a, _b, _c;
1427
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1428
+ const { formData, warnings } = this.getArgs(options);
1429
+ const {
1430
+ value: response,
1431
+ responseHeaders,
1432
+ rawValue: rawResponse
1433
+ } = await (0, import_provider_utils10.postFormDataToApi)({
1434
+ url: this.config.url({
1435
+ path: "/speech-to-text-translate",
1436
+ modelId: this.modelId
1437
+ }),
1438
+ headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
1439
+ formData,
1440
+ failedResponseHandler: sarvamFailedResponseHandler,
1441
+ successfulResponseHandler: (0, import_provider_utils10.createJsonResponseHandler)(
1442
+ sarvamTranscriptionResponseSchema2
1443
+ ),
1444
+ abortSignal: options.abortSignal,
1445
+ fetch: this.config.fetch
1446
+ });
1447
+ return {
1448
+ text: response.transcript,
1449
+ segments: [],
1450
+ language: response.language_code ? response.language_code : void 0,
1451
+ durationInSeconds: void 0,
1452
+ warnings,
1453
+ response: {
1454
+ timestamp: currentDate,
1455
+ modelId: this.modelId,
1456
+ headers: responseHeaders,
1457
+ body: rawResponse
1458
+ }
1459
+ };
1460
+ }
1461
+ };
1462
+ var sarvamTranscriptionResponseSchema2 = import_zod11.z.object({
1463
+ request_id: import_zod11.z.string().nullable(),
1464
+ transcript: import_zod11.z.string(),
1465
+ language_code: import_zod11.z.string().nullable(),
1466
+ // timestamps: z
1467
+ // .object({
1468
+ // end_time_seconds: z.array(z.number()),
1469
+ // start_time_seconds: z.array(z.number()),
1470
+ // words: z.array(z.string()),
1471
+ // })
1472
+ // .optional(),
1473
+ diarized_transcript: import_zod11.z.object({
1474
+ entries: import_zod11.z.array(
1475
+ import_zod11.z.object({
1476
+ end_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1477
+ start_time_seconds: import_zod11.z.array(import_zod11.z.number()),
1478
+ transcript: import_zod11.z.string(),
1479
+ speaker_id: import_zod11.z.string()
1480
+ })
1481
+ )
1482
+ }).nullable().optional()
1483
+ });
1484
+
1398
1485
  // src/sarvam-provider.ts
1399
1486
  function createSarvam(options = {}) {
1400
1487
  var _a;
1401
- const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1402
- const ApiKey = (0, import_provider_utils10.loadApiKey)({
1488
+ const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.sarvam.ai";
1489
+ const ApiKey = (0, import_provider_utils11.loadApiKey)({
1403
1490
  apiKey: options.apiKey,
1404
1491
  environmentVariableName: "SARVAM_API_KEY",
1405
1492
  description: "Sarvam"
@@ -1422,15 +1509,19 @@ function createSarvam(options = {}) {
1422
1509
  }
1423
1510
  return createChatModel(modelId, settings);
1424
1511
  };
1425
- const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => {
1426
- return new SarvamTranscriptionModel(modelId, languageCode, {
1427
- provider: "sarvam.transcription",
1428
- url: ({ path }) => `${baseURL}${path}`,
1429
- headers: getHeaders,
1430
- fetch: options.fetch,
1431
- transcription: settings
1432
- });
1433
- };
1512
+ const createTranscriptionModel = (modelId, languageCode = "unknown", settings) => new SarvamTranscriptionModel(modelId, languageCode, {
1513
+ provider: "sarvam.transcription",
1514
+ url: ({ path }) => `${baseURL}${path}`,
1515
+ headers: getHeaders,
1516
+ fetch: options.fetch,
1517
+ transcription: settings
1518
+ });
1519
+ const createSpeechTranslation = (modelId) => new SarvamSpeechTranslationModel(modelId, {
1520
+ provider: "sarvam.transcription",
1521
+ url: ({ path }) => `${baseURL}${path}`,
1522
+ headers: getHeaders,
1523
+ fetch: options.fetch
1524
+ });
1434
1525
  const createSpeechModel = (modelId, languageCode, settings) => new SarvamSpeechModel(modelId, languageCode, {
1435
1526
  provider: "sarvam.speech",
1436
1527
  url: ({ path }) => `${baseURL}${path}`,
@@ -1474,6 +1565,7 @@ function createSarvam(options = {}) {
1474
1565
  provider.languageModel = createLanguageModel;
1475
1566
  provider.chat = createChatModel;
1476
1567
  provider.transcription = createTranscriptionModel;
1568
+ provider.speechTranslation = createSpeechTranslation;
1477
1569
  provider.speech = createSpeechModel;
1478
1570
  provider.transliterate = createTransliterateModel;
1479
1571
  provider.translation = createTranslationModel;