@ai-sdk/google-vertex 4.0.146 → 4.0.147

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  // src/google-vertex-provider-node.ts
2
- import { loadOptionalSetting as loadOptionalSetting2, resolve as resolve5 } from "@ai-sdk/provider-utils";
2
+ import { loadOptionalSetting as loadOptionalSetting2, resolve as resolve6 } from "@ai-sdk/provider-utils";
3
3
 
4
4
  // src/google-vertex-auth-google-auth-library.ts
5
5
  import { GoogleAuth } from "google-auth-library";
@@ -23,13 +23,13 @@ import {
23
23
  loadOptionalSetting,
24
24
  loadSetting,
25
25
  normalizeHeaders,
26
- resolve as resolve4,
26
+ resolve as resolve5,
27
27
  withoutTrailingSlash,
28
28
  withUserAgentSuffix
29
29
  } from "@ai-sdk/provider-utils";
30
30
 
31
31
  // src/version.ts
32
- var VERSION = true ? "4.0.146" : "0.0.0-test";
32
+ var VERSION = true ? "4.0.147" : "0.0.0-test";
33
33
 
34
34
  // src/google-vertex-embedding-model.ts
35
35
  import {
@@ -552,22 +552,201 @@ var googleVertexTools = {
552
552
  vertexRagStore: googleTools.vertexRagStore
553
553
  };
554
554
 
555
+ // src/google-vertex-transcription-model.ts
556
+ import {
557
+ combineHeaders as combineHeaders3,
558
+ convertUint8ArrayToBase64 as convertUint8ArrayToBase642,
559
+ createJsonResponseHandler as createJsonResponseHandler3,
560
+ parseProviderOptions as parseProviderOptions3,
561
+ postJsonToApi as postJsonToApi3,
562
+ resolve as resolve3
563
+ } from "@ai-sdk/provider-utils";
564
+ import { z as z6 } from "zod/v4";
565
+
566
+ // src/google-vertex-transcription-model-options.ts
567
+ import { z as z5 } from "zod/v4";
568
+ var googleVertexTranscriptionProviderOptionsSchema = z5.object({
569
+ /**
570
+ * BCP-47 language codes to recognize (e.g. `['en-US']`), or `['auto']` to let
571
+ * Chirp auto-detect the spoken language. Defaults to `['auto']`. For
572
+ * `telephony`, pass a supported explicit language code.
573
+ */
574
+ languageCodes: z5.array(z5.string()).optional(),
575
+ /**
576
+ * Whether to add punctuation to the transcript. Defaults to `true`.
577
+ */
578
+ enableAutomaticPunctuation: z5.boolean().optional(),
579
+ /**
580
+ * Whether to include word-level timestamps. Defaults to `true` so the
581
+ * transcription result can include segments.
582
+ *
583
+ * Enabling word-level timestamps can reduce transcription quality and speed
584
+ * for Chirp models.
585
+ */
586
+ enableWordTimeOffsets: z5.boolean().optional(),
587
+ /**
588
+ * The Cloud Speech-to-Text region for the request (e.g. `'us'`, `'eu'`,
589
+ * `'us-central1'`). Defaults to the provider `location`.
590
+ *
591
+ * Note: Speech-to-Text regions differ from Vertex AI regions. Chirp is only
592
+ * available in specific Speech-to-Text regions and is not available in the
593
+ * `global` location.
594
+ */
595
+ region: z5.string().optional()
596
+ });
597
+
598
+ // src/google-vertex-transcription-model.ts
599
+ function parseDurationSeconds(value) {
600
+ if (value == null) {
601
+ return void 0;
602
+ }
603
+ const seconds = Number.parseFloat(value);
604
+ return Number.isFinite(seconds) ? seconds : void 0;
605
+ }
606
+ function convertBcp47ToIso6391(value) {
607
+ if (value == null) {
608
+ return void 0;
609
+ }
610
+ try {
611
+ const language = new Intl.Locale(value).language;
612
+ return language.length === 2 ? language : void 0;
613
+ } catch (e) {
614
+ return void 0;
615
+ }
616
+ }
617
+ var GoogleVertexTranscriptionModel = class {
618
+ constructor(modelId, config) {
619
+ this.modelId = modelId;
620
+ this.config = config;
621
+ this.specificationVersion = "v3";
622
+ }
623
+ get provider() {
624
+ return this.config.provider;
625
+ }
626
+ async doGenerate(options) {
627
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
628
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
629
+ const warnings = [];
630
+ let googleOptions;
631
+ for (const provider of ["googleVertex", "vertex", "google"]) {
632
+ googleOptions = await parseProviderOptions3({
633
+ provider,
634
+ providerOptions: options.providerOptions,
635
+ schema: googleVertexTranscriptionProviderOptionsSchema
636
+ });
637
+ if (googleOptions != null) {
638
+ break;
639
+ }
640
+ }
641
+ const region = (_d = googleOptions == null ? void 0 : googleOptions.region) != null ? _d : this.config.location;
642
+ const languageCodes = (_e = googleOptions == null ? void 0 : googleOptions.languageCodes) != null ? _e : ["auto"];
643
+ const content = typeof options.audio === "string" ? options.audio : convertUint8ArrayToBase642(options.audio);
644
+ const requestBody = {
645
+ config: {
646
+ model: this.modelId,
647
+ languageCodes,
648
+ // Let Speech-to-Text auto-detect the audio encoding (wav/mp3/flac/…).
649
+ autoDecodingConfig: {},
650
+ features: {
651
+ // Word timing populates `segments`.
652
+ enableWordTimeOffsets: (_f = googleOptions == null ? void 0 : googleOptions.enableWordTimeOffsets) != null ? _f : true,
653
+ enableAutomaticPunctuation: (_g = googleOptions == null ? void 0 : googleOptions.enableAutomaticPunctuation) != null ? _g : true
654
+ }
655
+ },
656
+ content
657
+ };
658
+ const host = region === "global" ? "speech.googleapis.com" : `${region}-speech.googleapis.com`;
659
+ const url = `https://${host}/v2/projects/${this.config.project}/locations/${region}/recognizers/_:recognize`;
660
+ const {
661
+ value: response,
662
+ responseHeaders,
663
+ rawValue: rawResponse
664
+ } = await postJsonToApi3({
665
+ url,
666
+ headers: combineHeaders3(
667
+ this.config.headers ? await resolve3(this.config.headers) : void 0,
668
+ options.headers
669
+ ),
670
+ body: requestBody,
671
+ failedResponseHandler: googleVertexFailedResponseHandler,
672
+ successfulResponseHandler: createJsonResponseHandler3(
673
+ googleVertexTranscriptionResponseSchema
674
+ ),
675
+ abortSignal: options.abortSignal,
676
+ fetch: this.config.fetch
677
+ });
678
+ const results = (_h = response.results) != null ? _h : [];
679
+ const text = results.map((result) => {
680
+ var _a2, _b2, _c2;
681
+ return (_c2 = (_b2 = (_a2 = result.alternatives) == null ? void 0 : _a2[0]) == null ? void 0 : _b2.transcript) != null ? _c2 : "";
682
+ }).join(" ").trim();
683
+ const segments = results.flatMap(
684
+ (result) => {
685
+ var _a2, _b2, _c2, _d2;
686
+ return (_d2 = (_c2 = (_b2 = (_a2 = result.alternatives) == null ? void 0 : _a2[0]) == null ? void 0 : _b2.words) == null ? void 0 : _c2.flatMap((word) => {
687
+ const startSecond = parseDurationSeconds(word.startOffset);
688
+ const endSecond = parseDurationSeconds(word.endOffset);
689
+ return word.word == null || startSecond == null || endSecond == null ? [] : [{ text: word.word, startSecond, endSecond }];
690
+ })) != null ? _d2 : [];
691
+ }
692
+ );
693
+ const language = convertBcp47ToIso6391((_i = results[0]) == null ? void 0 : _i.languageCode);
694
+ return {
695
+ text,
696
+ segments,
697
+ language,
698
+ durationInSeconds: parseDurationSeconds(
699
+ (_j = response.metadata) == null ? void 0 : _j.totalBilledDuration
700
+ ),
701
+ warnings,
702
+ response: {
703
+ timestamp: currentDate,
704
+ modelId: this.modelId,
705
+ headers: responseHeaders,
706
+ body: rawResponse
707
+ }
708
+ };
709
+ }
710
+ };
711
+ var googleVertexTranscriptionResponseSchema = z6.object({
712
+ results: z6.array(
713
+ z6.object({
714
+ alternatives: z6.array(
715
+ z6.object({
716
+ transcript: z6.string().nullish(),
717
+ words: z6.array(
718
+ z6.object({
719
+ word: z6.string().nullish(),
720
+ startOffset: z6.string().nullish(),
721
+ endOffset: z6.string().nullish()
722
+ })
723
+ ).nullish()
724
+ })
725
+ ).nullish(),
726
+ languageCode: z6.string().nullish()
727
+ })
728
+ ).nullish(),
729
+ metadata: z6.object({
730
+ totalBilledDuration: z6.string().nullish()
731
+ }).nullish()
732
+ });
733
+
555
734
  // src/google-vertex-video-model.ts
556
735
  import {
557
736
  AISDKError
558
737
  } from "@ai-sdk/provider";
559
738
  import {
560
- combineHeaders as combineHeaders3,
561
- convertUint8ArrayToBase64 as convertUint8ArrayToBase642,
562
- createJsonResponseHandler as createJsonResponseHandler3,
739
+ combineHeaders as combineHeaders4,
740
+ convertUint8ArrayToBase64 as convertUint8ArrayToBase643,
741
+ createJsonResponseHandler as createJsonResponseHandler4,
563
742
  delay,
564
743
  lazySchema,
565
- parseProviderOptions as parseProviderOptions3,
566
- postJsonToApi as postJsonToApi3,
567
- resolve as resolve3,
744
+ parseProviderOptions as parseProviderOptions4,
745
+ postJsonToApi as postJsonToApi4,
746
+ resolve as resolve4,
568
747
  zodSchema
569
748
  } from "@ai-sdk/provider-utils";
570
- import { z as z5 } from "zod/v4";
749
+ import { z as z7 } from "zod/v4";
571
750
  var GoogleVertexVideoModel = class {
572
751
  constructor(modelId, config) {
573
752
  this.modelId = modelId;
@@ -584,7 +763,7 @@ var GoogleVertexVideoModel = class {
584
763
  var _a, _b, _c, _d, _e, _f;
585
764
  const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
586
765
  const warnings = [];
587
- const vertexOptions = await parseProviderOptions3({
766
+ const vertexOptions = await parseProviderOptions4({
588
767
  provider: "vertex",
589
768
  providerOptions: options.providerOptions,
590
769
  schema: googleVertexVideoModelOptionsSchema
@@ -602,7 +781,7 @@ var GoogleVertexVideoModel = class {
602
781
  details: "Vertex AI video models require base64-encoded images or GCS URIs. URL will be ignored."
603
782
  });
604
783
  } else {
605
- const base64Data = typeof options.image.data === "string" ? options.image.data : convertUint8ArrayToBase642(options.image.data);
784
+ const base64Data = typeof options.image.data === "string" ? options.image.data : convertUint8ArrayToBase643(options.image.data);
606
785
  instance.image = {
607
786
  bytesBase64Encoded: base64Data,
608
787
  mimeType: options.image.mediaType
@@ -660,17 +839,17 @@ var GoogleVertexVideoModel = class {
660
839
  }
661
840
  }
662
841
  }
663
- const { value: operation } = await postJsonToApi3({
842
+ const { value: operation } = await postJsonToApi4({
664
843
  url: `${this.config.baseURL}/models/${this.modelId}:predictLongRunning`,
665
- headers: combineHeaders3(
666
- await resolve3(this.config.headers),
844
+ headers: combineHeaders4(
845
+ await resolve4(this.config.headers),
667
846
  options.headers
668
847
  ),
669
848
  body: {
670
849
  instances,
671
850
  parameters
672
851
  },
673
- successfulResponseHandler: createJsonResponseHandler3(
852
+ successfulResponseHandler: createJsonResponseHandler4(
674
853
  vertexOperationSchema
675
854
  ),
676
855
  failedResponseHandler: googleVertexFailedResponseHandler,
@@ -703,16 +882,16 @@ var GoogleVertexVideoModel = class {
703
882
  message: "Video generation request was aborted"
704
883
  });
705
884
  }
706
- const { value: statusOperation, responseHeaders: pollHeaders } = await postJsonToApi3({
885
+ const { value: statusOperation, responseHeaders: pollHeaders } = await postJsonToApi4({
707
886
  url: `${this.config.baseURL}/models/${this.modelId}:fetchPredictOperation`,
708
- headers: combineHeaders3(
709
- await resolve3(this.config.headers),
887
+ headers: combineHeaders4(
888
+ await resolve4(this.config.headers),
710
889
  options.headers
711
890
  ),
712
891
  body: {
713
892
  operationName
714
893
  },
715
- successfulResponseHandler: createJsonResponseHandler3(
894
+ successfulResponseHandler: createJsonResponseHandler4(
716
895
  vertexOperationSchema
717
896
  ),
718
897
  failedResponseHandler: googleVertexFailedResponseHandler,
@@ -781,38 +960,38 @@ var GoogleVertexVideoModel = class {
781
960
  };
782
961
  }
783
962
  };
784
- var vertexOperationSchema = z5.object({
785
- name: z5.string().nullish(),
786
- done: z5.boolean().nullish(),
787
- error: z5.object({
788
- code: z5.number().nullish(),
789
- message: z5.string(),
790
- status: z5.string().nullish()
963
+ var vertexOperationSchema = z7.object({
964
+ name: z7.string().nullish(),
965
+ done: z7.boolean().nullish(),
966
+ error: z7.object({
967
+ code: z7.number().nullish(),
968
+ message: z7.string(),
969
+ status: z7.string().nullish()
791
970
  }).nullish(),
792
- response: z5.object({
793
- videos: z5.array(
794
- z5.object({
795
- bytesBase64Encoded: z5.string().nullish(),
796
- gcsUri: z5.string().nullish(),
797
- mimeType: z5.string().nullish()
971
+ response: z7.object({
972
+ videos: z7.array(
973
+ z7.object({
974
+ bytesBase64Encoded: z7.string().nullish(),
975
+ gcsUri: z7.string().nullish(),
976
+ mimeType: z7.string().nullish()
798
977
  })
799
978
  ).nullish(),
800
- raiMediaFilteredCount: z5.number().nullish()
979
+ raiMediaFilteredCount: z7.number().nullish()
801
980
  }).nullish()
802
981
  });
803
982
  var googleVertexVideoModelOptionsSchema = lazySchema(
804
983
  () => zodSchema(
805
- z5.object({
806
- pollIntervalMs: z5.number().positive().nullish(),
807
- pollTimeoutMs: z5.number().positive().nullish(),
808
- personGeneration: z5.enum(["dont_allow", "allow_adult", "allow_all"]).nullish(),
809
- negativePrompt: z5.string().nullish(),
810
- generateAudio: z5.boolean().nullish(),
811
- gcsOutputDirectory: z5.string().nullish(),
812
- referenceImages: z5.array(
813
- z5.object({
814
- bytesBase64Encoded: z5.string().nullish(),
815
- gcsUri: z5.string().nullish()
984
+ z7.object({
985
+ pollIntervalMs: z7.number().positive().nullish(),
986
+ pollTimeoutMs: z7.number().positive().nullish(),
987
+ personGeneration: z7.enum(["dont_allow", "allow_adult", "allow_all"]).nullish(),
988
+ negativePrompt: z7.string().nullish(),
989
+ generateAudio: z7.boolean().nullish(),
990
+ gcsOutputDirectory: z7.string().nullish(),
991
+ referenceImages: z7.array(
992
+ z7.object({
993
+ bytesBase64Encoded: z7.string().nullish(),
994
+ gcsUri: z7.string().nullish()
816
995
  })
817
996
  ).nullish()
818
997
  }).passthrough()
@@ -871,7 +1050,7 @@ function createVertex(options = {}) {
871
1050
  const createConfig = (name) => {
872
1051
  const getHeaders = async () => {
873
1052
  var _a;
874
- const originalHeaders = await resolve4((_a = options.headers) != null ? _a : {});
1053
+ const originalHeaders = await resolve5((_a = options.headers) != null ? _a : {});
875
1054
  return withUserAgentSuffix(
876
1055
  originalHeaders,
877
1056
  `ai-sdk/google-vertex/${VERSION}`
@@ -914,6 +1093,21 @@ function createVertex(options = {}) {
914
1093
  generateId: (_a = options.generateId) != null ? _a : generateId
915
1094
  });
916
1095
  };
1096
+ const createTranscriptionModel = (modelId) => {
1097
+ if (apiKey) {
1098
+ throw new Error(
1099
+ "Google Vertex transcription models do not support Express Mode API keys. Use standard Google Cloud credentials instead."
1100
+ );
1101
+ }
1102
+ const config = createConfig("transcription");
1103
+ return new GoogleVertexTranscriptionModel(modelId, {
1104
+ provider: config.provider,
1105
+ headers: config.headers,
1106
+ fetch: config.fetch,
1107
+ project: loadVertexProject(),
1108
+ location: loadVertexLocation()
1109
+ });
1110
+ };
917
1111
  const provider = function(modelId) {
918
1112
  if (new.target) {
919
1113
  throw new Error(
@@ -930,6 +1124,8 @@ function createVertex(options = {}) {
930
1124
  provider.imageModel = createImageModel;
931
1125
  provider.video = createVideoModel;
932
1126
  provider.videoModel = createVideoModel;
1127
+ provider.transcription = createTranscriptionModel;
1128
+ provider.transcriptionModel = createTranscriptionModel;
933
1129
  provider.tools = googleVertexTools;
934
1130
  return provider;
935
1131
  }
@@ -952,7 +1148,7 @@ function createVertex2(options = {}) {
952
1148
  ...options,
953
1149
  headers: async () => ({
954
1150
  Authorization: `Bearer ${await generateAuthToken()}`,
955
- ...await resolve5(options.headers)
1151
+ ...await resolve6(options.headers)
956
1152
  })
957
1153
  });
958
1154
  }