@ai-sdk/google-vertex 4.0.145 → 4.0.147

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  // src/edge/google-vertex-provider-edge.ts
2
- import { loadOptionalSetting as loadOptionalSetting3, resolve as resolve5 } from "@ai-sdk/provider-utils";
2
+ import { loadOptionalSetting as loadOptionalSetting3, resolve as resolve6 } from "@ai-sdk/provider-utils";
3
3
 
4
4
  // src/google-vertex-provider.ts
5
5
  import { GoogleGenerativeAILanguageModel as GoogleGenerativeAILanguageModel2 } from "@ai-sdk/google/internal";
@@ -8,13 +8,13 @@ import {
8
8
  loadOptionalSetting,
9
9
  loadSetting,
10
10
  normalizeHeaders,
11
- resolve as resolve4,
11
+ resolve as resolve5,
12
12
  withoutTrailingSlash,
13
13
  withUserAgentSuffix
14
14
  } from "@ai-sdk/provider-utils";
15
15
 
16
16
  // src/version.ts
17
- var VERSION = true ? "4.0.145" : "0.0.0-test";
17
+ var VERSION = true ? "4.0.147" : "0.0.0-test";
18
18
 
19
19
  // src/google-vertex-embedding-model.ts
20
20
  import {
@@ -537,22 +537,201 @@ var googleVertexTools = {
537
537
  vertexRagStore: googleTools.vertexRagStore
538
538
  };
539
539
 
540
+ // src/google-vertex-transcription-model.ts
541
+ import {
542
+ combineHeaders as combineHeaders3,
543
+ convertUint8ArrayToBase64 as convertUint8ArrayToBase642,
544
+ createJsonResponseHandler as createJsonResponseHandler3,
545
+ parseProviderOptions as parseProviderOptions3,
546
+ postJsonToApi as postJsonToApi3,
547
+ resolve as resolve3
548
+ } from "@ai-sdk/provider-utils";
549
+ import { z as z6 } from "zod/v4";
550
+
551
+ // src/google-vertex-transcription-model-options.ts
552
+ import { z as z5 } from "zod/v4";
553
+ var googleVertexTranscriptionProviderOptionsSchema = z5.object({
554
+ /**
555
+ * BCP-47 language codes to recognize (e.g. `['en-US']`), or `['auto']` to let
556
+ * Chirp auto-detect the spoken language. Defaults to `['auto']`. For
557
+ * `telephony`, pass a supported explicit language code.
558
+ */
559
+ languageCodes: z5.array(z5.string()).optional(),
560
+ /**
561
+ * Whether to add punctuation to the transcript. Defaults to `true`.
562
+ */
563
+ enableAutomaticPunctuation: z5.boolean().optional(),
564
+ /**
565
+ * Whether to include word-level timestamps. Defaults to `true` so the
566
+ * transcription result can include segments.
567
+ *
568
+ * Enabling word-level timestamps can reduce transcription quality and speed
569
+ * for Chirp models.
570
+ */
571
+ enableWordTimeOffsets: z5.boolean().optional(),
572
+ /**
573
+ * The Cloud Speech-to-Text region for the request (e.g. `'us'`, `'eu'`,
574
+ * `'us-central1'`). Defaults to the provider `location`.
575
+ *
576
+ * Note: Speech-to-Text regions differ from Vertex AI regions. Chirp is only
577
+ * available in specific Speech-to-Text regions and is not available in the
578
+ * `global` location.
579
+ */
580
+ region: z5.string().optional()
581
+ });
582
+
583
+ // src/google-vertex-transcription-model.ts
584
+ function parseDurationSeconds(value) {
585
+ if (value == null) {
586
+ return void 0;
587
+ }
588
+ const seconds = Number.parseFloat(value);
589
+ return Number.isFinite(seconds) ? seconds : void 0;
590
+ }
591
+ function convertBcp47ToIso6391(value) {
592
+ if (value == null) {
593
+ return void 0;
594
+ }
595
+ try {
596
+ const language = new Intl.Locale(value).language;
597
+ return language.length === 2 ? language : void 0;
598
+ } catch (e) {
599
+ return void 0;
600
+ }
601
+ }
602
+ var GoogleVertexTranscriptionModel = class {
603
+ constructor(modelId, config) {
604
+ this.modelId = modelId;
605
+ this.config = config;
606
+ this.specificationVersion = "v3";
607
+ }
608
+ get provider() {
609
+ return this.config.provider;
610
+ }
611
+ async doGenerate(options) {
612
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
613
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
614
+ const warnings = [];
615
+ let googleOptions;
616
+ for (const provider of ["googleVertex", "vertex", "google"]) {
617
+ googleOptions = await parseProviderOptions3({
618
+ provider,
619
+ providerOptions: options.providerOptions,
620
+ schema: googleVertexTranscriptionProviderOptionsSchema
621
+ });
622
+ if (googleOptions != null) {
623
+ break;
624
+ }
625
+ }
626
+ const region = (_d = googleOptions == null ? void 0 : googleOptions.region) != null ? _d : this.config.location;
627
+ const languageCodes = (_e = googleOptions == null ? void 0 : googleOptions.languageCodes) != null ? _e : ["auto"];
628
+ const content = typeof options.audio === "string" ? options.audio : convertUint8ArrayToBase642(options.audio);
629
+ const requestBody = {
630
+ config: {
631
+ model: this.modelId,
632
+ languageCodes,
633
+ // Let Speech-to-Text auto-detect the audio encoding (wav/mp3/flac/…).
634
+ autoDecodingConfig: {},
635
+ features: {
636
+ // Word timing populates `segments`.
637
+ enableWordTimeOffsets: (_f = googleOptions == null ? void 0 : googleOptions.enableWordTimeOffsets) != null ? _f : true,
638
+ enableAutomaticPunctuation: (_g = googleOptions == null ? void 0 : googleOptions.enableAutomaticPunctuation) != null ? _g : true
639
+ }
640
+ },
641
+ content
642
+ };
643
+ const host = region === "global" ? "speech.googleapis.com" : `${region}-speech.googleapis.com`;
644
+ const url = `https://${host}/v2/projects/${this.config.project}/locations/${region}/recognizers/_:recognize`;
645
+ const {
646
+ value: response,
647
+ responseHeaders,
648
+ rawValue: rawResponse
649
+ } = await postJsonToApi3({
650
+ url,
651
+ headers: combineHeaders3(
652
+ this.config.headers ? await resolve3(this.config.headers) : void 0,
653
+ options.headers
654
+ ),
655
+ body: requestBody,
656
+ failedResponseHandler: googleVertexFailedResponseHandler,
657
+ successfulResponseHandler: createJsonResponseHandler3(
658
+ googleVertexTranscriptionResponseSchema
659
+ ),
660
+ abortSignal: options.abortSignal,
661
+ fetch: this.config.fetch
662
+ });
663
+ const results = (_h = response.results) != null ? _h : [];
664
+ const text = results.map((result) => {
665
+ var _a2, _b2, _c2;
666
+ return (_c2 = (_b2 = (_a2 = result.alternatives) == null ? void 0 : _a2[0]) == null ? void 0 : _b2.transcript) != null ? _c2 : "";
667
+ }).join(" ").trim();
668
+ const segments = results.flatMap(
669
+ (result) => {
670
+ var _a2, _b2, _c2, _d2;
671
+ return (_d2 = (_c2 = (_b2 = (_a2 = result.alternatives) == null ? void 0 : _a2[0]) == null ? void 0 : _b2.words) == null ? void 0 : _c2.flatMap((word) => {
672
+ const startSecond = parseDurationSeconds(word.startOffset);
673
+ const endSecond = parseDurationSeconds(word.endOffset);
674
+ return word.word == null || startSecond == null || endSecond == null ? [] : [{ text: word.word, startSecond, endSecond }];
675
+ })) != null ? _d2 : [];
676
+ }
677
+ );
678
+ const language = convertBcp47ToIso6391((_i = results[0]) == null ? void 0 : _i.languageCode);
679
+ return {
680
+ text,
681
+ segments,
682
+ language,
683
+ durationInSeconds: parseDurationSeconds(
684
+ (_j = response.metadata) == null ? void 0 : _j.totalBilledDuration
685
+ ),
686
+ warnings,
687
+ response: {
688
+ timestamp: currentDate,
689
+ modelId: this.modelId,
690
+ headers: responseHeaders,
691
+ body: rawResponse
692
+ }
693
+ };
694
+ }
695
+ };
696
+ var googleVertexTranscriptionResponseSchema = z6.object({
697
+ results: z6.array(
698
+ z6.object({
699
+ alternatives: z6.array(
700
+ z6.object({
701
+ transcript: z6.string().nullish(),
702
+ words: z6.array(
703
+ z6.object({
704
+ word: z6.string().nullish(),
705
+ startOffset: z6.string().nullish(),
706
+ endOffset: z6.string().nullish()
707
+ })
708
+ ).nullish()
709
+ })
710
+ ).nullish(),
711
+ languageCode: z6.string().nullish()
712
+ })
713
+ ).nullish(),
714
+ metadata: z6.object({
715
+ totalBilledDuration: z6.string().nullish()
716
+ }).nullish()
717
+ });
718
+
540
719
  // src/google-vertex-video-model.ts
541
720
  import {
542
721
  AISDKError
543
722
  } from "@ai-sdk/provider";
544
723
  import {
545
- combineHeaders as combineHeaders3,
546
- convertUint8ArrayToBase64 as convertUint8ArrayToBase642,
547
- createJsonResponseHandler as createJsonResponseHandler3,
724
+ combineHeaders as combineHeaders4,
725
+ convertUint8ArrayToBase64 as convertUint8ArrayToBase643,
726
+ createJsonResponseHandler as createJsonResponseHandler4,
548
727
  delay,
549
728
  lazySchema,
550
- parseProviderOptions as parseProviderOptions3,
551
- postJsonToApi as postJsonToApi3,
552
- resolve as resolve3,
729
+ parseProviderOptions as parseProviderOptions4,
730
+ postJsonToApi as postJsonToApi4,
731
+ resolve as resolve4,
553
732
  zodSchema
554
733
  } from "@ai-sdk/provider-utils";
555
- import { z as z5 } from "zod/v4";
734
+ import { z as z7 } from "zod/v4";
556
735
  var GoogleVertexVideoModel = class {
557
736
  constructor(modelId, config) {
558
737
  this.modelId = modelId;
@@ -569,7 +748,7 @@ var GoogleVertexVideoModel = class {
569
748
  var _a, _b, _c, _d, _e, _f;
570
749
  const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
571
750
  const warnings = [];
572
- const vertexOptions = await parseProviderOptions3({
751
+ const vertexOptions = await parseProviderOptions4({
573
752
  provider: "vertex",
574
753
  providerOptions: options.providerOptions,
575
754
  schema: googleVertexVideoModelOptionsSchema
@@ -587,7 +766,7 @@ var GoogleVertexVideoModel = class {
587
766
  details: "Vertex AI video models require base64-encoded images or GCS URIs. URL will be ignored."
588
767
  });
589
768
  } else {
590
- const base64Data = typeof options.image.data === "string" ? options.image.data : convertUint8ArrayToBase642(options.image.data);
769
+ const base64Data = typeof options.image.data === "string" ? options.image.data : convertUint8ArrayToBase643(options.image.data);
591
770
  instance.image = {
592
771
  bytesBase64Encoded: base64Data,
593
772
  mimeType: options.image.mediaType
@@ -645,17 +824,17 @@ var GoogleVertexVideoModel = class {
645
824
  }
646
825
  }
647
826
  }
648
- const { value: operation } = await postJsonToApi3({
827
+ const { value: operation } = await postJsonToApi4({
649
828
  url: `${this.config.baseURL}/models/${this.modelId}:predictLongRunning`,
650
- headers: combineHeaders3(
651
- await resolve3(this.config.headers),
829
+ headers: combineHeaders4(
830
+ await resolve4(this.config.headers),
652
831
  options.headers
653
832
  ),
654
833
  body: {
655
834
  instances,
656
835
  parameters
657
836
  },
658
- successfulResponseHandler: createJsonResponseHandler3(
837
+ successfulResponseHandler: createJsonResponseHandler4(
659
838
  vertexOperationSchema
660
839
  ),
661
840
  failedResponseHandler: googleVertexFailedResponseHandler,
@@ -688,16 +867,16 @@ var GoogleVertexVideoModel = class {
688
867
  message: "Video generation request was aborted"
689
868
  });
690
869
  }
691
- const { value: statusOperation, responseHeaders: pollHeaders } = await postJsonToApi3({
870
+ const { value: statusOperation, responseHeaders: pollHeaders } = await postJsonToApi4({
692
871
  url: `${this.config.baseURL}/models/${this.modelId}:fetchPredictOperation`,
693
- headers: combineHeaders3(
694
- await resolve3(this.config.headers),
872
+ headers: combineHeaders4(
873
+ await resolve4(this.config.headers),
695
874
  options.headers
696
875
  ),
697
876
  body: {
698
877
  operationName
699
878
  },
700
- successfulResponseHandler: createJsonResponseHandler3(
879
+ successfulResponseHandler: createJsonResponseHandler4(
701
880
  vertexOperationSchema
702
881
  ),
703
882
  failedResponseHandler: googleVertexFailedResponseHandler,
@@ -766,38 +945,38 @@ var GoogleVertexVideoModel = class {
766
945
  };
767
946
  }
768
947
  };
769
- var vertexOperationSchema = z5.object({
770
- name: z5.string().nullish(),
771
- done: z5.boolean().nullish(),
772
- error: z5.object({
773
- code: z5.number().nullish(),
774
- message: z5.string(),
775
- status: z5.string().nullish()
948
+ var vertexOperationSchema = z7.object({
949
+ name: z7.string().nullish(),
950
+ done: z7.boolean().nullish(),
951
+ error: z7.object({
952
+ code: z7.number().nullish(),
953
+ message: z7.string(),
954
+ status: z7.string().nullish()
776
955
  }).nullish(),
777
- response: z5.object({
778
- videos: z5.array(
779
- z5.object({
780
- bytesBase64Encoded: z5.string().nullish(),
781
- gcsUri: z5.string().nullish(),
782
- mimeType: z5.string().nullish()
956
+ response: z7.object({
957
+ videos: z7.array(
958
+ z7.object({
959
+ bytesBase64Encoded: z7.string().nullish(),
960
+ gcsUri: z7.string().nullish(),
961
+ mimeType: z7.string().nullish()
783
962
  })
784
963
  ).nullish(),
785
- raiMediaFilteredCount: z5.number().nullish()
964
+ raiMediaFilteredCount: z7.number().nullish()
786
965
  }).nullish()
787
966
  });
788
967
  var googleVertexVideoModelOptionsSchema = lazySchema(
789
968
  () => zodSchema(
790
- z5.object({
791
- pollIntervalMs: z5.number().positive().nullish(),
792
- pollTimeoutMs: z5.number().positive().nullish(),
793
- personGeneration: z5.enum(["dont_allow", "allow_adult", "allow_all"]).nullish(),
794
- negativePrompt: z5.string().nullish(),
795
- generateAudio: z5.boolean().nullish(),
796
- gcsOutputDirectory: z5.string().nullish(),
797
- referenceImages: z5.array(
798
- z5.object({
799
- bytesBase64Encoded: z5.string().nullish(),
800
- gcsUri: z5.string().nullish()
969
+ z7.object({
970
+ pollIntervalMs: z7.number().positive().nullish(),
971
+ pollTimeoutMs: z7.number().positive().nullish(),
972
+ personGeneration: z7.enum(["dont_allow", "allow_adult", "allow_all"]).nullish(),
973
+ negativePrompt: z7.string().nullish(),
974
+ generateAudio: z7.boolean().nullish(),
975
+ gcsOutputDirectory: z7.string().nullish(),
976
+ referenceImages: z7.array(
977
+ z7.object({
978
+ bytesBase64Encoded: z7.string().nullish(),
979
+ gcsUri: z7.string().nullish()
801
980
  })
802
981
  ).nullish()
803
982
  }).passthrough()
@@ -856,7 +1035,7 @@ function createVertex(options = {}) {
856
1035
  const createConfig = (name) => {
857
1036
  const getHeaders = async () => {
858
1037
  var _a;
859
- const originalHeaders = await resolve4((_a = options.headers) != null ? _a : {});
1038
+ const originalHeaders = await resolve5((_a = options.headers) != null ? _a : {});
860
1039
  return withUserAgentSuffix(
861
1040
  originalHeaders,
862
1041
  `ai-sdk/google-vertex/${VERSION}`
@@ -899,6 +1078,21 @@ function createVertex(options = {}) {
899
1078
  generateId: (_a = options.generateId) != null ? _a : generateId
900
1079
  });
901
1080
  };
1081
+ const createTranscriptionModel = (modelId) => {
1082
+ if (apiKey) {
1083
+ throw new Error(
1084
+ "Google Vertex transcription models do not support Express Mode API keys. Use standard Google Cloud credentials instead."
1085
+ );
1086
+ }
1087
+ const config = createConfig("transcription");
1088
+ return new GoogleVertexTranscriptionModel(modelId, {
1089
+ provider: config.provider,
1090
+ headers: config.headers,
1091
+ fetch: config.fetch,
1092
+ project: loadVertexProject(),
1093
+ location: loadVertexLocation()
1094
+ });
1095
+ };
902
1096
  const provider = function(modelId) {
903
1097
  if (new.target) {
904
1098
  throw new Error(
@@ -915,6 +1109,8 @@ function createVertex(options = {}) {
915
1109
  provider.imageModel = createImageModel;
916
1110
  provider.video = createVideoModel;
917
1111
  provider.videoModel = createVideoModel;
1112
+ provider.transcription = createTranscriptionModel;
1113
+ provider.transcriptionModel = createTranscriptionModel;
918
1114
  provider.tools = googleVertexTools;
919
1115
  return provider;
920
1116
  }
@@ -1045,7 +1241,7 @@ function createVertex2(options = {}) {
1045
1241
  Authorization: `Bearer ${await generateAuthToken(
1046
1242
  options.googleCredentials
1047
1243
  )}`,
1048
- ...await resolve5(options.headers)
1244
+ ...await resolve6(options.headers)
1049
1245
  })
1050
1246
  });
1051
1247
  }