@ai-sdk/google 4.0.0-canary.78 → 4.0.0-canary.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # @ai-sdk/google
2
2
 
3
+ ## 4.0.0-canary.80
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [bae5e2b]
8
+ - @ai-sdk/provider-utils@5.0.0-canary.47
9
+
10
+ ## 4.0.0-canary.79
11
+
12
+ ### Patch Changes
13
+
14
+ - ce769dd: feat(provider): add experimental Realtime API support for voice conversations
15
+
16
+ Adds first-class support for realtime (speech-to-speech) APIs:
17
+
18
+ - `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
19
+ - OpenAI, Google, and xAI realtime provider implementations
20
+ - `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
21
+ - `.getToken()` static method on each provider for server-side ephemeral token creation
22
+ - `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
23
+ - `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
24
+ - `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
25
+
26
+ - Updated dependencies [ce769dd]
27
+ - @ai-sdk/provider@4.0.0-canary.18
28
+ - @ai-sdk/provider-utils@5.0.0-canary.46
29
+
3
30
  ## 4.0.0-canary.78
4
31
 
5
32
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
2
2
  import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
3
- import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4 } from '@ai-sdk/provider';
3
+ import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4, Experimental_RealtimeFactoryV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
4
4
 
5
5
  declare const googleErrorDataSchema: _ai_sdk_provider_utils.LazySchema<{
6
6
  error: {
@@ -554,6 +554,7 @@ interface GoogleProvider extends ProviderV4 {
554
554
  } | {
555
555
  managedAgent: string;
556
556
  }): LanguageModelV4;
557
+ experimental_realtime: Experimental_RealtimeFactoryV4;
557
558
  tools: typeof googleTools;
558
559
  }
559
560
  interface GoogleProviderSettings {
@@ -595,6 +596,32 @@ declare function createGoogle(options?: GoogleProviderSettings): GoogleProvider;
595
596
  */
596
597
  declare const google: GoogleProvider;
597
598
 
599
+ type GoogleRealtimeModelConfig = {
600
+ provider: string;
601
+ baseURL: string;
602
+ headers: () => Record<string, string | undefined>;
603
+ fetch?: FetchFunction;
604
+ };
605
+ declare class GoogleRealtimeModel implements Experimental_RealtimeModelV4 {
606
+ readonly specificationVersion: "v4";
607
+ readonly provider: string;
608
+ readonly modelId: string;
609
+ private readonly config;
610
+ private readonly mapper;
611
+ constructor(modelId: string, config: GoogleRealtimeModelConfig);
612
+ doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
613
+ getWebSocketConfig(options: {
614
+ token: string;
615
+ url: string;
616
+ }): {
617
+ url: string;
618
+ protocols?: string[];
619
+ };
620
+ parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent | Experimental_RealtimeModelV4ServerEvent[];
621
+ serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): ReturnType<Experimental_RealtimeModelV4['serializeClientEvent']>;
622
+ buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
623
+ }
624
+
598
625
  declare const VERSION: string;
599
626
 
600
- export { type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
627
+ export { GoogleRealtimeModel as Experimental_GoogleRealtimeModel, type GoogleRealtimeModelConfig as Experimental_GoogleRealtimeModelConfig, type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  } from "@ai-sdk/provider-utils";
8
8
 
9
9
  // src/version.ts
10
- var VERSION = true ? "4.0.0-canary.78" : "0.0.0-test";
10
+ var VERSION = true ? "4.0.0-canary.80" : "0.0.0-test";
11
11
 
12
12
  // src/google-embedding-model.ts
13
13
  import {
@@ -5646,14 +5646,14 @@ async function cancelGoogleInteraction({
5646
5646
  baseURL,
5647
5647
  interactionId,
5648
5648
  headers,
5649
- fetch = getOriginalFetch()
5649
+ fetch: fetch2 = getOriginalFetch()
5650
5650
  }) {
5651
5651
  if (interactionId == null || interactionId.length === 0) {
5652
5652
  return;
5653
5653
  }
5654
5654
  const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}/cancel`;
5655
5655
  try {
5656
- const response = await fetch(url, {
5656
+ const response = await fetch2(url, {
5657
5657
  method: "POST",
5658
5658
  headers: withUserAgentSuffix(
5659
5659
  combineHeaders7({ "Content-Type": "application/json" }, headers),
@@ -5681,7 +5681,7 @@ async function pollGoogleInteractionUntilTerminal({
5681
5681
  baseURL,
5682
5682
  interactionId,
5683
5683
  headers,
5684
- fetch,
5684
+ fetch: fetch2,
5685
5685
  abortSignal,
5686
5686
  initialDelayMs = DEFAULT_INITIAL_DELAY_MS,
5687
5687
  maxDelayMs = DEFAULT_MAX_DELAY_MS,
@@ -5695,7 +5695,7 @@ async function pollGoogleInteractionUntilTerminal({
5695
5695
  const startedAt = Date.now();
5696
5696
  let nextDelayMs = initialDelayMs;
5697
5697
  const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}`;
5698
- const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch });
5698
+ const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch: fetch2 });
5699
5699
  try {
5700
5700
  while (true) {
5701
5701
  if (abortSignal == null ? void 0 : abortSignal.aborted) {
@@ -5720,7 +5720,7 @@ async function pollGoogleInteractionUntilTerminal({
5720
5720
  googleInteractionsResponseSchema
5721
5721
  ),
5722
5722
  abortSignal,
5723
- fetch
5723
+ fetch: fetch2
5724
5724
  });
5725
5725
  if (isTerminalStatus(response.status)) {
5726
5726
  return { response, rawResponse, responseHeaders };
@@ -5896,7 +5896,7 @@ function streamGoogleInteractionEvents({
5896
5896
  baseURL,
5897
5897
  interactionId,
5898
5898
  headers,
5899
- fetch,
5899
+ fetch: fetch2,
5900
5900
  abortSignal,
5901
5901
  maxRetries = DEFAULT_MAX_RETRIES,
5902
5902
  retryDelayMs = DEFAULT_RETRY_DELAY_MS
@@ -5944,7 +5944,7 @@ function streamGoogleInteractionEvents({
5944
5944
  googleInteractionsEventSchema
5945
5945
  ),
5946
5946
  abortSignal: effectiveSignal,
5947
- fetch
5947
+ fetch: fetch2
5948
5948
  });
5949
5949
  return stream.getReader();
5950
5950
  }
@@ -6037,7 +6037,7 @@ function streamGoogleInteractionEvents({
6037
6037
  baseURL,
6038
6038
  interactionId,
6039
6039
  headers,
6040
- fetch
6040
+ fetch: fetch2
6041
6041
  });
6042
6042
  }
6043
6043
  }
@@ -6690,6 +6690,388 @@ function pruneUndefined(obj) {
6690
6690
  return result;
6691
6691
  }
6692
6692
 
6693
+ // src/realtime/google-realtime-event-mapper.ts
6694
+ import { safeParseJSON } from "@ai-sdk/provider-utils";
6695
+ var GoogleRealtimeEventMapper = class {
6696
+ constructor() {
6697
+ this.turnCounter = 0;
6698
+ this.hasAudio = false;
6699
+ this.hasText = false;
6700
+ this.hasTranscript = false;
6701
+ this.turnClosed = false;
6702
+ this.inputAudioRate = 16e3;
6703
+ }
6704
+ get responseId() {
6705
+ return `google-resp-${this.turnCounter}`;
6706
+ }
6707
+ get itemId() {
6708
+ return `google-item-${this.turnCounter}`;
6709
+ }
6710
+ /**
6711
+ * Rolls over to the next turn lazily, only once new model content actually
6712
+ * arrives. `turnComplete` merely marks the current turn closed; the counter
6713
+ * is not advanced until the next response begins. This keeps a transcript
6714
+ * that arrives shortly after `turnComplete` attached to the turn it belongs
6715
+ * to, since Google delivers transcription independently with no guaranteed
6716
+ * ordering relative to `turnComplete`.
6717
+ */
6718
+ beginTurnIfClosed() {
6719
+ if (!this.turnClosed) return;
6720
+ this.turnCounter++;
6721
+ this.hasAudio = false;
6722
+ this.hasText = false;
6723
+ this.hasTranscript = false;
6724
+ this.turnClosed = false;
6725
+ }
6726
+ parseServerEvent(raw) {
6727
+ var _a, _b;
6728
+ const data = raw;
6729
+ if (data.setupComplete != null) {
6730
+ return { type: "session-created", raw };
6731
+ }
6732
+ if (data.toolCall != null) {
6733
+ this.beginTurnIfClosed();
6734
+ const functionCalls = (_a = data.toolCall.functionCalls) != null ? _a : [];
6735
+ return functionCalls.flatMap((functionCall) => {
6736
+ var _a2;
6737
+ const args = JSON.stringify((_a2 = functionCall.args) != null ? _a2 : {});
6738
+ return [
6739
+ {
6740
+ type: "function-call-arguments-delta",
6741
+ responseId: this.responseId,
6742
+ itemId: this.itemId,
6743
+ callId: functionCall.id,
6744
+ delta: args,
6745
+ raw
6746
+ },
6747
+ {
6748
+ type: "function-call-arguments-done",
6749
+ responseId: this.responseId,
6750
+ itemId: this.itemId,
6751
+ callId: functionCall.id,
6752
+ name: functionCall.name,
6753
+ arguments: args,
6754
+ raw
6755
+ }
6756
+ ];
6757
+ });
6758
+ }
6759
+ if (data.toolCallCancellation != null) {
6760
+ return {
6761
+ type: "custom",
6762
+ rawType: "toolCallCancellation",
6763
+ raw
6764
+ };
6765
+ }
6766
+ if (data.serverContent != null) {
6767
+ return this.parseServerContent(data.serverContent, raw);
6768
+ }
6769
+ if (((_b = data.inputTranscription) == null ? void 0 : _b.text) != null) {
6770
+ return {
6771
+ type: "input-transcription-completed",
6772
+ itemId: `google-input-${this.turnCounter}`,
6773
+ transcript: data.inputTranscription.text,
6774
+ raw
6775
+ };
6776
+ }
6777
+ return { type: "custom", rawType: String(Object.keys(data)[0]), raw };
6778
+ }
6779
+ parseServerContent(serverContent, raw) {
6780
+ var _a, _b, _c, _d;
6781
+ const events = [];
6782
+ if (serverContent.interrupted) {
6783
+ events.push({
6784
+ type: "speech-started",
6785
+ raw
6786
+ });
6787
+ }
6788
+ if ((_a = serverContent.modelTurn) == null ? void 0 : _a.parts) {
6789
+ this.beginTurnIfClosed();
6790
+ for (const part of serverContent.modelTurn.parts) {
6791
+ if ((_b = part.inlineData) == null ? void 0 : _b.data) {
6792
+ this.hasAudio = true;
6793
+ events.push({
6794
+ type: "audio-delta",
6795
+ responseId: this.responseId,
6796
+ itemId: this.itemId,
6797
+ delta: part.inlineData.data,
6798
+ raw
6799
+ });
6800
+ }
6801
+ if (part.text) {
6802
+ this.hasText = true;
6803
+ events.push({
6804
+ type: "text-delta",
6805
+ responseId: this.responseId,
6806
+ itemId: this.itemId,
6807
+ delta: part.text,
6808
+ raw
6809
+ });
6810
+ }
6811
+ }
6812
+ }
6813
+ if ((_c = serverContent.outputTranscription) == null ? void 0 : _c.text) {
6814
+ this.hasTranscript = true;
6815
+ events.push({
6816
+ type: "audio-transcript-delta",
6817
+ responseId: this.responseId,
6818
+ itemId: this.itemId,
6819
+ delta: serverContent.outputTranscription.text,
6820
+ raw
6821
+ });
6822
+ }
6823
+ if ((_d = serverContent.inputTranscription) == null ? void 0 : _d.text) {
6824
+ events.push({
6825
+ type: "input-transcription-completed",
6826
+ itemId: `google-input-${this.turnCounter}`,
6827
+ transcript: serverContent.inputTranscription.text,
6828
+ raw
6829
+ });
6830
+ }
6831
+ if (serverContent.turnComplete) {
6832
+ if (this.hasAudio) {
6833
+ events.push({
6834
+ type: "audio-done",
6835
+ responseId: this.responseId,
6836
+ itemId: this.itemId,
6837
+ raw
6838
+ });
6839
+ }
6840
+ if (this.hasText) {
6841
+ events.push({
6842
+ type: "text-done",
6843
+ responseId: this.responseId,
6844
+ itemId: this.itemId,
6845
+ raw
6846
+ });
6847
+ }
6848
+ if (this.hasTranscript) {
6849
+ events.push({
6850
+ type: "audio-transcript-done",
6851
+ responseId: this.responseId,
6852
+ itemId: this.itemId,
6853
+ raw
6854
+ });
6855
+ }
6856
+ events.push({
6857
+ type: "response-done",
6858
+ responseId: this.responseId,
6859
+ status: "completed",
6860
+ raw
6861
+ });
6862
+ this.turnClosed = true;
6863
+ }
6864
+ if (events.length === 0) {
6865
+ return { type: "custom", rawType: "serverContent", raw };
6866
+ }
6867
+ return events.length === 1 ? events[0] : events;
6868
+ }
6869
+ serializeClientEvent(event, modelId) {
6870
+ var _a;
6871
+ switch (event.type) {
6872
+ case "session-update":
6873
+ if (((_a = event.config.inputAudioFormat) == null ? void 0 : _a.rate) != null) {
6874
+ this.inputAudioRate = event.config.inputAudioFormat.rate;
6875
+ }
6876
+ return {
6877
+ setup: buildGoogleSessionConfig(event.config, modelId)
6878
+ };
6879
+ case "input-audio-append":
6880
+ return {
6881
+ realtimeInput: {
6882
+ audio: {
6883
+ data: event.audio,
6884
+ mimeType: `audio/pcm;rate=${this.inputAudioRate}`
6885
+ }
6886
+ }
6887
+ };
6888
+ case "input-audio-commit":
6889
+ case "input-audio-clear":
6890
+ case "response-create":
6891
+ case "response-cancel":
6892
+ case "conversation-item-truncate":
6893
+ return null;
6894
+ case "conversation-item-create": {
6895
+ const item = event.item;
6896
+ switch (item.type) {
6897
+ case "text-message":
6898
+ return {
6899
+ realtimeInput: {
6900
+ text: item.text
6901
+ }
6902
+ };
6903
+ case "function-call-output":
6904
+ return serializeFunctionCallOutput(item);
6905
+ case "audio-message":
6906
+ return null;
6907
+ }
6908
+ break;
6909
+ }
6910
+ }
6911
+ return null;
6912
+ }
6913
+ };
6914
+ async function serializeFunctionCallOutput(item) {
6915
+ const parseResult = await safeParseJSON({ text: item.output });
6916
+ const response = parseResult.success ? parseResult.value : {};
6917
+ return {
6918
+ toolResponse: {
6919
+ functionResponses: [
6920
+ {
6921
+ id: item.callId,
6922
+ name: item.name,
6923
+ response
6924
+ }
6925
+ ]
6926
+ }
6927
+ };
6928
+ }
6929
+ function buildGoogleSessionConfig(config, modelId) {
6930
+ const setup = {
6931
+ model: getModelPath(modelId)
6932
+ };
6933
+ const generationConfig = {};
6934
+ if ((config == null ? void 0 : config.outputModalities) != null) {
6935
+ generationConfig.responseModalities = config.outputModalities.map(
6936
+ (m) => m.toUpperCase()
6937
+ );
6938
+ } else {
6939
+ generationConfig.responseModalities = ["AUDIO"];
6940
+ }
6941
+ if ((config == null ? void 0 : config.voice) != null) {
6942
+ generationConfig.speechConfig = {
6943
+ voiceConfig: {
6944
+ prebuiltVoiceConfig: {
6945
+ voiceName: config.voice
6946
+ }
6947
+ }
6948
+ };
6949
+ }
6950
+ setup.generationConfig = generationConfig;
6951
+ if ((config == null ? void 0 : config.instructions) != null) {
6952
+ setup.systemInstruction = {
6953
+ parts: [{ text: config.instructions }]
6954
+ };
6955
+ }
6956
+ if ((config == null ? void 0 : config.tools) != null && config.tools.length > 0) {
6957
+ setup.tools = [
6958
+ {
6959
+ functionDeclarations: config.tools.map((tool) => ({
6960
+ name: tool.name,
6961
+ description: tool.description,
6962
+ parameters: convertJSONSchemaToOpenAPISchema(tool.parameters)
6963
+ }))
6964
+ }
6965
+ ];
6966
+ }
6967
+ if ((config == null ? void 0 : config.inputAudioTranscription) != null) {
6968
+ setup.inputAudioTranscription = {};
6969
+ }
6970
+ if ((config == null ? void 0 : config.outputAudioTranscription) != null) {
6971
+ setup.outputAudioTranscription = {};
6972
+ }
6973
+ if ((config == null ? void 0 : config.providerOptions) != null) {
6974
+ Object.assign(setup, config.providerOptions);
6975
+ }
6976
+ return setup;
6977
+ }
6978
+
6979
+ // src/realtime/google-realtime-model.ts
6980
+ var realtimeWebSocketPath = "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
6981
+ function getRealtimeBaseURL(baseURL) {
6982
+ const url = new URL(baseURL);
6983
+ const pathSegments = url.pathname.split("/");
6984
+ const version = pathSegments.at(-1);
6985
+ if (version === "v1beta" || version === "v1alpha") {
6986
+ pathSegments.pop();
6987
+ url.pathname = pathSegments.join("/") || "/";
6988
+ }
6989
+ return url;
6990
+ }
6991
+ function getAuthTokensURL(baseURL) {
6992
+ const url = getRealtimeBaseURL(baseURL);
6993
+ url.pathname = `${url.pathname.replace(/\/$/, "")}/v1alpha/auth_tokens`;
6994
+ return url.toString();
6995
+ }
6996
+ function getWebSocketURL(baseURL) {
6997
+ const url = getRealtimeBaseURL(baseURL);
6998
+ url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
6999
+ url.pathname = `${url.pathname.replace(/\/$/, "")}/ws/${realtimeWebSocketPath}`;
7000
+ return url.toString();
7001
+ }
7002
+ var GoogleRealtimeModel = class {
7003
+ constructor(modelId, config) {
7004
+ this.specificationVersion = "v4";
7005
+ this.mapper = new GoogleRealtimeEventMapper();
7006
+ this.modelId = modelId;
7007
+ this.provider = config.provider;
7008
+ this.config = config;
7009
+ }
7010
+ async doCreateClientSecret(options) {
7011
+ var _a, _b;
7012
+ const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
7013
+ const headers = this.config.headers();
7014
+ const apiKey = headers["x-goog-api-key"];
7015
+ if (!apiKey) {
7016
+ throw new Error(
7017
+ "Google Generative AI API key is required for realtime token creation."
7018
+ );
7019
+ }
7020
+ const now = Date.now();
7021
+ const openWindowMs = ((_b = options.expiresAfterSeconds) != null ? _b : 60) * 1e3;
7022
+ const newSessionExpireTime = new Date(now + openWindowMs).toISOString();
7023
+ const expireTime = new Date(
7024
+ now + openWindowMs + 30 * 60 * 1e3
7025
+ ).toISOString();
7026
+ const setupPayload = buildGoogleSessionConfig(
7027
+ options.sessionConfig,
7028
+ this.modelId
7029
+ );
7030
+ const response = await fetchFn(
7031
+ `${getAuthTokensURL(this.config.baseURL)}?key=${encodeURIComponent(apiKey)}`,
7032
+ {
7033
+ method: "POST",
7034
+ headers: { "Content-Type": "application/json" },
7035
+ body: JSON.stringify({
7036
+ // `uses: 0` means no limit is applied to how many times the token can
7037
+ // start a session (per the AuthToken spec). An unset value would
7038
+ // default to 1, which breaks WebSocket reconnects within the session.
7039
+ uses: 0,
7040
+ expireTime,
7041
+ newSessionExpireTime,
7042
+ bidiGenerateContentSetup: setupPayload
7043
+ })
7044
+ }
7045
+ );
7046
+ if (!response.ok) {
7047
+ const text = await response.text();
7048
+ throw new Error(
7049
+ `Google realtime auth token request failed: ${response.status} ${text}`
7050
+ );
7051
+ }
7052
+ const data = await response.json();
7053
+ return {
7054
+ token: data.name,
7055
+ url: getWebSocketURL(this.config.baseURL),
7056
+ expiresAt: data.expireTime ? Math.floor(new Date(data.expireTime).getTime() / 1e3) : void 0
7057
+ };
7058
+ }
7059
+ getWebSocketConfig(options) {
7060
+ return {
7061
+ url: `${options.url}?access_token=${encodeURIComponent(options.token)}`
7062
+ };
7063
+ }
7064
+ parseServerEvent(raw) {
7065
+ return this.mapper.parseServerEvent(raw);
7066
+ }
7067
+ serializeClientEvent(event) {
7068
+ return this.mapper.serializeClientEvent(event, this.modelId);
7069
+ }
7070
+ buildSessionConfig(config) {
7071
+ return buildGoogleSessionConfig(config, this.modelId);
7072
+ }
7073
+ };
7074
+
6693
7075
  // src/google-provider.ts
6694
7076
  function createGoogle(options = {}) {
6695
7077
  var _a, _b;
@@ -6756,12 +7138,35 @@ function createGoogle(options = {}) {
6756
7138
  generateId: (_a2 = options.generateId) != null ? _a2 : generateId2
6757
7139
  });
6758
7140
  };
7141
+ const createRealtimeModel = (modelId) => new GoogleRealtimeModel(modelId, {
7142
+ provider: `${providerName}.realtime`,
7143
+ baseURL,
7144
+ headers: getHeaders,
7145
+ fetch: options.fetch
7146
+ });
6759
7147
  const createSpeechModel = (modelId) => new GoogleSpeechModel(modelId, {
6760
7148
  provider: `${providerName}.speech`,
6761
7149
  baseURL,
6762
7150
  headers: getHeaders,
6763
7151
  fetch: options.fetch
6764
7152
  });
7153
+ const experimentalRealtimeFactory = Object.assign(
7154
+ (modelId) => createRealtimeModel(modelId),
7155
+ {
7156
+ getToken: async (tokenOptions) => {
7157
+ const model = createRealtimeModel(tokenOptions.model);
7158
+ const secret = await model.doCreateClientSecret({
7159
+ sessionConfig: tokenOptions.sessionConfig,
7160
+ expiresAfterSeconds: tokenOptions.expiresAfterSeconds
7161
+ });
7162
+ return {
7163
+ token: secret.token,
7164
+ url: secret.url,
7165
+ expiresAt: secret.expiresAt
7166
+ };
7167
+ }
7168
+ }
7169
+ );
6765
7170
  const createInteractionsModel = (modelIdOrAgent) => {
6766
7171
  var _a2;
6767
7172
  return new GoogleInteractionsLanguageModel(
@@ -6795,6 +7200,7 @@ function createGoogle(options = {}) {
6795
7200
  provider.imageModel = createImageModel;
6796
7201
  provider.video = createVideoModel;
6797
7202
  provider.videoModel = createVideoModel;
7203
+ provider.experimental_realtime = experimentalRealtimeFactory;
6798
7204
  provider.files = createFiles;
6799
7205
  provider.speech = createSpeechModel;
6800
7206
  provider.speechModel = createSpeechModel;
@@ -6804,6 +7210,7 @@ function createGoogle(options = {}) {
6804
7210
  }
6805
7211
  var google = createGoogle();
6806
7212
  export {
7213
+ GoogleRealtimeModel as Experimental_GoogleRealtimeModel,
6807
7214
  VERSION,
6808
7215
  createGoogle,
6809
7216
  createGoogle as createGoogleGenerativeAI,