@ai-sdk/google 4.0.0-canary.78 → 4.0.0-canary.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dist/index.d.ts +29 -2
- package/dist/index.js +416 -9
- package/dist/index.js.map +1 -1
- package/docs/15-google.mdx +26 -0
- package/package.json +3 -3
- package/src/google-provider.ts +33 -0
- package/src/index.ts +2 -0
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,32 @@
|
|
|
1
1
|
# @ai-sdk/google
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.80
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [bae5e2b]
|
|
8
|
+
- @ai-sdk/provider-utils@5.0.0-canary.47
|
|
9
|
+
|
|
10
|
+
## 4.0.0-canary.79
|
|
11
|
+
|
|
12
|
+
### Patch Changes
|
|
13
|
+
|
|
14
|
+
- ce769dd: feat(provider): add experimental Realtime API support for voice conversations
|
|
15
|
+
|
|
16
|
+
Adds first-class support for realtime (speech-to-speech) APIs:
|
|
17
|
+
|
|
18
|
+
- `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
|
|
19
|
+
- OpenAI, Google, and xAI realtime provider implementations
|
|
20
|
+
- `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
|
|
21
|
+
- `.getToken()` static method on each provider for server-side ephemeral token creation
|
|
22
|
+
- `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
|
|
23
|
+
- `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
|
|
24
|
+
- `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
|
|
25
|
+
|
|
26
|
+
- Updated dependencies [ce769dd]
|
|
27
|
+
- @ai-sdk/provider@4.0.0-canary.18
|
|
28
|
+
- @ai-sdk/provider-utils@5.0.0-canary.46
|
|
29
|
+
|
|
3
30
|
## 4.0.0-canary.78
|
|
4
31
|
|
|
5
32
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
2
2
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
|
-
import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4 } from '@ai-sdk/provider';
|
|
3
|
+
import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4, Experimental_RealtimeFactoryV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
4
4
|
|
|
5
5
|
declare const googleErrorDataSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
6
6
|
error: {
|
|
@@ -554,6 +554,7 @@ interface GoogleProvider extends ProviderV4 {
|
|
|
554
554
|
} | {
|
|
555
555
|
managedAgent: string;
|
|
556
556
|
}): LanguageModelV4;
|
|
557
|
+
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
557
558
|
tools: typeof googleTools;
|
|
558
559
|
}
|
|
559
560
|
interface GoogleProviderSettings {
|
|
@@ -595,6 +596,32 @@ declare function createGoogle(options?: GoogleProviderSettings): GoogleProvider;
|
|
|
595
596
|
*/
|
|
596
597
|
declare const google: GoogleProvider;
|
|
597
598
|
|
|
599
|
+
type GoogleRealtimeModelConfig = {
|
|
600
|
+
provider: string;
|
|
601
|
+
baseURL: string;
|
|
602
|
+
headers: () => Record<string, string | undefined>;
|
|
603
|
+
fetch?: FetchFunction;
|
|
604
|
+
};
|
|
605
|
+
declare class GoogleRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
606
|
+
readonly specificationVersion: "v4";
|
|
607
|
+
readonly provider: string;
|
|
608
|
+
readonly modelId: string;
|
|
609
|
+
private readonly config;
|
|
610
|
+
private readonly mapper;
|
|
611
|
+
constructor(modelId: string, config: GoogleRealtimeModelConfig);
|
|
612
|
+
doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
|
|
613
|
+
getWebSocketConfig(options: {
|
|
614
|
+
token: string;
|
|
615
|
+
url: string;
|
|
616
|
+
}): {
|
|
617
|
+
url: string;
|
|
618
|
+
protocols?: string[];
|
|
619
|
+
};
|
|
620
|
+
parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent | Experimental_RealtimeModelV4ServerEvent[];
|
|
621
|
+
serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): ReturnType<Experimental_RealtimeModelV4['serializeClientEvent']>;
|
|
622
|
+
buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
|
|
623
|
+
}
|
|
624
|
+
|
|
598
625
|
declare const VERSION: string;
|
|
599
626
|
|
|
600
|
-
export { type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
|
|
627
|
+
export { GoogleRealtimeModel as Experimental_GoogleRealtimeModel, type GoogleRealtimeModelConfig as Experimental_GoogleRealtimeModelConfig, type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
} from "@ai-sdk/provider-utils";
|
|
8
8
|
|
|
9
9
|
// src/version.ts
|
|
10
|
-
var VERSION = true ? "4.0.0-canary.
|
|
10
|
+
var VERSION = true ? "4.0.0-canary.80" : "0.0.0-test";
|
|
11
11
|
|
|
12
12
|
// src/google-embedding-model.ts
|
|
13
13
|
import {
|
|
@@ -5646,14 +5646,14 @@ async function cancelGoogleInteraction({
|
|
|
5646
5646
|
baseURL,
|
|
5647
5647
|
interactionId,
|
|
5648
5648
|
headers,
|
|
5649
|
-
fetch = getOriginalFetch()
|
|
5649
|
+
fetch: fetch2 = getOriginalFetch()
|
|
5650
5650
|
}) {
|
|
5651
5651
|
if (interactionId == null || interactionId.length === 0) {
|
|
5652
5652
|
return;
|
|
5653
5653
|
}
|
|
5654
5654
|
const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}/cancel`;
|
|
5655
5655
|
try {
|
|
5656
|
-
const response = await
|
|
5656
|
+
const response = await fetch2(url, {
|
|
5657
5657
|
method: "POST",
|
|
5658
5658
|
headers: withUserAgentSuffix(
|
|
5659
5659
|
combineHeaders7({ "Content-Type": "application/json" }, headers),
|
|
@@ -5681,7 +5681,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5681
5681
|
baseURL,
|
|
5682
5682
|
interactionId,
|
|
5683
5683
|
headers,
|
|
5684
|
-
fetch,
|
|
5684
|
+
fetch: fetch2,
|
|
5685
5685
|
abortSignal,
|
|
5686
5686
|
initialDelayMs = DEFAULT_INITIAL_DELAY_MS,
|
|
5687
5687
|
maxDelayMs = DEFAULT_MAX_DELAY_MS,
|
|
@@ -5695,7 +5695,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5695
5695
|
const startedAt = Date.now();
|
|
5696
5696
|
let nextDelayMs = initialDelayMs;
|
|
5697
5697
|
const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}`;
|
|
5698
|
-
const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch });
|
|
5698
|
+
const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch: fetch2 });
|
|
5699
5699
|
try {
|
|
5700
5700
|
while (true) {
|
|
5701
5701
|
if (abortSignal == null ? void 0 : abortSignal.aborted) {
|
|
@@ -5720,7 +5720,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5720
5720
|
googleInteractionsResponseSchema
|
|
5721
5721
|
),
|
|
5722
5722
|
abortSignal,
|
|
5723
|
-
fetch
|
|
5723
|
+
fetch: fetch2
|
|
5724
5724
|
});
|
|
5725
5725
|
if (isTerminalStatus(response.status)) {
|
|
5726
5726
|
return { response, rawResponse, responseHeaders };
|
|
@@ -5896,7 +5896,7 @@ function streamGoogleInteractionEvents({
|
|
|
5896
5896
|
baseURL,
|
|
5897
5897
|
interactionId,
|
|
5898
5898
|
headers,
|
|
5899
|
-
fetch,
|
|
5899
|
+
fetch: fetch2,
|
|
5900
5900
|
abortSignal,
|
|
5901
5901
|
maxRetries = DEFAULT_MAX_RETRIES,
|
|
5902
5902
|
retryDelayMs = DEFAULT_RETRY_DELAY_MS
|
|
@@ -5944,7 +5944,7 @@ function streamGoogleInteractionEvents({
|
|
|
5944
5944
|
googleInteractionsEventSchema
|
|
5945
5945
|
),
|
|
5946
5946
|
abortSignal: effectiveSignal,
|
|
5947
|
-
fetch
|
|
5947
|
+
fetch: fetch2
|
|
5948
5948
|
});
|
|
5949
5949
|
return stream.getReader();
|
|
5950
5950
|
}
|
|
@@ -6037,7 +6037,7 @@ function streamGoogleInteractionEvents({
|
|
|
6037
6037
|
baseURL,
|
|
6038
6038
|
interactionId,
|
|
6039
6039
|
headers,
|
|
6040
|
-
fetch
|
|
6040
|
+
fetch: fetch2
|
|
6041
6041
|
});
|
|
6042
6042
|
}
|
|
6043
6043
|
}
|
|
@@ -6690,6 +6690,388 @@ function pruneUndefined(obj) {
|
|
|
6690
6690
|
return result;
|
|
6691
6691
|
}
|
|
6692
6692
|
|
|
6693
|
+
// src/realtime/google-realtime-event-mapper.ts
|
|
6694
|
+
import { safeParseJSON } from "@ai-sdk/provider-utils";
|
|
6695
|
+
var GoogleRealtimeEventMapper = class {
|
|
6696
|
+
constructor() {
|
|
6697
|
+
this.turnCounter = 0;
|
|
6698
|
+
this.hasAudio = false;
|
|
6699
|
+
this.hasText = false;
|
|
6700
|
+
this.hasTranscript = false;
|
|
6701
|
+
this.turnClosed = false;
|
|
6702
|
+
this.inputAudioRate = 16e3;
|
|
6703
|
+
}
|
|
6704
|
+
get responseId() {
|
|
6705
|
+
return `google-resp-${this.turnCounter}`;
|
|
6706
|
+
}
|
|
6707
|
+
get itemId() {
|
|
6708
|
+
return `google-item-${this.turnCounter}`;
|
|
6709
|
+
}
|
|
6710
|
+
/**
|
|
6711
|
+
* Rolls over to the next turn lazily, only once new model content actually
|
|
6712
|
+
* arrives. `turnComplete` merely marks the current turn closed; the counter
|
|
6713
|
+
* is not advanced until the next response begins. This keeps a transcript
|
|
6714
|
+
* that arrives shortly after `turnComplete` attached to the turn it belongs
|
|
6715
|
+
* to, since Google delivers transcription independently with no guaranteed
|
|
6716
|
+
* ordering relative to `turnComplete`.
|
|
6717
|
+
*/
|
|
6718
|
+
beginTurnIfClosed() {
|
|
6719
|
+
if (!this.turnClosed) return;
|
|
6720
|
+
this.turnCounter++;
|
|
6721
|
+
this.hasAudio = false;
|
|
6722
|
+
this.hasText = false;
|
|
6723
|
+
this.hasTranscript = false;
|
|
6724
|
+
this.turnClosed = false;
|
|
6725
|
+
}
|
|
6726
|
+
parseServerEvent(raw) {
|
|
6727
|
+
var _a, _b;
|
|
6728
|
+
const data = raw;
|
|
6729
|
+
if (data.setupComplete != null) {
|
|
6730
|
+
return { type: "session-created", raw };
|
|
6731
|
+
}
|
|
6732
|
+
if (data.toolCall != null) {
|
|
6733
|
+
this.beginTurnIfClosed();
|
|
6734
|
+
const functionCalls = (_a = data.toolCall.functionCalls) != null ? _a : [];
|
|
6735
|
+
return functionCalls.flatMap((functionCall) => {
|
|
6736
|
+
var _a2;
|
|
6737
|
+
const args = JSON.stringify((_a2 = functionCall.args) != null ? _a2 : {});
|
|
6738
|
+
return [
|
|
6739
|
+
{
|
|
6740
|
+
type: "function-call-arguments-delta",
|
|
6741
|
+
responseId: this.responseId,
|
|
6742
|
+
itemId: this.itemId,
|
|
6743
|
+
callId: functionCall.id,
|
|
6744
|
+
delta: args,
|
|
6745
|
+
raw
|
|
6746
|
+
},
|
|
6747
|
+
{
|
|
6748
|
+
type: "function-call-arguments-done",
|
|
6749
|
+
responseId: this.responseId,
|
|
6750
|
+
itemId: this.itemId,
|
|
6751
|
+
callId: functionCall.id,
|
|
6752
|
+
name: functionCall.name,
|
|
6753
|
+
arguments: args,
|
|
6754
|
+
raw
|
|
6755
|
+
}
|
|
6756
|
+
];
|
|
6757
|
+
});
|
|
6758
|
+
}
|
|
6759
|
+
if (data.toolCallCancellation != null) {
|
|
6760
|
+
return {
|
|
6761
|
+
type: "custom",
|
|
6762
|
+
rawType: "toolCallCancellation",
|
|
6763
|
+
raw
|
|
6764
|
+
};
|
|
6765
|
+
}
|
|
6766
|
+
if (data.serverContent != null) {
|
|
6767
|
+
return this.parseServerContent(data.serverContent, raw);
|
|
6768
|
+
}
|
|
6769
|
+
if (((_b = data.inputTranscription) == null ? void 0 : _b.text) != null) {
|
|
6770
|
+
return {
|
|
6771
|
+
type: "input-transcription-completed",
|
|
6772
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
6773
|
+
transcript: data.inputTranscription.text,
|
|
6774
|
+
raw
|
|
6775
|
+
};
|
|
6776
|
+
}
|
|
6777
|
+
return { type: "custom", rawType: String(Object.keys(data)[0]), raw };
|
|
6778
|
+
}
|
|
6779
|
+
parseServerContent(serverContent, raw) {
|
|
6780
|
+
var _a, _b, _c, _d;
|
|
6781
|
+
const events = [];
|
|
6782
|
+
if (serverContent.interrupted) {
|
|
6783
|
+
events.push({
|
|
6784
|
+
type: "speech-started",
|
|
6785
|
+
raw
|
|
6786
|
+
});
|
|
6787
|
+
}
|
|
6788
|
+
if ((_a = serverContent.modelTurn) == null ? void 0 : _a.parts) {
|
|
6789
|
+
this.beginTurnIfClosed();
|
|
6790
|
+
for (const part of serverContent.modelTurn.parts) {
|
|
6791
|
+
if ((_b = part.inlineData) == null ? void 0 : _b.data) {
|
|
6792
|
+
this.hasAudio = true;
|
|
6793
|
+
events.push({
|
|
6794
|
+
type: "audio-delta",
|
|
6795
|
+
responseId: this.responseId,
|
|
6796
|
+
itemId: this.itemId,
|
|
6797
|
+
delta: part.inlineData.data,
|
|
6798
|
+
raw
|
|
6799
|
+
});
|
|
6800
|
+
}
|
|
6801
|
+
if (part.text) {
|
|
6802
|
+
this.hasText = true;
|
|
6803
|
+
events.push({
|
|
6804
|
+
type: "text-delta",
|
|
6805
|
+
responseId: this.responseId,
|
|
6806
|
+
itemId: this.itemId,
|
|
6807
|
+
delta: part.text,
|
|
6808
|
+
raw
|
|
6809
|
+
});
|
|
6810
|
+
}
|
|
6811
|
+
}
|
|
6812
|
+
}
|
|
6813
|
+
if ((_c = serverContent.outputTranscription) == null ? void 0 : _c.text) {
|
|
6814
|
+
this.hasTranscript = true;
|
|
6815
|
+
events.push({
|
|
6816
|
+
type: "audio-transcript-delta",
|
|
6817
|
+
responseId: this.responseId,
|
|
6818
|
+
itemId: this.itemId,
|
|
6819
|
+
delta: serverContent.outputTranscription.text,
|
|
6820
|
+
raw
|
|
6821
|
+
});
|
|
6822
|
+
}
|
|
6823
|
+
if ((_d = serverContent.inputTranscription) == null ? void 0 : _d.text) {
|
|
6824
|
+
events.push({
|
|
6825
|
+
type: "input-transcription-completed",
|
|
6826
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
6827
|
+
transcript: serverContent.inputTranscription.text,
|
|
6828
|
+
raw
|
|
6829
|
+
});
|
|
6830
|
+
}
|
|
6831
|
+
if (serverContent.turnComplete) {
|
|
6832
|
+
if (this.hasAudio) {
|
|
6833
|
+
events.push({
|
|
6834
|
+
type: "audio-done",
|
|
6835
|
+
responseId: this.responseId,
|
|
6836
|
+
itemId: this.itemId,
|
|
6837
|
+
raw
|
|
6838
|
+
});
|
|
6839
|
+
}
|
|
6840
|
+
if (this.hasText) {
|
|
6841
|
+
events.push({
|
|
6842
|
+
type: "text-done",
|
|
6843
|
+
responseId: this.responseId,
|
|
6844
|
+
itemId: this.itemId,
|
|
6845
|
+
raw
|
|
6846
|
+
});
|
|
6847
|
+
}
|
|
6848
|
+
if (this.hasTranscript) {
|
|
6849
|
+
events.push({
|
|
6850
|
+
type: "audio-transcript-done",
|
|
6851
|
+
responseId: this.responseId,
|
|
6852
|
+
itemId: this.itemId,
|
|
6853
|
+
raw
|
|
6854
|
+
});
|
|
6855
|
+
}
|
|
6856
|
+
events.push({
|
|
6857
|
+
type: "response-done",
|
|
6858
|
+
responseId: this.responseId,
|
|
6859
|
+
status: "completed",
|
|
6860
|
+
raw
|
|
6861
|
+
});
|
|
6862
|
+
this.turnClosed = true;
|
|
6863
|
+
}
|
|
6864
|
+
if (events.length === 0) {
|
|
6865
|
+
return { type: "custom", rawType: "serverContent", raw };
|
|
6866
|
+
}
|
|
6867
|
+
return events.length === 1 ? events[0] : events;
|
|
6868
|
+
}
|
|
6869
|
+
serializeClientEvent(event, modelId) {
|
|
6870
|
+
var _a;
|
|
6871
|
+
switch (event.type) {
|
|
6872
|
+
case "session-update":
|
|
6873
|
+
if (((_a = event.config.inputAudioFormat) == null ? void 0 : _a.rate) != null) {
|
|
6874
|
+
this.inputAudioRate = event.config.inputAudioFormat.rate;
|
|
6875
|
+
}
|
|
6876
|
+
return {
|
|
6877
|
+
setup: buildGoogleSessionConfig(event.config, modelId)
|
|
6878
|
+
};
|
|
6879
|
+
case "input-audio-append":
|
|
6880
|
+
return {
|
|
6881
|
+
realtimeInput: {
|
|
6882
|
+
audio: {
|
|
6883
|
+
data: event.audio,
|
|
6884
|
+
mimeType: `audio/pcm;rate=${this.inputAudioRate}`
|
|
6885
|
+
}
|
|
6886
|
+
}
|
|
6887
|
+
};
|
|
6888
|
+
case "input-audio-commit":
|
|
6889
|
+
case "input-audio-clear":
|
|
6890
|
+
case "response-create":
|
|
6891
|
+
case "response-cancel":
|
|
6892
|
+
case "conversation-item-truncate":
|
|
6893
|
+
return null;
|
|
6894
|
+
case "conversation-item-create": {
|
|
6895
|
+
const item = event.item;
|
|
6896
|
+
switch (item.type) {
|
|
6897
|
+
case "text-message":
|
|
6898
|
+
return {
|
|
6899
|
+
realtimeInput: {
|
|
6900
|
+
text: item.text
|
|
6901
|
+
}
|
|
6902
|
+
};
|
|
6903
|
+
case "function-call-output":
|
|
6904
|
+
return serializeFunctionCallOutput(item);
|
|
6905
|
+
case "audio-message":
|
|
6906
|
+
return null;
|
|
6907
|
+
}
|
|
6908
|
+
break;
|
|
6909
|
+
}
|
|
6910
|
+
}
|
|
6911
|
+
return null;
|
|
6912
|
+
}
|
|
6913
|
+
};
|
|
6914
|
+
async function serializeFunctionCallOutput(item) {
|
|
6915
|
+
const parseResult = await safeParseJSON({ text: item.output });
|
|
6916
|
+
const response = parseResult.success ? parseResult.value : {};
|
|
6917
|
+
return {
|
|
6918
|
+
toolResponse: {
|
|
6919
|
+
functionResponses: [
|
|
6920
|
+
{
|
|
6921
|
+
id: item.callId,
|
|
6922
|
+
name: item.name,
|
|
6923
|
+
response
|
|
6924
|
+
}
|
|
6925
|
+
]
|
|
6926
|
+
}
|
|
6927
|
+
};
|
|
6928
|
+
}
|
|
6929
|
+
function buildGoogleSessionConfig(config, modelId) {
|
|
6930
|
+
const setup = {
|
|
6931
|
+
model: getModelPath(modelId)
|
|
6932
|
+
};
|
|
6933
|
+
const generationConfig = {};
|
|
6934
|
+
if ((config == null ? void 0 : config.outputModalities) != null) {
|
|
6935
|
+
generationConfig.responseModalities = config.outputModalities.map(
|
|
6936
|
+
(m) => m.toUpperCase()
|
|
6937
|
+
);
|
|
6938
|
+
} else {
|
|
6939
|
+
generationConfig.responseModalities = ["AUDIO"];
|
|
6940
|
+
}
|
|
6941
|
+
if ((config == null ? void 0 : config.voice) != null) {
|
|
6942
|
+
generationConfig.speechConfig = {
|
|
6943
|
+
voiceConfig: {
|
|
6944
|
+
prebuiltVoiceConfig: {
|
|
6945
|
+
voiceName: config.voice
|
|
6946
|
+
}
|
|
6947
|
+
}
|
|
6948
|
+
};
|
|
6949
|
+
}
|
|
6950
|
+
setup.generationConfig = generationConfig;
|
|
6951
|
+
if ((config == null ? void 0 : config.instructions) != null) {
|
|
6952
|
+
setup.systemInstruction = {
|
|
6953
|
+
parts: [{ text: config.instructions }]
|
|
6954
|
+
};
|
|
6955
|
+
}
|
|
6956
|
+
if ((config == null ? void 0 : config.tools) != null && config.tools.length > 0) {
|
|
6957
|
+
setup.tools = [
|
|
6958
|
+
{
|
|
6959
|
+
functionDeclarations: config.tools.map((tool) => ({
|
|
6960
|
+
name: tool.name,
|
|
6961
|
+
description: tool.description,
|
|
6962
|
+
parameters: convertJSONSchemaToOpenAPISchema(tool.parameters)
|
|
6963
|
+
}))
|
|
6964
|
+
}
|
|
6965
|
+
];
|
|
6966
|
+
}
|
|
6967
|
+
if ((config == null ? void 0 : config.inputAudioTranscription) != null) {
|
|
6968
|
+
setup.inputAudioTranscription = {};
|
|
6969
|
+
}
|
|
6970
|
+
if ((config == null ? void 0 : config.outputAudioTranscription) != null) {
|
|
6971
|
+
setup.outputAudioTranscription = {};
|
|
6972
|
+
}
|
|
6973
|
+
if ((config == null ? void 0 : config.providerOptions) != null) {
|
|
6974
|
+
Object.assign(setup, config.providerOptions);
|
|
6975
|
+
}
|
|
6976
|
+
return setup;
|
|
6977
|
+
}
|
|
6978
|
+
|
|
6979
|
+
// src/realtime/google-realtime-model.ts
|
|
6980
|
+
var realtimeWebSocketPath = "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
6981
|
+
function getRealtimeBaseURL(baseURL) {
|
|
6982
|
+
const url = new URL(baseURL);
|
|
6983
|
+
const pathSegments = url.pathname.split("/");
|
|
6984
|
+
const version = pathSegments.at(-1);
|
|
6985
|
+
if (version === "v1beta" || version === "v1alpha") {
|
|
6986
|
+
pathSegments.pop();
|
|
6987
|
+
url.pathname = pathSegments.join("/") || "/";
|
|
6988
|
+
}
|
|
6989
|
+
return url;
|
|
6990
|
+
}
|
|
6991
|
+
function getAuthTokensURL(baseURL) {
|
|
6992
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
6993
|
+
url.pathname = `${url.pathname.replace(/\/$/, "")}/v1alpha/auth_tokens`;
|
|
6994
|
+
return url.toString();
|
|
6995
|
+
}
|
|
6996
|
+
function getWebSocketURL(baseURL) {
|
|
6997
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
6998
|
+
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
|
6999
|
+
url.pathname = `${url.pathname.replace(/\/$/, "")}/ws/${realtimeWebSocketPath}`;
|
|
7000
|
+
return url.toString();
|
|
7001
|
+
}
|
|
7002
|
+
var GoogleRealtimeModel = class {
|
|
7003
|
+
constructor(modelId, config) {
|
|
7004
|
+
this.specificationVersion = "v4";
|
|
7005
|
+
this.mapper = new GoogleRealtimeEventMapper();
|
|
7006
|
+
this.modelId = modelId;
|
|
7007
|
+
this.provider = config.provider;
|
|
7008
|
+
this.config = config;
|
|
7009
|
+
}
|
|
7010
|
+
async doCreateClientSecret(options) {
|
|
7011
|
+
var _a, _b;
|
|
7012
|
+
const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
|
|
7013
|
+
const headers = this.config.headers();
|
|
7014
|
+
const apiKey = headers["x-goog-api-key"];
|
|
7015
|
+
if (!apiKey) {
|
|
7016
|
+
throw new Error(
|
|
7017
|
+
"Google Generative AI API key is required for realtime token creation."
|
|
7018
|
+
);
|
|
7019
|
+
}
|
|
7020
|
+
const now = Date.now();
|
|
7021
|
+
const openWindowMs = ((_b = options.expiresAfterSeconds) != null ? _b : 60) * 1e3;
|
|
7022
|
+
const newSessionExpireTime = new Date(now + openWindowMs).toISOString();
|
|
7023
|
+
const expireTime = new Date(
|
|
7024
|
+
now + openWindowMs + 30 * 60 * 1e3
|
|
7025
|
+
).toISOString();
|
|
7026
|
+
const setupPayload = buildGoogleSessionConfig(
|
|
7027
|
+
options.sessionConfig,
|
|
7028
|
+
this.modelId
|
|
7029
|
+
);
|
|
7030
|
+
const response = await fetchFn(
|
|
7031
|
+
`${getAuthTokensURL(this.config.baseURL)}?key=${encodeURIComponent(apiKey)}`,
|
|
7032
|
+
{
|
|
7033
|
+
method: "POST",
|
|
7034
|
+
headers: { "Content-Type": "application/json" },
|
|
7035
|
+
body: JSON.stringify({
|
|
7036
|
+
// `uses: 0` means no limit is applied to how many times the token can
|
|
7037
|
+
// start a session (per the AuthToken spec). An unset value would
|
|
7038
|
+
// default to 1, which breaks WebSocket reconnects within the session.
|
|
7039
|
+
uses: 0,
|
|
7040
|
+
expireTime,
|
|
7041
|
+
newSessionExpireTime,
|
|
7042
|
+
bidiGenerateContentSetup: setupPayload
|
|
7043
|
+
})
|
|
7044
|
+
}
|
|
7045
|
+
);
|
|
7046
|
+
if (!response.ok) {
|
|
7047
|
+
const text = await response.text();
|
|
7048
|
+
throw new Error(
|
|
7049
|
+
`Google realtime auth token request failed: ${response.status} ${text}`
|
|
7050
|
+
);
|
|
7051
|
+
}
|
|
7052
|
+
const data = await response.json();
|
|
7053
|
+
return {
|
|
7054
|
+
token: data.name,
|
|
7055
|
+
url: getWebSocketURL(this.config.baseURL),
|
|
7056
|
+
expiresAt: data.expireTime ? Math.floor(new Date(data.expireTime).getTime() / 1e3) : void 0
|
|
7057
|
+
};
|
|
7058
|
+
}
|
|
7059
|
+
getWebSocketConfig(options) {
|
|
7060
|
+
return {
|
|
7061
|
+
url: `${options.url}?access_token=${encodeURIComponent(options.token)}`
|
|
7062
|
+
};
|
|
7063
|
+
}
|
|
7064
|
+
parseServerEvent(raw) {
|
|
7065
|
+
return this.mapper.parseServerEvent(raw);
|
|
7066
|
+
}
|
|
7067
|
+
serializeClientEvent(event) {
|
|
7068
|
+
return this.mapper.serializeClientEvent(event, this.modelId);
|
|
7069
|
+
}
|
|
7070
|
+
buildSessionConfig(config) {
|
|
7071
|
+
return buildGoogleSessionConfig(config, this.modelId);
|
|
7072
|
+
}
|
|
7073
|
+
};
|
|
7074
|
+
|
|
6693
7075
|
// src/google-provider.ts
|
|
6694
7076
|
function createGoogle(options = {}) {
|
|
6695
7077
|
var _a, _b;
|
|
@@ -6756,12 +7138,35 @@ function createGoogle(options = {}) {
|
|
|
6756
7138
|
generateId: (_a2 = options.generateId) != null ? _a2 : generateId2
|
|
6757
7139
|
});
|
|
6758
7140
|
};
|
|
7141
|
+
const createRealtimeModel = (modelId) => new GoogleRealtimeModel(modelId, {
|
|
7142
|
+
provider: `${providerName}.realtime`,
|
|
7143
|
+
baseURL,
|
|
7144
|
+
headers: getHeaders,
|
|
7145
|
+
fetch: options.fetch
|
|
7146
|
+
});
|
|
6759
7147
|
const createSpeechModel = (modelId) => new GoogleSpeechModel(modelId, {
|
|
6760
7148
|
provider: `${providerName}.speech`,
|
|
6761
7149
|
baseURL,
|
|
6762
7150
|
headers: getHeaders,
|
|
6763
7151
|
fetch: options.fetch
|
|
6764
7152
|
});
|
|
7153
|
+
const experimentalRealtimeFactory = Object.assign(
|
|
7154
|
+
(modelId) => createRealtimeModel(modelId),
|
|
7155
|
+
{
|
|
7156
|
+
getToken: async (tokenOptions) => {
|
|
7157
|
+
const model = createRealtimeModel(tokenOptions.model);
|
|
7158
|
+
const secret = await model.doCreateClientSecret({
|
|
7159
|
+
sessionConfig: tokenOptions.sessionConfig,
|
|
7160
|
+
expiresAfterSeconds: tokenOptions.expiresAfterSeconds
|
|
7161
|
+
});
|
|
7162
|
+
return {
|
|
7163
|
+
token: secret.token,
|
|
7164
|
+
url: secret.url,
|
|
7165
|
+
expiresAt: secret.expiresAt
|
|
7166
|
+
};
|
|
7167
|
+
}
|
|
7168
|
+
}
|
|
7169
|
+
);
|
|
6765
7170
|
const createInteractionsModel = (modelIdOrAgent) => {
|
|
6766
7171
|
var _a2;
|
|
6767
7172
|
return new GoogleInteractionsLanguageModel(
|
|
@@ -6795,6 +7200,7 @@ function createGoogle(options = {}) {
|
|
|
6795
7200
|
provider.imageModel = createImageModel;
|
|
6796
7201
|
provider.video = createVideoModel;
|
|
6797
7202
|
provider.videoModel = createVideoModel;
|
|
7203
|
+
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
6798
7204
|
provider.files = createFiles;
|
|
6799
7205
|
provider.speech = createSpeechModel;
|
|
6800
7206
|
provider.speechModel = createSpeechModel;
|
|
@@ -6804,6 +7210,7 @@ function createGoogle(options = {}) {
|
|
|
6804
7210
|
}
|
|
6805
7211
|
var google = createGoogle();
|
|
6806
7212
|
export {
|
|
7213
|
+
GoogleRealtimeModel as Experimental_GoogleRealtimeModel,
|
|
6807
7214
|
VERSION,
|
|
6808
7215
|
createGoogle,
|
|
6809
7216
|
createGoogle as createGoogleGenerativeAI,
|