@ai-sdk/google 4.0.0-canary.77 → 4.0.0-canary.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/dist/index.d.ts +29 -2
- package/dist/index.js +436 -15
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +32 -2
- package/dist/internal/index.js +322 -48
- package/dist/internal/index.js.map +1 -1
- package/docs/15-google.mdx +26 -0
- package/package.json +3 -3
- package/src/google-provider.ts +33 -0
- package/src/google-speech-model.ts +31 -6
- package/src/index.ts +2 -0
- package/src/internal/index.ts +1 -0
- package/src/realtime/google-realtime-event-mapper.ts +383 -0
- package/src/realtime/google-realtime-model-options.ts +3 -0
- package/src/realtime/google-realtime-model.ts +160 -0
- package/src/realtime/index.ts +2 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# @ai-sdk/google
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.79
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- ce769dd: feat(provider): add experimental Realtime API support for voice conversations
|
|
8
|
+
|
|
9
|
+
Adds first-class support for realtime (speech-to-speech) APIs:
|
|
10
|
+
|
|
11
|
+
- `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
|
|
12
|
+
- OpenAI, Google, and xAI realtime provider implementations
|
|
13
|
+
- `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
|
|
14
|
+
- `.getToken()` static method on each provider for server-side ephemeral token creation
|
|
15
|
+
- `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
|
|
16
|
+
- `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
|
|
17
|
+
- `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
|
|
18
|
+
|
|
19
|
+
- Updated dependencies [ce769dd]
|
|
20
|
+
- @ai-sdk/provider@4.0.0-canary.18
|
|
21
|
+
- @ai-sdk/provider-utils@5.0.0-canary.46
|
|
22
|
+
|
|
23
|
+
## 4.0.0-canary.78
|
|
24
|
+
|
|
25
|
+
### Patch Changes
|
|
26
|
+
|
|
27
|
+
- 2ce3c65: feat(provider/google-vertex): add Gemini text-to-speech (speech) model support
|
|
28
|
+
|
|
3
29
|
## 4.0.0-canary.77
|
|
4
30
|
|
|
5
31
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
2
2
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
|
-
import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4 } from '@ai-sdk/provider';
|
|
3
|
+
import { ProviderV4, LanguageModelV4, ImageModelV4, EmbeddingModelV4, Experimental_VideoModelV4, SpeechModelV4, FilesV4, Experimental_RealtimeFactoryV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
4
4
|
|
|
5
5
|
declare const googleErrorDataSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
6
6
|
error: {
|
|
@@ -554,6 +554,7 @@ interface GoogleProvider extends ProviderV4 {
|
|
|
554
554
|
} | {
|
|
555
555
|
managedAgent: string;
|
|
556
556
|
}): LanguageModelV4;
|
|
557
|
+
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
557
558
|
tools: typeof googleTools;
|
|
558
559
|
}
|
|
559
560
|
interface GoogleProviderSettings {
|
|
@@ -595,6 +596,32 @@ declare function createGoogle(options?: GoogleProviderSettings): GoogleProvider;
|
|
|
595
596
|
*/
|
|
596
597
|
declare const google: GoogleProvider;
|
|
597
598
|
|
|
599
|
+
type GoogleRealtimeModelConfig = {
|
|
600
|
+
provider: string;
|
|
601
|
+
baseURL: string;
|
|
602
|
+
headers: () => Record<string, string | undefined>;
|
|
603
|
+
fetch?: FetchFunction;
|
|
604
|
+
};
|
|
605
|
+
declare class GoogleRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
606
|
+
readonly specificationVersion: "v4";
|
|
607
|
+
readonly provider: string;
|
|
608
|
+
readonly modelId: string;
|
|
609
|
+
private readonly config;
|
|
610
|
+
private readonly mapper;
|
|
611
|
+
constructor(modelId: string, config: GoogleRealtimeModelConfig);
|
|
612
|
+
doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
|
|
613
|
+
getWebSocketConfig(options: {
|
|
614
|
+
token: string;
|
|
615
|
+
url: string;
|
|
616
|
+
}): {
|
|
617
|
+
url: string;
|
|
618
|
+
protocols?: string[];
|
|
619
|
+
};
|
|
620
|
+
parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent | Experimental_RealtimeModelV4ServerEvent[];
|
|
621
|
+
serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): ReturnType<Experimental_RealtimeModelV4['serializeClientEvent']>;
|
|
622
|
+
buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
|
|
623
|
+
}
|
|
624
|
+
|
|
598
625
|
declare const VERSION: string;
|
|
599
626
|
|
|
600
|
-
export { type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
|
|
627
|
+
export { GoogleRealtimeModel as Experimental_GoogleRealtimeModel, type GoogleRealtimeModelConfig as Experimental_GoogleRealtimeModelConfig, type GoogleEmbeddingModelOptions, type GoogleErrorData, type GoogleFilesUploadOptions, type GoogleEmbeddingModelOptions as GoogleGenerativeAIEmbeddingProviderOptions, type GoogleImageModelOptions as GoogleGenerativeAIImageProviderOptions, type GoogleProvider as GoogleGenerativeAIProvider, type GoogleProviderMetadata as GoogleGenerativeAIProviderMetadata, type GoogleLanguageModelOptions as GoogleGenerativeAIProviderOptions, type GoogleProviderSettings as GoogleGenerativeAIProviderSettings, type GoogleVideoModelId as GoogleGenerativeAIVideoModelId, type GoogleVideoModelOptions as GoogleGenerativeAIVideoProviderOptions, type GoogleImageModelOptions, type GoogleInteractionsAgentName, type GoogleInteractionsModelId, type GoogleInteractionsProviderMetadata, type GoogleLanguageModelInteractionsOptions, type GoogleLanguageModelOptions, type GoogleProvider, type GoogleProviderMetadata, type GoogleProviderSettings, type GoogleSpeechModelId, type GoogleSpeechModelOptions, type GoogleVideoModelId, type GoogleVideoModelOptions, VERSION, createGoogle, createGoogle as createGoogleGenerativeAI, google };
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
} from "@ai-sdk/provider-utils";
|
|
8
8
|
|
|
9
9
|
// src/version.ts
|
|
10
|
-
var VERSION = true ? "4.0.0-canary.
|
|
10
|
+
var VERSION = true ? "4.0.0-canary.79" : "0.0.0-test";
|
|
11
11
|
|
|
12
12
|
// src/google-embedding-model.ts
|
|
13
13
|
import {
|
|
@@ -3675,11 +3675,25 @@ var GoogleSpeechModel = class _GoogleSpeechModel {
|
|
|
3675
3675
|
providerOptions
|
|
3676
3676
|
}) {
|
|
3677
3677
|
const warnings = [];
|
|
3678
|
-
const
|
|
3679
|
-
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3678
|
+
const providerOptionsNames = this.config.provider.includes("vertex") ? ["googleVertex", "vertex"] : ["google"];
|
|
3679
|
+
let googleOptions;
|
|
3680
|
+
for (const name of providerOptionsNames) {
|
|
3681
|
+
googleOptions = await parseProviderOptions6({
|
|
3682
|
+
provider: name,
|
|
3683
|
+
providerOptions,
|
|
3684
|
+
schema: googleSpeechProviderOptionsSchema
|
|
3685
|
+
});
|
|
3686
|
+
if (googleOptions != null) {
|
|
3687
|
+
break;
|
|
3688
|
+
}
|
|
3689
|
+
}
|
|
3690
|
+
if (googleOptions == null && !providerOptionsNames.includes("google")) {
|
|
3691
|
+
googleOptions = await parseProviderOptions6({
|
|
3692
|
+
provider: "google",
|
|
3693
|
+
providerOptions,
|
|
3694
|
+
schema: googleSpeechProviderOptionsSchema
|
|
3695
|
+
});
|
|
3696
|
+
}
|
|
3683
3697
|
const multiSpeakerVoiceConfig = googleOptions == null ? void 0 : googleOptions.multiSpeakerVoiceConfig;
|
|
3684
3698
|
const speechConfig = multiSpeakerVoiceConfig ? { multiSpeakerVoiceConfig } : { voiceConfig: { prebuiltVoiceConfig: { voiceName: voice } } };
|
|
3685
3699
|
let promptText = text;
|
|
@@ -3719,7 +3733,7 @@ var GoogleSpeechModel = class _GoogleSpeechModel {
|
|
|
3719
3733
|
});
|
|
3720
3734
|
}
|
|
3721
3735
|
const requestBody = {
|
|
3722
|
-
contents: [{ parts: [{ text: promptText }] }],
|
|
3736
|
+
contents: [{ role: "user", parts: [{ text: promptText }] }],
|
|
3723
3737
|
generationConfig: {
|
|
3724
3738
|
responseModalities: ["AUDIO"],
|
|
3725
3739
|
speechConfig
|
|
@@ -5632,14 +5646,14 @@ async function cancelGoogleInteraction({
|
|
|
5632
5646
|
baseURL,
|
|
5633
5647
|
interactionId,
|
|
5634
5648
|
headers,
|
|
5635
|
-
fetch = getOriginalFetch()
|
|
5649
|
+
fetch: fetch2 = getOriginalFetch()
|
|
5636
5650
|
}) {
|
|
5637
5651
|
if (interactionId == null || interactionId.length === 0) {
|
|
5638
5652
|
return;
|
|
5639
5653
|
}
|
|
5640
5654
|
const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}/cancel`;
|
|
5641
5655
|
try {
|
|
5642
|
-
const response = await
|
|
5656
|
+
const response = await fetch2(url, {
|
|
5643
5657
|
method: "POST",
|
|
5644
5658
|
headers: withUserAgentSuffix(
|
|
5645
5659
|
combineHeaders7({ "Content-Type": "application/json" }, headers),
|
|
@@ -5667,7 +5681,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5667
5681
|
baseURL,
|
|
5668
5682
|
interactionId,
|
|
5669
5683
|
headers,
|
|
5670
|
-
fetch,
|
|
5684
|
+
fetch: fetch2,
|
|
5671
5685
|
abortSignal,
|
|
5672
5686
|
initialDelayMs = DEFAULT_INITIAL_DELAY_MS,
|
|
5673
5687
|
maxDelayMs = DEFAULT_MAX_DELAY_MS,
|
|
@@ -5681,7 +5695,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5681
5695
|
const startedAt = Date.now();
|
|
5682
5696
|
let nextDelayMs = initialDelayMs;
|
|
5683
5697
|
const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}`;
|
|
5684
|
-
const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch });
|
|
5698
|
+
const cancelOnServer = () => cancelGoogleInteraction({ baseURL, interactionId, headers, fetch: fetch2 });
|
|
5685
5699
|
try {
|
|
5686
5700
|
while (true) {
|
|
5687
5701
|
if (abortSignal == null ? void 0 : abortSignal.aborted) {
|
|
@@ -5706,7 +5720,7 @@ async function pollGoogleInteractionUntilTerminal({
|
|
|
5706
5720
|
googleInteractionsResponseSchema
|
|
5707
5721
|
),
|
|
5708
5722
|
abortSignal,
|
|
5709
|
-
fetch
|
|
5723
|
+
fetch: fetch2
|
|
5710
5724
|
});
|
|
5711
5725
|
if (isTerminalStatus(response.status)) {
|
|
5712
5726
|
return { response, rawResponse, responseHeaders };
|
|
@@ -5882,7 +5896,7 @@ function streamGoogleInteractionEvents({
|
|
|
5882
5896
|
baseURL,
|
|
5883
5897
|
interactionId,
|
|
5884
5898
|
headers,
|
|
5885
|
-
fetch,
|
|
5899
|
+
fetch: fetch2,
|
|
5886
5900
|
abortSignal,
|
|
5887
5901
|
maxRetries = DEFAULT_MAX_RETRIES,
|
|
5888
5902
|
retryDelayMs = DEFAULT_RETRY_DELAY_MS
|
|
@@ -5930,7 +5944,7 @@ function streamGoogleInteractionEvents({
|
|
|
5930
5944
|
googleInteractionsEventSchema
|
|
5931
5945
|
),
|
|
5932
5946
|
abortSignal: effectiveSignal,
|
|
5933
|
-
fetch
|
|
5947
|
+
fetch: fetch2
|
|
5934
5948
|
});
|
|
5935
5949
|
return stream.getReader();
|
|
5936
5950
|
}
|
|
@@ -6023,7 +6037,7 @@ function streamGoogleInteractionEvents({
|
|
|
6023
6037
|
baseURL,
|
|
6024
6038
|
interactionId,
|
|
6025
6039
|
headers,
|
|
6026
|
-
fetch
|
|
6040
|
+
fetch: fetch2
|
|
6027
6041
|
});
|
|
6028
6042
|
}
|
|
6029
6043
|
}
|
|
@@ -6676,6 +6690,388 @@ function pruneUndefined(obj) {
|
|
|
6676
6690
|
return result;
|
|
6677
6691
|
}
|
|
6678
6692
|
|
|
6693
|
+
// src/realtime/google-realtime-event-mapper.ts
|
|
6694
|
+
import { safeParseJSON } from "@ai-sdk/provider-utils";
|
|
6695
|
+
var GoogleRealtimeEventMapper = class {
|
|
6696
|
+
constructor() {
|
|
6697
|
+
this.turnCounter = 0;
|
|
6698
|
+
this.hasAudio = false;
|
|
6699
|
+
this.hasText = false;
|
|
6700
|
+
this.hasTranscript = false;
|
|
6701
|
+
this.turnClosed = false;
|
|
6702
|
+
this.inputAudioRate = 16e3;
|
|
6703
|
+
}
|
|
6704
|
+
get responseId() {
|
|
6705
|
+
return `google-resp-${this.turnCounter}`;
|
|
6706
|
+
}
|
|
6707
|
+
get itemId() {
|
|
6708
|
+
return `google-item-${this.turnCounter}`;
|
|
6709
|
+
}
|
|
6710
|
+
/**
|
|
6711
|
+
* Rolls over to the next turn lazily, only once new model content actually
|
|
6712
|
+
* arrives. `turnComplete` merely marks the current turn closed; the counter
|
|
6713
|
+
* is not advanced until the next response begins. This keeps a transcript
|
|
6714
|
+
* that arrives shortly after `turnComplete` attached to the turn it belongs
|
|
6715
|
+
* to, since Google delivers transcription independently with no guaranteed
|
|
6716
|
+
* ordering relative to `turnComplete`.
|
|
6717
|
+
*/
|
|
6718
|
+
beginTurnIfClosed() {
|
|
6719
|
+
if (!this.turnClosed) return;
|
|
6720
|
+
this.turnCounter++;
|
|
6721
|
+
this.hasAudio = false;
|
|
6722
|
+
this.hasText = false;
|
|
6723
|
+
this.hasTranscript = false;
|
|
6724
|
+
this.turnClosed = false;
|
|
6725
|
+
}
|
|
6726
|
+
parseServerEvent(raw) {
|
|
6727
|
+
var _a, _b;
|
|
6728
|
+
const data = raw;
|
|
6729
|
+
if (data.setupComplete != null) {
|
|
6730
|
+
return { type: "session-created", raw };
|
|
6731
|
+
}
|
|
6732
|
+
if (data.toolCall != null) {
|
|
6733
|
+
this.beginTurnIfClosed();
|
|
6734
|
+
const functionCalls = (_a = data.toolCall.functionCalls) != null ? _a : [];
|
|
6735
|
+
return functionCalls.flatMap((functionCall) => {
|
|
6736
|
+
var _a2;
|
|
6737
|
+
const args = JSON.stringify((_a2 = functionCall.args) != null ? _a2 : {});
|
|
6738
|
+
return [
|
|
6739
|
+
{
|
|
6740
|
+
type: "function-call-arguments-delta",
|
|
6741
|
+
responseId: this.responseId,
|
|
6742
|
+
itemId: this.itemId,
|
|
6743
|
+
callId: functionCall.id,
|
|
6744
|
+
delta: args,
|
|
6745
|
+
raw
|
|
6746
|
+
},
|
|
6747
|
+
{
|
|
6748
|
+
type: "function-call-arguments-done",
|
|
6749
|
+
responseId: this.responseId,
|
|
6750
|
+
itemId: this.itemId,
|
|
6751
|
+
callId: functionCall.id,
|
|
6752
|
+
name: functionCall.name,
|
|
6753
|
+
arguments: args,
|
|
6754
|
+
raw
|
|
6755
|
+
}
|
|
6756
|
+
];
|
|
6757
|
+
});
|
|
6758
|
+
}
|
|
6759
|
+
if (data.toolCallCancellation != null) {
|
|
6760
|
+
return {
|
|
6761
|
+
type: "custom",
|
|
6762
|
+
rawType: "toolCallCancellation",
|
|
6763
|
+
raw
|
|
6764
|
+
};
|
|
6765
|
+
}
|
|
6766
|
+
if (data.serverContent != null) {
|
|
6767
|
+
return this.parseServerContent(data.serverContent, raw);
|
|
6768
|
+
}
|
|
6769
|
+
if (((_b = data.inputTranscription) == null ? void 0 : _b.text) != null) {
|
|
6770
|
+
return {
|
|
6771
|
+
type: "input-transcription-completed",
|
|
6772
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
6773
|
+
transcript: data.inputTranscription.text,
|
|
6774
|
+
raw
|
|
6775
|
+
};
|
|
6776
|
+
}
|
|
6777
|
+
return { type: "custom", rawType: String(Object.keys(data)[0]), raw };
|
|
6778
|
+
}
|
|
6779
|
+
parseServerContent(serverContent, raw) {
|
|
6780
|
+
var _a, _b, _c, _d;
|
|
6781
|
+
const events = [];
|
|
6782
|
+
if (serverContent.interrupted) {
|
|
6783
|
+
events.push({
|
|
6784
|
+
type: "speech-started",
|
|
6785
|
+
raw
|
|
6786
|
+
});
|
|
6787
|
+
}
|
|
6788
|
+
if ((_a = serverContent.modelTurn) == null ? void 0 : _a.parts) {
|
|
6789
|
+
this.beginTurnIfClosed();
|
|
6790
|
+
for (const part of serverContent.modelTurn.parts) {
|
|
6791
|
+
if ((_b = part.inlineData) == null ? void 0 : _b.data) {
|
|
6792
|
+
this.hasAudio = true;
|
|
6793
|
+
events.push({
|
|
6794
|
+
type: "audio-delta",
|
|
6795
|
+
responseId: this.responseId,
|
|
6796
|
+
itemId: this.itemId,
|
|
6797
|
+
delta: part.inlineData.data,
|
|
6798
|
+
raw
|
|
6799
|
+
});
|
|
6800
|
+
}
|
|
6801
|
+
if (part.text) {
|
|
6802
|
+
this.hasText = true;
|
|
6803
|
+
events.push({
|
|
6804
|
+
type: "text-delta",
|
|
6805
|
+
responseId: this.responseId,
|
|
6806
|
+
itemId: this.itemId,
|
|
6807
|
+
delta: part.text,
|
|
6808
|
+
raw
|
|
6809
|
+
});
|
|
6810
|
+
}
|
|
6811
|
+
}
|
|
6812
|
+
}
|
|
6813
|
+
if ((_c = serverContent.outputTranscription) == null ? void 0 : _c.text) {
|
|
6814
|
+
this.hasTranscript = true;
|
|
6815
|
+
events.push({
|
|
6816
|
+
type: "audio-transcript-delta",
|
|
6817
|
+
responseId: this.responseId,
|
|
6818
|
+
itemId: this.itemId,
|
|
6819
|
+
delta: serverContent.outputTranscription.text,
|
|
6820
|
+
raw
|
|
6821
|
+
});
|
|
6822
|
+
}
|
|
6823
|
+
if ((_d = serverContent.inputTranscription) == null ? void 0 : _d.text) {
|
|
6824
|
+
events.push({
|
|
6825
|
+
type: "input-transcription-completed",
|
|
6826
|
+
itemId: `google-input-${this.turnCounter}`,
|
|
6827
|
+
transcript: serverContent.inputTranscription.text,
|
|
6828
|
+
raw
|
|
6829
|
+
});
|
|
6830
|
+
}
|
|
6831
|
+
if (serverContent.turnComplete) {
|
|
6832
|
+
if (this.hasAudio) {
|
|
6833
|
+
events.push({
|
|
6834
|
+
type: "audio-done",
|
|
6835
|
+
responseId: this.responseId,
|
|
6836
|
+
itemId: this.itemId,
|
|
6837
|
+
raw
|
|
6838
|
+
});
|
|
6839
|
+
}
|
|
6840
|
+
if (this.hasText) {
|
|
6841
|
+
events.push({
|
|
6842
|
+
type: "text-done",
|
|
6843
|
+
responseId: this.responseId,
|
|
6844
|
+
itemId: this.itemId,
|
|
6845
|
+
raw
|
|
6846
|
+
});
|
|
6847
|
+
}
|
|
6848
|
+
if (this.hasTranscript) {
|
|
6849
|
+
events.push({
|
|
6850
|
+
type: "audio-transcript-done",
|
|
6851
|
+
responseId: this.responseId,
|
|
6852
|
+
itemId: this.itemId,
|
|
6853
|
+
raw
|
|
6854
|
+
});
|
|
6855
|
+
}
|
|
6856
|
+
events.push({
|
|
6857
|
+
type: "response-done",
|
|
6858
|
+
responseId: this.responseId,
|
|
6859
|
+
status: "completed",
|
|
6860
|
+
raw
|
|
6861
|
+
});
|
|
6862
|
+
this.turnClosed = true;
|
|
6863
|
+
}
|
|
6864
|
+
if (events.length === 0) {
|
|
6865
|
+
return { type: "custom", rawType: "serverContent", raw };
|
|
6866
|
+
}
|
|
6867
|
+
return events.length === 1 ? events[0] : events;
|
|
6868
|
+
}
|
|
6869
|
+
serializeClientEvent(event, modelId) {
|
|
6870
|
+
var _a;
|
|
6871
|
+
switch (event.type) {
|
|
6872
|
+
case "session-update":
|
|
6873
|
+
if (((_a = event.config.inputAudioFormat) == null ? void 0 : _a.rate) != null) {
|
|
6874
|
+
this.inputAudioRate = event.config.inputAudioFormat.rate;
|
|
6875
|
+
}
|
|
6876
|
+
return {
|
|
6877
|
+
setup: buildGoogleSessionConfig(event.config, modelId)
|
|
6878
|
+
};
|
|
6879
|
+
case "input-audio-append":
|
|
6880
|
+
return {
|
|
6881
|
+
realtimeInput: {
|
|
6882
|
+
audio: {
|
|
6883
|
+
data: event.audio,
|
|
6884
|
+
mimeType: `audio/pcm;rate=${this.inputAudioRate}`
|
|
6885
|
+
}
|
|
6886
|
+
}
|
|
6887
|
+
};
|
|
6888
|
+
case "input-audio-commit":
|
|
6889
|
+
case "input-audio-clear":
|
|
6890
|
+
case "response-create":
|
|
6891
|
+
case "response-cancel":
|
|
6892
|
+
case "conversation-item-truncate":
|
|
6893
|
+
return null;
|
|
6894
|
+
case "conversation-item-create": {
|
|
6895
|
+
const item = event.item;
|
|
6896
|
+
switch (item.type) {
|
|
6897
|
+
case "text-message":
|
|
6898
|
+
return {
|
|
6899
|
+
realtimeInput: {
|
|
6900
|
+
text: item.text
|
|
6901
|
+
}
|
|
6902
|
+
};
|
|
6903
|
+
case "function-call-output":
|
|
6904
|
+
return serializeFunctionCallOutput(item);
|
|
6905
|
+
case "audio-message":
|
|
6906
|
+
return null;
|
|
6907
|
+
}
|
|
6908
|
+
break;
|
|
6909
|
+
}
|
|
6910
|
+
}
|
|
6911
|
+
return null;
|
|
6912
|
+
}
|
|
6913
|
+
};
|
|
6914
|
+
async function serializeFunctionCallOutput(item) {
|
|
6915
|
+
const parseResult = await safeParseJSON({ text: item.output });
|
|
6916
|
+
const response = parseResult.success ? parseResult.value : {};
|
|
6917
|
+
return {
|
|
6918
|
+
toolResponse: {
|
|
6919
|
+
functionResponses: [
|
|
6920
|
+
{
|
|
6921
|
+
id: item.callId,
|
|
6922
|
+
name: item.name,
|
|
6923
|
+
response
|
|
6924
|
+
}
|
|
6925
|
+
]
|
|
6926
|
+
}
|
|
6927
|
+
};
|
|
6928
|
+
}
|
|
6929
|
+
function buildGoogleSessionConfig(config, modelId) {
|
|
6930
|
+
const setup = {
|
|
6931
|
+
model: getModelPath(modelId)
|
|
6932
|
+
};
|
|
6933
|
+
const generationConfig = {};
|
|
6934
|
+
if ((config == null ? void 0 : config.outputModalities) != null) {
|
|
6935
|
+
generationConfig.responseModalities = config.outputModalities.map(
|
|
6936
|
+
(m) => m.toUpperCase()
|
|
6937
|
+
);
|
|
6938
|
+
} else {
|
|
6939
|
+
generationConfig.responseModalities = ["AUDIO"];
|
|
6940
|
+
}
|
|
6941
|
+
if ((config == null ? void 0 : config.voice) != null) {
|
|
6942
|
+
generationConfig.speechConfig = {
|
|
6943
|
+
voiceConfig: {
|
|
6944
|
+
prebuiltVoiceConfig: {
|
|
6945
|
+
voiceName: config.voice
|
|
6946
|
+
}
|
|
6947
|
+
}
|
|
6948
|
+
};
|
|
6949
|
+
}
|
|
6950
|
+
setup.generationConfig = generationConfig;
|
|
6951
|
+
if ((config == null ? void 0 : config.instructions) != null) {
|
|
6952
|
+
setup.systemInstruction = {
|
|
6953
|
+
parts: [{ text: config.instructions }]
|
|
6954
|
+
};
|
|
6955
|
+
}
|
|
6956
|
+
if ((config == null ? void 0 : config.tools) != null && config.tools.length > 0) {
|
|
6957
|
+
setup.tools = [
|
|
6958
|
+
{
|
|
6959
|
+
functionDeclarations: config.tools.map((tool) => ({
|
|
6960
|
+
name: tool.name,
|
|
6961
|
+
description: tool.description,
|
|
6962
|
+
parameters: convertJSONSchemaToOpenAPISchema(tool.parameters)
|
|
6963
|
+
}))
|
|
6964
|
+
}
|
|
6965
|
+
];
|
|
6966
|
+
}
|
|
6967
|
+
if ((config == null ? void 0 : config.inputAudioTranscription) != null) {
|
|
6968
|
+
setup.inputAudioTranscription = {};
|
|
6969
|
+
}
|
|
6970
|
+
if ((config == null ? void 0 : config.outputAudioTranscription) != null) {
|
|
6971
|
+
setup.outputAudioTranscription = {};
|
|
6972
|
+
}
|
|
6973
|
+
if ((config == null ? void 0 : config.providerOptions) != null) {
|
|
6974
|
+
Object.assign(setup, config.providerOptions);
|
|
6975
|
+
}
|
|
6976
|
+
return setup;
|
|
6977
|
+
}
|
|
6978
|
+
|
|
6979
|
+
// src/realtime/google-realtime-model.ts
|
|
6980
|
+
var realtimeWebSocketPath = "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
6981
|
+
function getRealtimeBaseURL(baseURL) {
|
|
6982
|
+
const url = new URL(baseURL);
|
|
6983
|
+
const pathSegments = url.pathname.split("/");
|
|
6984
|
+
const version = pathSegments.at(-1);
|
|
6985
|
+
if (version === "v1beta" || version === "v1alpha") {
|
|
6986
|
+
pathSegments.pop();
|
|
6987
|
+
url.pathname = pathSegments.join("/") || "/";
|
|
6988
|
+
}
|
|
6989
|
+
return url;
|
|
6990
|
+
}
|
|
6991
|
+
function getAuthTokensURL(baseURL) {
|
|
6992
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
6993
|
+
url.pathname = `${url.pathname.replace(/\/$/, "")}/v1alpha/auth_tokens`;
|
|
6994
|
+
return url.toString();
|
|
6995
|
+
}
|
|
6996
|
+
function getWebSocketURL(baseURL) {
|
|
6997
|
+
const url = getRealtimeBaseURL(baseURL);
|
|
6998
|
+
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
|
6999
|
+
url.pathname = `${url.pathname.replace(/\/$/, "")}/ws/${realtimeWebSocketPath}`;
|
|
7000
|
+
return url.toString();
|
|
7001
|
+
}
|
|
7002
|
+
var GoogleRealtimeModel = class {
|
|
7003
|
+
constructor(modelId, config) {
|
|
7004
|
+
this.specificationVersion = "v4";
|
|
7005
|
+
this.mapper = new GoogleRealtimeEventMapper();
|
|
7006
|
+
this.modelId = modelId;
|
|
7007
|
+
this.provider = config.provider;
|
|
7008
|
+
this.config = config;
|
|
7009
|
+
}
|
|
7010
|
+
async doCreateClientSecret(options) {
|
|
7011
|
+
var _a, _b;
|
|
7012
|
+
const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
|
|
7013
|
+
const headers = this.config.headers();
|
|
7014
|
+
const apiKey = headers["x-goog-api-key"];
|
|
7015
|
+
if (!apiKey) {
|
|
7016
|
+
throw new Error(
|
|
7017
|
+
"Google Generative AI API key is required for realtime token creation."
|
|
7018
|
+
);
|
|
7019
|
+
}
|
|
7020
|
+
const now = Date.now();
|
|
7021
|
+
const openWindowMs = ((_b = options.expiresAfterSeconds) != null ? _b : 60) * 1e3;
|
|
7022
|
+
const newSessionExpireTime = new Date(now + openWindowMs).toISOString();
|
|
7023
|
+
const expireTime = new Date(
|
|
7024
|
+
now + openWindowMs + 30 * 60 * 1e3
|
|
7025
|
+
).toISOString();
|
|
7026
|
+
const setupPayload = buildGoogleSessionConfig(
|
|
7027
|
+
options.sessionConfig,
|
|
7028
|
+
this.modelId
|
|
7029
|
+
);
|
|
7030
|
+
const response = await fetchFn(
|
|
7031
|
+
`${getAuthTokensURL(this.config.baseURL)}?key=${encodeURIComponent(apiKey)}`,
|
|
7032
|
+
{
|
|
7033
|
+
method: "POST",
|
|
7034
|
+
headers: { "Content-Type": "application/json" },
|
|
7035
|
+
body: JSON.stringify({
|
|
7036
|
+
// `uses: 0` means no limit is applied to how many times the token can
|
|
7037
|
+
// start a session (per the AuthToken spec). An unset value would
|
|
7038
|
+
// default to 1, which breaks WebSocket reconnects within the session.
|
|
7039
|
+
uses: 0,
|
|
7040
|
+
expireTime,
|
|
7041
|
+
newSessionExpireTime,
|
|
7042
|
+
bidiGenerateContentSetup: setupPayload
|
|
7043
|
+
})
|
|
7044
|
+
}
|
|
7045
|
+
);
|
|
7046
|
+
if (!response.ok) {
|
|
7047
|
+
const text = await response.text();
|
|
7048
|
+
throw new Error(
|
|
7049
|
+
`Google realtime auth token request failed: ${response.status} ${text}`
|
|
7050
|
+
);
|
|
7051
|
+
}
|
|
7052
|
+
const data = await response.json();
|
|
7053
|
+
return {
|
|
7054
|
+
token: data.name,
|
|
7055
|
+
url: getWebSocketURL(this.config.baseURL),
|
|
7056
|
+
expiresAt: data.expireTime ? Math.floor(new Date(data.expireTime).getTime() / 1e3) : void 0
|
|
7057
|
+
};
|
|
7058
|
+
}
|
|
7059
|
+
getWebSocketConfig(options) {
|
|
7060
|
+
return {
|
|
7061
|
+
url: `${options.url}?access_token=${encodeURIComponent(options.token)}`
|
|
7062
|
+
};
|
|
7063
|
+
}
|
|
7064
|
+
parseServerEvent(raw) {
|
|
7065
|
+
return this.mapper.parseServerEvent(raw);
|
|
7066
|
+
}
|
|
7067
|
+
serializeClientEvent(event) {
|
|
7068
|
+
return this.mapper.serializeClientEvent(event, this.modelId);
|
|
7069
|
+
}
|
|
7070
|
+
buildSessionConfig(config) {
|
|
7071
|
+
return buildGoogleSessionConfig(config, this.modelId);
|
|
7072
|
+
}
|
|
7073
|
+
};
|
|
7074
|
+
|
|
6679
7075
|
// src/google-provider.ts
|
|
6680
7076
|
function createGoogle(options = {}) {
|
|
6681
7077
|
var _a, _b;
|
|
@@ -6742,12 +7138,35 @@ function createGoogle(options = {}) {
|
|
|
6742
7138
|
generateId: (_a2 = options.generateId) != null ? _a2 : generateId2
|
|
6743
7139
|
});
|
|
6744
7140
|
};
|
|
7141
|
+
const createRealtimeModel = (modelId) => new GoogleRealtimeModel(modelId, {
|
|
7142
|
+
provider: `${providerName}.realtime`,
|
|
7143
|
+
baseURL,
|
|
7144
|
+
headers: getHeaders,
|
|
7145
|
+
fetch: options.fetch
|
|
7146
|
+
});
|
|
6745
7147
|
const createSpeechModel = (modelId) => new GoogleSpeechModel(modelId, {
|
|
6746
7148
|
provider: `${providerName}.speech`,
|
|
6747
7149
|
baseURL,
|
|
6748
7150
|
headers: getHeaders,
|
|
6749
7151
|
fetch: options.fetch
|
|
6750
7152
|
});
|
|
7153
|
+
const experimentalRealtimeFactory = Object.assign(
|
|
7154
|
+
(modelId) => createRealtimeModel(modelId),
|
|
7155
|
+
{
|
|
7156
|
+
getToken: async (tokenOptions) => {
|
|
7157
|
+
const model = createRealtimeModel(tokenOptions.model);
|
|
7158
|
+
const secret = await model.doCreateClientSecret({
|
|
7159
|
+
sessionConfig: tokenOptions.sessionConfig,
|
|
7160
|
+
expiresAfterSeconds: tokenOptions.expiresAfterSeconds
|
|
7161
|
+
});
|
|
7162
|
+
return {
|
|
7163
|
+
token: secret.token,
|
|
7164
|
+
url: secret.url,
|
|
7165
|
+
expiresAt: secret.expiresAt
|
|
7166
|
+
};
|
|
7167
|
+
}
|
|
7168
|
+
}
|
|
7169
|
+
);
|
|
6751
7170
|
const createInteractionsModel = (modelIdOrAgent) => {
|
|
6752
7171
|
var _a2;
|
|
6753
7172
|
return new GoogleInteractionsLanguageModel(
|
|
@@ -6781,6 +7200,7 @@ function createGoogle(options = {}) {
|
|
|
6781
7200
|
provider.imageModel = createImageModel;
|
|
6782
7201
|
provider.video = createVideoModel;
|
|
6783
7202
|
provider.videoModel = createVideoModel;
|
|
7203
|
+
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
6784
7204
|
provider.files = createFiles;
|
|
6785
7205
|
provider.speech = createSpeechModel;
|
|
6786
7206
|
provider.speechModel = createSpeechModel;
|
|
@@ -6790,6 +7210,7 @@ function createGoogle(options = {}) {
|
|
|
6790
7210
|
}
|
|
6791
7211
|
var google = createGoogle();
|
|
6792
7212
|
export {
|
|
7213
|
+
GoogleRealtimeModel as Experimental_GoogleRealtimeModel,
|
|
6793
7214
|
VERSION,
|
|
6794
7215
|
createGoogle,
|
|
6795
7216
|
createGoogle as createGoogleGenerativeAI,
|