@ai-sdk/google 4.0.0-beta.13 → 4.0.0-beta.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -4
- package/dist/index.js +197 -37
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +200 -37
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.js +196 -36
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +199 -36
- package/dist/internal/index.mjs.map +1 -1
- package/package.json +3 -5
- package/src/convert-to-google-generative-ai-messages.ts +163 -32
- package/src/google-generative-ai-language-model.ts +124 -2
- package/src/google-generative-ai-prompt.ts +12 -2
|
@@ -2,22 +2,179 @@ import {
|
|
|
2
2
|
LanguageModelV4Prompt,
|
|
3
3
|
UnsupportedFunctionalityError,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
|
+
import { convertToBase64 } from '@ai-sdk/provider-utils';
|
|
5
6
|
import {
|
|
6
7
|
GoogleGenerativeAIContent,
|
|
7
8
|
GoogleGenerativeAIContentPart,
|
|
9
|
+
GoogleGenerativeAIFunctionResponsePart,
|
|
8
10
|
GoogleGenerativeAIPrompt,
|
|
9
11
|
} from './google-generative-ai-prompt';
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
|
|
14
|
+
|
|
15
|
+
function parseBase64DataUrl(
|
|
16
|
+
value: string,
|
|
17
|
+
): { mediaType: string; data: string } | undefined {
|
|
18
|
+
const match = dataUrlRegex.exec(value);
|
|
19
|
+
if (match == null) {
|
|
20
|
+
return undefined;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
mediaType: match[1],
|
|
25
|
+
data: match[2],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function convertUrlToolResultPart(
|
|
30
|
+
url: string,
|
|
31
|
+
): GoogleGenerativeAIFunctionResponsePart | undefined {
|
|
32
|
+
// Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
|
|
33
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
|
|
34
|
+
// may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
|
|
35
|
+
const parsedDataUrl = parseBase64DataUrl(url);
|
|
36
|
+
if (parsedDataUrl == null) {
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
inlineData: {
|
|
42
|
+
mimeType: parsedDataUrl.mediaType,
|
|
43
|
+
data: parsedDataUrl.data,
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/*
|
|
49
|
+
* Appends tool result content parts to the message using the functionResponse
|
|
50
|
+
* format with support for multimodal parts (e.g. inline images/files alongside
|
|
51
|
+
* text). This format is supported by Gemini 3+ models.
|
|
52
|
+
*/
|
|
53
|
+
function appendToolResultParts(
|
|
54
|
+
parts: GoogleGenerativeAIContentPart[],
|
|
55
|
+
toolName: string,
|
|
56
|
+
outputValue: Array<{
|
|
57
|
+
type: string;
|
|
58
|
+
[key: string]: unknown;
|
|
59
|
+
}>,
|
|
60
|
+
): void {
|
|
61
|
+
const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
|
|
62
|
+
const responseTextParts: string[] = [];
|
|
63
|
+
|
|
64
|
+
for (const contentPart of outputValue) {
|
|
65
|
+
switch (contentPart.type) {
|
|
66
|
+
case 'text': {
|
|
67
|
+
responseTextParts.push(contentPart.text as string);
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
case 'image-data':
|
|
71
|
+
case 'file-data': {
|
|
72
|
+
functionResponseParts.push({
|
|
73
|
+
inlineData: {
|
|
74
|
+
mimeType: contentPart.mediaType as string,
|
|
75
|
+
data: contentPart.data as string,
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
case 'image-url':
|
|
81
|
+
case 'file-url': {
|
|
82
|
+
const functionResponsePart = convertUrlToolResultPart(
|
|
83
|
+
contentPart.url as string,
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
if (functionResponsePart != null) {
|
|
87
|
+
functionResponseParts.push(functionResponsePart);
|
|
88
|
+
} else {
|
|
89
|
+
responseTextParts.push(JSON.stringify(contentPart));
|
|
90
|
+
}
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
default: {
|
|
94
|
+
responseTextParts.push(JSON.stringify(contentPart));
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
parts.push({
|
|
101
|
+
functionResponse: {
|
|
102
|
+
name: toolName,
|
|
103
|
+
response: {
|
|
104
|
+
name: toolName,
|
|
105
|
+
content:
|
|
106
|
+
responseTextParts.length > 0
|
|
107
|
+
? responseTextParts.join('\n')
|
|
108
|
+
: 'Tool executed successfully.',
|
|
109
|
+
},
|
|
110
|
+
...(functionResponseParts.length > 0
|
|
111
|
+
? { parts: functionResponseParts }
|
|
112
|
+
: {}),
|
|
113
|
+
},
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/*
|
|
118
|
+
* Appends tool result content parts using a legacy format for pre-Gemini 3
|
|
119
|
+
* models that do not support multimodal parts within functionResponse. Instead,
|
|
120
|
+
* non-text content like images is sent as separate top-level inlineData parts.
|
|
121
|
+
*/
|
|
122
|
+
function appendLegacyToolResultParts(
|
|
123
|
+
parts: GoogleGenerativeAIContentPart[],
|
|
124
|
+
toolName: string,
|
|
125
|
+
outputValue: Array<{
|
|
126
|
+
type: string;
|
|
127
|
+
[key: string]: unknown;
|
|
128
|
+
}>,
|
|
129
|
+
): void {
|
|
130
|
+
for (const contentPart of outputValue) {
|
|
131
|
+
switch (contentPart.type) {
|
|
132
|
+
case 'text':
|
|
133
|
+
parts.push({
|
|
134
|
+
functionResponse: {
|
|
135
|
+
name: toolName,
|
|
136
|
+
response: {
|
|
137
|
+
name: toolName,
|
|
138
|
+
content: contentPart.text,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
break;
|
|
143
|
+
case 'image-data':
|
|
144
|
+
parts.push(
|
|
145
|
+
{
|
|
146
|
+
inlineData: {
|
|
147
|
+
mimeType: String(contentPart.mediaType),
|
|
148
|
+
data: String(contentPart.data),
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
text: 'Tool executed successfully and returned this image as a response',
|
|
153
|
+
},
|
|
154
|
+
);
|
|
155
|
+
break;
|
|
156
|
+
default:
|
|
157
|
+
parts.push({ text: JSON.stringify(contentPart) });
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
11
162
|
|
|
12
163
|
export function convertToGoogleGenerativeAIMessages(
|
|
13
164
|
prompt: LanguageModelV4Prompt,
|
|
14
|
-
options?: {
|
|
165
|
+
options?: {
|
|
166
|
+
isGemmaModel?: boolean;
|
|
167
|
+
providerOptionsName?: string;
|
|
168
|
+
supportsFunctionResponseParts?: boolean;
|
|
169
|
+
},
|
|
15
170
|
): GoogleGenerativeAIPrompt {
|
|
16
171
|
const systemInstructionParts: Array<{ text: string }> = [];
|
|
17
172
|
const contents: Array<GoogleGenerativeAIContent> = [];
|
|
18
173
|
let systemMessagesAllowed = true;
|
|
19
174
|
const isGemmaModel = options?.isGemmaModel ?? false;
|
|
20
175
|
const providerOptionsName = options?.providerOptionsName ?? 'google';
|
|
176
|
+
const supportsFunctionResponseParts =
|
|
177
|
+
options?.supportsFunctionResponseParts ?? true;
|
|
21
178
|
|
|
22
179
|
for (const { role, content } of prompt) {
|
|
23
180
|
switch (role) {
|
|
@@ -178,36 +335,10 @@ export function convertToGoogleGenerativeAIMessages(
|
|
|
178
335
|
const output = part.output;
|
|
179
336
|
|
|
180
337
|
if (output.type === 'content') {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
functionResponse: {
|
|
186
|
-
name: part.toolName,
|
|
187
|
-
response: {
|
|
188
|
-
name: part.toolName,
|
|
189
|
-
content: contentPart.text,
|
|
190
|
-
},
|
|
191
|
-
},
|
|
192
|
-
});
|
|
193
|
-
break;
|
|
194
|
-
case 'image-data':
|
|
195
|
-
parts.push(
|
|
196
|
-
{
|
|
197
|
-
inlineData: {
|
|
198
|
-
mimeType: contentPart.mediaType,
|
|
199
|
-
data: contentPart.data,
|
|
200
|
-
},
|
|
201
|
-
},
|
|
202
|
-
{
|
|
203
|
-
text: 'Tool executed successfully and returned this image as a response',
|
|
204
|
-
},
|
|
205
|
-
);
|
|
206
|
-
break;
|
|
207
|
-
default:
|
|
208
|
-
parts.push({ text: JSON.stringify(contentPart) });
|
|
209
|
-
break;
|
|
210
|
-
}
|
|
338
|
+
if (supportsFunctionResponseParts) {
|
|
339
|
+
appendToolResultParts(parts, part.toolName, output.value);
|
|
340
|
+
} else {
|
|
341
|
+
appendLegacyToolResultParts(parts, part.toolName, output.value);
|
|
211
342
|
}
|
|
212
343
|
} else {
|
|
213
344
|
parts.push({
|
|
@@ -17,7 +17,10 @@ import {
|
|
|
17
17
|
FetchFunction,
|
|
18
18
|
generateId,
|
|
19
19
|
InferSchema,
|
|
20
|
+
isCustomReasoning,
|
|
20
21
|
lazySchema,
|
|
22
|
+
mapReasoningToProviderBudget,
|
|
23
|
+
mapReasoningToProviderEffort,
|
|
21
24
|
parseProviderOptions,
|
|
22
25
|
ParseResult,
|
|
23
26
|
postJsonToApi,
|
|
@@ -96,6 +99,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
|
|
|
96
99
|
seed,
|
|
97
100
|
tools,
|
|
98
101
|
toolChoice,
|
|
102
|
+
reasoning,
|
|
99
103
|
providerOptions,
|
|
100
104
|
}: LanguageModelV4CallOptions) {
|
|
101
105
|
const warnings: SharedV4Warning[] = [];
|
|
@@ -135,10 +139,15 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
|
|
|
135
139
|
}
|
|
136
140
|
|
|
137
141
|
const isGemmaModel = this.modelId.toLowerCase().startsWith('gemma-');
|
|
142
|
+
const supportsFunctionResponseParts = this.modelId.startsWith('gemini-3');
|
|
138
143
|
|
|
139
144
|
const { contents, systemInstruction } = convertToGoogleGenerativeAIMessages(
|
|
140
145
|
prompt,
|
|
141
|
-
{
|
|
146
|
+
{
|
|
147
|
+
isGemmaModel,
|
|
148
|
+
providerOptionsName,
|
|
149
|
+
supportsFunctionResponseParts,
|
|
150
|
+
},
|
|
142
151
|
);
|
|
143
152
|
|
|
144
153
|
const {
|
|
@@ -151,6 +160,16 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
|
|
|
151
160
|
modelId: this.modelId,
|
|
152
161
|
});
|
|
153
162
|
|
|
163
|
+
const resolvedThinking = resolveThinkingConfig({
|
|
164
|
+
reasoning,
|
|
165
|
+
modelId: this.modelId,
|
|
166
|
+
warnings,
|
|
167
|
+
});
|
|
168
|
+
const thinkingConfig =
|
|
169
|
+
googleOptions?.thinkingConfig || resolvedThinking
|
|
170
|
+
? { ...resolvedThinking, ...googleOptions?.thinkingConfig }
|
|
171
|
+
: undefined;
|
|
172
|
+
|
|
154
173
|
return {
|
|
155
174
|
args: {
|
|
156
175
|
generationConfig: {
|
|
@@ -182,7 +201,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
|
|
|
182
201
|
|
|
183
202
|
// provider options:
|
|
184
203
|
responseModalities: googleOptions?.responseModalities,
|
|
185
|
-
thinkingConfig
|
|
204
|
+
thinkingConfig,
|
|
186
205
|
...(googleOptions?.mediaResolution && {
|
|
187
206
|
mediaResolution: googleOptions.mediaResolution,
|
|
188
207
|
}),
|
|
@@ -706,6 +725,109 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
|
|
|
706
725
|
}
|
|
707
726
|
}
|
|
708
727
|
|
|
728
|
+
function isGemini3Model(modelId: string): boolean {
|
|
729
|
+
return /gemini-3[\.\-]/i.test(modelId) || /gemini-3$/i.test(modelId);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
function getMaxOutputTokensForGemini25Model(): number {
|
|
733
|
+
return 65536;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
function getMaxThinkingTokensForGemini25Model(modelId: string): number {
|
|
737
|
+
const id = modelId.toLowerCase();
|
|
738
|
+
if (id.includes('2.5-pro') || id.includes('gemini-3-pro-image')) {
|
|
739
|
+
return 32768;
|
|
740
|
+
}
|
|
741
|
+
return 24576;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
type GoogleThinkingConfig = NonNullable<
|
|
745
|
+
InferSchema<typeof googleLanguageModelOptions>['thinkingConfig']
|
|
746
|
+
>;
|
|
747
|
+
|
|
748
|
+
function resolveThinkingConfig({
|
|
749
|
+
reasoning,
|
|
750
|
+
modelId,
|
|
751
|
+
warnings,
|
|
752
|
+
}: {
|
|
753
|
+
reasoning: LanguageModelV4CallOptions['reasoning'];
|
|
754
|
+
modelId: string;
|
|
755
|
+
warnings: SharedV4Warning[];
|
|
756
|
+
}): Omit<GoogleThinkingConfig, 'includeThoughts'> | undefined {
|
|
757
|
+
if (!isCustomReasoning(reasoning)) {
|
|
758
|
+
return undefined;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
if (isGemini3Model(modelId) && !modelId.includes('gemini-3-pro-image')) {
|
|
762
|
+
return resolveGemini3ThinkingConfig({ reasoning, warnings });
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
return resolveGemini25ThinkingConfig({ reasoning, modelId, warnings });
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
function resolveGemini3ThinkingConfig({
|
|
769
|
+
reasoning,
|
|
770
|
+
warnings,
|
|
771
|
+
}: {
|
|
772
|
+
reasoning: Exclude<
|
|
773
|
+
LanguageModelV4CallOptions['reasoning'],
|
|
774
|
+
'provider-default' | undefined
|
|
775
|
+
>;
|
|
776
|
+
warnings: SharedV4Warning[];
|
|
777
|
+
}): Pick<GoogleThinkingConfig, 'thinkingLevel'> | undefined {
|
|
778
|
+
if (reasoning === 'none') {
|
|
779
|
+
// It's not possible to fully disable thinking with Gemini 3.
|
|
780
|
+
return { thinkingLevel: 'minimal' };
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
const thinkingLevel = mapReasoningToProviderEffort({
|
|
784
|
+
reasoning,
|
|
785
|
+
effortMap: {
|
|
786
|
+
minimal: 'minimal',
|
|
787
|
+
low: 'low',
|
|
788
|
+
medium: 'medium',
|
|
789
|
+
high: 'high',
|
|
790
|
+
xhigh: 'high',
|
|
791
|
+
},
|
|
792
|
+
warnings,
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
if (thinkingLevel == null) {
|
|
796
|
+
return undefined;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
return { thinkingLevel };
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
function resolveGemini25ThinkingConfig({
|
|
803
|
+
reasoning,
|
|
804
|
+
modelId,
|
|
805
|
+
warnings,
|
|
806
|
+
}: {
|
|
807
|
+
reasoning: Exclude<
|
|
808
|
+
LanguageModelV4CallOptions['reasoning'],
|
|
809
|
+
'provider-default' | undefined
|
|
810
|
+
>;
|
|
811
|
+
modelId: string;
|
|
812
|
+
warnings: SharedV4Warning[];
|
|
813
|
+
}): Pick<GoogleThinkingConfig, 'thinkingBudget'> | undefined {
|
|
814
|
+
if (reasoning === 'none') {
|
|
815
|
+
return { thinkingBudget: 0 };
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
const thinkingBudget = mapReasoningToProviderBudget({
|
|
819
|
+
reasoning,
|
|
820
|
+
maxOutputTokens: getMaxOutputTokensForGemini25Model(),
|
|
821
|
+
maxReasoningBudget: getMaxThinkingTokensForGemini25Model(modelId),
|
|
822
|
+
minReasoningBudget: 0,
|
|
823
|
+
warnings,
|
|
824
|
+
});
|
|
825
|
+
if (thinkingBudget == null) {
|
|
826
|
+
return undefined;
|
|
827
|
+
}
|
|
828
|
+
return { thinkingBudget };
|
|
829
|
+
}
|
|
830
|
+
|
|
709
831
|
function getToolCallsFromParts({
|
|
710
832
|
parts,
|
|
711
833
|
generateId,
|
|
@@ -2,9 +2,9 @@ import {
|
|
|
2
2
|
GroundingMetadataSchema,
|
|
3
3
|
PromptFeedbackSchema,
|
|
4
4
|
UrlContextMetadataSchema,
|
|
5
|
+
type SafetyRatingSchema,
|
|
5
6
|
UsageMetadataSchema,
|
|
6
7
|
} from './google-generative-ai-language-model';
|
|
7
|
-
import { type SafetyRatingSchema } from './google-generative-ai-language-model';
|
|
8
8
|
|
|
9
9
|
export type GoogleGenerativeAIPrompt = {
|
|
10
10
|
systemInstruction?: GoogleGenerativeAISystemInstruction;
|
|
@@ -24,9 +24,19 @@ export type GoogleGenerativeAIContentPart =
|
|
|
24
24
|
| { text: string; thought?: boolean; thoughtSignature?: string }
|
|
25
25
|
| { inlineData: { mimeType: string; data: string } }
|
|
26
26
|
| { functionCall: { name: string; args: unknown }; thoughtSignature?: string }
|
|
27
|
-
| {
|
|
27
|
+
| {
|
|
28
|
+
functionResponse: {
|
|
29
|
+
name: string;
|
|
30
|
+
response: unknown;
|
|
31
|
+
parts?: Array<GoogleGenerativeAIFunctionResponsePart>;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
28
34
|
| { fileData: { mimeType: string; fileUri: string } };
|
|
29
35
|
|
|
36
|
+
export type GoogleGenerativeAIFunctionResponsePart = {
|
|
37
|
+
inlineData: { mimeType: string; data: string };
|
|
38
|
+
};
|
|
39
|
+
|
|
30
40
|
export type GoogleGenerativeAIGroundingMetadata = GroundingMetadataSchema;
|
|
31
41
|
|
|
32
42
|
export type GoogleGenerativeAIUrlContextMetadata = UrlContextMetadataSchema;
|