@ai-sdk/google 4.0.0-beta.14 → 4.0.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,22 +2,179 @@ import {
2
2
  LanguageModelV4Prompt,
3
3
  UnsupportedFunctionalityError,
4
4
  } from '@ai-sdk/provider';
5
+ import { convertToBase64 } from '@ai-sdk/provider-utils';
5
6
  import {
6
7
  GoogleGenerativeAIContent,
7
8
  GoogleGenerativeAIContentPart,
9
+ GoogleGenerativeAIFunctionResponsePart,
8
10
  GoogleGenerativeAIPrompt,
9
11
  } from './google-generative-ai-prompt';
10
- import { convertToBase64 } from '@ai-sdk/provider-utils';
12
+
13
+ const dataUrlRegex = /^data:([^;,]+);base64,(.+)$/s;
14
+
15
+ function parseBase64DataUrl(
16
+ value: string,
17
+ ): { mediaType: string; data: string } | undefined {
18
+ const match = dataUrlRegex.exec(value);
19
+ if (match == null) {
20
+ return undefined;
21
+ }
22
+
23
+ return {
24
+ mediaType: match[1],
25
+ data: match[2],
26
+ };
27
+ }
28
+
29
+ function convertUrlToolResultPart(
30
+ url: string,
31
+ ): GoogleGenerativeAIFunctionResponsePart | undefined {
32
+ // Per https://ai.google.dev/api/caching#FunctionResponsePart, only inline data is supported.
33
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/function-calling#functionresponsepart suggests that this
34
+ // may be different for Vertex, but this needs to be confirmed and further tested for both APIs.
35
+ const parsedDataUrl = parseBase64DataUrl(url);
36
+ if (parsedDataUrl == null) {
37
+ return undefined;
38
+ }
39
+
40
+ return {
41
+ inlineData: {
42
+ mimeType: parsedDataUrl.mediaType,
43
+ data: parsedDataUrl.data,
44
+ },
45
+ };
46
+ }
47
+
48
+ /*
49
+ * Appends tool result content parts to the message using the functionResponse
50
+ * format with support for multimodal parts (e.g. inline images/files alongside
51
+ * text). This format is supported by Gemini 3+ models.
52
+ */
53
+ function appendToolResultParts(
54
+ parts: GoogleGenerativeAIContentPart[],
55
+ toolName: string,
56
+ outputValue: Array<{
57
+ type: string;
58
+ [key: string]: unknown;
59
+ }>,
60
+ ): void {
61
+ const functionResponseParts: GoogleGenerativeAIFunctionResponsePart[] = [];
62
+ const responseTextParts: string[] = [];
63
+
64
+ for (const contentPart of outputValue) {
65
+ switch (contentPart.type) {
66
+ case 'text': {
67
+ responseTextParts.push(contentPart.text as string);
68
+ break;
69
+ }
70
+ case 'image-data':
71
+ case 'file-data': {
72
+ functionResponseParts.push({
73
+ inlineData: {
74
+ mimeType: contentPart.mediaType as string,
75
+ data: contentPart.data as string,
76
+ },
77
+ });
78
+ break;
79
+ }
80
+ case 'image-url':
81
+ case 'file-url': {
82
+ const functionResponsePart = convertUrlToolResultPart(
83
+ contentPart.url as string,
84
+ );
85
+
86
+ if (functionResponsePart != null) {
87
+ functionResponseParts.push(functionResponsePart);
88
+ } else {
89
+ responseTextParts.push(JSON.stringify(contentPart));
90
+ }
91
+ break;
92
+ }
93
+ default: {
94
+ responseTextParts.push(JSON.stringify(contentPart));
95
+ break;
96
+ }
97
+ }
98
+ }
99
+
100
+ parts.push({
101
+ functionResponse: {
102
+ name: toolName,
103
+ response: {
104
+ name: toolName,
105
+ content:
106
+ responseTextParts.length > 0
107
+ ? responseTextParts.join('\n')
108
+ : 'Tool executed successfully.',
109
+ },
110
+ ...(functionResponseParts.length > 0
111
+ ? { parts: functionResponseParts }
112
+ : {}),
113
+ },
114
+ });
115
+ }
116
+
117
+ /*
118
+ * Appends tool result content parts using a legacy format for pre-Gemini 3
119
+ * models that do not support multimodal parts within functionResponse. Instead,
120
+ * non-text content like images is sent as separate top-level inlineData parts.
121
+ */
122
+ function appendLegacyToolResultParts(
123
+ parts: GoogleGenerativeAIContentPart[],
124
+ toolName: string,
125
+ outputValue: Array<{
126
+ type: string;
127
+ [key: string]: unknown;
128
+ }>,
129
+ ): void {
130
+ for (const contentPart of outputValue) {
131
+ switch (contentPart.type) {
132
+ case 'text':
133
+ parts.push({
134
+ functionResponse: {
135
+ name: toolName,
136
+ response: {
137
+ name: toolName,
138
+ content: contentPart.text,
139
+ },
140
+ },
141
+ });
142
+ break;
143
+ case 'image-data':
144
+ parts.push(
145
+ {
146
+ inlineData: {
147
+ mimeType: String(contentPart.mediaType),
148
+ data: String(contentPart.data),
149
+ },
150
+ },
151
+ {
152
+ text: 'Tool executed successfully and returned this image as a response',
153
+ },
154
+ );
155
+ break;
156
+ default:
157
+ parts.push({ text: JSON.stringify(contentPart) });
158
+ break;
159
+ }
160
+ }
161
+ }
11
162
 
12
163
  export function convertToGoogleGenerativeAIMessages(
13
164
  prompt: LanguageModelV4Prompt,
14
- options?: { isGemmaModel?: boolean; providerOptionsName?: string },
165
+ options?: {
166
+ isGemmaModel?: boolean;
167
+ providerOptionsName?: string;
168
+ supportsFunctionResponseParts?: boolean;
169
+ },
15
170
  ): GoogleGenerativeAIPrompt {
16
171
  const systemInstructionParts: Array<{ text: string }> = [];
17
172
  const contents: Array<GoogleGenerativeAIContent> = [];
18
173
  let systemMessagesAllowed = true;
19
174
  const isGemmaModel = options?.isGemmaModel ?? false;
20
175
  const providerOptionsName = options?.providerOptionsName ?? 'google';
176
+ const supportsFunctionResponseParts =
177
+ options?.supportsFunctionResponseParts ?? true;
21
178
 
22
179
  for (const { role, content } of prompt) {
23
180
  switch (role) {
@@ -178,36 +335,10 @@ export function convertToGoogleGenerativeAIMessages(
178
335
  const output = part.output;
179
336
 
180
337
  if (output.type === 'content') {
181
- for (const contentPart of output.value) {
182
- switch (contentPart.type) {
183
- case 'text':
184
- parts.push({
185
- functionResponse: {
186
- name: part.toolName,
187
- response: {
188
- name: part.toolName,
189
- content: contentPart.text,
190
- },
191
- },
192
- });
193
- break;
194
- case 'image-data':
195
- parts.push(
196
- {
197
- inlineData: {
198
- mimeType: contentPart.mediaType,
199
- data: contentPart.data,
200
- },
201
- },
202
- {
203
- text: 'Tool executed successfully and returned this image as a response',
204
- },
205
- );
206
- break;
207
- default:
208
- parts.push({ text: JSON.stringify(contentPart) });
209
- break;
210
- }
338
+ if (supportsFunctionResponseParts) {
339
+ appendToolResultParts(parts, part.toolName, output.value);
340
+ } else {
341
+ appendLegacyToolResultParts(parts, part.toolName, output.value);
211
342
  }
212
343
  } else {
213
344
  parts.push({
@@ -139,10 +139,15 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV4 {
139
139
  }
140
140
 
141
141
  const isGemmaModel = this.modelId.toLowerCase().startsWith('gemma-');
142
+ const supportsFunctionResponseParts = this.modelId.startsWith('gemini-3');
142
143
 
143
144
  const { contents, systemInstruction } = convertToGoogleGenerativeAIMessages(
144
145
  prompt,
145
- { isGemmaModel, providerOptionsName },
146
+ {
147
+ isGemmaModel,
148
+ providerOptionsName,
149
+ supportsFunctionResponseParts,
150
+ },
146
151
  );
147
152
 
148
153
  const {
@@ -2,9 +2,9 @@ import {
2
2
  GroundingMetadataSchema,
3
3
  PromptFeedbackSchema,
4
4
  UrlContextMetadataSchema,
5
+ type SafetyRatingSchema,
5
6
  UsageMetadataSchema,
6
7
  } from './google-generative-ai-language-model';
7
- import { type SafetyRatingSchema } from './google-generative-ai-language-model';
8
8
 
9
9
  export type GoogleGenerativeAIPrompt = {
10
10
  systemInstruction?: GoogleGenerativeAISystemInstruction;
@@ -24,9 +24,19 @@ export type GoogleGenerativeAIContentPart =
24
24
  | { text: string; thought?: boolean; thoughtSignature?: string }
25
25
  | { inlineData: { mimeType: string; data: string } }
26
26
  | { functionCall: { name: string; args: unknown }; thoughtSignature?: string }
27
- | { functionResponse: { name: string; response: unknown } }
27
+ | {
28
+ functionResponse: {
29
+ name: string;
30
+ response: unknown;
31
+ parts?: Array<GoogleGenerativeAIFunctionResponsePart>;
32
+ };
33
+ }
28
34
  | { fileData: { mimeType: string; fileUri: string } };
29
35
 
36
+ export type GoogleGenerativeAIFunctionResponsePart = {
37
+ inlineData: { mimeType: string; data: string };
38
+ };
39
+
30
40
  export type GoogleGenerativeAIGroundingMetadata = GroundingMetadataSchema;
31
41
 
32
42
  export type GoogleGenerativeAIUrlContextMetadata = UrlContextMetadataSchema;