@ai-sdk/google 3.0.47 → 3.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/google",
3
- "version": "3.0.47",
3
+ "version": "3.0.49",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -36,16 +36,16 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
- "@ai-sdk/provider": "3.0.8",
40
- "@ai-sdk/provider-utils": "4.0.20"
39
+ "@ai-sdk/provider-utils": "4.0.20",
40
+ "@ai-sdk/provider": "3.0.8"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "20.17.24",
44
44
  "tsup": "^8",
45
45
  "typescript": "5.8.3",
46
46
  "zod": "3.25.76",
47
- "@ai-sdk/test-server": "1.0.3",
48
- "@vercel/ai-tsconfig": "0.0.0"
47
+ "@vercel/ai-tsconfig": "0.0.0",
48
+ "@ai-sdk/test-server": "1.0.3"
49
49
  },
50
50
  "peerDependencies": {
51
51
  "zod": "^3.25.76 || ^4.1.8"
@@ -74,8 +74,16 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
74
74
  headers,
75
75
  );
76
76
 
77
- // For single embeddings, use the single endpoint (ratelimits, etc.)
77
+ const multimodalContent = googleOptions?.content ?? [];
78
+
79
+ // For single embeddings, use the single endpoint
78
80
  if (values.length === 1) {
81
+ const textPart = values[0] ? [{ text: values[0] }] : [];
82
+ const parts =
83
+ multimodalContent.length > 0
84
+ ? [...textPart, ...multimodalContent]
85
+ : [{ text: values[0] }];
86
+
79
87
  const {
80
88
  responseHeaders,
81
89
  value: response,
@@ -86,7 +94,7 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
86
94
  body: {
87
95
  model: `models/${this.modelId}`,
88
96
  content: {
89
- parts: [{ text: values[0] }],
97
+ parts,
90
98
  },
91
99
  outputDimensionality: googleOptions?.outputDimensionality,
92
100
  taskType: googleOptions?.taskType,
@@ -107,6 +115,8 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
107
115
  };
108
116
  }
109
117
 
118
+ // For multiple values, use the batch endpoint
119
+ // If multimodal content is provided, merge it into each request's parts
110
120
  const {
111
121
  responseHeaders,
112
122
  value: response,
@@ -115,12 +125,21 @@ export class GoogleGenerativeAIEmbeddingModel implements EmbeddingModelV3 {
115
125
  url: `${this.config.baseURL}/models/${this.modelId}:batchEmbedContents`,
116
126
  headers: mergedHeaders,
117
127
  body: {
118
- requests: values.map(value => ({
119
- model: `models/${this.modelId}`,
120
- content: { role: 'user', parts: [{ text: value }] },
121
- outputDimensionality: googleOptions?.outputDimensionality,
122
- taskType: googleOptions?.taskType,
123
- })),
128
+ requests: values.map(value => {
129
+ const textPart = value ? [{ text: value }] : [];
130
+ return {
131
+ model: `models/${this.modelId}`,
132
+ content: {
133
+ role: 'user',
134
+ parts:
135
+ multimodalContent.length > 0
136
+ ? [...textPart, ...multimodalContent]
137
+ : [{ text: value }],
138
+ },
139
+ outputDimensionality: googleOptions?.outputDimensionality,
140
+ taskType: googleOptions?.taskType,
141
+ };
142
+ }),
124
143
  },
125
144
  failedResponseHandler: googleFailedResponseHandler,
126
145
  successfulResponseHandler: createJsonResponseHandler(
@@ -9,6 +9,16 @@ export type GoogleGenerativeAIEmbeddingModelId =
9
9
  | 'gemini-embedding-001'
10
10
  | (string & {});
11
11
 
12
+ const googleEmbeddingContentPartSchema = z.union([
13
+ z.object({ text: z.string() }),
14
+ z.object({
15
+ inlineData: z.object({
16
+ mimeType: z.string(),
17
+ data: z.string(),
18
+ }),
19
+ }),
20
+ ]);
21
+
12
22
  export const googleEmbeddingModelOptions = lazySchema(() =>
13
23
  zodSchema(
14
24
  z.object({
@@ -42,6 +52,13 @@ export const googleEmbeddingModelOptions = lazySchema(() =>
42
52
  'CODE_RETRIEVAL_QUERY',
43
53
  ])
44
54
  .optional(),
55
+
56
+ /**
57
+ * Optional. Multimodal content parts for embedding non-text content
58
+ * (images, video, PDF, audio). When provided, these parts are merged
59
+ * with the text values in the embedding request.
60
+ */
61
+ content: z.array(googleEmbeddingContentPartSchema).min(1).optional(),
45
62
  }),
46
63
  ),
47
64
  );