@ai-sdk/google 4.0.0-beta.8 → 4.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +608 -5
  2. package/README.md +6 -4
  3. package/dist/index.d.ts +297 -54
  4. package/dist/index.js +5409 -640
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +97 -26
  7. package/dist/internal/index.js +1653 -453
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
  10. package/package.json +16 -17
  11. package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
  12. package/src/convert-json-schema-to-openapi-schema.ts +1 -1
  13. package/src/convert-to-google-messages.ts +647 -0
  14. package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
  15. package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
  16. package/src/google-error.ts +1 -1
  17. package/src/google-files.ts +225 -0
  18. package/src/google-image-model-options.ts +35 -0
  19. package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
  20. package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
  21. package/src/google-json-accumulator.ts +371 -0
  22. package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
  23. package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +691 -217
  24. package/src/google-prepare-tools.ts +72 -12
  25. package/src/google-prompt.ts +86 -0
  26. package/src/google-provider.ts +157 -53
  27. package/src/google-speech-api.ts +36 -0
  28. package/src/google-speech-model-options.ts +48 -0
  29. package/src/google-speech-model.ts +311 -0
  30. package/src/google-video-model-options.ts +43 -0
  31. package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
  32. package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
  33. package/src/index.ts +40 -9
  34. package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
  35. package/src/interactions/cancel-google-interaction.ts +60 -0
  36. package/src/interactions/convert-google-interactions-usage.ts +47 -0
  37. package/src/interactions/convert-to-google-interactions-input.ts +557 -0
  38. package/src/interactions/extract-google-interactions-sources.ts +252 -0
  39. package/src/interactions/google-interactions-agent.ts +15 -0
  40. package/src/interactions/google-interactions-api.ts +530 -0
  41. package/src/interactions/google-interactions-language-model-options.ts +262 -0
  42. package/src/interactions/google-interactions-language-model.ts +776 -0
  43. package/src/interactions/google-interactions-prompt.ts +582 -0
  44. package/src/interactions/google-interactions-provider-metadata.ts +23 -0
  45. package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
  46. package/src/interactions/parse-google-interactions-outputs.ts +252 -0
  47. package/src/interactions/poll-google-interactions.ts +129 -0
  48. package/src/interactions/prepare-google-interactions-tools.ts +245 -0
  49. package/src/interactions/stream-google-interactions.ts +242 -0
  50. package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
  51. package/src/internal/index.ts +3 -2
  52. package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
  53. package/src/realtime/google-realtime-event-mapper.ts +383 -0
  54. package/src/realtime/google-realtime-model-options.ts +3 -0
  55. package/src/realtime/google-realtime-model.ts +160 -0
  56. package/src/realtime/index.ts +2 -0
  57. package/src/tool/code-execution.ts +2 -2
  58. package/src/tool/enterprise-web-search.ts +9 -3
  59. package/src/tool/file-search.ts +5 -7
  60. package/src/tool/google-maps.ts +3 -2
  61. package/src/tool/google-search.ts +11 -12
  62. package/src/tool/url-context.ts +4 -2
  63. package/src/tool/vertex-rag-store.ts +9 -6
  64. package/dist/index.d.mts +0 -384
  65. package/dist/index.mjs +0 -2519
  66. package/dist/index.mjs.map +0 -1
  67. package/dist/internal/index.d.mts +0 -287
  68. package/dist/internal/index.mjs +0 -1708
  69. package/dist/internal/index.mjs.map +0 -1
  70. package/src/convert-to-google-generative-ai-messages.ts +0 -239
  71. package/src/google-generative-ai-prompt.ts +0 -47
@@ -0,0 +1,530 @@
1
+ import {
2
+ lazySchema,
3
+ zodSchema,
4
+ type InferSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ /*
9
+ * Zod schemas for the Gemini Interactions API wire format.
10
+ *
11
+ * Helpers are defined as factories (invoked only inside the exported
12
+ * `lazySchema(() => ...)` callbacks) so no `z.object(...)` / `z.union(...)`
13
+ * runs at module import. Schemas are intentionally narrow on the fields the
14
+ * SDK consumes and lenient (`loose()` / `unknown`) on the rest, so subsequent
15
+ * additions can widen without breaking the basic path.
16
+ */
17
+
18
+ const tokenByModalitySchema = () =>
19
+ z
20
+ .object({
21
+ modality: z.string().nullish(),
22
+ tokens: z.number().nullish(),
23
+ })
24
+ .loose();
25
+
26
+ const usageSchema = () =>
27
+ z
28
+ .object({
29
+ total_input_tokens: z.number().nullish(),
30
+ total_output_tokens: z.number().nullish(),
31
+ total_thought_tokens: z.number().nullish(),
32
+ total_cached_tokens: z.number().nullish(),
33
+ total_tool_use_tokens: z.number().nullish(),
34
+ total_tokens: z.number().nullish(),
35
+ input_tokens_by_modality: z.array(tokenByModalitySchema()).nullish(),
36
+ output_tokens_by_modality: z.array(tokenByModalitySchema()).nullish(),
37
+ cached_tokens_by_modality: z.array(tokenByModalitySchema()).nullish(),
38
+ tool_use_tokens_by_modality: z.array(tokenByModalitySchema()).nullish(),
39
+ grounding_tool_count: z
40
+ .array(
41
+ z
42
+ .object({
43
+ type: z.string().nullish(),
44
+ count: z.number().nullish(),
45
+ })
46
+ .loose(),
47
+ )
48
+ .nullish(),
49
+ })
50
+ .loose();
51
+
52
+ export type GoogleInteractionsUsage = z.infer<ReturnType<typeof usageSchema>>;
53
+
54
+ const interactionStatusSchema = () =>
55
+ z.enum([
56
+ 'in_progress',
57
+ 'requires_action',
58
+ 'completed',
59
+ 'failed',
60
+ 'cancelled',
61
+ 'incomplete',
62
+ ]);
63
+
64
+ const annotationSchema = () => {
65
+ const urlCitation = z
66
+ .object({
67
+ type: z.literal('url_citation'),
68
+ url: z.string().nullish(),
69
+ title: z.string().nullish(),
70
+ start_index: z.number().nullish(),
71
+ end_index: z.number().nullish(),
72
+ })
73
+ .loose();
74
+
75
+ const fileCitation = z
76
+ .object({
77
+ type: z.literal('file_citation'),
78
+ file_name: z.string().nullish(),
79
+ document_uri: z.string().nullish(),
80
+ url: z.string().nullish(),
81
+ page_number: z.number().nullish(),
82
+ media_id: z.string().nullish(),
83
+ start_index: z.number().nullish(),
84
+ end_index: z.number().nullish(),
85
+ custom_metadata: z.record(z.string(), z.unknown()).nullish(),
86
+ })
87
+ .loose();
88
+
89
+ const placeCitation = z
90
+ .object({
91
+ type: z.literal('place_citation'),
92
+ name: z.string().nullish(),
93
+ url: z.string().nullish(),
94
+ place_id: z.string().nullish(),
95
+ start_index: z.number().nullish(),
96
+ end_index: z.number().nullish(),
97
+ })
98
+ .loose();
99
+
100
+ return z.union([
101
+ urlCitation,
102
+ fileCitation,
103
+ placeCitation,
104
+ z.object({ type: z.string() }).loose(),
105
+ ]);
106
+ };
107
+
108
+ const thoughtSummaryItemSchema = () =>
109
+ z
110
+ .object({
111
+ type: z.string(),
112
+ text: z.string().nullish(),
113
+ data: z.string().nullish(),
114
+ mime_type: z.string().nullish(),
115
+ })
116
+ .loose();
117
+
118
+ /*
119
+ * Content block schemas — these populate the `content` array of a
120
+ * `model_output` step. Function calls, thoughts, and built-in tool
121
+ * call/result blocks are top-level step types (see `stepSchema` below), not
122
+ * content blocks.
123
+ */
124
+ const contentBlockSchema = () => {
125
+ const textContent = z
126
+ .object({
127
+ type: z.literal('text'),
128
+ text: z.string(),
129
+ annotations: z.array(annotationSchema()).nullish(),
130
+ })
131
+ .loose();
132
+
133
+ const imageContent = z
134
+ .object({
135
+ type: z.literal('image'),
136
+ data: z.string().nullish(),
137
+ mime_type: z.string().nullish(),
138
+ resolution: z.enum(['low', 'medium', 'high', 'ultra_high']).nullish(),
139
+ uri: z.string().nullish(),
140
+ })
141
+ .loose();
142
+
143
+ return z.union([
144
+ textContent,
145
+ imageContent,
146
+ z.object({ type: z.string() }).loose(),
147
+ ]);
148
+ };
149
+
150
+ export type GoogleInteractionsContentBlock = z.infer<
151
+ ReturnType<typeof contentBlockSchema>
152
+ >;
153
+
154
+ const BUILTIN_TOOL_CALL_STEP_TYPES = [
155
+ 'google_search_call',
156
+ 'code_execution_call',
157
+ 'url_context_call',
158
+ 'file_search_call',
159
+ 'google_maps_call',
160
+ 'mcp_server_tool_call',
161
+ ] as const;
162
+
163
+ const BUILTIN_TOOL_RESULT_STEP_TYPES = [
164
+ 'google_search_result',
165
+ 'code_execution_result',
166
+ 'url_context_result',
167
+ 'file_search_result',
168
+ 'google_maps_result',
169
+ 'mcp_server_tool_result',
170
+ ] as const;
171
+
172
+ /*
173
+ * Step schema union — elements of `response.steps[]` and the `step` field on
174
+ * `step.start` SSE events.
175
+ *
176
+ * - `user_input` echoes a turn the client sent; only appears on
177
+ * `GET /interactions/{id}` (the full timeline). The SDK skips it.
178
+ * - `model_output` wraps the model's text/image content in `step.content[]`.
179
+ * - `function_call`, `thought`, and the built-in `*_call`/`*_result` steps
180
+ * carry their payload directly on the step (no `content` indirection).
181
+ */
182
+ const stepSchema = () => {
183
+ const userInputStep = z
184
+ .object({
185
+ type: z.literal('user_input'),
186
+ content: z.array(contentBlockSchema()).nullish(),
187
+ })
188
+ .loose();
189
+
190
+ const modelOutputStep = z
191
+ .object({
192
+ type: z.literal('model_output'),
193
+ content: z.array(contentBlockSchema()).nullish(),
194
+ })
195
+ .loose();
196
+
197
+ const functionCallStep = z
198
+ .object({
199
+ type: z.literal('function_call'),
200
+ id: z.string(),
201
+ name: z.string(),
202
+ arguments: z.record(z.string(), z.unknown()).nullish(),
203
+ signature: z.string().nullish(),
204
+ })
205
+ .loose();
206
+
207
+ const thoughtStep = z
208
+ .object({
209
+ type: z.literal('thought'),
210
+ signature: z.string().nullish(),
211
+ summary: z.array(thoughtSummaryItemSchema()).nullish(),
212
+ })
213
+ .loose();
214
+
215
+ const builtinToolCallStep = z
216
+ .object({
217
+ type: z.enum(BUILTIN_TOOL_CALL_STEP_TYPES),
218
+ id: z.string(),
219
+ arguments: z.record(z.string(), z.unknown()).nullish(),
220
+ name: z.string().nullish(),
221
+ server_name: z.string().nullish(),
222
+ search_type: z.string().nullish(),
223
+ signature: z.string().nullish(),
224
+ })
225
+ .loose();
226
+
227
+ const builtinToolResultStep = z
228
+ .object({
229
+ type: z.enum(BUILTIN_TOOL_RESULT_STEP_TYPES),
230
+ call_id: z.string(),
231
+ result: z.unknown().nullish(),
232
+ is_error: z.boolean().nullish(),
233
+ name: z.string().nullish(),
234
+ server_name: z.string().nullish(),
235
+ signature: z.string().nullish(),
236
+ })
237
+ .loose();
238
+
239
+ return z.union([
240
+ userInputStep,
241
+ modelOutputStep,
242
+ functionCallStep,
243
+ thoughtStep,
244
+ builtinToolCallStep,
245
+ builtinToolResultStep,
246
+ z.object({ type: z.string() }).loose(),
247
+ ]);
248
+ };
249
+
250
+ export type GoogleInteractionsStep = z.infer<ReturnType<typeof stepSchema>>;
251
+
252
+ export const googleInteractionsResponseSchema = lazySchema(() =>
253
+ zodSchema(
254
+ z
255
+ .object({
256
+ /*
257
+ * `id` is omitted from the response body when `store: false` (fully
258
+ * stateless mode) — there is no server-side interaction record for the
259
+ * client to reference. `nullish` lets the schema accept that shape.
260
+ */
261
+ id: z.string().nullish(),
262
+ created: z.string().nullish(),
263
+ updated: z.string().nullish(),
264
+ status: interactionStatusSchema(),
265
+ model: z.string().nullish(),
266
+ agent: z.string().nullish(),
267
+ steps: z.array(stepSchema()).nullish(),
268
+ usage: usageSchema().nullish(),
269
+ service_tier: z.string().nullish(),
270
+ previous_interaction_id: z.string().nullish(),
271
+ response_modalities: z.array(z.string()).nullish(),
272
+ })
273
+ .loose(),
274
+ ),
275
+ );
276
+
277
+ export type GoogleInteractionsResponse = InferSchema<
278
+ typeof googleInteractionsResponseSchema
279
+ >;
280
+
281
+ export const googleInteractionsEventSchema = lazySchema(() =>
282
+ zodSchema(
283
+ (() => {
284
+ const status = interactionStatusSchema();
285
+ const annotation = annotationSchema();
286
+ const thoughtSummaryItem = thoughtSummaryItemSchema();
287
+
288
+ const interactionCreatedEvent = z
289
+ .object({
290
+ event_type: z.literal('interaction.created'),
291
+ event_id: z.string().nullish(),
292
+ interaction: z
293
+ .object({
294
+ /*
295
+ * `id` is omitted when `store: false` (fully stateless mode);
296
+ * see the matching note on `googleInteractionsResponseSchema.id`.
297
+ */
298
+ id: z.string().nullish(),
299
+ created: z.string().nullish(),
300
+ model: z.string().nullish(),
301
+ agent: z.string().nullish(),
302
+ status: status.nullish(),
303
+ })
304
+ .loose(),
305
+ })
306
+ .loose();
307
+
308
+ /*
309
+ * `step.start` carries the discriminated step shape under `step`. For
310
+ * `function_call` steps the `name` is included here; for `thought`
311
+ * steps the initial `signature` and `summary` arrive here when set.
312
+ */
313
+ const stepStartEvent = z
314
+ .object({
315
+ event_type: z.literal('step.start'),
316
+ event_id: z.string().nullish(),
317
+ index: z.number(),
318
+ step: stepSchema(),
319
+ })
320
+ .loose();
321
+
322
+ const stepDeltaText = z
323
+ .object({
324
+ type: z.literal('text'),
325
+ text: z.string(),
326
+ })
327
+ .loose();
328
+
329
+ const stepDeltaThoughtSummary = z
330
+ .object({
331
+ type: z.literal('thought_summary'),
332
+ content: thoughtSummaryItem.nullish(),
333
+ })
334
+ .loose();
335
+
336
+ const stepDeltaThoughtSignature = z
337
+ .object({
338
+ type: z.literal('thought_signature'),
339
+ signature: z.string().nullish(),
340
+ })
341
+ .loose();
342
+
343
+ /*
344
+ * `function_call` step deltas stream the JSON arguments as a partial
345
+ * string. Wire shape:
346
+ * { type: 'arguments_delta', arguments: '<partial-json-string>' }
347
+ * The partial JSON lives in `arguments` (a string), not in a separate
348
+ * `arguments_delta` field — the discriminator name is the only place
349
+ * `arguments_delta` appears. Consumers accumulate the substrings and
350
+ * parse on `step.stop`.
351
+ */
352
+ const stepDeltaArgumentsDelta = z
353
+ .object({
354
+ type: z.literal('arguments_delta'),
355
+ arguments: z.string().nullish(),
356
+ id: z.string().nullish(),
357
+ signature: z.string().nullish(),
358
+ })
359
+ .loose();
360
+
361
+ /*
362
+ * URL/file/place-citation deltas. The discriminator is
363
+ * `text_annotation_delta` (matching the `_delta` suffix used by
364
+ * `arguments_delta`); `text_annotation` is also accepted as an alias.
365
+ */
366
+ const stepDeltaTextAnnotation = z
367
+ .object({
368
+ type: z.enum(['text_annotation_delta', 'text_annotation']),
369
+ annotations: z.array(annotation).nullish(),
370
+ })
371
+ .loose();
372
+
373
+ /*
374
+ * `image` deltas carry the entire payload per delta (`data` base64 +
375
+ * `mime_type`, or `uri`) — there is no per-byte streaming.
376
+ */
377
+ const stepDeltaImage = z
378
+ .object({
379
+ type: z.literal('image'),
380
+ data: z.string().nullish(),
381
+ mime_type: z.string().nullish(),
382
+ resolution: z.enum(['low', 'medium', 'high', 'ultra_high']).nullish(),
383
+ uri: z.string().nullish(),
384
+ })
385
+ .loose();
386
+
387
+ /*
388
+ * Built-in tool call/result step deltas mirror the shape of their step
389
+ * counterparts (full payload per delta — there is no per-token
390
+ * streaming of arguments). Result deltas carry the populated `result`
391
+ * payload.
392
+ */
393
+ const stepDeltaBuiltinToolCall = z
394
+ .object({
395
+ type: z.enum(BUILTIN_TOOL_CALL_STEP_TYPES),
396
+ id: z.string().nullish(),
397
+ arguments: z.record(z.string(), z.unknown()).nullish(),
398
+ name: z.string().nullish(),
399
+ server_name: z.string().nullish(),
400
+ search_type: z.string().nullish(),
401
+ signature: z.string().nullish(),
402
+ })
403
+ .loose();
404
+
405
+ const stepDeltaBuiltinToolResult = z
406
+ .object({
407
+ type: z.enum(BUILTIN_TOOL_RESULT_STEP_TYPES),
408
+ call_id: z.string().nullish(),
409
+ result: z.unknown().nullish(),
410
+ is_error: z.boolean().nullish(),
411
+ name: z.string().nullish(),
412
+ server_name: z.string().nullish(),
413
+ signature: z.string().nullish(),
414
+ })
415
+ .loose();
416
+
417
+ const stepDeltaUnknown = z.object({ type: z.string() }).loose();
418
+
419
+ const stepDeltaUnion = z.union([
420
+ stepDeltaText,
421
+ stepDeltaImage,
422
+ stepDeltaThoughtSummary,
423
+ stepDeltaThoughtSignature,
424
+ stepDeltaArgumentsDelta,
425
+ stepDeltaTextAnnotation,
426
+ stepDeltaBuiltinToolCall,
427
+ stepDeltaBuiltinToolResult,
428
+ stepDeltaUnknown,
429
+ ]);
430
+
431
+ const stepDeltaEvent = z
432
+ .object({
433
+ event_type: z.literal('step.delta'),
434
+ event_id: z.string().nullish(),
435
+ index: z.number(),
436
+ delta: stepDeltaUnion,
437
+ })
438
+ .loose();
439
+
440
+ const stepStopEvent = z
441
+ .object({
442
+ event_type: z.literal('step.stop'),
443
+ event_id: z.string().nullish(),
444
+ index: z.number(),
445
+ })
446
+ .loose();
447
+
448
+ /*
449
+ * Status-transition events. The API emits `interaction.status_update`
450
+ * for in-progress and requires-action transitions; the more specific
451
+ * `interaction.in_progress` and `interaction.requires_action` shapes
452
+ * are accepted so all three route through the same handler.
453
+ */
454
+ const interactionStatusUpdateEvent = z
455
+ .object({
456
+ event_type: z.literal('interaction.status_update'),
457
+ event_id: z.string().nullish(),
458
+ interaction_id: z.string().nullish(),
459
+ status: status.nullish(),
460
+ })
461
+ .loose();
462
+
463
+ const interactionInProgressEvent = z
464
+ .object({
465
+ event_type: z.literal('interaction.in_progress'),
466
+ event_id: z.string().nullish(),
467
+ interaction_id: z.string().nullish(),
468
+ status: status.nullish(),
469
+ })
470
+ .loose();
471
+
472
+ const interactionRequiresActionEvent = z
473
+ .object({
474
+ event_type: z.literal('interaction.requires_action'),
475
+ event_id: z.string().nullish(),
476
+ interaction_id: z.string().nullish(),
477
+ status: status.nullish(),
478
+ })
479
+ .loose();
480
+
481
+ const interactionCompletedEvent = z
482
+ .object({
483
+ event_type: z.literal('interaction.completed'),
484
+ event_id: z.string().nullish(),
485
+ interaction: z
486
+ .object({
487
+ id: z.string().nullish(),
488
+ status: status.nullish(),
489
+ usage: usageSchema().nullish(),
490
+ service_tier: z.string().nullish(),
491
+ })
492
+ .loose(),
493
+ })
494
+ .loose();
495
+
496
+ const errorEvent = z
497
+ .object({
498
+ event_type: z.literal('error'),
499
+ event_id: z.string().nullish(),
500
+ error: z
501
+ .object({
502
+ code: z.string().nullish(),
503
+ message: z.string().nullish(),
504
+ })
505
+ .loose()
506
+ .nullish(),
507
+ })
508
+ .loose();
509
+
510
+ const unknownEvent = z.object({ event_type: z.string() }).loose();
511
+
512
+ return z.union([
513
+ interactionCreatedEvent,
514
+ stepStartEvent,
515
+ stepDeltaEvent,
516
+ stepStopEvent,
517
+ interactionStatusUpdateEvent,
518
+ interactionInProgressEvent,
519
+ interactionRequiresActionEvent,
520
+ interactionCompletedEvent,
521
+ errorEvent,
522
+ unknownEvent,
523
+ ]);
524
+ })(),
525
+ ),
526
+ );
527
+
528
+ export type GoogleInteractionsEvent = InferSchema<
529
+ typeof googleInteractionsEventSchema
530
+ >;