@ai-sdk/google 4.0.0-beta.7 → 4.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +614 -5
  2. package/README.md +6 -4
  3. package/dist/index.d.ts +301 -50
  4. package/dist/index.js +5410 -639
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +100 -26
  7. package/dist/internal/index.js +1653 -451
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
  10. package/package.json +16 -17
  11. package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
  12. package/src/convert-json-schema-to-openapi-schema.ts +1 -1
  13. package/src/convert-to-google-messages.ts +647 -0
  14. package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
  15. package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
  16. package/src/google-error.ts +1 -1
  17. package/src/google-files.ts +225 -0
  18. package/src/google-image-model-options.ts +35 -0
  19. package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
  20. package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
  21. package/src/google-json-accumulator.ts +371 -0
  22. package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
  23. package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +701 -219
  24. package/src/google-prepare-tools.ts +72 -12
  25. package/src/google-prompt.ts +86 -0
  26. package/src/google-provider.ts +157 -53
  27. package/src/google-speech-api.ts +36 -0
  28. package/src/google-speech-model-options.ts +48 -0
  29. package/src/google-speech-model.ts +311 -0
  30. package/src/google-video-model-options.ts +43 -0
  31. package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
  32. package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
  33. package/src/index.ts +40 -9
  34. package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
  35. package/src/interactions/cancel-google-interaction.ts +60 -0
  36. package/src/interactions/convert-google-interactions-usage.ts +47 -0
  37. package/src/interactions/convert-to-google-interactions-input.ts +557 -0
  38. package/src/interactions/extract-google-interactions-sources.ts +252 -0
  39. package/src/interactions/google-interactions-agent.ts +15 -0
  40. package/src/interactions/google-interactions-api.ts +530 -0
  41. package/src/interactions/google-interactions-language-model-options.ts +262 -0
  42. package/src/interactions/google-interactions-language-model.ts +776 -0
  43. package/src/interactions/google-interactions-prompt.ts +582 -0
  44. package/src/interactions/google-interactions-provider-metadata.ts +23 -0
  45. package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
  46. package/src/interactions/parse-google-interactions-outputs.ts +252 -0
  47. package/src/interactions/poll-google-interactions.ts +129 -0
  48. package/src/interactions/prepare-google-interactions-tools.ts +245 -0
  49. package/src/interactions/stream-google-interactions.ts +242 -0
  50. package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
  51. package/src/internal/index.ts +3 -2
  52. package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
  53. package/src/realtime/google-realtime-event-mapper.ts +383 -0
  54. package/src/realtime/google-realtime-model-options.ts +3 -0
  55. package/src/realtime/google-realtime-model.ts +160 -0
  56. package/src/realtime/index.ts +2 -0
  57. package/src/tool/code-execution.ts +2 -2
  58. package/src/tool/enterprise-web-search.ts +9 -3
  59. package/src/tool/file-search.ts +5 -7
  60. package/src/tool/google-maps.ts +3 -2
  61. package/src/tool/google-search.ts +11 -12
  62. package/src/tool/url-context.ts +4 -2
  63. package/src/tool/vertex-rag-store.ts +9 -6
  64. package/dist/index.d.mts +0 -376
  65. package/dist/index.mjs +0 -2517
  66. package/dist/index.mjs.map +0 -1
  67. package/dist/internal/index.d.mts +0 -284
  68. package/dist/internal/index.mjs +0 -1706
  69. package/dist/internal/index.mjs.map +0 -1
  70. package/src/convert-to-google-generative-ai-messages.ts +0 -239
  71. package/src/google-generative-ai-prompt.ts +0 -38
@@ -0,0 +1,242 @@
1
+ import {
2
+ createEventSourceResponseHandler,
3
+ delay,
4
+ getFromApi,
5
+ isAbortError,
6
+ type FetchFunction,
7
+ type ParseResult,
8
+ } from '@ai-sdk/provider-utils';
9
+ import { googleFailedResponseHandler } from '../google-error';
10
+ import { cancelGoogleInteraction } from './cancel-google-interaction';
11
+ import {
12
+ googleInteractionsEventSchema,
13
+ type GoogleInteractionsEvent,
14
+ } from './google-interactions-api';
15
+
16
+ const DEFAULT_MAX_RETRIES = 3;
17
+ const DEFAULT_RETRY_DELAY_MS = 500;
18
+
19
+ /**
20
+ * Connects to `GET {baseURL}/interactions/{id}?stream=true` and surfaces the
21
+ * server-sent events as a `ReadableStream<ParseResult<GoogleInteractionsEvent>>`
22
+ * so the existing `buildGoogleInteractionsStreamTransform` can consume them
23
+ * unchanged.
24
+ *
25
+ * The connection can drop mid-run: long-running agents idle for long
26
+ * stretches between SSE events and undici's default body timeout terminates
27
+ * the request with `UND_ERR_BODY_TIMEOUT`. We track the last seen `event_id`
28
+ * and reconnect with `?last_event_id=<id>` on any unexpected end. After
29
+ * `maxRetries` consecutive failures the stream errors out so the caller can
30
+ * decide whether to fall back to polling.
31
+ *
32
+ * The stream completes cleanly when an `interaction.complete` event with a
33
+ * terminal status arrives, or when an `error` event arrives.
34
+ */
35
+ export function streamGoogleInteractionEvents({
36
+ baseURL,
37
+ interactionId,
38
+ headers,
39
+ fetch,
40
+ abortSignal,
41
+ maxRetries = DEFAULT_MAX_RETRIES,
42
+ retryDelayMs = DEFAULT_RETRY_DELAY_MS,
43
+ }: {
44
+ baseURL: string;
45
+ interactionId: string;
46
+ headers: Record<string, string | undefined>;
47
+ fetch?: FetchFunction;
48
+ abortSignal?: AbortSignal;
49
+ maxRetries?: number;
50
+ retryDelayMs?: number;
51
+ }): ReadableStream<ParseResult<GoogleInteractionsEvent>> {
52
+ if (interactionId.length === 0) {
53
+ throw new Error(
54
+ 'google.interactions: cannot stream a background interaction without an id.',
55
+ );
56
+ }
57
+
58
+ const eventSourceHeaders = {
59
+ ...headers,
60
+ accept: 'text/event-stream',
61
+ };
62
+
63
+ let lastEventId: string | undefined;
64
+ let complete = false;
65
+ let attempt = 0;
66
+ let receivedAnyEventThisAttempt = false;
67
+ let currentReader:
68
+ | ReadableStreamDefaultReader<ParseResult<GoogleInteractionsEvent>>
69
+ | undefined;
70
+
71
+ /*
72
+ * Forwards `cancel()` from the consumer (and the upstream `abortSignal`) to
73
+ * any in-flight `getFromApi` or `delay` so the loop unblocks immediately
74
+ * instead of waiting for the next iteration to notice a flag.
75
+ */
76
+ const internalAbort = new AbortController();
77
+ const upstreamAbortHandler = () => internalAbort.abort();
78
+ if (abortSignal != null) {
79
+ if (abortSignal.aborted) {
80
+ internalAbort.abort();
81
+ } else {
82
+ abortSignal.addEventListener('abort', upstreamAbortHandler, {
83
+ once: true,
84
+ });
85
+ }
86
+ }
87
+ const effectiveSignal = internalAbort.signal;
88
+
89
+ function buildUrl(): string {
90
+ const base = `${baseURL}/interactions/${encodeURIComponent(interactionId)}`;
91
+ const params = new URLSearchParams({ stream: 'true' });
92
+ if (lastEventId != null) {
93
+ params.set('last_event_id', lastEventId);
94
+ }
95
+ return `${base}?${params.toString()}`;
96
+ }
97
+
98
+ async function openReader() {
99
+ const { value: stream } = await getFromApi({
100
+ url: buildUrl(),
101
+ headers: eventSourceHeaders,
102
+ failedResponseHandler: googleFailedResponseHandler,
103
+ successfulResponseHandler: createEventSourceResponseHandler(
104
+ googleInteractionsEventSchema,
105
+ ),
106
+ abortSignal: effectiveSignal,
107
+ fetch,
108
+ });
109
+ return stream.getReader();
110
+ }
111
+
112
+ return new ReadableStream<ParseResult<GoogleInteractionsEvent>>({
113
+ async start(controller) {
114
+ try {
115
+ while (!complete && !effectiveSignal.aborted) {
116
+ if (currentReader == null) {
117
+ try {
118
+ currentReader = await openReader();
119
+ receivedAnyEventThisAttempt = false;
120
+ } catch (error) {
121
+ if (isAbortError(error) || effectiveSignal.aborted) {
122
+ controller.error(error);
123
+ return;
124
+ }
125
+ attempt++;
126
+ if (attempt >= maxRetries) {
127
+ controller.error(error);
128
+ return;
129
+ }
130
+ await delay(retryDelayMs * attempt, {
131
+ abortSignal: effectiveSignal,
132
+ });
133
+ continue;
134
+ }
135
+ }
136
+
137
+ try {
138
+ const { done, value } = await currentReader.read();
139
+ if (done) {
140
+ /*
141
+ * Underlying stream ended. If we already saw the terminal event
142
+ * we exit cleanly; otherwise this is an unexpected disconnect
143
+ * and we'll reconnect with `last_event_id`.
144
+ *
145
+ * If the connection closed without producing any events at all
146
+ * this attempt, count it as a failed attempt -- otherwise an
147
+ * empty/misbehaving server response would loop forever.
148
+ */
149
+ currentReader = undefined;
150
+ if (complete) break;
151
+ if (!receivedAnyEventThisAttempt) {
152
+ attempt++;
153
+ if (attempt >= maxRetries) {
154
+ controller.error(
155
+ new Error(
156
+ 'google.interactions: SSE stream closed without producing any events.',
157
+ ),
158
+ );
159
+ return;
160
+ }
161
+ await delay(retryDelayMs * attempt, {
162
+ abortSignal: effectiveSignal,
163
+ });
164
+ } else {
165
+ attempt = 0;
166
+ }
167
+ continue;
168
+ }
169
+
170
+ receivedAnyEventThisAttempt = true;
171
+
172
+ if (value.success) {
173
+ const streamEvent = value.value as {
174
+ event_id?: string;
175
+ event_type?: string;
176
+ };
177
+ if (
178
+ typeof streamEvent.event_id === 'string' &&
179
+ streamEvent.event_id.length > 0
180
+ ) {
181
+ lastEventId = streamEvent.event_id;
182
+ }
183
+ if (
184
+ streamEvent.event_type === 'interaction.completed' ||
185
+ streamEvent.event_type === 'error'
186
+ ) {
187
+ complete = true;
188
+ }
189
+ }
190
+
191
+ controller.enqueue(value);
192
+ } catch (error) {
193
+ if (isAbortError(error) || effectiveSignal.aborted) {
194
+ controller.error(error);
195
+ return;
196
+ }
197
+ currentReader = undefined;
198
+ attempt++;
199
+ if (attempt >= maxRetries) {
200
+ controller.error(error);
201
+ return;
202
+ }
203
+ await delay(retryDelayMs * attempt, {
204
+ abortSignal: effectiveSignal,
205
+ });
206
+ }
207
+ }
208
+ controller.close();
209
+ } catch (error) {
210
+ controller.error(error);
211
+ } finally {
212
+ if (abortSignal != null) {
213
+ abortSignal.removeEventListener('abort', upstreamAbortHandler);
214
+ }
215
+ currentReader?.cancel().catch(() => {});
216
+ currentReader = undefined;
217
+
218
+ /*
219
+ * If we're exiting because the caller aborted (or the consumer
220
+ * cancelled the stream) before the agent finished, fire
221
+ * `POST /interactions/{id}/cancel` so the run stops billing on
222
+ * Google's side. Skipped when `complete` is set -- the agent already
223
+ * reported terminal status via `interaction.complete` / `error`.
224
+ */
225
+ if (effectiveSignal.aborted && !complete) {
226
+ await cancelGoogleInteraction({
227
+ baseURL,
228
+ interactionId,
229
+ headers,
230
+ fetch,
231
+ });
232
+ }
233
+ }
234
+ },
235
+
236
+ cancel() {
237
+ internalAbort.abort();
238
+ currentReader?.cancel().catch(() => {});
239
+ currentReader = undefined;
240
+ },
241
+ });
242
+ }
@@ -0,0 +1,185 @@
1
+ import type {
2
+ LanguageModelV4FinishReason,
3
+ LanguageModelV4StreamPart,
4
+ SharedV4ProviderMetadata,
5
+ SharedV4Warning,
6
+ } from '@ai-sdk/provider';
7
+ import { convertGoogleInteractionsUsage } from './convert-google-interactions-usage';
8
+ import { type GoogleInteractionsResponse } from './google-interactions-api';
9
+ import { mapGoogleInteractionsFinishReason } from './map-google-interactions-finish-reason';
10
+ import { parseGoogleInteractionsOutputs } from './parse-google-interactions-outputs';
11
+
12
+ /**
13
+ * Synthesizes a `LanguageModelV4StreamPart` stream from a fully-resolved
14
+ * Interaction response (i.e. the `response` returned after polling a
15
+ * `background: true` agent call to a terminal status).
16
+ *
17
+ * Agent calls cannot use SSE (`stream: true` is incompatible with
18
+ * `background: true`), so we deterministically replay the polled outputs as a
19
+ * stream sequence in the same order/shape `buildGoogleInteractionsStreamTransform`
20
+ * would produce. Each text/reasoning block is emitted as a single delta — the
21
+ * server has already produced the whole block by the time we synthesize.
22
+ */
23
+ export function synthesizeGoogleInteractionsAgentStream({
24
+ response,
25
+ warnings,
26
+ generateId,
27
+ includeRawChunks,
28
+ headerServiceTier,
29
+ }: {
30
+ response: GoogleInteractionsResponse;
31
+ warnings: Array<SharedV4Warning>;
32
+ generateId: () => string;
33
+ includeRawChunks?: boolean;
34
+ headerServiceTier?: string;
35
+ }): ReadableStream<LanguageModelV4StreamPart> {
36
+ return new ReadableStream<LanguageModelV4StreamPart>({
37
+ start(controller) {
38
+ controller.enqueue({ type: 'stream-start', warnings });
39
+
40
+ const interactionId =
41
+ typeof response.id === 'string' && response.id.length > 0
42
+ ? response.id
43
+ : undefined;
44
+
45
+ let timestamp: Date | undefined;
46
+ const created = response.created;
47
+ if (typeof created === 'string') {
48
+ const parsed = new Date(created);
49
+ if (!Number.isNaN(parsed.getTime())) {
50
+ timestamp = parsed;
51
+ }
52
+ }
53
+
54
+ controller.enqueue({
55
+ type: 'response-metadata',
56
+ ...(interactionId != null ? { id: interactionId } : {}),
57
+ modelId: response.model ?? undefined,
58
+ ...(timestamp ? { timestamp } : {}),
59
+ });
60
+
61
+ if (includeRawChunks) {
62
+ controller.enqueue({ type: 'raw', rawValue: response });
63
+ }
64
+
65
+ const { content, hasFunctionCall } = parseGoogleInteractionsOutputs({
66
+ steps: response.steps ?? null,
67
+ generateId,
68
+ interactionId,
69
+ });
70
+
71
+ let blockCounter = 0;
72
+ const nextBlockId = () => `${interactionId ?? 'agent'}:${blockCounter++}`;
73
+
74
+ for (const part of content) {
75
+ switch (part.type) {
76
+ case 'text': {
77
+ const id = nextBlockId();
78
+ const providerMetadata = part.providerMetadata;
79
+ controller.enqueue({ type: 'text-start', id });
80
+ if (part.text.length > 0) {
81
+ controller.enqueue({ type: 'text-delta', id, delta: part.text });
82
+ }
83
+ controller.enqueue({
84
+ type: 'text-end',
85
+ id,
86
+ ...(providerMetadata ? { providerMetadata } : {}),
87
+ });
88
+ break;
89
+ }
90
+ case 'reasoning': {
91
+ const id = nextBlockId();
92
+ const providerMetadata = part.providerMetadata;
93
+ controller.enqueue({ type: 'reasoning-start', id });
94
+ if (part.text.length > 0) {
95
+ controller.enqueue({
96
+ type: 'reasoning-delta',
97
+ id,
98
+ delta: part.text,
99
+ });
100
+ }
101
+ controller.enqueue({
102
+ type: 'reasoning-end',
103
+ id,
104
+ ...(providerMetadata ? { providerMetadata } : {}),
105
+ });
106
+ break;
107
+ }
108
+ case 'tool-call': {
109
+ const providerMetadata = part.providerMetadata;
110
+ controller.enqueue({
111
+ type: 'tool-input-start',
112
+ id: part.toolCallId,
113
+ toolName: part.toolName,
114
+ ...(part.providerExecuted
115
+ ? { providerExecuted: part.providerExecuted }
116
+ : {}),
117
+ });
118
+ controller.enqueue({
119
+ type: 'tool-input-delta',
120
+ id: part.toolCallId,
121
+ delta: part.input,
122
+ });
123
+ controller.enqueue({
124
+ type: 'tool-input-end',
125
+ id: part.toolCallId,
126
+ });
127
+ controller.enqueue({
128
+ type: 'tool-call',
129
+ toolCallId: part.toolCallId,
130
+ toolName: part.toolName,
131
+ input: part.input,
132
+ ...(part.providerExecuted
133
+ ? { providerExecuted: part.providerExecuted }
134
+ : {}),
135
+ ...(providerMetadata ? { providerMetadata } : {}),
136
+ });
137
+ break;
138
+ }
139
+ case 'tool-result': {
140
+ controller.enqueue({
141
+ type: 'tool-result',
142
+ toolCallId: part.toolCallId,
143
+ toolName: part.toolName,
144
+ result: part.result,
145
+ });
146
+ break;
147
+ }
148
+ case 'source':
149
+ case 'file': {
150
+ controller.enqueue(part);
151
+ break;
152
+ }
153
+ default:
154
+ break;
155
+ }
156
+ }
157
+
158
+ const serviceTier = response.service_tier ?? headerServiceTier;
159
+
160
+ const finishReason: LanguageModelV4FinishReason = {
161
+ unified: mapGoogleInteractionsFinishReason({
162
+ status: response.status,
163
+ hasFunctionCall,
164
+ }),
165
+ raw: response.status,
166
+ };
167
+
168
+ const providerMetadata: SharedV4ProviderMetadata = {
169
+ google: {
170
+ ...(interactionId != null ? { interactionId } : {}),
171
+ ...(serviceTier != null ? { serviceTier } : {}),
172
+ },
173
+ };
174
+
175
+ controller.enqueue({
176
+ type: 'finish',
177
+ finishReason,
178
+ usage: convertGoogleInteractionsUsage(response.usage),
179
+ providerMetadata,
180
+ });
181
+
182
+ controller.close();
183
+ },
184
+ });
185
+ }
@@ -1,3 +1,4 @@
1
- export * from '../google-generative-ai-language-model';
1
+ export * from '../google-language-model';
2
+ export * from '../google-speech-model';
2
3
  export { googleTools } from '../google-tools';
3
- export type { GoogleGenerativeAIModelId } from '../google-generative-ai-options';
4
+ export type { GoogleModelId } from '../google-language-model-options';
@@ -1,12 +1,12 @@
1
- import { LanguageModelV3FinishReason } from '@ai-sdk/provider';
1
+ import type { LanguageModelV4FinishReason } from '@ai-sdk/provider';
2
2
 
3
- export function mapGoogleGenerativeAIFinishReason({
3
+ export function mapGoogleFinishReason({
4
4
  finishReason,
5
5
  hasToolCalls,
6
6
  }: {
7
7
  finishReason: string | null | undefined;
8
8
  hasToolCalls: boolean;
9
- }): LanguageModelV3FinishReason['unified'] {
9
+ }): LanguageModelV4FinishReason['unified'] {
10
10
  switch (finishReason) {
11
11
  case 'STOP':
12
12
  return hasToolCalls ? 'tool-calls' : 'stop';