@ai-sdk/google 4.0.0-beta.7 → 4.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +614 -5
  2. package/README.md +6 -4
  3. package/dist/index.d.ts +301 -50
  4. package/dist/index.js +5410 -639
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +100 -26
  7. package/dist/internal/index.js +1653 -451
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/{15-google-generative-ai.mdx → 15-google.mdx} +784 -69
  10. package/package.json +16 -17
  11. package/src/{convert-google-generative-ai-usage.ts → convert-google-usage.ts} +13 -5
  12. package/src/convert-json-schema-to-openapi-schema.ts +1 -1
  13. package/src/convert-to-google-messages.ts +647 -0
  14. package/src/{google-generative-ai-embedding-options.ts → google-embedding-model-options.ts} +9 -2
  15. package/src/{google-generative-ai-embedding-model.ts → google-embedding-model.ts} +31 -18
  16. package/src/google-error.ts +1 -1
  17. package/src/google-files.ts +225 -0
  18. package/src/google-image-model-options.ts +35 -0
  19. package/src/{google-generative-ai-image-model.ts → google-image-model.ts} +116 -65
  20. package/src/{google-generative-ai-image-settings.ts → google-image-settings.ts} +2 -2
  21. package/src/google-json-accumulator.ts +371 -0
  22. package/src/{google-generative-ai-options.ts → google-language-model-options.ts} +50 -5
  23. package/src/{google-generative-ai-language-model.ts → google-language-model.ts} +701 -219
  24. package/src/google-prepare-tools.ts +72 -12
  25. package/src/google-prompt.ts +86 -0
  26. package/src/google-provider.ts +157 -53
  27. package/src/google-speech-api.ts +36 -0
  28. package/src/google-speech-model-options.ts +48 -0
  29. package/src/google-speech-model.ts +311 -0
  30. package/src/google-video-model-options.ts +43 -0
  31. package/src/{google-generative-ai-video-model.ts → google-video-model.ts} +25 -60
  32. package/src/{google-generative-ai-video-settings.ts → google-video-settings.ts} +2 -1
  33. package/src/index.ts +40 -9
  34. package/src/interactions/build-google-interactions-stream-transform.ts +818 -0
  35. package/src/interactions/cancel-google-interaction.ts +60 -0
  36. package/src/interactions/convert-google-interactions-usage.ts +47 -0
  37. package/src/interactions/convert-to-google-interactions-input.ts +557 -0
  38. package/src/interactions/extract-google-interactions-sources.ts +252 -0
  39. package/src/interactions/google-interactions-agent.ts +15 -0
  40. package/src/interactions/google-interactions-api.ts +530 -0
  41. package/src/interactions/google-interactions-language-model-options.ts +262 -0
  42. package/src/interactions/google-interactions-language-model.ts +776 -0
  43. package/src/interactions/google-interactions-prompt.ts +582 -0
  44. package/src/interactions/google-interactions-provider-metadata.ts +23 -0
  45. package/src/interactions/map-google-interactions-finish-reason.ts +31 -0
  46. package/src/interactions/parse-google-interactions-outputs.ts +252 -0
  47. package/src/interactions/poll-google-interactions.ts +129 -0
  48. package/src/interactions/prepare-google-interactions-tools.ts +245 -0
  49. package/src/interactions/stream-google-interactions.ts +242 -0
  50. package/src/interactions/synthesize-google-interactions-agent-stream.ts +185 -0
  51. package/src/internal/index.ts +3 -2
  52. package/src/{map-google-generative-ai-finish-reason.ts → map-google-finish-reason.ts} +3 -3
  53. package/src/realtime/google-realtime-event-mapper.ts +383 -0
  54. package/src/realtime/google-realtime-model-options.ts +3 -0
  55. package/src/realtime/google-realtime-model.ts +160 -0
  56. package/src/realtime/index.ts +2 -0
  57. package/src/tool/code-execution.ts +2 -2
  58. package/src/tool/enterprise-web-search.ts +9 -3
  59. package/src/tool/file-search.ts +5 -7
  60. package/src/tool/google-maps.ts +3 -2
  61. package/src/tool/google-search.ts +11 -12
  62. package/src/tool/url-context.ts +4 -2
  63. package/src/tool/vertex-rag-store.ts +9 -6
  64. package/dist/index.d.mts +0 -376
  65. package/dist/index.mjs +0 -2517
  66. package/dist/index.mjs.map +0 -1
  67. package/dist/internal/index.d.mts +0 -284
  68. package/dist/internal/index.mjs +0 -1706
  69. package/dist/internal/index.mjs.map +0 -1
  70. package/src/convert-to-google-generative-ai-messages.ts +0 -239
  71. package/src/google-generative-ai-prompt.ts +0 -38
@@ -0,0 +1,60 @@
1
+ import {
2
+ combineHeaders,
3
+ getRuntimeEnvironmentUserAgent,
4
+ withUserAgentSuffix,
5
+ type FetchFunction,
6
+ } from '@ai-sdk/provider-utils';
7
+
8
+ const getOriginalFetch = () => globalThis.fetch;
9
+
10
+ /**
11
+ * Best-effort `POST /interactions/{id}/cancel` to stop a background interaction
12
+ * on Google's side after the caller has aborted locally. Errors and non-2xx
13
+ * responses are swallowed so a cancel failure cannot mask the original abort.
14
+ *
15
+ * Skips the request entirely if `interactionId` is missing/empty -- e.g. when
16
+ * the interaction was created with `store: false` and the API did not return an
17
+ * id.
18
+ */
19
+ export async function cancelGoogleInteraction({
20
+ baseURL,
21
+ interactionId,
22
+ headers,
23
+ fetch = getOriginalFetch(),
24
+ }: {
25
+ baseURL: string;
26
+ interactionId: string | null | undefined;
27
+ headers: Record<string, string | undefined>;
28
+ fetch?: FetchFunction;
29
+ }): Promise<void> {
30
+ if (interactionId == null || interactionId.length === 0) {
31
+ return;
32
+ }
33
+
34
+ const url = `${baseURL}/interactions/${encodeURIComponent(interactionId)}/cancel`;
35
+
36
+ try {
37
+ const response = await fetch(url, {
38
+ method: 'POST',
39
+ headers: withUserAgentSuffix(
40
+ combineHeaders({ 'Content-Type': 'application/json' }, headers),
41
+ getRuntimeEnvironmentUserAgent(),
42
+ ),
43
+ body: '{}',
44
+ });
45
+
46
+ /*
47
+ * Drain the body so undici/Node can return the connection to the pool.
48
+ * Errors (e.g. non-2xx, network failure) are intentionally ignored: this
49
+ * is a best-effort cleanup and must not throw past the caller, which is
50
+ * already handling an aborted/failed run.
51
+ */
52
+ try {
53
+ await response.text();
54
+ } catch {
55
+ // ignore
56
+ }
57
+ } catch {
58
+ // ignore -- cancel is best-effort
59
+ }
60
+ }
@@ -0,0 +1,47 @@
1
+ import type { JSONObject, LanguageModelV4Usage } from '@ai-sdk/provider';
2
+ import type { GoogleInteractionsUsage } from './google-interactions-api';
3
+
4
+ export function convertGoogleInteractionsUsage(
5
+ usage: GoogleInteractionsUsage | undefined | null,
6
+ ): LanguageModelV4Usage {
7
+ if (usage == null) {
8
+ return {
9
+ inputTokens: {
10
+ total: undefined,
11
+ noCache: undefined,
12
+ cacheRead: undefined,
13
+ cacheWrite: undefined,
14
+ },
15
+ outputTokens: {
16
+ total: undefined,
17
+ text: undefined,
18
+ reasoning: undefined,
19
+ },
20
+ raw: undefined,
21
+ };
22
+ }
23
+
24
+ const totalInput = usage.total_input_tokens ?? 0;
25
+ const totalOutput = usage.total_output_tokens ?? 0;
26
+ const totalThought = usage.total_thought_tokens ?? 0;
27
+ const totalCached = usage.total_cached_tokens ?? 0;
28
+
29
+ return {
30
+ inputTokens: {
31
+ total: usage.total_input_tokens ?? undefined,
32
+ noCache:
33
+ usage.total_input_tokens == null ? undefined : totalInput - totalCached,
34
+ cacheRead: usage.total_cached_tokens ?? undefined,
35
+ cacheWrite: undefined,
36
+ },
37
+ outputTokens: {
38
+ total:
39
+ usage.total_output_tokens == null && usage.total_thought_tokens == null
40
+ ? undefined
41
+ : totalOutput + totalThought,
42
+ text: usage.total_output_tokens ?? undefined,
43
+ reasoning: usage.total_thought_tokens ?? undefined,
44
+ },
45
+ raw: usage as unknown as JSONObject,
46
+ };
47
+ }
@@ -0,0 +1,557 @@
1
+ import type {
2
+ LanguageModelV4FilePart,
3
+ LanguageModelV4Prompt,
4
+ LanguageModelV4ToolResultOutput,
5
+ SharedV4Warning,
6
+ } from '@ai-sdk/provider';
7
+ import {
8
+ convertToBase64,
9
+ getTopLevelMediaType,
10
+ isFullMediaType,
11
+ resolveFullMediaType,
12
+ resolveProviderReference,
13
+ } from '@ai-sdk/provider-utils';
14
+ import type {
15
+ GoogleInteractionsContent,
16
+ GoogleInteractionsContentBlock,
17
+ GoogleInteractionsFunctionResultContent,
18
+ GoogleInteractionsImageContent,
19
+ GoogleInteractionsInput,
20
+ GoogleInteractionsStep,
21
+ GoogleInteractionsTextContent,
22
+ } from './google-interactions-prompt';
23
+
24
+ export type GoogleInteractionsMediaResolution =
25
+ | 'low'
26
+ | 'medium'
27
+ | 'high'
28
+ | 'ultra_high';
29
+
30
+ export type ConvertToGoogleInteractionsInputResult = {
31
+ input: GoogleInteractionsInput;
32
+ systemInstruction: string | undefined;
33
+ warnings: Array<SharedV4Warning>;
34
+ };
35
+
36
+ /**
37
+ * Converts an AI SDK `LanguageModelV4Prompt` into the Gemini Interactions
38
+ * request shape (`{ input: Array<Step>, system_instruction }`).
39
+ *
40
+ * Prior assistant content round-trips as discrete steps:
41
+ * - text / image content → `model_output` step with a single `content` array
42
+ * - reasoning → `thought` step (`signature` + `summary`)
43
+ * - tool-call → `function_call` step
44
+ * User turns (and tool-result turns from the previous round) are sent as
45
+ * `user_input` steps whose `content[]` holds the user's parts (text, files,
46
+ * and — for tool-result turns — `function_result` blocks).
47
+ *
48
+ * Handles text parts, file parts (image / audio / document / video, all four
49
+ * `data.type` shapes), tool-call/tool-result round-tripping, per-step
50
+ * `signature` round-tripping, and statefulness compaction (drop assistant/tool
51
+ * turns whose `providerOptions.google.interactionId === previousInteractionId`).
52
+ */
53
+ export function convertToGoogleInteractionsInput({
54
+ prompt,
55
+ previousInteractionId,
56
+ store,
57
+ mediaResolution,
58
+ }: {
59
+ prompt: LanguageModelV4Prompt;
60
+ previousInteractionId?: string;
61
+ store?: boolean;
62
+ mediaResolution?: GoogleInteractionsMediaResolution;
63
+ }): ConvertToGoogleInteractionsInputResult {
64
+ const warnings: Array<SharedV4Warning> = [];
65
+
66
+ /*
67
+ * Behavior matrix for compaction:
68
+ *
69
+ * - `previousInteractionId` set + `store !== false` → compact history (drop
70
+ * assistant/tool turns whose `providerMetadata.google.interactionId`
71
+ * matches), emit `previous_interaction_id`.
72
+ * - `previousInteractionId` set + `store === false` → emit warning
73
+ * (incoherent combo), still send full history (NO compaction).
74
+ * - `store === false`, no `previousInteractionId` → no compaction.
75
+ * - Default → no compaction.
76
+ */
77
+ const incoherentCombo = previousInteractionId != null && store === false;
78
+ const shouldCompact = previousInteractionId != null && store !== false;
79
+ if (incoherentCombo) {
80
+ warnings.push({
81
+ type: 'other',
82
+ message:
83
+ 'google.interactions: providerOptions.google.previousInteractionId was set together with store: false. These are incoherent (the prior interaction cannot be referenced when nothing was stored on the server); the full history will be sent and previous_interaction_id will still be emitted.',
84
+ });
85
+ }
86
+
87
+ const compactedPrompt = shouldCompact
88
+ ? compactPromptForPreviousInteraction({
89
+ prompt,
90
+ previousInteractionId,
91
+ })
92
+ : prompt;
93
+
94
+ const systemTexts: Array<string> = [];
95
+ const steps: Array<GoogleInteractionsStep> = [];
96
+
97
+ for (const message of compactedPrompt) {
98
+ switch (message.role) {
99
+ case 'system': {
100
+ systemTexts.push(message.content);
101
+ break;
102
+ }
103
+ case 'user': {
104
+ const content: Array<GoogleInteractionsContentBlock> = [];
105
+ for (const part of message.content) {
106
+ if (part.type === 'text') {
107
+ content.push({ type: 'text', text: part.text });
108
+ } else if (part.type === 'file') {
109
+ const fileBlock = convertFilePartToContent({
110
+ part,
111
+ warnings,
112
+ mediaResolution,
113
+ });
114
+ if (fileBlock != null) {
115
+ content.push(fileBlock);
116
+ }
117
+ }
118
+ }
119
+ const merged = mergeAdjacentTextContent(content);
120
+ if (merged.length > 0) {
121
+ steps.push({ type: 'user_input', content: merged });
122
+ }
123
+ break;
124
+ }
125
+ case 'assistant': {
126
+ /*
127
+ * Prior assistant content fans out into one step per logical block.
128
+ * Adjacent text/image content blocks are coalesced into a single
129
+ * `model_output` step (matching how the API emits them on output);
130
+ * reasoning and tool-calls each become their own step.
131
+ */
132
+ let pendingModelOutput: Array<GoogleInteractionsContentBlock> = [];
133
+ const flushModelOutput = () => {
134
+ if (pendingModelOutput.length > 0) {
135
+ steps.push({ type: 'model_output', content: pendingModelOutput });
136
+ pendingModelOutput = [];
137
+ }
138
+ };
139
+
140
+ for (const part of message.content) {
141
+ if (part.type === 'text') {
142
+ pendingModelOutput.push({ type: 'text', text: part.text });
143
+ } else if (part.type === 'reasoning') {
144
+ flushModelOutput();
145
+ const signature = part.providerOptions?.google?.signature as
146
+ | string
147
+ | undefined;
148
+ steps.push({
149
+ type: 'thought',
150
+ ...(signature != null ? { signature } : {}),
151
+ summary:
152
+ part.text.length > 0
153
+ ? [{ type: 'text', text: part.text }]
154
+ : undefined,
155
+ });
156
+ } else if (part.type === 'file') {
157
+ const fileBlock = convertFilePartToContent({
158
+ part,
159
+ warnings,
160
+ mediaResolution,
161
+ });
162
+ if (fileBlock != null) {
163
+ pendingModelOutput.push(fileBlock);
164
+ }
165
+ } else if (part.type === 'tool-call') {
166
+ flushModelOutput();
167
+ const signature = part.providerOptions?.google?.signature as
168
+ | string
169
+ | undefined;
170
+ const args =
171
+ typeof part.input === 'string'
172
+ ? safeParseToolArgs(part.input)
173
+ : ((part.input ?? {}) as Record<string, unknown>);
174
+ steps.push({
175
+ type: 'function_call',
176
+ id: part.toolCallId,
177
+ name: part.toolName,
178
+ arguments: args,
179
+ ...(signature != null ? { signature } : {}),
180
+ });
181
+ } else {
182
+ warnings.push({
183
+ type: 'other',
184
+ message: `google.interactions: unsupported assistant content part type "${part.type}"; part dropped.`,
185
+ });
186
+ }
187
+ }
188
+ flushModelOutput();
189
+ break;
190
+ }
191
+ case 'tool': {
192
+ /*
193
+ * Tool-result messages are emitted as a `user_input` step whose
194
+ * content holds one `function_result` block per tool-result part.
195
+ * `function_result` remains a content-block type (it sits inside
196
+ * a step), not a top-level step type.
197
+ */
198
+ const content: Array<GoogleInteractionsContentBlock> = [];
199
+ for (const part of message.content) {
200
+ if (part.type !== 'tool-result') {
201
+ warnings.push({
202
+ type: 'other',
203
+ message: `google.interactions: unsupported tool message part type "${part.type}"; part dropped.`,
204
+ });
205
+ continue;
206
+ }
207
+ const block = convertToolResultPart({
208
+ toolCallId: part.toolCallId,
209
+ toolName: part.toolName,
210
+ output: part.output,
211
+ signature: part.providerOptions?.google?.signature as
212
+ | string
213
+ | undefined,
214
+ warnings,
215
+ });
216
+ content.push(block);
217
+ }
218
+ if (content.length > 0) {
219
+ steps.push({ type: 'user_input', content });
220
+ }
221
+ break;
222
+ }
223
+ }
224
+ }
225
+
226
+ const systemInstruction =
227
+ systemTexts.length > 0 ? systemTexts.join('\n\n') : undefined;
228
+
229
+ return { input: steps, systemInstruction, warnings };
230
+ }
231
+
232
+ /**
233
+ * Maps a single AI SDK `LanguageModelV4FilePart` to a Gemini Interactions
234
+ * content block (`image` / `audio` / `document` / `video`).
235
+ */
236
+ function convertFilePartToContent({
237
+ part,
238
+ warnings,
239
+ mediaResolution,
240
+ }: {
241
+ part: LanguageModelV4FilePart;
242
+ warnings: Array<SharedV4Warning>;
243
+ mediaResolution?: GoogleInteractionsMediaResolution;
244
+ }): GoogleInteractionsContent | undefined {
245
+ if (part.data.type === 'text') {
246
+ return {
247
+ type: 'text',
248
+ text: part.data.text,
249
+ };
250
+ }
251
+
252
+ const topLevel = getTopLevelMediaType(part.mediaType);
253
+ let kind: 'image' | 'audio' | 'video' | 'document' | undefined;
254
+ switch (topLevel) {
255
+ case 'image':
256
+ kind = 'image';
257
+ break;
258
+ case 'audio':
259
+ kind = 'audio';
260
+ break;
261
+ case 'video':
262
+ kind = 'video';
263
+ break;
264
+ case 'application':
265
+ kind = 'document';
266
+ break;
267
+ default:
268
+ kind = undefined;
269
+ }
270
+
271
+ if (kind == null) {
272
+ warnings.push({
273
+ type: 'other',
274
+ message: `google.interactions: unsupported file media type "${part.mediaType}"; part dropped.`,
275
+ });
276
+ return undefined;
277
+ }
278
+
279
+ const resolutionField =
280
+ mediaResolution != null && (kind === 'image' || kind === 'video')
281
+ ? { resolution: mediaResolution }
282
+ : {};
283
+
284
+ switch (part.data.type) {
285
+ case 'data': {
286
+ const mimeType = resolveFullMediaType({ part });
287
+ return {
288
+ type: kind,
289
+ data: convertToBase64(part.data.data),
290
+ mime_type: mimeType,
291
+ ...resolutionField,
292
+ };
293
+ }
294
+ case 'url': {
295
+ return {
296
+ type: kind,
297
+ uri: part.data.url.toString(),
298
+ ...(isFullMediaType(part.mediaType)
299
+ ? { mime_type: part.mediaType }
300
+ : {}),
301
+ ...resolutionField,
302
+ };
303
+ }
304
+ case 'reference': {
305
+ const uri = resolveProviderReference({
306
+ reference: part.data.reference,
307
+ provider: 'google',
308
+ });
309
+ return {
310
+ type: kind,
311
+ uri,
312
+ ...(isFullMediaType(part.mediaType)
313
+ ? { mime_type: part.mediaType }
314
+ : {}),
315
+ ...resolutionField,
316
+ };
317
+ }
318
+ }
319
+ }
320
+
321
+ /*
322
+ * Drops assistant messages that were part of the linked interaction
323
+ * (`previousInteractionId`). Tool-result turns whose tool-call counterpart
324
+ * was dropped are also pruned to keep the message stream well-formed.
325
+ */
326
+ function compactPromptForPreviousInteraction({
327
+ prompt,
328
+ previousInteractionId,
329
+ }: {
330
+ prompt: LanguageModelV4Prompt;
331
+ previousInteractionId: string;
332
+ }): LanguageModelV4Prompt {
333
+ const out: LanguageModelV4Prompt = [];
334
+ const droppedToolCallIds = new Set<string>();
335
+
336
+ for (const message of prompt) {
337
+ if (message.role === 'assistant') {
338
+ const matchesLinkedInteraction = message.content.some(part => {
339
+ const partInteractionId = (
340
+ part as { providerOptions?: { google?: { interactionId?: string } } }
341
+ ).providerOptions?.google?.interactionId;
342
+ return partInteractionId === previousInteractionId;
343
+ });
344
+ if (matchesLinkedInteraction) {
345
+ for (const part of message.content) {
346
+ if (part.type === 'tool-call') {
347
+ droppedToolCallIds.add(part.toolCallId);
348
+ }
349
+ }
350
+ continue;
351
+ }
352
+ out.push(message);
353
+ continue;
354
+ }
355
+ if (message.role === 'tool') {
356
+ const remaining = message.content.filter(part => {
357
+ if (part.type !== 'tool-result') {
358
+ return true;
359
+ }
360
+ return !droppedToolCallIds.has(part.toolCallId);
361
+ });
362
+ if (remaining.length === 0) {
363
+ continue;
364
+ }
365
+ out.push({
366
+ ...message,
367
+ content: remaining as typeof message.content,
368
+ });
369
+ continue;
370
+ }
371
+ out.push(message);
372
+ }
373
+
374
+ return out;
375
+ }
376
+
377
+ function safeParseToolArgs(input: string): Record<string, unknown> {
378
+ try {
379
+ const parsed = JSON.parse(input);
380
+ if (
381
+ parsed != null &&
382
+ typeof parsed === 'object' &&
383
+ !Array.isArray(parsed)
384
+ ) {
385
+ return parsed as Record<string, unknown>;
386
+ }
387
+ return { value: parsed };
388
+ } catch {
389
+ return { value: input };
390
+ }
391
+ }
392
+
393
+ function convertToolResultPart({
394
+ toolCallId,
395
+ toolName,
396
+ output,
397
+ signature,
398
+ warnings,
399
+ }: {
400
+ toolCallId: string;
401
+ toolName: string;
402
+ output: LanguageModelV4ToolResultOutput;
403
+ signature: string | undefined;
404
+ warnings: Array<SharedV4Warning>;
405
+ }): GoogleInteractionsFunctionResultContent {
406
+ const base = {
407
+ type: 'function_result' as const,
408
+ call_id: toolCallId,
409
+ name: toolName,
410
+ ...(signature != null ? { signature } : {}),
411
+ };
412
+
413
+ switch (output.type) {
414
+ case 'text':
415
+ return { ...base, result: output.value };
416
+ case 'json':
417
+ return { ...base, result: JSON.stringify(output.value) };
418
+ case 'error-text':
419
+ return { ...base, is_error: true, result: output.value };
420
+ case 'error-json':
421
+ return { ...base, is_error: true, result: JSON.stringify(output.value) };
422
+ case 'execution-denied':
423
+ return {
424
+ ...base,
425
+ is_error: true,
426
+ result: output.reason ?? 'Tool execution denied by user.',
427
+ };
428
+ case 'content': {
429
+ const blocks: Array<
430
+ GoogleInteractionsTextContent | GoogleInteractionsImageContent
431
+ > = [];
432
+ for (const item of output.value) {
433
+ if (item.type === 'text') {
434
+ blocks.push({ type: 'text', text: item.text });
435
+ } else if (item.type === 'file') {
436
+ const topLevel = getTopLevelMediaType(item.mediaType);
437
+ if (topLevel !== 'image') {
438
+ warnings.push({
439
+ type: 'other',
440
+ message: `google.interactions: tool-result file with mediaType "${item.mediaType}" is not supported (Interactions \`function_result.result\` accepts only text and image content); part dropped.`,
441
+ });
442
+ continue;
443
+ }
444
+ const imageBlock = filePartToImageBlock({ part: item, warnings });
445
+ if (imageBlock != null) {
446
+ blocks.push(imageBlock);
447
+ }
448
+ } else {
449
+ warnings.push({
450
+ type: 'other',
451
+ message: `google.interactions: tool-result content part type "${(item as { type: string }).type}" is not supported; part dropped.`,
452
+ });
453
+ }
454
+ }
455
+ return { ...base, result: blocks };
456
+ }
457
+ }
458
+ }
459
+
460
+ function filePartToImageBlock({
461
+ part,
462
+ warnings,
463
+ }: {
464
+ part: {
465
+ type: 'file';
466
+ mediaType: string;
467
+ data:
468
+ | { type: 'data'; data: Uint8Array | string }
469
+ | { type: 'url'; url: URL }
470
+ | { type: 'reference'; reference: Record<string, string> }
471
+ | { type: 'text'; text: string };
472
+ filename?: string;
473
+ };
474
+ warnings: Array<SharedV4Warning>;
475
+ }): GoogleInteractionsImageContent | undefined {
476
+ switch (part.data.type) {
477
+ case 'data': {
478
+ const mimeType = isFullMediaType(part.mediaType)
479
+ ? part.mediaType
480
+ : resolveFullMediaType({
481
+ part: {
482
+ type: 'file',
483
+ mediaType: part.mediaType,
484
+ data: part.data,
485
+ } as LanguageModelV4FilePart,
486
+ });
487
+ return {
488
+ type: 'image',
489
+ data: convertToBase64(part.data.data),
490
+ mime_type: mimeType,
491
+ };
492
+ }
493
+ case 'url':
494
+ return {
495
+ type: 'image',
496
+ uri: part.data.url.toString(),
497
+ ...(isFullMediaType(part.mediaType)
498
+ ? { mime_type: part.mediaType }
499
+ : {}),
500
+ };
501
+ case 'reference': {
502
+ const uri = resolveProviderReference({
503
+ reference: part.data.reference,
504
+ provider: 'google',
505
+ });
506
+ return {
507
+ type: 'image',
508
+ uri,
509
+ ...(isFullMediaType(part.mediaType)
510
+ ? { mime_type: part.mediaType }
511
+ : {}),
512
+ };
513
+ }
514
+ case 'text': {
515
+ warnings.push({
516
+ type: 'other',
517
+ message:
518
+ 'google.interactions: tool-result image part with `data.type === "text"` is not representable as an image; part dropped.',
519
+ });
520
+ return undefined;
521
+ }
522
+ }
523
+ }
524
+
525
+ /*
526
+ * Collapses runs of adjacent text content blocks within a single user step
527
+ * into one combined text block, separated by a blank line. Text blocks
528
+ * carrying `annotations` are left untouched (annotations are tied to specific
529
+ * text spans).
530
+ */
531
+ function mergeAdjacentTextContent(
532
+ content: Array<GoogleInteractionsContentBlock>,
533
+ ): Array<GoogleInteractionsContentBlock> {
534
+ if (content.length < 2) {
535
+ return content;
536
+ }
537
+ const result: Array<GoogleInteractionsContentBlock> = [];
538
+ for (const block of content) {
539
+ const last = result[result.length - 1];
540
+ if (
541
+ block.type === 'text' &&
542
+ last != null &&
543
+ last.type === 'text' &&
544
+ (last as GoogleInteractionsTextContent).annotations == null &&
545
+ (block as GoogleInteractionsTextContent).annotations == null
546
+ ) {
547
+ const merged: GoogleInteractionsTextContent = {
548
+ type: 'text',
549
+ text: `${(last as GoogleInteractionsTextContent).text}\n\n${(block as GoogleInteractionsTextContent).text}`,
550
+ };
551
+ result[result.length - 1] = merged;
552
+ continue;
553
+ }
554
+ result.push(block);
555
+ }
556
+ return result;
557
+ }