universal-llm-client 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +142 -103
  2. package/LICENSE +21 -21
  3. package/README.md +640 -591
  4. package/dist/ai-model.d.ts +12 -1
  5. package/dist/ai-model.d.ts.map +1 -1
  6. package/dist/ai-model.js +36 -1
  7. package/dist/ai-model.js.map +1 -1
  8. package/dist/gemma-channel.d.ts +14 -0
  9. package/dist/gemma-channel.d.ts.map +1 -0
  10. package/dist/gemma-channel.js +38 -0
  11. package/dist/gemma-channel.js.map +1 -0
  12. package/dist/gemma-diffusion.d.ts +49 -0
  13. package/dist/gemma-diffusion.d.ts.map +1 -0
  14. package/dist/gemma-diffusion.js +147 -0
  15. package/dist/gemma-diffusion.js.map +1 -0
  16. package/dist/http.d.ts +4 -0
  17. package/dist/http.d.ts.map +1 -1
  18. package/dist/http.js +14 -1
  19. package/dist/http.js.map +1 -1
  20. package/dist/index.d.ts +2 -1
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +4 -0
  23. package/dist/index.js.map +1 -1
  24. package/dist/interfaces.d.ts +183 -7
  25. package/dist/interfaces.d.ts.map +1 -1
  26. package/dist/interfaces.js.map +1 -1
  27. package/dist/providers/anthropic.d.ts.map +1 -1
  28. package/dist/providers/anthropic.js +28 -3
  29. package/dist/providers/anthropic.js.map +1 -1
  30. package/dist/providers/google.d.ts +22 -1
  31. package/dist/providers/google.d.ts.map +1 -1
  32. package/dist/providers/google.js +225 -13
  33. package/dist/providers/google.js.map +1 -1
  34. package/dist/providers/ollama.d.ts +2 -0
  35. package/dist/providers/ollama.d.ts.map +1 -1
  36. package/dist/providers/ollama.js +59 -30
  37. package/dist/providers/ollama.js.map +1 -1
  38. package/dist/providers/openai.d.ts +14 -0
  39. package/dist/providers/openai.d.ts.map +1 -1
  40. package/dist/providers/openai.js +200 -22
  41. package/dist/providers/openai.js.map +1 -1
  42. package/dist/router.d.ts +2 -0
  43. package/dist/router.d.ts.map +1 -1
  44. package/dist/router.js +4 -0
  45. package/dist/router.js.map +1 -1
  46. package/dist/stream-decoder.d.ts +12 -0
  47. package/dist/stream-decoder.d.ts.map +1 -1
  48. package/dist/stream-decoder.js +182 -5
  49. package/dist/stream-decoder.js.map +1 -1
  50. package/dist/thinking.d.ts +36 -0
  51. package/dist/thinking.d.ts.map +1 -0
  52. package/dist/thinking.js +52 -0
  53. package/dist/thinking.js.map +1 -0
  54. package/package.json +118 -116
  55. package/src/ai-model.ts +400 -350
  56. package/src/auditor.ts +213 -213
  57. package/src/client.ts +402 -402
  58. package/src/debug/debug-google-streaming.ts +1 -1
  59. package/src/demos/basic/universal-llm-examples.ts +3 -3
  60. package/src/demos/diffusion-gemma/.env +29 -0
  61. package/src/demos/diffusion-gemma/.env.example +27 -0
  62. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  63. package/src/demos/diffusion-gemma/README.md +59 -0
  64. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  65. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  66. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  67. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  68. package/src/demos/diffusion-gemma/server.ts +1205 -0
  69. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  70. package/src/gemma-channel.ts +47 -0
  71. package/src/gemma-diffusion.ts +167 -0
  72. package/src/http.ts +261 -247
  73. package/src/index.ts +180 -161
  74. package/src/interfaces.ts +843 -657
  75. package/src/mcp.ts +345 -345
  76. package/src/providers/anthropic.ts +796 -762
  77. package/src/providers/google.ts +840 -620
  78. package/src/providers/index.ts +8 -8
  79. package/src/providers/ollama.ts +503 -469
  80. package/src/providers/openai.ts +587 -392
  81. package/src/router.ts +785 -780
  82. package/src/stream-decoder.ts +535 -361
  83. package/src/structured-output.ts +759 -759
  84. package/src/test-scripts/test-google-deep-research.ts +33 -0
  85. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
  86. package/src/test-scripts/test-google-streaming.ts +1 -1
  87. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
  88. package/src/test-scripts/test-google-thinking.ts +46 -0
  89. package/src/test-scripts/test-system-message-positions.ts +163 -163
  90. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
  91. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  92. package/src/tests/ai-model.test.ts +1614 -1614
  93. package/src/tests/auditor.test.ts +224 -224
  94. package/src/tests/gemma-diffusion.test.ts +115 -0
  95. package/src/tests/http.test.ts +200 -200
  96. package/src/tests/interfaces.test.ts +117 -117
  97. package/src/tests/providers/anthropic.test.ts +118 -0
  98. package/src/tests/providers/google.test.ts +841 -660
  99. package/src/tests/providers/ollama.test.ts +1034 -954
  100. package/src/tests/providers/openai.test.ts +1511 -1122
  101. package/src/tests/router.test.ts +254 -254
  102. package/src/tests/stream-decoder.test.ts +263 -179
  103. package/src/tests/structured-output.test.ts +1450 -1450
  104. package/src/tests/thinking.test.ts +65 -0
  105. package/src/tests/tools.test.ts +175 -175
  106. package/src/thinking.ts +73 -0
  107. package/src/tools.ts +246 -246
  108. package/src/zod-adapter.ts +72 -72
@@ -1,620 +1,840 @@
1
- /**
2
- * Universal LLM Client v3 — Google Provider
3
- *
4
- * Implements BaseLLMClient for Google AI Studio and Vertex AI.
5
- * Supports Gemini and Gemma models with full tool calling,
6
- * streaming, embeddings, and system prompt handling.
7
- */
8
-
9
- import { BaseLLMClient } from '../client.js';
10
- import { httpRequest, httpStream } from '../http.js';
11
- import { StandardChatDecoder } from '../stream-decoder.js';
12
- import {
13
- normalizeJsonSchema,
14
- stripUnsupportedFeatures,
15
- getJsonSchemaFromConfig,
16
- type JSONSchema,
17
- } from '../structured-output.js';
18
- import type {
19
- LLMClientOptions,
20
- LLMChatMessage,
21
- LLMChatResponse,
22
- ChatOptions,
23
- LLMToolDefinition,
24
- LLMToolCall,
25
- LLMContentPart,
26
- LLMTextContent,
27
- GooglePart,
28
- GoogleContent,
29
- GoogleRequest,
30
- GoogleResponse,
31
- GoogleFunctionDeclaration,
32
- TokenUsageInfo,
33
- AIModelApiType,
34
- } from '../interfaces.js';
35
- import type { DecodedEvent } from '../stream-decoder.js';
36
- import type { Auditor } from '../auditor.js';
37
-
38
- export class GoogleClient extends BaseLLMClient {
39
- private isVertex: boolean;
40
- private apiVersion: string;
41
-
42
- constructor(options: LLMClientOptions, auditor?: Auditor) {
43
- super(options, auditor);
44
- this.isVertex = options.apiType === ('vertex' as AIModelApiType);
45
- this.apiVersion = options.apiVersion ?? 'v1beta';
46
- }
47
-
48
- // ========================================================================
49
- // URL Building
50
- // ========================================================================
51
-
52
- private getBaseUrl(): string {
53
- if (this.isVertex) {
54
- const region = this.options.region ?? 'us-central1';
55
- return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}`;
56
- }
57
- if (this.options.url) return this.options.url.replace(/\/+$/, '');
58
- return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}`;
59
- }
60
-
61
- private getChatUrl(): string {
62
- const base = this.getBaseUrl();
63
- if (this.isVertex) {
64
- return `${base}:generateContent`;
65
- }
66
- return `${base}:generateContent?key=${this.options.apiKey}`;
67
- }
68
-
69
- private getStreamUrl(): string {
70
- const base = this.getBaseUrl();
71
- if (this.isVertex) {
72
- return `${base}:streamGenerateContent?alt=sse`;
73
- }
74
- return `${base}:streamGenerateContent?alt=sse&key=${this.options.apiKey}`;
75
- }
76
-
77
- private getEmbedUrl(): string {
78
- if (this.isVertex) {
79
- const region = this.options.region ?? 'us-central1';
80
- return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}:embedContent`;
81
- }
82
- return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}:embedContent?key=${this.options.apiKey}`;
83
- }
84
-
85
- private getHeaders(): Record<string, string> {
86
- const headers: Record<string, string> = {
87
- 'Content-Type': 'application/json',
88
- };
89
- if (this.isVertex && this.options.apiKey) {
90
- headers['Authorization'] = `Bearer ${this.options.apiKey}`;
91
- }
92
- return headers;
93
- }
94
-
95
- // ========================================================================
96
- // Chat
97
- // ========================================================================
98
-
99
- async chat(
100
- messages: LLMChatMessage[],
101
- options?: ChatOptions,
102
- ): Promise<LLMChatResponse> {
103
- // Structured output and tools can now be used together.\n // The provider sends both responseSchema and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
104
-
105
- const url = this.getChatUrl();
106
- const body = this.buildRequestBody(messages, options);
107
-
108
- // Flex tier: increase timeout (Google recommends 600s+) and use retry logic
109
- const tier = options?.serviceTier;
110
- const effectiveTimeout = tier === 'flex'
111
- ? Math.max(this.options.timeout ?? 60000, 600_000)
112
- : (this.options.timeout ?? 60000);
113
-
114
- const start = Date.now();
115
- this.auditor.record({
116
- timestamp: start,
117
- type: 'request',
118
- provider: this.isVertex ? 'vertex' : 'google',
119
- model: this.options.model,
120
- });
121
-
122
- const reqOptions = {
123
- method: 'POST' as const,
124
- headers: this.getHeaders(),
125
- body,
126
- timeout: effectiveTimeout,
127
- };
128
-
129
- const response = tier === 'flex'
130
- ? await this.fetchWithFlexRetry<GoogleResponse>(url, reqOptions)
131
- : await httpRequest<GoogleResponse>(url, reqOptions);
132
-
133
- const result = this.parseGoogleResponse(response.data);
134
-
135
- // Surface the tier that actually served the request
136
- const resolvedTier = response.headers?.get('x-gemini-service-tier');
137
- if (resolvedTier) {
138
- result.serviceTier = resolvedTier.toLowerCase() as 'flex' | 'priority' | 'standard';
139
- }
140
-
141
- this.auditor.record({
142
- timestamp: Date.now(),
143
- type: 'response',
144
- provider: this.isVertex ? 'vertex' : 'google',
145
- model: this.options.model,
146
- duration: Date.now() - start,
147
- usage: result.usage,
148
- });
149
-
150
- return result;
151
- }
152
-
153
- // ========================================================================
154
- // Streaming
155
- // ========================================================================
156
-
157
- async *chatStream(
158
- messages: LLMChatMessage[],
159
- options?: ChatOptions,
160
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
161
- const url = this.getStreamUrl();
162
- const body = this.buildRequestBody(messages, options);
163
-
164
- // Flex tier: increase timeout (Google recommends 600s+)
165
- const tier = options?.serviceTier;
166
- const effectiveTimeout = tier === 'flex'
167
- ? Math.max(this.options.timeout ?? 120000, 600_000)
168
- : (this.options.timeout ?? 120000);
169
-
170
- const start = Date.now();
171
- this.auditor.record({
172
- timestamp: start,
173
- type: 'stream_start',
174
- provider: this.isVertex ? 'vertex' : 'google',
175
- model: this.options.model,
176
- });
177
-
178
- const decoder = new StandardChatDecoder(() => {});
179
- let usage: TokenUsageInfo | undefined;
180
- const allToolCalls: LLMToolCall[] = [];
181
-
182
- const stream = httpStream(url, {
183
- method: 'POST',
184
- headers: this.getHeaders(),
185
- body,
186
- timeout: effectiveTimeout,
187
- });
188
-
189
- // Google streams SSE with JSON payloads
190
- let buffer = '';
191
- for await (const chunk of stream) {
192
- buffer += chunk;
193
-
194
- // Google SSE uses "data: " prefix
195
- const lines = buffer.split('\n');
196
- buffer = lines.pop() ?? '';
197
-
198
- for (const line of lines) {
199
- if (!line.startsWith('data: ')) continue;
200
- const jsonStr = line.slice(6).trim();
201
- if (!jsonStr || jsonStr === '[DONE]') continue;
202
-
203
- try {
204
- const data = JSON.parse(jsonStr) as GoogleResponse;
205
-
206
- if (data.usageMetadata) {
207
- usage = {
208
- inputTokens: data.usageMetadata.promptTokenCount ?? 0,
209
- outputTokens: data.usageMetadata.candidatesTokenCount ?? 0,
210
- totalTokens: data.usageMetadata.totalTokenCount ?? 0,
211
- cachedTokens: data.usageMetadata.cachedContentTokenCount,
212
- };
213
- }
214
-
215
- const candidate = data.candidates?.[0];
216
- if (!candidate?.content?.parts) continue;
217
-
218
- for (const part of candidate.content.parts) {
219
- if (part.text) {
220
- decoder.push(part.text);
221
- yield { type: 'text', content: part.text };
222
- }
223
- if (part.functionCall) {
224
- const toolCall = this.convertFunctionCallToToolCall(
225
- part.functionCall,
226
- part.thoughtSignature,
227
- );
228
- allToolCalls.push(toolCall);
229
- yield { type: 'tool_call', calls: [toolCall] };
230
- }
231
- }
232
- } catch {
233
- // Skip unparseable JSON
234
- }
235
- }
236
- }
237
-
238
- decoder.flush();
239
-
240
- this.auditor.record({
241
- timestamp: Date.now(),
242
- type: 'stream_end',
243
- provider: this.isVertex ? 'vertex' : 'google',
244
- model: this.options.model,
245
- duration: Date.now() - start,
246
- usage,
247
- });
248
-
249
- return {
250
- message: {
251
- role: 'assistant',
252
- content: decoder.getCleanContent(),
253
- tool_calls: allToolCalls.length > 0 ? allToolCalls : undefined,
254
- },
255
- reasoning: decoder.getReasoning(),
256
- usage,
257
- provider: this.isVertex ? 'vertex' : 'google',
258
- };
259
- }
260
-
261
- // ========================================================================
262
- // Embeddings
263
- // ========================================================================
264
-
265
- async embed(text: string): Promise<number[]> {
266
- const url = this.getEmbedUrl();
267
- const response = await httpRequest<{
268
- embedding: { values: number[] };
269
- }>(url, {
270
- method: 'POST',
271
- headers: this.getHeaders(),
272
- body: {
273
- content: {
274
- parts: [{ text }],
275
- },
276
- },
277
- timeout: this.options.timeout ?? 30000,
278
- });
279
- return response.data.embedding.values;
280
- }
281
-
282
- // ========================================================================
283
- // Model Discovery
284
- // ========================================================================
285
-
286
- async getModels(): Promise<string[]> {
287
- const baseUrl = this.isVertex
288
- ? `https://${this.options.region ?? 'us-central1'}-aiplatform.googleapis.com/${this.apiVersion}/models`
289
- : `https://generativelanguage.googleapis.com/${this.apiVersion}/models?key=${this.options.apiKey}`;
290
-
291
- try {
292
- const response = await httpRequest<{
293
- models: Array<{ name: string }>;
294
- }>(baseUrl, {
295
- headers: this.getHeaders(),
296
- timeout: 10000,
297
- });
298
- return response.data.models.map(m =>
299
- m.name.replace(/^models\//, ''),
300
- );
301
- } catch {
302
- return [];
303
- }
304
- }
305
-
306
- // ========================================================================
307
- // Request Building
308
- // ========================================================================
309
-
310
- private buildRequestBody(
311
- messages: LLMChatMessage[],
312
- options?: ChatOptions,
313
- ): GoogleRequest {
314
- const isGemma = this.options.model.toLowerCase().includes('gemma');
315
- const { systemInstruction, contents } = this.convertToGoogleMessages(messages, isGemma);
316
-
317
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
318
-
319
- const body: GoogleRequest = {
320
- contents,
321
- generationConfig: this.buildGenerationConfig(options),
322
- };
323
-
324
- // System instruction (Gemini supports it, Gemma doesn't)
325
- if (systemInstruction && !isGemma) {
326
- body.systemInstruction = {
327
- parts: [{ text: systemInstruction }],
328
- };
329
- }
330
-
331
- // Tools
332
- if (tools?.length) {
333
- body.tools = [{
334
- functionDeclarations: tools.map(t => this.convertToGoogleTool(t)),
335
- }];
336
- }
337
-
338
- // Inference tier (Flex / Priority)
339
- const tier = options?.serviceTier;
340
- if (tier && tier !== 'standard') {
341
- body.service_tier = tier.toUpperCase() as 'FLEX' | 'PRIORITY';
342
- }
343
-
344
- return body;
345
- }
346
-
347
- private buildGenerationConfig(options?: ChatOptions): Record<string, unknown> {
348
- const config: Record<string, unknown> = {
349
- ...this.options.defaultParameters,
350
- ...options?.parameters,
351
- };
352
- if (options?.temperature !== undefined) config['temperature'] = options.temperature;
353
- if (options?.maxTokens !== undefined) config['maxOutputTokens'] = options.maxTokens;
354
- if (this.options.thinking) {
355
- config['thinkingConfig'] = { thinkingBudget: 8192 };
356
- }
357
-
358
- // Structured output: add responseMimeType and responseSchema
359
- const schemaOptions = this.extractSchemaOptions(options);
360
- if (schemaOptions) {
361
- config['responseMimeType'] = 'application/json';
362
-
363
- // Convert schema to Google-compatible format
364
- let jsonSchema: JSONSchema;
365
- if (schemaOptions.jsonSchema) {
366
- jsonSchema = normalizeJsonSchema(schemaOptions.jsonSchema);
367
- } else if (schemaOptions.schemaConfig) {
368
- jsonSchema = getJsonSchemaFromConfig(schemaOptions.schemaConfig);
369
- } else {
370
- throw new Error('Either schemaConfig or jsonSchema must be provided');
371
- }
372
-
373
- // Strip unsupported features for Google
374
- const googleSchema = stripUnsupportedFeatures(jsonSchema, 'google');
375
- config['responseSchema'] = googleSchema;
376
- }
377
-
378
- return config;
379
- }
380
-
381
- // ========================================================================
382
- // Message Conversion
383
- // ========================================================================
384
-
385
- private convertToGoogleMessages(
386
- messages: LLMChatMessage[],
387
- isGemma: boolean,
388
- ): { systemInstruction?: string; contents: GoogleContent[] } {
389
- let systemInstruction: string | undefined;
390
- const contents: GoogleContent[] = [];
391
-
392
- for (const msg of messages) {
393
- if (msg.role === 'system') {
394
- if (isGemma) {
395
- // Gemma: prepend system message to first user message
396
- systemInstruction = typeof msg.content === 'string'
397
- ? msg.content
398
- : msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
399
- } else {
400
- systemInstruction = typeof msg.content === 'string'
401
- ? msg.content
402
- : msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
403
- }
404
- continue;
405
- }
406
-
407
- if (msg.role === 'tool') {
408
- // Convert tool result to Google functionResponse
409
- let responseData: Record<string, unknown>;
410
- try {
411
- responseData = typeof msg.content === 'string'
412
- ? JSON.parse(msg.content)
413
- : { result: msg.content };
414
- } catch {
415
- responseData = { result: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) };
416
- }
417
-
418
- contents.push({
419
- role: 'function',
420
- parts: [{
421
- functionResponse: {
422
- name: msg.tool_call_id ?? 'unknown',
423
- response: responseData,
424
- },
425
- }],
426
- });
427
- continue;
428
- }
429
-
430
- if (msg.role === 'assistant') {
431
- const parts: GooglePart[] = [];
432
- const textContent = typeof msg.content === 'string' ? msg.content : '';
433
- if (textContent) parts.push({ text: textContent });
434
-
435
- // Convert tool calls to functionCall parts
436
- if (msg.tool_calls) {
437
- for (const tc of msg.tool_calls) {
438
- const part: GooglePart = {
439
- functionCall: {
440
- name: tc.function.name,
441
- args: typeof tc.function.arguments === 'string'
442
- ? JSON.parse(tc.function.arguments)
443
- : tc.function.arguments as Record<string, unknown>,
444
- },
445
- };
446
- // Echo thought signature back (required by Gemini 3.x)
447
- if (tc.thoughtSignature) {
448
- part.thoughtSignature = tc.thoughtSignature;
449
- }
450
- parts.push(part);
451
- }
452
- }
453
-
454
- contents.push({ role: 'model', parts });
455
- continue;
456
- }
457
-
458
- // User messages
459
- const parts = this.convertContentToGoogleParts(msg.content);
460
-
461
- // Gemma: prepend system instruction to first user message
462
- if (isGemma && systemInstruction && contents.length === 0) {
463
- const systemParts = [{ text: `[System Instructions]\n${systemInstruction}\n\n[User Message]\n` }];
464
- contents.push({
465
- role: 'user',
466
- parts: [...systemParts, ...parts],
467
- });
468
- systemInstruction = undefined; // Consumed
469
- } else {
470
- contents.push({ role: 'user', parts });
471
- }
472
- }
473
-
474
- return { systemInstruction, contents };
475
- }
476
-
477
- private convertContentToGoogleParts(content: string | LLMContentPart[]): GooglePart[] {
478
- if (typeof content === 'string') {
479
- return [{ text: content }];
480
- }
481
-
482
- return content.map(part => {
483
- if (part.type === 'text') {
484
- return { text: part.text };
485
- }
486
- if (part.type === 'audio') {
487
- return {
488
- inlineData: {
489
- mimeType: part.audio.mimeType,
490
- data: part.audio.data,
491
- },
492
- };
493
- }
494
- // Image content
495
- const url = part.image_url.url;
496
- if (url.startsWith('data:')) {
497
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
498
- if (match) {
499
- return {
500
- inlineData: {
501
- mimeType: match[1]!,
502
- data: match[2]!,
503
- },
504
- };
505
- }
506
- }
507
- // For regular URLs, try inline data format
508
- return { text: `[Image: ${url}]` };
509
- });
510
- }
511
-
512
- // ========================================================================
513
- // Tool Conversion
514
- // ========================================================================
515
-
516
- private convertToGoogleTool(tool: LLMToolDefinition): GoogleFunctionDeclaration {
517
- return {
518
- name: tool.function.name,
519
- description: tool.function.description,
520
- parameters: {
521
- type: 'object',
522
- properties: tool.function.parameters.properties ?? {},
523
- required: tool.function.parameters.required,
524
- },
525
- };
526
- }
527
-
528
- private convertFunctionCallToToolCall(
529
- fc: { name: string; args: Record<string, unknown> },
530
- thoughtSignature?: string,
531
- ): LLMToolCall {
532
- const toolCall: LLMToolCall = {
533
- id: this.generateToolCallId(),
534
- type: 'function',
535
- function: {
536
- name: fc.name,
537
- arguments: JSON.stringify(fc.args),
538
- },
539
- };
540
- if (thoughtSignature) {
541
- toolCall.thoughtSignature = thoughtSignature;
542
- }
543
- return toolCall;
544
- }
545
-
546
- // ========================================================================
547
- // Response Parsing
548
- // ========================================================================
549
-
550
- private parseGoogleResponse(data: GoogleResponse): LLMChatResponse {
551
- const candidate = data.candidates?.[0];
552
- if (!candidate?.content?.parts) {
553
- return {
554
- message: { role: 'assistant', content: '' },
555
- provider: this.isVertex ? 'vertex' : 'google',
556
- };
557
- }
558
-
559
- let textContent = '';
560
- const toolCalls: LLMToolCall[] = [];
561
-
562
- for (const part of candidate.content.parts) {
563
- if (part.text) textContent += part.text;
564
- if (part.functionCall) {
565
- toolCalls.push(this.convertFunctionCallToToolCall(
566
- part.functionCall,
567
- part.thoughtSignature,
568
- ));
569
- }
570
- }
571
-
572
- const usage: TokenUsageInfo | undefined = data.usageMetadata
573
- ? {
574
- inputTokens: data.usageMetadata.promptTokenCount,
575
- outputTokens: data.usageMetadata.candidatesTokenCount,
576
- totalTokens: data.usageMetadata.totalTokenCount,
577
- cachedTokens: data.usageMetadata.cachedContentTokenCount,
578
- }
579
- : undefined;
580
-
581
- return {
582
- message: {
583
- role: 'assistant',
584
- content: textContent,
585
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
586
- },
587
- usage,
588
- provider: this.isVertex ? 'vertex' : 'google',
589
- };
590
- }
591
-
592
- // ========================================================================
593
- // Flex Retry Logic
594
- // ========================================================================
595
-
596
- /**
597
- * Retry HTTP requests for Flex tier when receiving 503/429 errors.
598
- * Uses exponential backoff (5s → 10s → 20s) as recommended by Google.
599
- */
600
- private async fetchWithFlexRetry<T>(
601
- url: string,
602
- reqOptions: { method: 'POST'; headers: Record<string, string>; body: unknown; timeout: number },
603
- maxRetries = 3,
604
- baseDelay = 5000,
605
- ): Promise<import('../http.js').HttpResponse<T>> {
606
- for (let attempt = 0; attempt < maxRetries; attempt++) {
607
- try {
608
- return await httpRequest<T>(url, reqOptions);
609
- } catch (error) {
610
- const isRetryable = error instanceof Error
611
- && (error.message.includes('HTTP 503') || error.message.includes('HTTP 429'));
612
- if (!isRetryable || attempt >= maxRetries - 1) throw error;
613
- const delay = baseDelay * (2 ** attempt);
614
- await new Promise(r => setTimeout(r, delay));
615
- }
616
- }
617
- throw new Error('Unreachable');
618
- }
619
-
620
- }
1
+ /**
2
+ * Universal LLM Client v3 — Google Provider
3
+ *
4
+ * Implements BaseLLMClient for Google AI Studio and Vertex AI.
5
+ * Supports Gemini and Gemma models with full tool calling,
6
+ * streaming, embeddings, and system prompt handling.
7
+ */
8
+
9
+ import { BaseLLMClient } from '../client.js';
10
+ import { resolveThinking, geminiThinkingBudget } from '../thinking.js';
11
+ import { httpRequest, httpStream, parseSSE, type HttpRequestOptions } from '../http.js';
12
+ import { StandardChatDecoder } from '../stream-decoder.js';
13
+ import {
14
+ normalizeJsonSchema,
15
+ stripUnsupportedFeatures,
16
+ getJsonSchemaFromConfig,
17
+ type JSONSchema,
18
+ } from '../structured-output.js';
19
+ import type {
20
+ LLMClientOptions,
21
+ LLMChatMessage,
22
+ LLMChatResponse,
23
+ ChatOptions,
24
+ LLMToolDefinition,
25
+ LLMToolCall,
26
+ LLMContentPart,
27
+ LLMTextContent,
28
+ GooglePart,
29
+ GoogleContent,
30
+ GoogleRequest,
31
+ GoogleResponse,
32
+ GoogleFunctionDeclaration,
33
+ TokenUsageInfo,
34
+ AIModelApiType,
35
+ DeepResearchOptions,
36
+ DeepResearchResult,
37
+ DeepResearchStep,
38
+ DeepResearchEvent,
39
+ } from '../interfaces.js';
40
+ import type { DecodedEvent } from '../stream-decoder.js';
41
+ import type { Auditor } from '../auditor.js';
42
+
43
+ export class GoogleClient extends BaseLLMClient {
44
+ private isVertex: boolean;
45
+ private apiVersion: string;
46
+
47
+ constructor(options: LLMClientOptions, auditor?: Auditor) {
48
+ super(options, auditor);
49
+ this.isVertex = options.apiType === ('vertex' as AIModelApiType);
50
+ this.apiVersion = options.apiVersion ?? 'v1beta';
51
+ }
52
+
53
+ // ========================================================================
54
+ // URL Building
55
+ // ========================================================================
56
+
57
+ private getBaseUrl(): string {
58
+ if (this.isVertex) {
59
+ const region = this.options.region ?? 'us-central1';
60
+ return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}`;
61
+ }
62
+ if (this.options.url) return this.options.url.replace(/\/+$/, '');
63
+ return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}`;
64
+ }
65
+
66
+ private getChatUrl(): string {
67
+ const base = this.getBaseUrl();
68
+ if (this.isVertex) {
69
+ return `${base}:generateContent`;
70
+ }
71
+ return `${base}:generateContent?key=${this.options.apiKey}`;
72
+ }
73
+
74
+ private getStreamUrl(): string {
75
+ const base = this.getBaseUrl();
76
+ if (this.isVertex) {
77
+ return `${base}:streamGenerateContent?alt=sse`;
78
+ }
79
+ return `${base}:streamGenerateContent?alt=sse&key=${this.options.apiKey}`;
80
+ }
81
+
82
+ private getEmbedUrl(): string {
83
+ if (this.isVertex) {
84
+ const region = this.options.region ?? 'us-central1';
85
+ return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}:embedContent`;
86
+ }
87
+ return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}:embedContent?key=${this.options.apiKey}`;
88
+ }
89
+
90
+ private getHeaders(): Record<string, string> {
91
+ const headers: Record<string, string> = {
92
+ 'Content-Type': 'application/json',
93
+ };
94
+ if (this.isVertex && this.options.apiKey) {
95
+ headers['Authorization'] = `Bearer ${this.options.apiKey}`;
96
+ }
97
+ return headers;
98
+ }
99
+
100
+ // ========================================================================
101
+ // Chat
102
+ // ========================================================================
103
+
104
+ async chat(
105
+ messages: LLMChatMessage[],
106
+ options?: ChatOptions,
107
+ ): Promise<LLMChatResponse> {
108
+ // Structured output and tools can now be used together.\n // The provider sends both responseSchema and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
109
+
110
+ const url = this.getChatUrl();
111
+ const body = this.buildRequestBody(messages, options);
112
+
113
+ // Flex tier: increase timeout (Google recommends 600s+) and use retry logic
114
+ const tier = options?.serviceTier;
115
+ const effectiveTimeout = tier === 'flex'
116
+ ? Math.max(this.options.timeout ?? 60000, 600_000)
117
+ : (this.options.timeout ?? 60000);
118
+
119
+ const start = Date.now();
120
+ this.auditor.record({
121
+ timestamp: start,
122
+ type: 'request',
123
+ provider: this.isVertex ? 'vertex' : 'google',
124
+ model: this.options.model,
125
+ });
126
+
127
+ const reqOptions = {
128
+ method: 'POST' as const,
129
+ headers: this.getHeaders(),
130
+ body,
131
+ timeout: effectiveTimeout,
132
+ };
133
+
134
+ const response = tier === 'flex'
135
+ ? await this.fetchWithFlexRetry<GoogleResponse>(url, reqOptions)
136
+ : await httpRequest<GoogleResponse>(url, reqOptions);
137
+
138
+ const result = this.parseGoogleResponse(response.data);
139
+
140
+ // Surface the tier that actually served the request
141
+ const resolvedTier = response.headers?.get('x-gemini-service-tier');
142
+ if (resolvedTier) {
143
+ result.serviceTier = resolvedTier.toLowerCase() as 'flex' | 'priority' | 'standard';
144
+ }
145
+
146
+ this.auditor.record({
147
+ timestamp: Date.now(),
148
+ type: 'response',
149
+ provider: this.isVertex ? 'vertex' : 'google',
150
+ model: this.options.model,
151
+ duration: Date.now() - start,
152
+ usage: result.usage,
153
+ });
154
+
155
+ return result;
156
+ }
157
+
158
+ // ========================================================================
159
+ // Streaming
160
+ // ========================================================================
161
+
162
+ async *chatStream(
163
+ messages: LLMChatMessage[],
164
+ options?: ChatOptions,
165
+ ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
166
+ const url = this.getStreamUrl();
167
+ const body = this.buildRequestBody(messages, options);
168
+
169
+ // Flex tier: increase timeout (Google recommends 600s+)
170
+ const tier = options?.serviceTier;
171
+ const effectiveTimeout = tier === 'flex'
172
+ ? Math.max(this.options.timeout ?? 120000, 600_000)
173
+ : (this.options.timeout ?? 120000);
174
+
175
+ const start = Date.now();
176
+ this.auditor.record({
177
+ timestamp: start,
178
+ type: 'stream_start',
179
+ provider: this.isVertex ? 'vertex' : 'google',
180
+ model: this.options.model,
181
+ });
182
+
183
+ const decoder = new StandardChatDecoder(() => {});
184
+ let usage: TokenUsageInfo | undefined;
185
+ const allToolCalls: LLMToolCall[] = [];
186
+
187
+ const stream = httpStream(url, {
188
+ method: 'POST',
189
+ headers: this.getHeaders(),
190
+ body,
191
+ timeout: effectiveTimeout,
192
+ });
193
+
194
+ // Google streams SSE with JSON payloads
195
+ let buffer = '';
196
+ let reasoningBuffer = '';
197
+ for await (const chunk of stream) {
198
+ buffer += chunk;
199
+
200
+ // Google SSE uses "data: " prefix
201
+ const lines = buffer.split('\n');
202
+ buffer = lines.pop() ?? '';
203
+
204
+ for (const line of lines) {
205
+ if (!line.startsWith('data: ')) continue;
206
+ const jsonStr = line.slice(6).trim();
207
+ if (!jsonStr || jsonStr === '[DONE]') continue;
208
+
209
+ try {
210
+ const data = JSON.parse(jsonStr) as GoogleResponse;
211
+
212
+ if (data.usageMetadata) {
213
+ usage = {
214
+ inputTokens: data.usageMetadata.promptTokenCount ?? 0,
215
+ outputTokens: data.usageMetadata.candidatesTokenCount ?? 0,
216
+ totalTokens: data.usageMetadata.totalTokenCount ?? 0,
217
+ cachedTokens: data.usageMetadata.cachedContentTokenCount,
218
+ reasoningTokens: data.usageMetadata.thoughtsTokenCount,
219
+ };
220
+ }
221
+
222
+ const candidate = data.candidates?.[0];
223
+ if (!candidate?.content?.parts) continue;
224
+
225
+ for (const part of candidate.content.parts) {
226
+ if (part.text) {
227
+ if (part.thought) {
228
+ reasoningBuffer += part.text;
229
+ yield { type: 'thinking', content: part.text };
230
+ } else {
231
+ decoder.push(part.text);
232
+ yield { type: 'text', content: part.text };
233
+ }
234
+ }
235
+ if (part.functionCall) {
236
+ const toolCall = this.convertFunctionCallToToolCall(
237
+ part.functionCall,
238
+ part.thoughtSignature,
239
+ );
240
+ allToolCalls.push(toolCall);
241
+ yield { type: 'tool_call', calls: [toolCall] };
242
+ }
243
+ }
244
+ } catch {
245
+ // Skip unparseable JSON
246
+ }
247
+ }
248
+ }
249
+
250
+ decoder.flush();
251
+
252
+ this.auditor.record({
253
+ timestamp: Date.now(),
254
+ type: 'stream_end',
255
+ provider: this.isVertex ? 'vertex' : 'google',
256
+ model: this.options.model,
257
+ duration: Date.now() - start,
258
+ usage,
259
+ });
260
+
261
+ return {
262
+ message: {
263
+ role: 'assistant',
264
+ content: decoder.getCleanContent(),
265
+ tool_calls: allToolCalls.length > 0 ? allToolCalls : undefined,
266
+ },
267
+ reasoning: reasoningBuffer || decoder.getReasoning(),
268
+ usage,
269
+ provider: this.isVertex ? 'vertex' : 'google',
270
+ };
271
+ }
272
+
273
+ // ========================================================================
274
+ // Deep Research (Gemini interactions API)
275
+ // ========================================================================
276
+
277
+ /** Deep Research is available via Google AI Studio only (not Vertex AI). */
278
+ supportsDeepResearch(): boolean {
279
+ return !this.isVertex;
280
+ }
281
+
282
+ private interactionsBase(): string {
283
+ if (this.isVertex) {
284
+ throw new Error('Deep Research is only available via Google AI Studio, not Vertex AI.');
285
+ }
286
+ return `https://generativelanguage.googleapis.com/${this.apiVersion}/interactions`;
287
+ }
288
+
289
+ private deepResearchHeaders(): Record<string, string> {
290
+ return {
291
+ 'Content-Type': 'application/json',
292
+ 'x-goog-api-key': this.options.apiKey ?? '',
293
+ 'Api-Revision': '2026-05-20',
294
+ };
295
+ }
296
+
297
+ private buildInteractionBody(input: string, opts: DeepResearchOptions, background: boolean): Record<string, unknown> {
298
+ return {
299
+ input,
300
+ agent: opts.agent ?? 'deep-research-preview-04-2026',
301
+ background,
302
+ agent_config: {
303
+ type: 'deep-research',
304
+ thinking_summaries: opts.thinkingSummaries ?? 'auto',
305
+ },
306
+ ...(opts.tools?.length ? { tools: opts.tools.map(t => ({ type: t })) } : {}),
307
+ ...(opts.previousInteractionId ? { previous_interaction_id: opts.previousInteractionId } : {}),
308
+ };
309
+ }
310
+
311
+ private toDeepResearchResult(i: Record<string, unknown> | undefined): DeepResearchResult {
312
+ const obj = i ?? {};
313
+ const steps = obj['steps'] as DeepResearchStep[] | undefined;
314
+ let report = (obj['output_text'] ?? obj['outputText'] ?? obj['output']) as string | undefined;
315
+ // Some responses carry the final report only inside the steps' content
316
+ // blocks (the last step is typically the answer) — concatenate text there.
317
+ if (!report && Array.isArray(steps)) {
318
+ const text = steps
319
+ .flatMap(s => (Array.isArray(s.content) ? s.content : []))
320
+ .map(c => (c && typeof c === 'object' && typeof (c as { text?: unknown }).text === 'string'
321
+ ? (c as { text: string }).text
322
+ : ''))
323
+ .filter(Boolean)
324
+ .join('\n\n');
325
+ report = text || undefined;
326
+ }
327
+ return {
328
+ id: (obj['id'] as string) ?? '',
329
+ status: (obj['status'] as string) ?? 'in_progress',
330
+ report,
331
+ steps,
332
+ error: obj['error'],
333
+ raw: obj,
334
+ };
335
+ }
336
+
337
+ /** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
338
+ private async drRequest(
339
+ url: string,
340
+ init: HttpRequestOptions,
341
+ retries = 3,
342
+ ): Promise<Record<string, unknown>> {
343
+ let lastErr: unknown;
344
+ for (let attempt = 0; attempt <= retries; attempt++) {
345
+ try {
346
+ const res = await httpRequest<Record<string, unknown>>(url, init);
347
+ return res.data;
348
+ } catch (e) {
349
+ lastErr = e;
350
+ if (attempt < retries) await this.delay(1500 * (attempt + 1), init.signal);
351
+ }
352
+ }
353
+ throw lastErr;
354
+ }
355
+
356
+ /**
357
+ * Run an agentic Deep Research interaction: create it, then poll until it
358
+ * completes/fails or the timeout elapses. Returns the final report + steps.
359
+ */
360
+ async deepResearch(input: string, opts: DeepResearchOptions = {}): Promise<DeepResearchResult> {
361
+ const base = this.interactionsBase();
362
+ const headers = this.deepResearchHeaders();
363
+ const pollInterval = opts.pollIntervalMs ?? 5000;
364
+ const deadline = Date.now() + (opts.timeoutMs ?? 600_000);
365
+
366
+ let interaction = await this.drRequest(base, {
367
+ method: 'POST',
368
+ headers,
369
+ body: this.buildInteractionBody(input, opts, true),
370
+ timeout: this.options.timeout ?? 60_000,
371
+ signal: opts.signal,
372
+ });
373
+ const id = interaction?.['id'] as string;
374
+ if (!id) return this.toDeepResearchResult(interaction);
375
+
376
+ while ((interaction?.['status'] ?? 'in_progress') === 'in_progress') {
377
+ if (Date.now() > deadline) break;
378
+ await this.delay(pollInterval, opts.signal);
379
+ try {
380
+ interaction = await this.drRequest(
381
+ `${base}/${id}`,
382
+ { method: 'GET', headers, timeout: this.options.timeout ?? 60_000, signal: opts.signal },
383
+ 2,
384
+ );
385
+ } catch {
386
+ // Tolerate transient errors during a long poll; keep trying until the deadline.
387
+ }
388
+ }
389
+ return this.toDeepResearchResult(interaction);
390
+ }
391
+
392
+ /**
393
+ * Stream a Deep Research interaction's intermediate updates (`step.delta`
394
+ * thought/text/image events) and return the final result. Best-effort:
395
+ * falls back to the created interaction object if the stream ends early.
396
+ */
397
+ async *deepResearchStream(
398
+ input: string,
399
+ opts: DeepResearchOptions = {},
400
+ ): AsyncGenerator<DeepResearchEvent, DeepResearchResult, unknown> {
401
+ const base = this.interactionsBase();
402
+ const headers = this.deepResearchHeaders();
403
+ // Streaming long-running research requires background:true AND stream:true
404
+ // in the create body (per the Deep Research Interactions API docs).
405
+ const stream = httpStream(base, {
406
+ method: 'POST',
407
+ headers,
408
+ body: { ...this.buildInteractionBody(input, opts, true), stream: true },
409
+ timeout: opts.timeoutMs ?? 600_000,
410
+ signal: opts.signal,
411
+ });
412
+
413
+ let last: Record<string, unknown> | undefined;
414
+ for await (const { data } of parseSSE(stream)) {
415
+ if (!data || data === '[DONE]') continue;
416
+ let parsed: Record<string, unknown>;
417
+ try { parsed = JSON.parse(data) as Record<string, unknown>; } catch { continue; }
418
+ last = parsed;
419
+ const delta = (parsed['delta'] ?? (parsed['step'] as Record<string, unknown> | undefined)?.['delta']) as
420
+ | Record<string, unknown> | undefined;
421
+ if (delta) {
422
+ const dtype = delta['type'] as string | undefined;
423
+ if (dtype === 'thought') yield { type: 'thought', content: String(delta['text'] ?? delta['content'] ?? '') };
424
+ else if (dtype === 'text') yield { type: 'text', content: String(delta['text'] ?? delta['content'] ?? '') };
425
+ else if (dtype === 'image') yield { type: 'image', content: delta['image'] ?? delta['content'] };
426
+ }
427
+ if (typeof parsed['status'] === 'string') yield { type: 'status', status: parsed['status'] as string };
428
+ }
429
+ return this.toDeepResearchResult(last);
430
+ }
431
+
432
+ private delay(ms: number, signal?: AbortSignal): Promise<void> {
433
+ return new Promise((resolve, reject) => {
434
+ if (signal?.aborted) return reject(new Error('aborted'));
435
+ const t = setTimeout(resolve, ms);
436
+ signal?.addEventListener('abort', () => { clearTimeout(t); reject(new Error('aborted')); }, { once: true });
437
+ });
438
+ }
439
+
440
+ // ========================================================================
441
+ // Embeddings
442
+ // ========================================================================
443
+
444
+ async embed(text: string): Promise<number[]> {
445
+ const url = this.getEmbedUrl();
446
+ const response = await httpRequest<{
447
+ embedding: { values: number[] };
448
+ }>(url, {
449
+ method: 'POST',
450
+ headers: this.getHeaders(),
451
+ body: {
452
+ content: {
453
+ parts: [{ text }],
454
+ },
455
+ },
456
+ timeout: this.options.timeout ?? 30000,
457
+ });
458
+ return response.data.embedding.values;
459
+ }
460
+
461
+ // ========================================================================
462
+ // Model Discovery
463
+ // ========================================================================
464
+
465
+ async getModels(): Promise<string[]> {
466
+ const baseUrl = this.isVertex
467
+ ? `https://${this.options.region ?? 'us-central1'}-aiplatform.googleapis.com/${this.apiVersion}/models`
468
+ : `https://generativelanguage.googleapis.com/${this.apiVersion}/models?key=${this.options.apiKey}`;
469
+
470
+ try {
471
+ const response = await httpRequest<{
472
+ models: Array<{ name: string }>;
473
+ }>(baseUrl, {
474
+ headers: this.getHeaders(),
475
+ timeout: 10000,
476
+ });
477
+ return response.data.models.map(m =>
478
+ m.name.replace(/^models\//, ''),
479
+ );
480
+ } catch {
481
+ return [];
482
+ }
483
+ }
484
+
485
+ // ========================================================================
486
+ // Request Building
487
+ // ========================================================================
488
+
489
+ private buildRequestBody(
490
+ messages: LLMChatMessage[],
491
+ options?: ChatOptions,
492
+ ): GoogleRequest {
493
+ const isGemma = this.options.model.toLowerCase().includes('gemma');
494
+ const { systemInstruction, contents } = this.convertToGoogleMessages(messages, isGemma);
495
+
496
+ const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
497
+
498
+ const body: GoogleRequest = {
499
+ contents,
500
+ generationConfig: this.buildGenerationConfig(options),
501
+ };
502
+
503
+ // System instruction (Gemini supports it, Gemma doesn't)
504
+ if (systemInstruction && !isGemma) {
505
+ body.systemInstruction = {
506
+ parts: [{ text: systemInstruction }],
507
+ };
508
+ }
509
+
510
+ // Tools
511
+ if (tools?.length) {
512
+ body.tools = [{
513
+ functionDeclarations: tools.map(t => this.convertToGoogleTool(t)),
514
+ }];
515
+ }
516
+
517
+ // Inference tier (Flex / Priority)
518
+ const tier = options?.serviceTier;
519
+ if (tier && tier !== 'standard') {
520
+ body.service_tier = tier.toUpperCase() as 'FLEX' | 'PRIORITY';
521
+ }
522
+
523
+ return body;
524
+ }
525
+
526
+ private buildGenerationConfig(options?: ChatOptions): Record<string, unknown> {
527
+ const config: Record<string, unknown> = {
528
+ ...this.options.defaultParameters,
529
+ ...options?.parameters,
530
+ };
531
+ if (options?.temperature !== undefined) config['temperature'] = options.temperature;
532
+ if (options?.maxTokens !== undefined) config['maxOutputTokens'] = options.maxTokens;
533
+ // Unified thinking flag → Gemini thinkingConfig. Per-call overrides model
534
+ // config. Gemini 3.x uses `thinkingLevel`; 2.5/2.0 use `thinkingBudget`
535
+ // (0 = off, -1 = dynamic). `includeThoughts` surfaces the reasoning text.
536
+ // A user-supplied thinkingConfig (via parameters) is left untouched.
537
+ const thinking = resolveThinking(options?.thinking, this.options.thinking);
538
+ if (thinking && config['thinkingConfig'] === undefined) {
539
+ if (/gemini-3/i.test(this.options.model)) {
540
+ const tc: Record<string, unknown> = {};
541
+ if (!thinking.enabled) {
542
+ tc['thinkingLevel'] = 'MINIMAL';
543
+ } else {
544
+ if (thinking.level) tc['thinkingLevel'] = thinking.level.toUpperCase();
545
+ tc['includeThoughts'] = true;
546
+ }
547
+ config['thinkingConfig'] = tc;
548
+ } else {
549
+ config['thinkingConfig'] = thinking.enabled
550
+ ? { thinkingBudget: geminiThinkingBudget(thinking.level), includeThoughts: true }
551
+ : { thinkingBudget: 0 };
552
+ }
553
+ }
554
+
555
+ // Structured output: add responseMimeType and responseSchema
556
+ const schemaOptions = this.extractSchemaOptions(options);
557
+ if (schemaOptions) {
558
+ config['responseMimeType'] = 'application/json';
559
+
560
+ // Convert schema to Google-compatible format
561
+ let jsonSchema: JSONSchema;
562
+ if (schemaOptions.jsonSchema) {
563
+ jsonSchema = normalizeJsonSchema(schemaOptions.jsonSchema);
564
+ } else if (schemaOptions.schemaConfig) {
565
+ jsonSchema = getJsonSchemaFromConfig(schemaOptions.schemaConfig);
566
+ } else {
567
+ throw new Error('Either schemaConfig or jsonSchema must be provided');
568
+ }
569
+
570
+ // Strip unsupported features for Google
571
+ const googleSchema = stripUnsupportedFeatures(jsonSchema, 'google');
572
+ config['responseSchema'] = googleSchema;
573
+ }
574
+
575
+ return config;
576
+ }
577
+
578
+ // ========================================================================
579
+ // Message Conversion
580
+ // ========================================================================
581
+
582
+ private convertToGoogleMessages(
583
+ messages: LLMChatMessage[],
584
+ isGemma: boolean,
585
+ ): { systemInstruction?: string; contents: GoogleContent[] } {
586
+ let systemInstruction: string | undefined;
587
+ const contents: GoogleContent[] = [];
588
+
589
+ for (const msg of messages) {
590
+ if (msg.role === 'system') {
591
+ if (isGemma) {
592
+ // Gemma: prepend system message to first user message
593
+ systemInstruction = typeof msg.content === 'string'
594
+ ? msg.content
595
+ : msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
596
+ } else {
597
+ systemInstruction = typeof msg.content === 'string'
598
+ ? msg.content
599
+ : msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
600
+ }
601
+ continue;
602
+ }
603
+
604
+ if (msg.role === 'tool') {
605
+ // Convert tool result to Google functionResponse
606
+ let responseData: Record<string, unknown>;
607
+ try {
608
+ responseData = typeof msg.content === 'string'
609
+ ? JSON.parse(msg.content)
610
+ : { result: msg.content };
611
+ } catch {
612
+ responseData = { result: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) };
613
+ }
614
+
615
+ contents.push({
616
+ role: 'function',
617
+ parts: [{
618
+ functionResponse: {
619
+ name: msg.tool_call_id ?? 'unknown',
620
+ response: responseData,
621
+ },
622
+ }],
623
+ });
624
+ continue;
625
+ }
626
+
627
+ if (msg.role === 'assistant') {
628
+ const parts: GooglePart[] = [];
629
+ const textContent = typeof msg.content === 'string' ? msg.content : '';
630
+ if (textContent) parts.push({ text: textContent });
631
+
632
+ // Convert tool calls to functionCall parts
633
+ if (msg.tool_calls) {
634
+ for (const tc of msg.tool_calls) {
635
+ const part: GooglePart = {
636
+ functionCall: {
637
+ name: tc.function.name,
638
+ args: this.parseToolArguments(tc.function.arguments),
639
+ },
640
+ };
641
+ // Echo thought signature back (required by Gemini 3.x)
642
+ if (tc.thoughtSignature) {
643
+ part.thoughtSignature = tc.thoughtSignature;
644
+ }
645
+ parts.push(part);
646
+ }
647
+ }
648
+
649
+ contents.push({ role: 'model', parts });
650
+ continue;
651
+ }
652
+
653
+ // User messages
654
+ const parts = this.convertContentToGoogleParts(msg.content);
655
+
656
+ // Gemma: prepend system instruction to first user message
657
+ if (isGemma && systemInstruction && contents.length === 0) {
658
+ const systemParts = [{ text: `[System Instructions]\n${systemInstruction}\n\n[User Message]\n` }];
659
+ contents.push({
660
+ role: 'user',
661
+ parts: [...systemParts, ...parts],
662
+ });
663
+ systemInstruction = undefined; // Consumed
664
+ } else {
665
+ contents.push({ role: 'user', parts });
666
+ }
667
+ }
668
+
669
+ return { systemInstruction, contents };
670
+ }
671
+
672
+ private convertContentToGoogleParts(content: string | LLMContentPart[]): GooglePart[] {
673
+ if (typeof content === 'string') {
674
+ return [{ text: content }];
675
+ }
676
+
677
+ return content.map(part => {
678
+ if (part.type === 'text') {
679
+ return { text: part.text };
680
+ }
681
+ if (part.type === 'audio') {
682
+ return {
683
+ inlineData: {
684
+ mimeType: part.audio.mimeType,
685
+ data: part.audio.data,
686
+ },
687
+ };
688
+ }
689
+ // Image content
690
+ const url = part.image_url.url;
691
+ if (url.startsWith('data:')) {
692
+ const match = url.match(/^data:([^;]+);base64,(.+)$/);
693
+ if (match) {
694
+ return {
695
+ inlineData: {
696
+ mimeType: match[1]!,
697
+ data: match[2]!,
698
+ },
699
+ };
700
+ }
701
+ }
702
+ // For regular URLs, try inline data format
703
+ return { text: `[Image: ${url}]` };
704
+ });
705
+ }
706
+
707
+ // ========================================================================
708
+ // Tool Conversion
709
+ // ========================================================================
710
+
711
+ private convertToGoogleTool(tool: LLMToolDefinition): GoogleFunctionDeclaration {
712
+ return {
713
+ name: tool.function.name,
714
+ description: tool.function.description,
715
+ parameters: {
716
+ type: 'object',
717
+ properties: tool.function.parameters.properties ?? {},
718
+ required: tool.function.parameters.required,
719
+ },
720
+ };
721
+ }
722
+
723
+ private convertFunctionCallToToolCall(
724
+ fc: { name?: string; args?: Record<string, unknown> },
725
+ thoughtSignature?: string,
726
+ ): LLMToolCall {
727
+ const toolCall: LLMToolCall = {
728
+ id: this.generateToolCallId(),
729
+ type: 'function',
730
+ function: {
731
+ name: fc.name || '',
732
+ arguments: JSON.stringify(fc.args ?? {}),
733
+ },
734
+ };
735
+ if (thoughtSignature) {
736
+ toolCall.thoughtSignature = thoughtSignature;
737
+ }
738
+ return toolCall;
739
+ }
740
+
741
+ private parseToolArguments(args: string | Record<string, unknown> | undefined): Record<string, unknown> {
742
+ if (typeof args !== 'string') {
743
+ return args ?? {};
744
+ }
745
+ if (args.length === 0) {
746
+ return {};
747
+ }
748
+ try {
749
+ const parsed = JSON.parse(args) as unknown;
750
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
751
+ ? parsed as Record<string, unknown>
752
+ : {};
753
+ } catch {
754
+ return {};
755
+ }
756
+ }
757
+
758
+ // ========================================================================
759
+ // Response Parsing
760
+ // ========================================================================
761
+
762
+ private parseGoogleResponse(data: GoogleResponse): LLMChatResponse {
763
+ const candidate = data.candidates?.[0];
764
+ if (!candidate?.content?.parts) {
765
+ return {
766
+ message: { role: 'assistant', content: '' },
767
+ provider: this.isVertex ? 'vertex' : 'google',
768
+ };
769
+ }
770
+
771
+ let textContent = '';
772
+ let reasoningText = '';
773
+ const toolCalls: LLMToolCall[] = [];
774
+
775
+ for (const part of candidate.content.parts) {
776
+ if (part.text) {
777
+ // Thought summaries (includeThoughts) carry the reasoning trace;
778
+ // keep them out of `content` and surface them as `reasoning`.
779
+ if (part.thought) reasoningText += part.text;
780
+ else textContent += part.text;
781
+ }
782
+ if (part.functionCall) {
783
+ toolCalls.push(this.convertFunctionCallToToolCall(
784
+ part.functionCall,
785
+ part.thoughtSignature,
786
+ ));
787
+ }
788
+ }
789
+
790
+ const usage: TokenUsageInfo | undefined = data.usageMetadata
791
+ ? {
792
+ inputTokens: data.usageMetadata.promptTokenCount,
793
+ outputTokens: data.usageMetadata.candidatesTokenCount,
794
+ totalTokens: data.usageMetadata.totalTokenCount,
795
+ cachedTokens: data.usageMetadata.cachedContentTokenCount,
796
+ reasoningTokens: data.usageMetadata.thoughtsTokenCount,
797
+ }
798
+ : undefined;
799
+
800
+ return {
801
+ message: {
802
+ role: 'assistant',
803
+ content: textContent,
804
+ tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
805
+ },
806
+ reasoning: reasoningText || undefined,
807
+ usage,
808
+ provider: this.isVertex ? 'vertex' : 'google',
809
+ };
810
+ }
811
+
812
+ // ========================================================================
813
+ // Flex Retry Logic
814
+ // ========================================================================
815
+
816
+ /**
817
+ * Retry HTTP requests for Flex tier when receiving 503/429 errors.
818
+ * Uses exponential backoff (5s → 10s → 20s) as recommended by Google.
819
+ */
820
+ private async fetchWithFlexRetry<T>(
821
+ url: string,
822
+ reqOptions: { method: 'POST'; headers: Record<string, string>; body: unknown; timeout: number },
823
+ maxRetries = 3,
824
+ baseDelay = 5000,
825
+ ): Promise<import('../http.js').HttpResponse<T>> {
826
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
827
+ try {
828
+ return await httpRequest<T>(url, reqOptions);
829
+ } catch (error) {
830
+ const isRetryable = error instanceof Error
831
+ && (error.message.includes('HTTP 503') || error.message.includes('HTTP 429'));
832
+ if (!isRetryable || attempt >= maxRetries - 1) throw error;
833
+ const delay = baseDelay * (2 ** attempt);
834
+ await new Promise(r => setTimeout(r, delay));
835
+ }
836
+ }
837
+ throw new Error('Unreachable');
838
+ }
839
+
840
+ }