universal-llm-client 4.0.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-model.d.ts +20 -22
- package/dist/ai-model.d.ts.map +1 -1
- package/dist/ai-model.js +26 -23
- package/dist/ai-model.js.map +1 -1
- package/dist/client.d.ts +5 -5
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +17 -9
- package/dist/client.js.map +1 -1
- package/dist/http.d.ts +2 -0
- package/dist/http.d.ts.map +1 -1
- package/dist/http.js +1 -0
- package/dist/http.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/interfaces.d.ts +49 -11
- package/dist/interfaces.d.ts.map +1 -1
- package/dist/interfaces.js +14 -0
- package/dist/interfaces.js.map +1 -1
- package/dist/providers/anthropic.d.ts +56 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +524 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/google.d.ts +5 -0
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +64 -8
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +1 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/ollama.d.ts.map +1 -1
- package/dist/providers/ollama.js +38 -11
- package/dist/providers/ollama.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +9 -7
- package/dist/providers/openai.js.map +1 -1
- package/dist/router.d.ts +13 -33
- package/dist/router.d.ts.map +1 -1
- package/dist/router.js +33 -57
- package/dist/router.js.map +1 -1
- package/dist/stream-decoder.d.ts +29 -2
- package/dist/stream-decoder.d.ts.map +1 -1
- package/dist/stream-decoder.js +39 -11
- package/dist/stream-decoder.js.map +1 -1
- package/dist/structured-output.d.ts +107 -181
- package/dist/structured-output.d.ts.map +1 -1
- package/dist/structured-output.js +137 -192
- package/dist/structured-output.js.map +1 -1
- package/dist/zod-adapter.d.ts +44 -0
- package/dist/zod-adapter.d.ts.map +1 -0
- package/dist/zod-adapter.js +61 -0
- package/dist/zod-adapter.js.map +1 -0
- package/package.json +9 -1
- package/src/ai-model.ts +350 -0
- package/src/auditor.ts +213 -0
- package/src/client.ts +402 -0
- package/src/debug/debug-google-streaming.ts +97 -0
- package/src/debug/debug-tool-execution.ts +86 -0
- package/src/debug/test-lmstudio-tools.ts +155 -0
- package/src/demos/README.md +47 -0
- package/src/demos/basic/universal-llm-examples.ts +161 -0
- package/src/demos/mcp/astrid-memory-demo.ts +295 -0
- package/src/demos/mcp/astrid-persona-memory.ts +357 -0
- package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
- package/src/demos/mcp/simple-astrid-memory.ts +148 -0
- package/src/demos/mcp/simple-mcp-demo.ts +68 -0
- package/src/demos/mcp/working-mcp-demo.ts +62 -0
- package/src/demos/model-alias-demo.ts +0 -0
- package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
- package/src/demos/tools/astrid-memory-demo.ts +270 -0
- package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
- package/src/demos/tools/astrid-production-memory.ts +558 -0
- package/src/demos/tools/basic-translation-test.ts +66 -0
- package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
- package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
- package/src/demos/tools/clean-translation-test.ts +119 -0
- package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
- package/src/demos/tools/complete-rag-demo.ts +369 -0
- package/src/demos/tools/complete-tool-demo.ts +132 -0
- package/src/demos/tools/demo-tool-calling.ts +124 -0
- package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
- package/src/demos/tools/hybrid-thinking-test.ts +154 -0
- package/src/demos/tools/memory-integration-test.ts +420 -0
- package/src/demos/tools/multilingual-memory-system.ts +802 -0
- package/src/demos/tools/ondemand-translation-demo.ts +655 -0
- package/src/demos/tools/production-tool-demo.ts +245 -0
- package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
- package/src/demos/tools/rigorous-language-analysis.ts +218 -0
- package/src/demos/tools/test-universal-memory-system.ts +126 -0
- package/src/demos/tools/translation-integration-guide.ts +346 -0
- package/src/demos/tools/universal-memory-system.ts +560 -0
- package/src/http.ts +247 -0
- package/src/index.ts +161 -0
- package/src/interfaces.ts +657 -0
- package/src/mcp.ts +345 -0
- package/src/providers/anthropic.ts +762 -0
- package/src/providers/google.ts +620 -0
- package/src/providers/index.ts +8 -0
- package/src/providers/ollama.ts +469 -0
- package/src/providers/openai.ts +392 -0
- package/src/router.ts +780 -0
- package/src/stream-decoder.ts +361 -0
- package/src/structured-output.ts +759 -0
- package/src/test-scripts/test-advanced-tools.ts +310 -0
- package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
- package/src/test-scripts/test-google-streaming.ts +63 -0
- package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
- package/src/test-scripts/test-mcp-config.ts +28 -0
- package/src/test-scripts/test-mcp-connection.ts +29 -0
- package/src/test-scripts/test-system-message-positions.ts +163 -0
- package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
- package/src/test-scripts/test-tool-calling.ts +231 -0
- package/src/tests/ai-model.test.ts +1614 -0
- package/src/tests/auditor.test.ts +224 -0
- package/src/tests/http.test.ts +200 -0
- package/src/tests/interfaces.test.ts +117 -0
- package/src/tests/providers/google.test.ts +660 -0
- package/src/tests/providers/ollama.test.ts +954 -0
- package/src/tests/providers/openai.test.ts +1122 -0
- package/src/tests/router.test.ts +254 -0
- package/src/tests/stream-decoder.test.ts +179 -0
- package/src/tests/structured-output.test.ts +1450 -0
- package/src/tests/tools.test.ts +175 -0
- package/src/tools.ts +246 -0
- package/src/zod-adapter.ts +72 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Universal LLM Client v3 — Google Provider
|
|
3
|
+
*
|
|
4
|
+
* Implements BaseLLMClient for Google AI Studio and Vertex AI.
|
|
5
|
+
* Supports Gemini and Gemma models with full tool calling,
|
|
6
|
+
* streaming, embeddings, and system prompt handling.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { BaseLLMClient } from '../client.js';
|
|
10
|
+
import { httpRequest, httpStream } from '../http.js';
|
|
11
|
+
import { StandardChatDecoder } from '../stream-decoder.js';
|
|
12
|
+
import {
|
|
13
|
+
normalizeJsonSchema,
|
|
14
|
+
stripUnsupportedFeatures,
|
|
15
|
+
getJsonSchemaFromConfig,
|
|
16
|
+
type JSONSchema,
|
|
17
|
+
} from '../structured-output.js';
|
|
18
|
+
import type {
|
|
19
|
+
LLMClientOptions,
|
|
20
|
+
LLMChatMessage,
|
|
21
|
+
LLMChatResponse,
|
|
22
|
+
ChatOptions,
|
|
23
|
+
LLMToolDefinition,
|
|
24
|
+
LLMToolCall,
|
|
25
|
+
LLMContentPart,
|
|
26
|
+
LLMTextContent,
|
|
27
|
+
GooglePart,
|
|
28
|
+
GoogleContent,
|
|
29
|
+
GoogleRequest,
|
|
30
|
+
GoogleResponse,
|
|
31
|
+
GoogleFunctionDeclaration,
|
|
32
|
+
TokenUsageInfo,
|
|
33
|
+
AIModelApiType,
|
|
34
|
+
} from '../interfaces.js';
|
|
35
|
+
import type { DecodedEvent } from '../stream-decoder.js';
|
|
36
|
+
import type { Auditor } from '../auditor.js';
|
|
37
|
+
|
|
38
|
+
export class GoogleClient extends BaseLLMClient {
|
|
39
|
+
private isVertex: boolean;
|
|
40
|
+
private apiVersion: string;
|
|
41
|
+
|
|
42
|
+
constructor(options: LLMClientOptions, auditor?: Auditor) {
|
|
43
|
+
super(options, auditor);
|
|
44
|
+
this.isVertex = options.apiType === ('vertex' as AIModelApiType);
|
|
45
|
+
this.apiVersion = options.apiVersion ?? 'v1beta';
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ========================================================================
|
|
49
|
+
// URL Building
|
|
50
|
+
// ========================================================================
|
|
51
|
+
|
|
52
|
+
private getBaseUrl(): string {
|
|
53
|
+
if (this.isVertex) {
|
|
54
|
+
const region = this.options.region ?? 'us-central1';
|
|
55
|
+
return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}`;
|
|
56
|
+
}
|
|
57
|
+
if (this.options.url) return this.options.url.replace(/\/+$/, '');
|
|
58
|
+
return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
private getChatUrl(): string {
|
|
62
|
+
const base = this.getBaseUrl();
|
|
63
|
+
if (this.isVertex) {
|
|
64
|
+
return `${base}:generateContent`;
|
|
65
|
+
}
|
|
66
|
+
return `${base}:generateContent?key=${this.options.apiKey}`;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
private getStreamUrl(): string {
|
|
70
|
+
const base = this.getBaseUrl();
|
|
71
|
+
if (this.isVertex) {
|
|
72
|
+
return `${base}:streamGenerateContent?alt=sse`;
|
|
73
|
+
}
|
|
74
|
+
return `${base}:streamGenerateContent?alt=sse&key=${this.options.apiKey}`;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private getEmbedUrl(): string {
|
|
78
|
+
if (this.isVertex) {
|
|
79
|
+
const region = this.options.region ?? 'us-central1';
|
|
80
|
+
return `https://${region}-aiplatform.googleapis.com/${this.apiVersion}/projects/-/locations/${region}/publishers/google/models/${this.options.model}:embedContent`;
|
|
81
|
+
}
|
|
82
|
+
return `https://generativelanguage.googleapis.com/${this.apiVersion}/models/${this.options.model}:embedContent?key=${this.options.apiKey}`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private getHeaders(): Record<string, string> {
|
|
86
|
+
const headers: Record<string, string> = {
|
|
87
|
+
'Content-Type': 'application/json',
|
|
88
|
+
};
|
|
89
|
+
if (this.isVertex && this.options.apiKey) {
|
|
90
|
+
headers['Authorization'] = `Bearer ${this.options.apiKey}`;
|
|
91
|
+
}
|
|
92
|
+
return headers;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ========================================================================
|
|
96
|
+
// Chat
|
|
97
|
+
// ========================================================================
|
|
98
|
+
|
|
99
|
+
async chat(
|
|
100
|
+
messages: LLMChatMessage[],
|
|
101
|
+
options?: ChatOptions,
|
|
102
|
+
): Promise<LLMChatResponse> {
|
|
103
|
+
// Structured output and tools can now be used together.\n // The provider sends both responseSchema and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
|
|
104
|
+
|
|
105
|
+
const url = this.getChatUrl();
|
|
106
|
+
const body = this.buildRequestBody(messages, options);
|
|
107
|
+
|
|
108
|
+
// Flex tier: increase timeout (Google recommends 600s+) and use retry logic
|
|
109
|
+
const tier = options?.serviceTier;
|
|
110
|
+
const effectiveTimeout = tier === 'flex'
|
|
111
|
+
? Math.max(this.options.timeout ?? 60000, 600_000)
|
|
112
|
+
: (this.options.timeout ?? 60000);
|
|
113
|
+
|
|
114
|
+
const start = Date.now();
|
|
115
|
+
this.auditor.record({
|
|
116
|
+
timestamp: start,
|
|
117
|
+
type: 'request',
|
|
118
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
119
|
+
model: this.options.model,
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
const reqOptions = {
|
|
123
|
+
method: 'POST' as const,
|
|
124
|
+
headers: this.getHeaders(),
|
|
125
|
+
body,
|
|
126
|
+
timeout: effectiveTimeout,
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
const response = tier === 'flex'
|
|
130
|
+
? await this.fetchWithFlexRetry<GoogleResponse>(url, reqOptions)
|
|
131
|
+
: await httpRequest<GoogleResponse>(url, reqOptions);
|
|
132
|
+
|
|
133
|
+
const result = this.parseGoogleResponse(response.data);
|
|
134
|
+
|
|
135
|
+
// Surface the tier that actually served the request
|
|
136
|
+
const resolvedTier = response.headers?.get('x-gemini-service-tier');
|
|
137
|
+
if (resolvedTier) {
|
|
138
|
+
result.serviceTier = resolvedTier.toLowerCase() as 'flex' | 'priority' | 'standard';
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
this.auditor.record({
|
|
142
|
+
timestamp: Date.now(),
|
|
143
|
+
type: 'response',
|
|
144
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
145
|
+
model: this.options.model,
|
|
146
|
+
duration: Date.now() - start,
|
|
147
|
+
usage: result.usage,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
return result;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ========================================================================
|
|
154
|
+
// Streaming
|
|
155
|
+
// ========================================================================
|
|
156
|
+
|
|
157
|
+
async *chatStream(
|
|
158
|
+
messages: LLMChatMessage[],
|
|
159
|
+
options?: ChatOptions,
|
|
160
|
+
): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
|
|
161
|
+
const url = this.getStreamUrl();
|
|
162
|
+
const body = this.buildRequestBody(messages, options);
|
|
163
|
+
|
|
164
|
+
// Flex tier: increase timeout (Google recommends 600s+)
|
|
165
|
+
const tier = options?.serviceTier;
|
|
166
|
+
const effectiveTimeout = tier === 'flex'
|
|
167
|
+
? Math.max(this.options.timeout ?? 120000, 600_000)
|
|
168
|
+
: (this.options.timeout ?? 120000);
|
|
169
|
+
|
|
170
|
+
const start = Date.now();
|
|
171
|
+
this.auditor.record({
|
|
172
|
+
timestamp: start,
|
|
173
|
+
type: 'stream_start',
|
|
174
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
175
|
+
model: this.options.model,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const decoder = new StandardChatDecoder(() => {});
|
|
179
|
+
let usage: TokenUsageInfo | undefined;
|
|
180
|
+
const allToolCalls: LLMToolCall[] = [];
|
|
181
|
+
|
|
182
|
+
const stream = httpStream(url, {
|
|
183
|
+
method: 'POST',
|
|
184
|
+
headers: this.getHeaders(),
|
|
185
|
+
body,
|
|
186
|
+
timeout: effectiveTimeout,
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// Google streams SSE with JSON payloads
|
|
190
|
+
let buffer = '';
|
|
191
|
+
for await (const chunk of stream) {
|
|
192
|
+
buffer += chunk;
|
|
193
|
+
|
|
194
|
+
// Google SSE uses "data: " prefix
|
|
195
|
+
const lines = buffer.split('\n');
|
|
196
|
+
buffer = lines.pop() ?? '';
|
|
197
|
+
|
|
198
|
+
for (const line of lines) {
|
|
199
|
+
if (!line.startsWith('data: ')) continue;
|
|
200
|
+
const jsonStr = line.slice(6).trim();
|
|
201
|
+
if (!jsonStr || jsonStr === '[DONE]') continue;
|
|
202
|
+
|
|
203
|
+
try {
|
|
204
|
+
const data = JSON.parse(jsonStr) as GoogleResponse;
|
|
205
|
+
|
|
206
|
+
if (data.usageMetadata) {
|
|
207
|
+
usage = {
|
|
208
|
+
inputTokens: data.usageMetadata.promptTokenCount ?? 0,
|
|
209
|
+
outputTokens: data.usageMetadata.candidatesTokenCount ?? 0,
|
|
210
|
+
totalTokens: data.usageMetadata.totalTokenCount ?? 0,
|
|
211
|
+
cachedTokens: data.usageMetadata.cachedContentTokenCount,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const candidate = data.candidates?.[0];
|
|
216
|
+
if (!candidate?.content?.parts) continue;
|
|
217
|
+
|
|
218
|
+
for (const part of candidate.content.parts) {
|
|
219
|
+
if (part.text) {
|
|
220
|
+
decoder.push(part.text);
|
|
221
|
+
yield { type: 'text', content: part.text };
|
|
222
|
+
}
|
|
223
|
+
if (part.functionCall) {
|
|
224
|
+
const toolCall = this.convertFunctionCallToToolCall(
|
|
225
|
+
part.functionCall,
|
|
226
|
+
part.thoughtSignature,
|
|
227
|
+
);
|
|
228
|
+
allToolCalls.push(toolCall);
|
|
229
|
+
yield { type: 'tool_call', calls: [toolCall] };
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
} catch {
|
|
233
|
+
// Skip unparseable JSON
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
decoder.flush();
|
|
239
|
+
|
|
240
|
+
this.auditor.record({
|
|
241
|
+
timestamp: Date.now(),
|
|
242
|
+
type: 'stream_end',
|
|
243
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
244
|
+
model: this.options.model,
|
|
245
|
+
duration: Date.now() - start,
|
|
246
|
+
usage,
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
message: {
|
|
251
|
+
role: 'assistant',
|
|
252
|
+
content: decoder.getCleanContent(),
|
|
253
|
+
tool_calls: allToolCalls.length > 0 ? allToolCalls : undefined,
|
|
254
|
+
},
|
|
255
|
+
reasoning: decoder.getReasoning(),
|
|
256
|
+
usage,
|
|
257
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// ========================================================================
|
|
262
|
+
// Embeddings
|
|
263
|
+
// ========================================================================
|
|
264
|
+
|
|
265
|
+
async embed(text: string): Promise<number[]> {
|
|
266
|
+
const url = this.getEmbedUrl();
|
|
267
|
+
const response = await httpRequest<{
|
|
268
|
+
embedding: { values: number[] };
|
|
269
|
+
}>(url, {
|
|
270
|
+
method: 'POST',
|
|
271
|
+
headers: this.getHeaders(),
|
|
272
|
+
body: {
|
|
273
|
+
content: {
|
|
274
|
+
parts: [{ text }],
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
timeout: this.options.timeout ?? 30000,
|
|
278
|
+
});
|
|
279
|
+
return response.data.embedding.values;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ========================================================================
|
|
283
|
+
// Model Discovery
|
|
284
|
+
// ========================================================================
|
|
285
|
+
|
|
286
|
+
async getModels(): Promise<string[]> {
|
|
287
|
+
const baseUrl = this.isVertex
|
|
288
|
+
? `https://${this.options.region ?? 'us-central1'}-aiplatform.googleapis.com/${this.apiVersion}/models`
|
|
289
|
+
: `https://generativelanguage.googleapis.com/${this.apiVersion}/models?key=${this.options.apiKey}`;
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const response = await httpRequest<{
|
|
293
|
+
models: Array<{ name: string }>;
|
|
294
|
+
}>(baseUrl, {
|
|
295
|
+
headers: this.getHeaders(),
|
|
296
|
+
timeout: 10000,
|
|
297
|
+
});
|
|
298
|
+
return response.data.models.map(m =>
|
|
299
|
+
m.name.replace(/^models\//, ''),
|
|
300
|
+
);
|
|
301
|
+
} catch {
|
|
302
|
+
return [];
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// ========================================================================
|
|
307
|
+
// Request Building
|
|
308
|
+
// ========================================================================
|
|
309
|
+
|
|
310
|
+
private buildRequestBody(
|
|
311
|
+
messages: LLMChatMessage[],
|
|
312
|
+
options?: ChatOptions,
|
|
313
|
+
): GoogleRequest {
|
|
314
|
+
const isGemma = this.options.model.toLowerCase().includes('gemma');
|
|
315
|
+
const { systemInstruction, contents } = this.convertToGoogleMessages(messages, isGemma);
|
|
316
|
+
|
|
317
|
+
const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
|
|
318
|
+
|
|
319
|
+
const body: GoogleRequest = {
|
|
320
|
+
contents,
|
|
321
|
+
generationConfig: this.buildGenerationConfig(options),
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
// System instruction (Gemini supports it, Gemma doesn't)
|
|
325
|
+
if (systemInstruction && !isGemma) {
|
|
326
|
+
body.systemInstruction = {
|
|
327
|
+
parts: [{ text: systemInstruction }],
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Tools
|
|
332
|
+
if (tools?.length) {
|
|
333
|
+
body.tools = [{
|
|
334
|
+
functionDeclarations: tools.map(t => this.convertToGoogleTool(t)),
|
|
335
|
+
}];
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Inference tier (Flex / Priority)
|
|
339
|
+
const tier = options?.serviceTier;
|
|
340
|
+
if (tier && tier !== 'standard') {
|
|
341
|
+
body.service_tier = tier.toUpperCase() as 'FLEX' | 'PRIORITY';
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return body;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
private buildGenerationConfig(options?: ChatOptions): Record<string, unknown> {
|
|
348
|
+
const config: Record<string, unknown> = {
|
|
349
|
+
...this.options.defaultParameters,
|
|
350
|
+
...options?.parameters,
|
|
351
|
+
};
|
|
352
|
+
if (options?.temperature !== undefined) config['temperature'] = options.temperature;
|
|
353
|
+
if (options?.maxTokens !== undefined) config['maxOutputTokens'] = options.maxTokens;
|
|
354
|
+
if (this.options.thinking) {
|
|
355
|
+
config['thinkingConfig'] = { thinkingBudget: 8192 };
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Structured output: add responseMimeType and responseSchema
|
|
359
|
+
const schemaOptions = this.extractSchemaOptions(options);
|
|
360
|
+
if (schemaOptions) {
|
|
361
|
+
config['responseMimeType'] = 'application/json';
|
|
362
|
+
|
|
363
|
+
// Convert schema to Google-compatible format
|
|
364
|
+
let jsonSchema: JSONSchema;
|
|
365
|
+
if (schemaOptions.jsonSchema) {
|
|
366
|
+
jsonSchema = normalizeJsonSchema(schemaOptions.jsonSchema);
|
|
367
|
+
} else if (schemaOptions.schemaConfig) {
|
|
368
|
+
jsonSchema = getJsonSchemaFromConfig(schemaOptions.schemaConfig);
|
|
369
|
+
} else {
|
|
370
|
+
throw new Error('Either schemaConfig or jsonSchema must be provided');
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Strip unsupported features for Google
|
|
374
|
+
const googleSchema = stripUnsupportedFeatures(jsonSchema, 'google');
|
|
375
|
+
config['responseSchema'] = googleSchema;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
return config;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// ========================================================================
|
|
382
|
+
// Message Conversion
|
|
383
|
+
// ========================================================================
|
|
384
|
+
|
|
385
|
+
private convertToGoogleMessages(
|
|
386
|
+
messages: LLMChatMessage[],
|
|
387
|
+
isGemma: boolean,
|
|
388
|
+
): { systemInstruction?: string; contents: GoogleContent[] } {
|
|
389
|
+
let systemInstruction: string | undefined;
|
|
390
|
+
const contents: GoogleContent[] = [];
|
|
391
|
+
|
|
392
|
+
for (const msg of messages) {
|
|
393
|
+
if (msg.role === 'system') {
|
|
394
|
+
if (isGemma) {
|
|
395
|
+
// Gemma: prepend system message to first user message
|
|
396
|
+
systemInstruction = typeof msg.content === 'string'
|
|
397
|
+
? msg.content
|
|
398
|
+
: msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
|
|
399
|
+
} else {
|
|
400
|
+
systemInstruction = typeof msg.content === 'string'
|
|
401
|
+
? msg.content
|
|
402
|
+
: msg.content.filter((p): p is LLMTextContent => p.type === 'text').map(p => p.text).join('');
|
|
403
|
+
}
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
if (msg.role === 'tool') {
|
|
408
|
+
// Convert tool result to Google functionResponse
|
|
409
|
+
let responseData: Record<string, unknown>;
|
|
410
|
+
try {
|
|
411
|
+
responseData = typeof msg.content === 'string'
|
|
412
|
+
? JSON.parse(msg.content)
|
|
413
|
+
: { result: msg.content };
|
|
414
|
+
} catch {
|
|
415
|
+
responseData = { result: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) };
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
contents.push({
|
|
419
|
+
role: 'function',
|
|
420
|
+
parts: [{
|
|
421
|
+
functionResponse: {
|
|
422
|
+
name: msg.tool_call_id ?? 'unknown',
|
|
423
|
+
response: responseData,
|
|
424
|
+
},
|
|
425
|
+
}],
|
|
426
|
+
});
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (msg.role === 'assistant') {
|
|
431
|
+
const parts: GooglePart[] = [];
|
|
432
|
+
const textContent = typeof msg.content === 'string' ? msg.content : '';
|
|
433
|
+
if (textContent) parts.push({ text: textContent });
|
|
434
|
+
|
|
435
|
+
// Convert tool calls to functionCall parts
|
|
436
|
+
if (msg.tool_calls) {
|
|
437
|
+
for (const tc of msg.tool_calls) {
|
|
438
|
+
const part: GooglePart = {
|
|
439
|
+
functionCall: {
|
|
440
|
+
name: tc.function.name,
|
|
441
|
+
args: typeof tc.function.arguments === 'string'
|
|
442
|
+
? JSON.parse(tc.function.arguments)
|
|
443
|
+
: tc.function.arguments as Record<string, unknown>,
|
|
444
|
+
},
|
|
445
|
+
};
|
|
446
|
+
// Echo thought signature back (required by Gemini 3.x)
|
|
447
|
+
if (tc.thoughtSignature) {
|
|
448
|
+
part.thoughtSignature = tc.thoughtSignature;
|
|
449
|
+
}
|
|
450
|
+
parts.push(part);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
contents.push({ role: 'model', parts });
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// User messages
|
|
459
|
+
const parts = this.convertContentToGoogleParts(msg.content);
|
|
460
|
+
|
|
461
|
+
// Gemma: prepend system instruction to first user message
|
|
462
|
+
if (isGemma && systemInstruction && contents.length === 0) {
|
|
463
|
+
const systemParts = [{ text: `[System Instructions]\n${systemInstruction}\n\n[User Message]\n` }];
|
|
464
|
+
contents.push({
|
|
465
|
+
role: 'user',
|
|
466
|
+
parts: [...systemParts, ...parts],
|
|
467
|
+
});
|
|
468
|
+
systemInstruction = undefined; // Consumed
|
|
469
|
+
} else {
|
|
470
|
+
contents.push({ role: 'user', parts });
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
return { systemInstruction, contents };
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
private convertContentToGoogleParts(content: string | LLMContentPart[]): GooglePart[] {
|
|
478
|
+
if (typeof content === 'string') {
|
|
479
|
+
return [{ text: content }];
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return content.map(part => {
|
|
483
|
+
if (part.type === 'text') {
|
|
484
|
+
return { text: part.text };
|
|
485
|
+
}
|
|
486
|
+
if (part.type === 'audio') {
|
|
487
|
+
return {
|
|
488
|
+
inlineData: {
|
|
489
|
+
mimeType: part.audio.mimeType,
|
|
490
|
+
data: part.audio.data,
|
|
491
|
+
},
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
// Image content
|
|
495
|
+
const url = part.image_url.url;
|
|
496
|
+
if (url.startsWith('data:')) {
|
|
497
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
498
|
+
if (match) {
|
|
499
|
+
return {
|
|
500
|
+
inlineData: {
|
|
501
|
+
mimeType: match[1]!,
|
|
502
|
+
data: match[2]!,
|
|
503
|
+
},
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
// For regular URLs, try inline data format
|
|
508
|
+
return { text: `[Image: ${url}]` };
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ========================================================================
|
|
513
|
+
// Tool Conversion
|
|
514
|
+
// ========================================================================
|
|
515
|
+
|
|
516
|
+
private convertToGoogleTool(tool: LLMToolDefinition): GoogleFunctionDeclaration {
|
|
517
|
+
return {
|
|
518
|
+
name: tool.function.name,
|
|
519
|
+
description: tool.function.description,
|
|
520
|
+
parameters: {
|
|
521
|
+
type: 'object',
|
|
522
|
+
properties: tool.function.parameters.properties ?? {},
|
|
523
|
+
required: tool.function.parameters.required,
|
|
524
|
+
},
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
private convertFunctionCallToToolCall(
|
|
529
|
+
fc: { name: string; args: Record<string, unknown> },
|
|
530
|
+
thoughtSignature?: string,
|
|
531
|
+
): LLMToolCall {
|
|
532
|
+
const toolCall: LLMToolCall = {
|
|
533
|
+
id: this.generateToolCallId(),
|
|
534
|
+
type: 'function',
|
|
535
|
+
function: {
|
|
536
|
+
name: fc.name,
|
|
537
|
+
arguments: JSON.stringify(fc.args),
|
|
538
|
+
},
|
|
539
|
+
};
|
|
540
|
+
if (thoughtSignature) {
|
|
541
|
+
toolCall.thoughtSignature = thoughtSignature;
|
|
542
|
+
}
|
|
543
|
+
return toolCall;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// ========================================================================
|
|
547
|
+
// Response Parsing
|
|
548
|
+
// ========================================================================
|
|
549
|
+
|
|
550
|
+
private parseGoogleResponse(data: GoogleResponse): LLMChatResponse {
|
|
551
|
+
const candidate = data.candidates?.[0];
|
|
552
|
+
if (!candidate?.content?.parts) {
|
|
553
|
+
return {
|
|
554
|
+
message: { role: 'assistant', content: '' },
|
|
555
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
let textContent = '';
|
|
560
|
+
const toolCalls: LLMToolCall[] = [];
|
|
561
|
+
|
|
562
|
+
for (const part of candidate.content.parts) {
|
|
563
|
+
if (part.text) textContent += part.text;
|
|
564
|
+
if (part.functionCall) {
|
|
565
|
+
toolCalls.push(this.convertFunctionCallToToolCall(
|
|
566
|
+
part.functionCall,
|
|
567
|
+
part.thoughtSignature,
|
|
568
|
+
));
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const usage: TokenUsageInfo | undefined = data.usageMetadata
|
|
573
|
+
? {
|
|
574
|
+
inputTokens: data.usageMetadata.promptTokenCount,
|
|
575
|
+
outputTokens: data.usageMetadata.candidatesTokenCount,
|
|
576
|
+
totalTokens: data.usageMetadata.totalTokenCount,
|
|
577
|
+
cachedTokens: data.usageMetadata.cachedContentTokenCount,
|
|
578
|
+
}
|
|
579
|
+
: undefined;
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
message: {
|
|
583
|
+
role: 'assistant',
|
|
584
|
+
content: textContent,
|
|
585
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
586
|
+
},
|
|
587
|
+
usage,
|
|
588
|
+
provider: this.isVertex ? 'vertex' : 'google',
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// ========================================================================
|
|
593
|
+
// Flex Retry Logic
|
|
594
|
+
// ========================================================================
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Retry HTTP requests for Flex tier when receiving 503/429 errors.
|
|
598
|
+
* Uses exponential backoff (5s → 10s → 20s) as recommended by Google.
|
|
599
|
+
*/
|
|
600
|
+
private async fetchWithFlexRetry<T>(
|
|
601
|
+
url: string,
|
|
602
|
+
reqOptions: { method: 'POST'; headers: Record<string, string>; body: unknown; timeout: number },
|
|
603
|
+
maxRetries = 3,
|
|
604
|
+
baseDelay = 5000,
|
|
605
|
+
): Promise<import('../http.js').HttpResponse<T>> {
|
|
606
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
607
|
+
try {
|
|
608
|
+
return await httpRequest<T>(url, reqOptions);
|
|
609
|
+
} catch (error) {
|
|
610
|
+
const isRetryable = error instanceof Error
|
|
611
|
+
&& (error.message.includes('HTTP 503') || error.message.includes('HTTP 429'));
|
|
612
|
+
if (!isRetryable || attempt >= maxRetries - 1) throw error;
|
|
613
|
+
const delay = baseDelay * (2 ** attempt);
|
|
614
|
+
await new Promise(r => setTimeout(r, delay));
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
throw new Error('Unreachable');
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
}
|