@tachu/extensions 1.0.0-alpha.5 → 1.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +16 -6
- package/README_ZH.md +16 -6
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +6 -0
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/gemini.d.ts +115 -0
- package/dist/providers/gemini.d.ts.map +1 -0
- package/dist/providers/gemini.js +901 -0
- package/dist/providers/gemini.js.map +1 -0
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +1 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +6 -0
- package/dist/providers/openai.js.map +1 -1
- package/package.json +8 -2
|
@@ -0,0 +1,901 @@
|
|
|
1
|
+
import { GoogleGenAI } from "@google/genai";
|
|
2
|
+
import { ProviderError, TimeoutError, } from "@tachu/core";
|
|
3
|
+
import { withAbortTimeout } from "../common/net";
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 120_000;
|
|
5
|
+
const FALLBACK_MODELS = [
|
|
6
|
+
{
|
|
7
|
+
modelName: "gemini-3-pro-preview",
|
|
8
|
+
capabilities: {
|
|
9
|
+
supportedModalities: ["text", "image", "audio", "video", "file"],
|
|
10
|
+
supportedOutputModalities: ["text"],
|
|
11
|
+
maxContextTokens: 1_000_000,
|
|
12
|
+
supportsStreaming: true,
|
|
13
|
+
supportsFunctionCalling: true,
|
|
14
|
+
supportsStructuredOutput: true,
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
modelName: "gemini-2.5-pro",
|
|
19
|
+
capabilities: {
|
|
20
|
+
supportedModalities: ["text", "image", "audio", "video", "file"],
|
|
21
|
+
supportedOutputModalities: ["text"],
|
|
22
|
+
maxContextTokens: 1_000_000,
|
|
23
|
+
supportsStreaming: true,
|
|
24
|
+
supportsFunctionCalling: true,
|
|
25
|
+
supportsStructuredOutput: true,
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
modelName: "gemini-2.5-flash",
|
|
30
|
+
capabilities: {
|
|
31
|
+
supportedModalities: ["text", "image", "audio", "video", "file"],
|
|
32
|
+
supportedOutputModalities: ["text"],
|
|
33
|
+
maxContextTokens: 1_000_000,
|
|
34
|
+
supportsStreaming: true,
|
|
35
|
+
supportsFunctionCalling: true,
|
|
36
|
+
supportsStructuredOutput: true,
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
modelName: "gemini-2.5-flash-image",
|
|
41
|
+
capabilities: {
|
|
42
|
+
supportedModalities: ["text", "image", "file"],
|
|
43
|
+
supportedOutputModalities: ["text", "image"],
|
|
44
|
+
maxContextTokens: 128_000,
|
|
45
|
+
supportsStreaming: true,
|
|
46
|
+
supportsFunctionCalling: true,
|
|
47
|
+
supportsStructuredOutput: true,
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
modelName: "text-embedding-004",
|
|
52
|
+
capabilities: {
|
|
53
|
+
supportedModalities: ["text"],
|
|
54
|
+
maxContextTokens: 8_192,
|
|
55
|
+
supportsStreaming: false,
|
|
56
|
+
supportsFunctionCalling: false,
|
|
57
|
+
supportsEmbeddings: true,
|
|
58
|
+
supportsRerank: true,
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
];
|
|
62
|
+
const isRecord = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
|
|
63
|
+
const normalizeModelName = (name) => name.startsWith("models/") ? name.slice("models/".length) : name;
|
|
64
|
+
const inferMediaType = (mimeType) => {
|
|
65
|
+
if (mimeType.startsWith("image/"))
|
|
66
|
+
return "image";
|
|
67
|
+
if (mimeType.startsWith("audio/"))
|
|
68
|
+
return "audio";
|
|
69
|
+
if (mimeType.startsWith("video/"))
|
|
70
|
+
return "video";
|
|
71
|
+
return "file";
|
|
72
|
+
};
|
|
73
|
+
const inferImageMimeTypeFromUrl = (url) => {
|
|
74
|
+
const lowered = url.toLowerCase();
|
|
75
|
+
if (lowered.startsWith("data:")) {
|
|
76
|
+
const match = /^data:([^;,]+)[;,]/.exec(lowered);
|
|
77
|
+
return match?.[1] ?? "image/png";
|
|
78
|
+
}
|
|
79
|
+
if (lowered.endsWith(".jpg") || lowered.endsWith(".jpeg"))
|
|
80
|
+
return "image/jpeg";
|
|
81
|
+
if (lowered.endsWith(".webp"))
|
|
82
|
+
return "image/webp";
|
|
83
|
+
if (lowered.endsWith(".gif"))
|
|
84
|
+
return "image/gif";
|
|
85
|
+
return "image/png";
|
|
86
|
+
};
|
|
87
|
+
const parseDataUrl = (value) => {
|
|
88
|
+
const match = /^data:([^;,]+);base64,(.*)$/s.exec(value);
|
|
89
|
+
if (!match) {
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
const mimeType = match[1];
|
|
93
|
+
const data = match[2];
|
|
94
|
+
if (!mimeType || !data) {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
return { mimeType, data };
|
|
98
|
+
};
|
|
99
|
+
const toTextContent = (content) => {
|
|
100
|
+
if (typeof content === "string") {
|
|
101
|
+
return content;
|
|
102
|
+
}
|
|
103
|
+
return content
|
|
104
|
+
.map((part) => {
|
|
105
|
+
if (part.type === "text")
|
|
106
|
+
return part.text;
|
|
107
|
+
if (part.type === "image_url")
|
|
108
|
+
return `[image:${part.image_url.url}]`;
|
|
109
|
+
return `[file:${part.file.mimeType}:${part.file.name ?? part.file.uri ?? "inline"}]`;
|
|
110
|
+
})
|
|
111
|
+
.join("");
|
|
112
|
+
};
|
|
113
|
+
const normalizeToolResponse = (content) => {
|
|
114
|
+
if (typeof content === "string") {
|
|
115
|
+
const trimmed = content.trim();
|
|
116
|
+
if (trimmed.length > 0) {
|
|
117
|
+
try {
|
|
118
|
+
const parsed = JSON.parse(trimmed);
|
|
119
|
+
if (isRecord(parsed)) {
|
|
120
|
+
return parsed;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
/* plain text tool result */
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return { output: content };
|
|
128
|
+
}
|
|
129
|
+
const text = toTextContent(content);
|
|
130
|
+
return { output: text };
|
|
131
|
+
};
|
|
132
|
+
const toGeminiUserParts = (content) => {
|
|
133
|
+
if (typeof content === "string") {
|
|
134
|
+
return [{ text: content }];
|
|
135
|
+
}
|
|
136
|
+
const parts = [];
|
|
137
|
+
for (const part of content) {
|
|
138
|
+
if (part.type === "text") {
|
|
139
|
+
parts.push({ text: part.text });
|
|
140
|
+
}
|
|
141
|
+
else if (part.type === "image_url") {
|
|
142
|
+
const dataUrl = parseDataUrl(part.image_url.url);
|
|
143
|
+
if (dataUrl) {
|
|
144
|
+
parts.push({ inlineData: { mimeType: dataUrl.mimeType, data: dataUrl.data } });
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
parts.push({
|
|
148
|
+
fileData: {
|
|
149
|
+
mimeType: inferImageMimeTypeFromUrl(part.image_url.url),
|
|
150
|
+
fileUri: part.image_url.url,
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
else if (part.file.data) {
|
|
156
|
+
parts.push({
|
|
157
|
+
inlineData: {
|
|
158
|
+
mimeType: part.file.mimeType,
|
|
159
|
+
data: part.file.data,
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
else if (part.file.uri) {
|
|
164
|
+
parts.push({
|
|
165
|
+
fileData: {
|
|
166
|
+
mimeType: part.file.mimeType,
|
|
167
|
+
fileUri: part.file.uri,
|
|
168
|
+
...(part.file.name ? { displayName: part.file.name } : {}),
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
parts.push({ text: `[file:${part.file.mimeType}:${part.file.name ?? "inline"}]` });
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return parts.length > 0 ? parts : [{ text: "" }];
|
|
177
|
+
};
|
|
178
|
+
const safeGeminiPart = (value) => {
|
|
179
|
+
if (!isRecord(value)) {
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
const part = {};
|
|
183
|
+
if (typeof value.text === "string") {
|
|
184
|
+
part.text = value.text;
|
|
185
|
+
}
|
|
186
|
+
if (value.thought === true) {
|
|
187
|
+
part.thought = true;
|
|
188
|
+
}
|
|
189
|
+
if (typeof value.thoughtSignature === "string") {
|
|
190
|
+
part.thoughtSignature = value.thoughtSignature;
|
|
191
|
+
}
|
|
192
|
+
if (isRecord(value.partMetadata)) {
|
|
193
|
+
part.partMetadata = value.partMetadata;
|
|
194
|
+
}
|
|
195
|
+
return Object.keys(part).length > 0 ? part : null;
|
|
196
|
+
};
|
|
197
|
+
const getThoughtPartsFromMetadata = (metadata) => {
|
|
198
|
+
const raw = metadata?.geminiThoughtParts;
|
|
199
|
+
if (Array.isArray(raw)) {
|
|
200
|
+
return raw.map(safeGeminiPart).filter((part) => part !== null);
|
|
201
|
+
}
|
|
202
|
+
const nested = metadata?.gemini;
|
|
203
|
+
if (isRecord(nested) && Array.isArray(nested.thoughtParts)) {
|
|
204
|
+
return nested.thoughtParts
|
|
205
|
+
.map(safeGeminiPart)
|
|
206
|
+
.filter((part) => part !== null);
|
|
207
|
+
}
|
|
208
|
+
return [];
|
|
209
|
+
};
|
|
210
|
+
const getToolThoughtSignature = (metadata) => {
|
|
211
|
+
if (!metadata)
|
|
212
|
+
return undefined;
|
|
213
|
+
if (typeof metadata.thoughtSignature === "string") {
|
|
214
|
+
return metadata.thoughtSignature;
|
|
215
|
+
}
|
|
216
|
+
const nested = metadata.gemini;
|
|
217
|
+
if (isRecord(nested) && typeof nested.thoughtSignature === "string") {
|
|
218
|
+
return nested.thoughtSignature;
|
|
219
|
+
}
|
|
220
|
+
return undefined;
|
|
221
|
+
};
|
|
222
|
+
const toGeminiAssistantParts = (message) => {
|
|
223
|
+
const parts = [];
|
|
224
|
+
parts.push(...getThoughtPartsFromMetadata(message.providerMetadata));
|
|
225
|
+
const content = message.content;
|
|
226
|
+
if (typeof content === "string") {
|
|
227
|
+
if (content.length > 0) {
|
|
228
|
+
parts.push({ text: content });
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
for (const part of content) {
|
|
233
|
+
if (part.type === "text") {
|
|
234
|
+
parts.push({ text: part.text });
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
for (const call of message.toolCalls ?? []) {
|
|
239
|
+
const thoughtSignature = getToolThoughtSignature(call.providerMetadata);
|
|
240
|
+
parts.push({
|
|
241
|
+
functionCall: {
|
|
242
|
+
id: call.id,
|
|
243
|
+
name: call.name,
|
|
244
|
+
args: call.arguments,
|
|
245
|
+
},
|
|
246
|
+
...(thoughtSignature ? { thoughtSignature } : {}),
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
return parts.length > 0 ? parts : [{ text: "" }];
|
|
250
|
+
};
|
|
251
|
+
const toGeminiMessages = (messages) => {
|
|
252
|
+
const systemParts = [];
|
|
253
|
+
const contents = [];
|
|
254
|
+
for (const message of messages) {
|
|
255
|
+
if (message.role === "system") {
|
|
256
|
+
systemParts.push(...toGeminiUserParts(message.content));
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
if (message.role === "assistant") {
|
|
260
|
+
contents.push({ role: "model", parts: toGeminiAssistantParts(message) });
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
if (message.role === "tool") {
|
|
264
|
+
contents.push({
|
|
265
|
+
role: "user",
|
|
266
|
+
parts: [
|
|
267
|
+
{
|
|
268
|
+
functionResponse: {
|
|
269
|
+
...(message.toolCallId ? { id: message.toolCallId } : {}),
|
|
270
|
+
name: message.name ?? message.toolCallId ?? "tool",
|
|
271
|
+
response: normalizeToolResponse(message.content),
|
|
272
|
+
},
|
|
273
|
+
},
|
|
274
|
+
],
|
|
275
|
+
});
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
contents.push({ role: "user", parts: toGeminiUserParts(message.content) });
|
|
279
|
+
}
|
|
280
|
+
return {
|
|
281
|
+
...(systemParts.length > 0
|
|
282
|
+
? { systemInstruction: { role: "user", parts: systemParts } }
|
|
283
|
+
: {}),
|
|
284
|
+
contents,
|
|
285
|
+
};
|
|
286
|
+
};
|
|
287
|
+
const toGeminiTools = (tools) => {
|
|
288
|
+
if (!tools || tools.length === 0) {
|
|
289
|
+
return undefined;
|
|
290
|
+
}
|
|
291
|
+
return [
|
|
292
|
+
{
|
|
293
|
+
functionDeclarations: tools.map((tool) => ({
|
|
294
|
+
name: tool.name,
|
|
295
|
+
description: tool.description,
|
|
296
|
+
parametersJsonSchema: tool.inputSchema,
|
|
297
|
+
})),
|
|
298
|
+
},
|
|
299
|
+
];
|
|
300
|
+
};
|
|
301
|
+
const applyProviderConfig = (config, providerOptions) => {
|
|
302
|
+
const geminiConfig = providerOptions?.geminiConfig;
|
|
303
|
+
if (isRecord(geminiConfig)) {
|
|
304
|
+
Object.assign(config, geminiConfig);
|
|
305
|
+
}
|
|
306
|
+
};
|
|
307
|
+
const buildGenerateConfig = (request) => {
|
|
308
|
+
const extended = request;
|
|
309
|
+
const config = {};
|
|
310
|
+
if (request.temperature !== undefined) {
|
|
311
|
+
config.temperature = request.temperature;
|
|
312
|
+
}
|
|
313
|
+
if (request.maxTokens !== undefined) {
|
|
314
|
+
config.maxOutputTokens = request.maxTokens;
|
|
315
|
+
}
|
|
316
|
+
if (extended.topP !== undefined) {
|
|
317
|
+
config.topP = extended.topP;
|
|
318
|
+
}
|
|
319
|
+
if (extended.topK !== undefined) {
|
|
320
|
+
config.topK = extended.topK;
|
|
321
|
+
}
|
|
322
|
+
if (extended.stop && extended.stop.length > 0) {
|
|
323
|
+
config.stopSequences = extended.stop;
|
|
324
|
+
}
|
|
325
|
+
if (request.responseModalities && request.responseModalities.length > 0) {
|
|
326
|
+
config.responseModalities = request.responseModalities;
|
|
327
|
+
}
|
|
328
|
+
if (request.structuredOutput) {
|
|
329
|
+
config.responseMimeType = request.structuredOutput.mimeType ?? "application/json";
|
|
330
|
+
config.responseJsonSchema = request.structuredOutput.schema;
|
|
331
|
+
}
|
|
332
|
+
const tools = toGeminiTools(request.tools);
|
|
333
|
+
if (tools) {
|
|
334
|
+
config.tools = tools;
|
|
335
|
+
config.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
|
|
336
|
+
}
|
|
337
|
+
applyProviderConfig(config, request.providerOptions);
|
|
338
|
+
return config;
|
|
339
|
+
};
|
|
340
|
+
const mapGeminiFinishReason = (raw, hasToolCalls) => {
|
|
341
|
+
if (hasToolCalls)
|
|
342
|
+
return "tool_calls";
|
|
343
|
+
if (raw === "STOP")
|
|
344
|
+
return "stop";
|
|
345
|
+
if (raw === "MAX_TOKENS")
|
|
346
|
+
return "length";
|
|
347
|
+
if (raw === "SAFETY" ||
|
|
348
|
+
raw === "RECITATION" ||
|
|
349
|
+
raw === "BLOCKLIST" ||
|
|
350
|
+
raw === "PROHIBITED_CONTENT" ||
|
|
351
|
+
raw === "SPII") {
|
|
352
|
+
return "content_filter";
|
|
353
|
+
}
|
|
354
|
+
if (raw === undefined || raw === null || raw === "FINISH_REASON_UNSPECIFIED") {
|
|
355
|
+
return "unknown";
|
|
356
|
+
}
|
|
357
|
+
return "unknown";
|
|
358
|
+
};
|
|
359
|
+
const mapUsage = (response) => {
|
|
360
|
+
const usage = response.usageMetadata;
|
|
361
|
+
const promptTokens = usage?.promptTokenCount ?? 0;
|
|
362
|
+
const completionTokens = (usage?.candidatesTokenCount ?? 0) + (usage?.thoughtsTokenCount ?? 0);
|
|
363
|
+
return {
|
|
364
|
+
promptTokens,
|
|
365
|
+
completionTokens,
|
|
366
|
+
totalTokens: usage?.totalTokenCount ??
|
|
367
|
+
promptTokens + completionTokens + (usage?.toolUsePromptTokenCount ?? 0),
|
|
368
|
+
...(typeof usage?.cachedContentTokenCount === "number" && usage.cachedContentTokenCount > 0
|
|
369
|
+
? { cachedPromptTokens: usage.cachedContentTokenCount }
|
|
370
|
+
: {}),
|
|
371
|
+
};
|
|
372
|
+
};
|
|
373
|
+
const extractParts = (response) => response.candidates?.[0]?.content?.parts ?? [];
|
|
374
|
+
const extractTextFromParts = (parts, thought) => parts
|
|
375
|
+
.filter((part) => (thought ? part.thought === true : part.thought !== true))
|
|
376
|
+
.map((part) => part.text ?? "")
|
|
377
|
+
.join("");
|
|
378
|
+
const extractMedia = (parts, model) => {
|
|
379
|
+
const media = [];
|
|
380
|
+
for (const part of parts) {
|
|
381
|
+
const inlineData = part.inlineData;
|
|
382
|
+
if (inlineData?.data && inlineData.mimeType) {
|
|
383
|
+
media.push({
|
|
384
|
+
type: inferMediaType(inlineData.mimeType),
|
|
385
|
+
index: media.length,
|
|
386
|
+
mimeType: inlineData.mimeType,
|
|
387
|
+
data: inlineData.data,
|
|
388
|
+
providerMetadata: { provider: "gemini", model, source: "inlineData" },
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
const fileData = part.fileData;
|
|
392
|
+
if (fileData?.fileUri && fileData.mimeType) {
|
|
393
|
+
media.push({
|
|
394
|
+
type: inferMediaType(fileData.mimeType),
|
|
395
|
+
index: media.length,
|
|
396
|
+
mimeType: fileData.mimeType,
|
|
397
|
+
url: fileData.fileUri,
|
|
398
|
+
...(fileData.displayName ? { name: fileData.displayName } : {}),
|
|
399
|
+
providerMetadata: { provider: "gemini", model, source: "fileData" },
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
return media;
|
|
404
|
+
};
|
|
405
|
+
const mediaToImages = (media) => {
|
|
406
|
+
const images = [];
|
|
407
|
+
for (const item of media) {
|
|
408
|
+
if (item.type !== "image") {
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
const url = item.url ?? `data:${item.mimeType};base64,${item.data ?? ""}`;
|
|
412
|
+
images.push({
|
|
413
|
+
url,
|
|
414
|
+
index: images.length,
|
|
415
|
+
mimeType: item.mimeType,
|
|
416
|
+
...(item.sizeBytes !== undefined ? { sizeBytes: item.sizeBytes } : {}),
|
|
417
|
+
...(item.providerMetadata ? { providerMetadata: item.providerMetadata } : {}),
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
return images;
|
|
421
|
+
};
|
|
422
|
+
const buildResponseMetadata = (response, parts, finishReason) => {
|
|
423
|
+
const thoughtParts = parts
|
|
424
|
+
.filter((part) => part.thought === true || typeof part.thoughtSignature === "string")
|
|
425
|
+
.map((part) => ({
|
|
426
|
+
...(part.text !== undefined ? { text: part.text } : {}),
|
|
427
|
+
...(part.thought !== undefined ? { thought: part.thought } : {}),
|
|
428
|
+
...(part.thoughtSignature !== undefined ? { thoughtSignature: part.thoughtSignature } : {}),
|
|
429
|
+
...(part.partMetadata !== undefined ? { partMetadata: part.partMetadata } : {}),
|
|
430
|
+
}));
|
|
431
|
+
if (thoughtParts.length === 0 &&
|
|
432
|
+
response.responseId === undefined &&
|
|
433
|
+
response.modelVersion === undefined &&
|
|
434
|
+
finishReason === undefined) {
|
|
435
|
+
return undefined;
|
|
436
|
+
}
|
|
437
|
+
return {
|
|
438
|
+
provider: "gemini",
|
|
439
|
+
...(response.responseId !== undefined ? { responseId: response.responseId } : {}),
|
|
440
|
+
...(response.modelVersion !== undefined ? { modelVersion: response.modelVersion } : {}),
|
|
441
|
+
...(finishReason !== undefined ? { finishReason } : {}),
|
|
442
|
+
...(thoughtParts.length > 0 ? { geminiThoughtParts: thoughtParts } : {}),
|
|
443
|
+
};
|
|
444
|
+
};
|
|
445
|
+
const parseStructuredOutput = (request, content) => {
|
|
446
|
+
if (!request.structuredOutput) {
|
|
447
|
+
return undefined;
|
|
448
|
+
}
|
|
449
|
+
if ((request.structuredOutput.mimeType ?? "application/json") === "text/x.enum") {
|
|
450
|
+
return content.trim();
|
|
451
|
+
}
|
|
452
|
+
const trimmed = content.trim();
|
|
453
|
+
if (trimmed.length === 0) {
|
|
454
|
+
return undefined;
|
|
455
|
+
}
|
|
456
|
+
try {
|
|
457
|
+
return JSON.parse(trimmed);
|
|
458
|
+
}
|
|
459
|
+
catch (error) {
|
|
460
|
+
throw new ProviderError("PROVIDER_INVALID_INPUT", "Gemini structured output was not valid JSON", {
|
|
461
|
+
cause: error,
|
|
462
|
+
retryable: true,
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
const parseToolCalls = (parts) => {
|
|
467
|
+
const calls = [];
|
|
468
|
+
for (const part of parts) {
|
|
469
|
+
const functionCall = part.functionCall;
|
|
470
|
+
if (!functionCall?.name) {
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
calls.push({
|
|
474
|
+
id: functionCall.id ?? `gemini-call-${calls.length}`,
|
|
475
|
+
name: functionCall.name,
|
|
476
|
+
arguments: functionCall.args ?? {},
|
|
477
|
+
providerMetadata: {
|
|
478
|
+
provider: "gemini",
|
|
479
|
+
gemini: {
|
|
480
|
+
functionCall,
|
|
481
|
+
...(part.thoughtSignature ? { thoughtSignature: part.thoughtSignature } : {}),
|
|
482
|
+
},
|
|
483
|
+
},
|
|
484
|
+
});
|
|
485
|
+
}
|
|
486
|
+
return calls.length > 0 ? calls : undefined;
|
|
487
|
+
};
|
|
488
|
+
const embeddingContentToGemini = (input) => {
|
|
489
|
+
if (typeof input === "string") {
|
|
490
|
+
return input;
|
|
491
|
+
}
|
|
492
|
+
return { role: "user", parts: toGeminiUserParts(input) };
|
|
493
|
+
};
|
|
494
|
+
const stringifyEmbeddingContent = (input) => {
|
|
495
|
+
if (typeof input === "string") {
|
|
496
|
+
return input;
|
|
497
|
+
}
|
|
498
|
+
return input
|
|
499
|
+
.map((part) => {
|
|
500
|
+
if (part.type === "text")
|
|
501
|
+
return part.text;
|
|
502
|
+
if (part.type === "image_url")
|
|
503
|
+
return `[image:${part.image_url.url}]`;
|
|
504
|
+
return `[file:${part.file.mimeType}:${part.file.name ?? part.file.uri ?? "inline"}]`;
|
|
505
|
+
})
|
|
506
|
+
.join("\n");
|
|
507
|
+
};
|
|
508
|
+
const cosineSimilarity = (left, right) => {
|
|
509
|
+
const len = Math.min(left.length, right.length);
|
|
510
|
+
if (len === 0)
|
|
511
|
+
return 0;
|
|
512
|
+
let dot = 0;
|
|
513
|
+
let leftNorm = 0;
|
|
514
|
+
let rightNorm = 0;
|
|
515
|
+
for (let index = 0; index < len; index += 1) {
|
|
516
|
+
const a = left[index] ?? 0;
|
|
517
|
+
const b = right[index] ?? 0;
|
|
518
|
+
dot += a * b;
|
|
519
|
+
leftNorm += a * a;
|
|
520
|
+
rightNorm += b * b;
|
|
521
|
+
}
|
|
522
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
523
|
+
return 0;
|
|
524
|
+
}
|
|
525
|
+
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
526
|
+
};
|
|
527
|
+
const inferModelCapabilities = (model) => {
|
|
528
|
+
const name = typeof model === "string" ? model : (model.name ?? model.displayName ?? "");
|
|
529
|
+
const lowered = normalizeModelName(name).toLowerCase();
|
|
530
|
+
const supportedActions = typeof model === "string" ? [] : (model.supportedActions ?? []).map((item) => item.toLowerCase());
|
|
531
|
+
const isEmbedding = lowered.includes("embedding") || supportedActions.some((action) => action.includes("embed"));
|
|
532
|
+
const imageOut = lowered.includes("image") || lowered.includes("imagen");
|
|
533
|
+
const audioOut = lowered.includes("audio") || lowered.includes("tts");
|
|
534
|
+
const videoOut = lowered.includes("video") || lowered.includes("veo");
|
|
535
|
+
const chatModel = !isEmbedding;
|
|
536
|
+
return {
|
|
537
|
+
supportedModalities: isEmbedding ? ["text"] : ["text", "image", "audio", "video", "file"],
|
|
538
|
+
...(chatModel
|
|
539
|
+
? { supportedOutputModalities: ["text", ...(imageOut ? ["image"] : []), ...(audioOut ? ["audio"] : []), ...(videoOut ? ["video"] : [])] }
|
|
540
|
+
: {}),
|
|
541
|
+
maxContextTokens: typeof model === "string" ? (lowered.includes("embedding") ? 8_192 : 1_000_000) : (model.inputTokenLimit ?? 1_000_000),
|
|
542
|
+
supportsStreaming: chatModel,
|
|
543
|
+
supportsFunctionCalling: chatModel,
|
|
544
|
+
supportsStructuredOutput: chatModel,
|
|
545
|
+
supportsEmbeddings: isEmbedding,
|
|
546
|
+
supportsRerank: isEmbedding,
|
|
547
|
+
};
|
|
548
|
+
};
|
|
549
|
+
const mapModel = (model) => {
|
|
550
|
+
const rawName = model.name ?? model.displayName;
|
|
551
|
+
if (!rawName) {
|
|
552
|
+
return null;
|
|
553
|
+
}
|
|
554
|
+
const modelName = normalizeModelName(rawName);
|
|
555
|
+
return {
|
|
556
|
+
modelName,
|
|
557
|
+
capabilities: inferModelCapabilities(model),
|
|
558
|
+
};
|
|
559
|
+
};
|
|
560
|
+
const mapProviderError = (error) => {
|
|
561
|
+
if (error instanceof TimeoutError) {
|
|
562
|
+
return error;
|
|
563
|
+
}
|
|
564
|
+
if (error instanceof ProviderError) {
|
|
565
|
+
return error;
|
|
566
|
+
}
|
|
567
|
+
const candidate = error;
|
|
568
|
+
const status = candidate.status ?? candidate.cause?.status;
|
|
569
|
+
const code = candidate.code ?? candidate.cause?.code;
|
|
570
|
+
const message = candidate.message ?? candidate.cause?.message ?? "Gemini 调用失败";
|
|
571
|
+
if (status === 401 || status === 403) {
|
|
572
|
+
return new ProviderError("PROVIDER_AUTH_FAILED", message, { cause: error });
|
|
573
|
+
}
|
|
574
|
+
if (status === 400 || status === 404) {
|
|
575
|
+
return new ProviderError("PROVIDER_INVALID_INPUT", message, { cause: error });
|
|
576
|
+
}
|
|
577
|
+
if (status === 429) {
|
|
578
|
+
return new ProviderError("PROVIDER_RATE_LIMITED", message, {
|
|
579
|
+
cause: error,
|
|
580
|
+
retryable: true,
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
if (typeof status === "number" && status >= 500) {
|
|
584
|
+
return new ProviderError("PROVIDER_UPSTREAM_ERROR", message, {
|
|
585
|
+
cause: error,
|
|
586
|
+
retryable: true,
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
if (code === "ETIMEDOUT" || code === "ECONNABORTED") {
|
|
590
|
+
return new TimeoutError("TIMEOUT_PROVIDER_REQUEST", message, {
|
|
591
|
+
cause: error,
|
|
592
|
+
retryable: true,
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
return new ProviderError("PROVIDER_CALL_FAILED", message, {
|
|
596
|
+
cause: error,
|
|
597
|
+
retryable: true,
|
|
598
|
+
});
|
|
599
|
+
};
|
|
600
|
+
const buildSdkOptions = (options, apiKey, timeoutMs) => {
|
|
601
|
+
const httpOptions = { timeout: timeoutMs };
|
|
602
|
+
if (options.baseURL) {
|
|
603
|
+
httpOptions.baseUrl = options.baseURL;
|
|
604
|
+
}
|
|
605
|
+
if (options.apiVersion) {
|
|
606
|
+
httpOptions.apiVersion = options.apiVersion;
|
|
607
|
+
}
|
|
608
|
+
const sdkOptions = { ...(options.extra ?? {}) };
|
|
609
|
+
if (apiKey) {
|
|
610
|
+
sdkOptions.apiKey = apiKey;
|
|
611
|
+
}
|
|
612
|
+
if (options.vertexai !== undefined) {
|
|
613
|
+
sdkOptions.vertexai = options.vertexai;
|
|
614
|
+
}
|
|
615
|
+
if (options.project) {
|
|
616
|
+
sdkOptions.project = options.project;
|
|
617
|
+
}
|
|
618
|
+
if (options.location) {
|
|
619
|
+
sdkOptions.location = options.location;
|
|
620
|
+
}
|
|
621
|
+
if (options.apiVersion) {
|
|
622
|
+
sdkOptions.apiVersion = options.apiVersion;
|
|
623
|
+
}
|
|
624
|
+
sdkOptions.httpOptions = {
|
|
625
|
+
...(isRecord(sdkOptions.httpOptions) ? sdkOptions.httpOptions : {}),
|
|
626
|
+
...httpOptions,
|
|
627
|
+
};
|
|
628
|
+
return sdkOptions;
|
|
629
|
+
};
|
|
630
|
+
export class GeminiProviderAdapter {
|
|
631
|
+
id = "gemini";
|
|
632
|
+
name = "Gemini";
|
|
633
|
+
client;
|
|
634
|
+
timeoutMs;
|
|
635
|
+
modelListCacheTtlMs;
|
|
636
|
+
modelListCache = null;
|
|
637
|
+
constructor(options = {}) {
|
|
638
|
+
this.timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
639
|
+
this.modelListCacheTtlMs = options.modelListCacheTtlMs ?? 60_000;
|
|
640
|
+
if (options.client) {
|
|
641
|
+
this.client = options.client;
|
|
642
|
+
return;
|
|
643
|
+
}
|
|
644
|
+
const apiKey = options.apiKey ?? process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
|
|
645
|
+
const hasVertexCredentials = options.vertexai === true && Boolean(options.project && options.location);
|
|
646
|
+
if (!apiKey && !hasVertexCredentials) {
|
|
647
|
+
throw new ProviderError("PROVIDER_MISSING_CREDENTIALS", "缺少 GEMINI_API_KEY/GOOGLE_API_KEY 或 Vertex AI project/location");
|
|
648
|
+
}
|
|
649
|
+
this.client = new GoogleGenAI(buildSdkOptions(options, apiKey, this.timeoutMs));
|
|
650
|
+
}
|
|
651
|
+
async listAvailableModels() {
|
|
652
|
+
const ttl = this.modelListCacheTtlMs;
|
|
653
|
+
if (ttl > 0 && this.modelListCache && Date.now() - this.modelListCache.at < ttl) {
|
|
654
|
+
return this.modelListCache.value;
|
|
655
|
+
}
|
|
656
|
+
try {
|
|
657
|
+
if (!this.client.models.list) {
|
|
658
|
+
return FALLBACK_MODELS;
|
|
659
|
+
}
|
|
660
|
+
const pager = await this.client.models.list();
|
|
661
|
+
const source = [];
|
|
662
|
+
const seen = new Set();
|
|
663
|
+
const addModel = (model) => {
|
|
664
|
+
const key = model.name ?? model.displayName;
|
|
665
|
+
if (!key || seen.has(key)) {
|
|
666
|
+
return;
|
|
667
|
+
}
|
|
668
|
+
seen.add(key);
|
|
669
|
+
source.push(model);
|
|
670
|
+
};
|
|
671
|
+
if (Array.isArray(pager.page)) {
|
|
672
|
+
for (const model of pager.page) {
|
|
673
|
+
addModel(model);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
if (Symbol.asyncIterator in Object(pager)) {
|
|
677
|
+
for await (const model of pager) {
|
|
678
|
+
addModel(model);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
const result = source
|
|
682
|
+
.map(mapModel)
|
|
683
|
+
.filter((model) => model !== null);
|
|
684
|
+
const models = result.length > 0 ? result : FALLBACK_MODELS;
|
|
685
|
+
if (ttl > 0) {
|
|
686
|
+
this.modelListCache = { at: Date.now(), value: models };
|
|
687
|
+
}
|
|
688
|
+
return models;
|
|
689
|
+
}
|
|
690
|
+
catch (error) {
|
|
691
|
+
throw mapProviderError(error);
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
async chat(request, _ctx, signal) {
|
|
695
|
+
const timeout = withAbortTimeout(signal, this.timeoutMs);
|
|
696
|
+
try {
|
|
697
|
+
const mapped = toGeminiMessages(request.messages);
|
|
698
|
+
const config = buildGenerateConfig(request);
|
|
699
|
+
if (mapped.systemInstruction) {
|
|
700
|
+
config.systemInstruction = mapped.systemInstruction;
|
|
701
|
+
}
|
|
702
|
+
const response = await this.client.models.generateContent({
|
|
703
|
+
model: request.model,
|
|
704
|
+
contents: mapped.contents,
|
|
705
|
+
config: {
|
|
706
|
+
...config,
|
|
707
|
+
abortSignal: timeout.signal,
|
|
708
|
+
},
|
|
709
|
+
});
|
|
710
|
+
const parts = extractParts(response);
|
|
711
|
+
const content = extractTextFromParts(parts, false);
|
|
712
|
+
const reasoningContent = extractTextFromParts(parts, true);
|
|
713
|
+
const toolCalls = parseToolCalls(parts);
|
|
714
|
+
const rawFinish = response.candidates?.[0]?.finishReason;
|
|
715
|
+
const finishReason = mapGeminiFinishReason(rawFinish, (toolCalls?.length ?? 0) > 0);
|
|
716
|
+
const media = extractMedia(parts, request.model);
|
|
717
|
+
const images = mediaToImages(media);
|
|
718
|
+
const structured = parseStructuredOutput(request, content);
|
|
719
|
+
const providerMetadata = buildResponseMetadata(response, parts, rawFinish);
|
|
720
|
+
return {
|
|
721
|
+
content,
|
|
722
|
+
...(structured !== undefined ? { structured } : {}),
|
|
723
|
+
...(providerMetadata !== undefined ? { providerMetadata } : {}),
|
|
724
|
+
...(reasoningContent.length > 0 ? { reasoningContent } : {}),
|
|
725
|
+
...(toolCalls !== undefined ? { toolCalls } : {}),
|
|
726
|
+
finishReason,
|
|
727
|
+
usage: mapUsage(response),
|
|
728
|
+
...(images.length > 0 ? { images } : {}),
|
|
729
|
+
...(media.length > 0 ? { media } : {}),
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
catch (error) {
|
|
733
|
+
throw mapProviderError(error);
|
|
734
|
+
}
|
|
735
|
+
finally {
|
|
736
|
+
timeout.cleanup();
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
async *chatStream(request, _ctx, signal) {
|
|
740
|
+
const timeout = withAbortTimeout(signal, this.timeoutMs);
|
|
741
|
+
try {
|
|
742
|
+
const mapped = toGeminiMessages(request.messages);
|
|
743
|
+
const config = buildGenerateConfig(request);
|
|
744
|
+
if (mapped.systemInstruction) {
|
|
745
|
+
config.systemInstruction = mapped.systemInstruction;
|
|
746
|
+
}
|
|
747
|
+
const stream = await this.client.models.generateContentStream({
|
|
748
|
+
model: request.model,
|
|
749
|
+
contents: mapped.contents,
|
|
750
|
+
config: {
|
|
751
|
+
...config,
|
|
752
|
+
abortSignal: timeout.signal,
|
|
753
|
+
},
|
|
754
|
+
});
|
|
755
|
+
let finishReason = "unknown";
|
|
756
|
+
let usage;
|
|
757
|
+
let responseMetadata;
|
|
758
|
+
let sawToolCall = false;
|
|
759
|
+
for await (const chunk of stream) {
|
|
760
|
+
if (timeout.signal.aborted) {
|
|
761
|
+
throw timeout.signal.reason ?? new Error("aborted");
|
|
762
|
+
}
|
|
763
|
+
const parts = extractParts(chunk);
|
|
764
|
+
const text = extractTextFromParts(parts, false);
|
|
765
|
+
const reasoning = extractTextFromParts(parts, true);
|
|
766
|
+
if (reasoning.length > 0) {
|
|
767
|
+
yield { type: "reasoning-delta", delta: reasoning };
|
|
768
|
+
}
|
|
769
|
+
if (text.length > 0) {
|
|
770
|
+
yield { type: "text-delta", delta: text };
|
|
771
|
+
if (request.structuredOutput) {
|
|
772
|
+
yield { type: "structured-delta", delta: text };
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
for (const item of extractMedia(parts, request.model)) {
|
|
776
|
+
yield { type: "media", media: item };
|
|
777
|
+
}
|
|
778
|
+
const calls = parseToolCalls(parts);
|
|
779
|
+
if (calls) {
|
|
780
|
+
sawToolCall = true;
|
|
781
|
+
for (const call of calls) {
|
|
782
|
+
yield { type: "tool-call-complete", call };
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
const rawFinish = chunk.candidates?.[0]?.finishReason;
|
|
786
|
+
if (rawFinish !== undefined) {
|
|
787
|
+
finishReason = mapGeminiFinishReason(rawFinish, sawToolCall);
|
|
788
|
+
}
|
|
789
|
+
usage = mapUsage(chunk);
|
|
790
|
+
responseMetadata = buildResponseMetadata(chunk, parts, rawFinish) ?? responseMetadata;
|
|
791
|
+
}
|
|
792
|
+
yield {
|
|
793
|
+
type: "finish",
|
|
794
|
+
finishReason: sawToolCall ? "tool_calls" : finishReason,
|
|
795
|
+
...(usage !== undefined ? { usage } : {}),
|
|
796
|
+
...(responseMetadata !== undefined ? { providerMetadata: responseMetadata } : {}),
|
|
797
|
+
};
|
|
798
|
+
}
|
|
799
|
+
catch (error) {
|
|
800
|
+
throw mapProviderError(error);
|
|
801
|
+
}
|
|
802
|
+
finally {
|
|
803
|
+
timeout.cleanup();
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
async countTokens(messages, model) {
|
|
807
|
+
if (!this.client.models.countTokens) {
|
|
808
|
+
return messages.reduce((sum, message) => sum + toTextContent(message.content).length, 0);
|
|
809
|
+
}
|
|
810
|
+
const timeout = withAbortTimeout(undefined, this.timeoutMs);
|
|
811
|
+
try {
|
|
812
|
+
const mapped = toGeminiMessages(messages);
|
|
813
|
+
const response = await this.client.models.countTokens({
|
|
814
|
+
model,
|
|
815
|
+
contents: mapped.contents,
|
|
816
|
+
config: {
|
|
817
|
+
...(mapped.systemInstruction ? { systemInstruction: mapped.systemInstruction } : {}),
|
|
818
|
+
abortSignal: timeout.signal,
|
|
819
|
+
},
|
|
820
|
+
});
|
|
821
|
+
return response.totalTokens ?? 0;
|
|
822
|
+
}
|
|
823
|
+
catch (error) {
|
|
824
|
+
throw mapProviderError(error);
|
|
825
|
+
}
|
|
826
|
+
finally {
|
|
827
|
+
timeout.cleanup();
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
async embed(request, _ctx, signal) {
|
|
831
|
+
const timeout = withAbortTimeout(signal, this.timeoutMs);
|
|
832
|
+
try {
|
|
833
|
+
const config = {};
|
|
834
|
+
if (request.taskType !== undefined) {
|
|
835
|
+
config.taskType = request.taskType;
|
|
836
|
+
}
|
|
837
|
+
if (request.outputDimensionality !== undefined) {
|
|
838
|
+
config.outputDimensionality = request.outputDimensionality;
|
|
839
|
+
}
|
|
840
|
+
applyProviderConfig(config, request.providerOptions);
|
|
841
|
+
const response = await this.client.models.embedContent({
|
|
842
|
+
model: request.model,
|
|
843
|
+
contents: request.inputs.map(embeddingContentToGemini),
|
|
844
|
+
config: {
|
|
845
|
+
...config,
|
|
846
|
+
abortSignal: timeout.signal,
|
|
847
|
+
},
|
|
848
|
+
});
|
|
849
|
+
return {
|
|
850
|
+
embeddings: (response.embeddings ?? []).map((item) => item.values ?? []),
|
|
851
|
+
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
852
|
+
providerMetadata: {
|
|
853
|
+
provider: "gemini",
|
|
854
|
+
...(response.metadata ? { metadata: response.metadata } : {}),
|
|
855
|
+
},
|
|
856
|
+
};
|
|
857
|
+
}
|
|
858
|
+
catch (error) {
|
|
859
|
+
throw mapProviderError(error);
|
|
860
|
+
}
|
|
861
|
+
finally {
|
|
862
|
+
timeout.cleanup();
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
async rerank(request, ctx, signal) {
|
|
866
|
+
const query = await this.embed({
|
|
867
|
+
model: request.model,
|
|
868
|
+
inputs: [request.query],
|
|
869
|
+
taskType: "RETRIEVAL_QUERY",
|
|
870
|
+
providerOptions: request.providerOptions,
|
|
871
|
+
}, ctx, signal);
|
|
872
|
+
const documents = await this.embed({
|
|
873
|
+
model: request.model,
|
|
874
|
+
inputs: request.documents.map((doc) => doc.text),
|
|
875
|
+
taskType: "RETRIEVAL_DOCUMENT",
|
|
876
|
+
providerOptions: request.providerOptions,
|
|
877
|
+
}, ctx, signal);
|
|
878
|
+
const queryVector = query.embeddings[0] ?? [];
|
|
879
|
+
const results = request.documents
|
|
880
|
+
.map((document, index) => ({
|
|
881
|
+
index,
|
|
882
|
+
score: cosineSimilarity(queryVector, documents.embeddings[index] ?? []),
|
|
883
|
+
document,
|
|
884
|
+
}))
|
|
885
|
+
.sort((left, right) => right.score - left.score)
|
|
886
|
+
.slice(0, request.topK ?? request.documents.length);
|
|
887
|
+
return {
|
|
888
|
+
results,
|
|
889
|
+
usage: {
|
|
890
|
+
promptTokens: (query.usage?.promptTokens ?? 0) + (documents.usage?.promptTokens ?? 0),
|
|
891
|
+
completionTokens: 0,
|
|
892
|
+
totalTokens: (query.usage?.totalTokens ?? 0) + (documents.usage?.totalTokens ?? 0),
|
|
893
|
+
},
|
|
894
|
+
providerMetadata: { provider: "gemini", rerankStrategy: "embedding-cosine" },
|
|
895
|
+
};
|
|
896
|
+
}
|
|
897
|
+
embeddingInputToText(input) {
|
|
898
|
+
return stringifyEmbeddingContent(input);
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
//# sourceMappingURL=gemini.js.map
|