@aigne/gemini 0.14.16 → 1.74.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/dist/gemini-chat-model.cjs +435 -0
- package/dist/gemini-chat-model.d.cts +123 -0
- package/dist/gemini-chat-model.d.cts.map +1 -0
- package/dist/gemini-chat-model.d.mts +123 -0
- package/dist/gemini-chat-model.d.mts.map +1 -0
- package/dist/gemini-chat-model.mjs +436 -0
- package/dist/gemini-chat-model.mjs.map +1 -0
- package/dist/gemini-image-model.cjs +169 -0
- package/dist/gemini-image-model.d.cts +37 -0
- package/dist/gemini-image-model.d.cts.map +1 -0
- package/dist/gemini-image-model.d.mts +37 -0
- package/dist/gemini-image-model.d.mts.map +1 -0
- package/dist/gemini-image-model.mjs +170 -0
- package/dist/gemini-image-model.mjs.map +1 -0
- package/dist/gemini-video-model.cjs +148 -0
- package/dist/gemini-video-model.d.cts +117 -0
- package/dist/gemini-video-model.d.cts.map +1 -0
- package/dist/gemini-video-model.d.mts +117 -0
- package/dist/gemini-video-model.d.mts.map +1 -0
- package/dist/gemini-video-model.mjs +149 -0
- package/dist/gemini-video-model.mjs.map +1 -0
- package/dist/index.cjs +7 -0
- package/dist/index.d.cts +4 -0
- package/dist/index.d.mts +4 -0
- package/dist/index.mjs +5 -0
- package/dist/utils.cjs +34 -0
- package/dist/utils.mjs +35 -0
- package/dist/utils.mjs.map +1 -0
- package/package.json +29 -30
- package/CHANGELOG.md +0 -2672
- package/lib/cjs/gemini-chat-model.d.ts +0 -117
- package/lib/cjs/gemini-chat-model.js +0 -564
- package/lib/cjs/gemini-image-model.d.ts +0 -34
- package/lib/cjs/gemini-image-model.js +0 -171
- package/lib/cjs/gemini-video-model.d.ts +0 -114
- package/lib/cjs/gemini-video-model.js +0 -164
- package/lib/cjs/index.d.ts +0 -3
- package/lib/cjs/index.js +0 -19
- package/lib/cjs/package.json +0 -3
- package/lib/cjs/utils.d.ts +0 -15
- package/lib/cjs/utils.js +0 -37
- package/lib/dts/gemini-chat-model.d.ts +0 -117
- package/lib/dts/gemini-image-model.d.ts +0 -34
- package/lib/dts/gemini-video-model.d.ts +0 -114
- package/lib/dts/index.d.ts +0 -3
- package/lib/dts/utils.d.ts +0 -15
- package/lib/esm/gemini-chat-model.d.ts +0 -117
- package/lib/esm/gemini-chat-model.js +0 -560
- package/lib/esm/gemini-image-model.d.ts +0 -34
- package/lib/esm/gemini-image-model.js +0 -167
- package/lib/esm/gemini-video-model.d.ts +0 -114
- package/lib/esm/gemini-video-model.js +0 -160
- package/lib/esm/index.d.ts +0 -3
- package/lib/esm/index.js +0 -3
- package/lib/esm/package.json +0 -3
- package/lib/esm/utils.d.ts +0 -15
- package/lib/esm/utils.js +0 -34
|
@@ -1,560 +0,0 @@
|
|
|
1
|
-
import { agentProcessResultToObject, ChatModel, StructuredOutputError, safeParseJSON, } from "@aigne/core";
|
|
2
|
-
import { logger } from "@aigne/core/utils/logger.js";
|
|
3
|
-
import { mergeUsage } from "@aigne/core/utils/model-utils.js";
|
|
4
|
-
import { isNil, isNonNullable, isRecord, } from "@aigne/core/utils/type-utils.js";
|
|
5
|
-
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
6
|
-
import { v7 } from "@aigne/uuid";
|
|
7
|
-
import { createPartFromUri, createUserContent, FunctionCallingConfigMode, GoogleGenAI, ThinkingLevel, } from "@google/genai";
|
|
8
|
-
import { parse } from "yaml";
|
|
9
|
-
import { z } from "zod";
|
|
10
|
-
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
11
|
-
const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
|
|
12
|
-
const OUTPUT_FUNCTION_NAME = "output";
|
|
13
|
-
const NEED_UPLOAD_MAX_FILE_SIZE_MB = 20;
|
|
14
|
-
/**
|
|
15
|
-
* Implementation of the ChatModel interface for Google's Gemini API
|
|
16
|
-
*
|
|
17
|
-
* @example
|
|
18
|
-
* Here's how to create and use a Gemini chat model:
|
|
19
|
-
* {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
|
|
20
|
-
*
|
|
21
|
-
* @example
|
|
22
|
-
* Here's an example with streaming response:
|
|
23
|
-
* {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
|
|
24
|
-
*/
|
|
25
|
-
export class GeminiChatModel extends ChatModel {
|
|
26
|
-
options;
|
|
27
|
-
constructor(options) {
|
|
28
|
-
super({
|
|
29
|
-
...options,
|
|
30
|
-
model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
|
|
31
|
-
});
|
|
32
|
-
this.options = options;
|
|
33
|
-
}
|
|
34
|
-
apiKeyEnvName = "GEMINI_API_KEY";
|
|
35
|
-
_googleClient;
|
|
36
|
-
get googleClient() {
|
|
37
|
-
if (this._googleClient)
|
|
38
|
-
return this._googleClient;
|
|
39
|
-
const { apiKey } = this.credential;
|
|
40
|
-
if (!apiKey)
|
|
41
|
-
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
42
|
-
this._googleClient ??= new GoogleGenAI({
|
|
43
|
-
apiKey,
|
|
44
|
-
...this.options?.clientOptions,
|
|
45
|
-
});
|
|
46
|
-
return this._googleClient;
|
|
47
|
-
}
|
|
48
|
-
get credential() {
|
|
49
|
-
const apiKey = this.options?.apiKey ||
|
|
50
|
-
process.env[this.apiKeyEnvName] ||
|
|
51
|
-
process.env.GEMINI_API_KEY ||
|
|
52
|
-
process.env.GOOGLE_API_KEY;
|
|
53
|
-
return {
|
|
54
|
-
apiKey,
|
|
55
|
-
model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
get modelOptions() {
|
|
59
|
-
return this.options?.modelOptions;
|
|
60
|
-
}
|
|
61
|
-
async countTokens(input) {
|
|
62
|
-
const { model, ...request } = await this.getParameters(input);
|
|
63
|
-
const contents = [];
|
|
64
|
-
const { systemInstruction, tools } = request.config ?? {};
|
|
65
|
-
if (systemInstruction)
|
|
66
|
-
contents.push(this.contentUnionToContent(systemInstruction));
|
|
67
|
-
if (tools?.length)
|
|
68
|
-
contents.push({ role: "system", parts: [{ text: JSON.stringify(tools) }] });
|
|
69
|
-
contents.push(...[request.contents].flat().map(this.contentUnionToContent));
|
|
70
|
-
const tokens = (await this.googleClient.models.countTokens({
|
|
71
|
-
model,
|
|
72
|
-
contents,
|
|
73
|
-
})).totalTokens;
|
|
74
|
-
if (!isNil(tokens))
|
|
75
|
-
return tokens;
|
|
76
|
-
return super.countTokens(input);
|
|
77
|
-
}
|
|
78
|
-
contentUnionToContent(content) {
|
|
79
|
-
if (typeof content === "object" && "parts" in content) {
|
|
80
|
-
return { role: "system", parts: content.parts };
|
|
81
|
-
}
|
|
82
|
-
else if (typeof content === "string") {
|
|
83
|
-
return { role: "system", parts: [{ text: content }] };
|
|
84
|
-
}
|
|
85
|
-
else if (Array.isArray(content)) {
|
|
86
|
-
return {
|
|
87
|
-
role: "system",
|
|
88
|
-
parts: content.map((i) => (typeof i === "string" ? { text: i } : i)),
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
return { role: "system", parts: [content] };
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
process(input, options) {
|
|
96
|
-
return this.processInput(input, options);
|
|
97
|
-
}
|
|
98
|
-
// References: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
99
|
-
thinkingBudgetModelMap = [
|
|
100
|
-
// 注意:gemini-2.5-flash-image-preview 模型并不支持 thinking。see: https://github.com/CherryHQ/cherry-studio/issues/9614
|
|
101
|
-
{
|
|
102
|
-
pattern: /gemini-2.5-flash-image-preview/,
|
|
103
|
-
support: false,
|
|
104
|
-
},
|
|
105
|
-
{
|
|
106
|
-
pattern: /gemini-3(?!.*-image-)/,
|
|
107
|
-
support: true,
|
|
108
|
-
type: "level",
|
|
109
|
-
},
|
|
110
|
-
{
|
|
111
|
-
pattern: /gemini-2.5-pro/,
|
|
112
|
-
support: true,
|
|
113
|
-
min: 128,
|
|
114
|
-
max: 32768,
|
|
115
|
-
},
|
|
116
|
-
{
|
|
117
|
-
pattern: /gemini-2.5-flash/,
|
|
118
|
-
support: true,
|
|
119
|
-
min: 0,
|
|
120
|
-
max: 24576,
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
pattern: /2.5-flash-lite/,
|
|
124
|
-
support: true,
|
|
125
|
-
min: 512,
|
|
126
|
-
max: 24576,
|
|
127
|
-
},
|
|
128
|
-
{
|
|
129
|
-
pattern: /.*/,
|
|
130
|
-
support: false,
|
|
131
|
-
},
|
|
132
|
-
];
|
|
133
|
-
thinkingBudgetLevelMap = {
|
|
134
|
-
high: 100000, // use 100k for high, finally capped by model max
|
|
135
|
-
medium: 10000,
|
|
136
|
-
low: 5000,
|
|
137
|
-
minimal: 200,
|
|
138
|
-
};
|
|
139
|
-
thinkingLevelMap = {
|
|
140
|
-
high: ThinkingLevel.HIGH,
|
|
141
|
-
medium: ThinkingLevel.HIGH,
|
|
142
|
-
low: ThinkingLevel.LOW,
|
|
143
|
-
minimal: ThinkingLevel.LOW,
|
|
144
|
-
};
|
|
145
|
-
getThinkingBudget(model, effort) {
|
|
146
|
-
const m = this.thinkingBudgetModelMap.find((i) => i.pattern.test(model));
|
|
147
|
-
if (!m?.support)
|
|
148
|
-
return { support: false };
|
|
149
|
-
if (m.type === "level") {
|
|
150
|
-
let level = ThinkingLevel.THINKING_LEVEL_UNSPECIFIED;
|
|
151
|
-
if (typeof effort === "string") {
|
|
152
|
-
level = this.thinkingLevelMap[effort];
|
|
153
|
-
}
|
|
154
|
-
else if (typeof effort === "number") {
|
|
155
|
-
level =
|
|
156
|
-
effort >= this.thinkingBudgetLevelMap["medium"] ? ThinkingLevel.HIGH : ThinkingLevel.LOW;
|
|
157
|
-
}
|
|
158
|
-
return { support: true, level };
|
|
159
|
-
}
|
|
160
|
-
let budget = typeof effort === "string" ? this.thinkingBudgetLevelMap[effort] || undefined : effort;
|
|
161
|
-
if (typeof budget === "undefined")
|
|
162
|
-
return { support: true };
|
|
163
|
-
if (typeof m.min === "number")
|
|
164
|
-
budget = Math.max(m.min, budget);
|
|
165
|
-
if (typeof m.max === "number")
|
|
166
|
-
budget = Math.min(m.max, budget);
|
|
167
|
-
return { support: true, budget };
|
|
168
|
-
}
|
|
169
|
-
async getParameters(input) {
|
|
170
|
-
const { modelOptions = {} } = input;
|
|
171
|
-
const model = modelOptions.model || this.credential.model;
|
|
172
|
-
const { contents, config } = await this.buildContents(input);
|
|
173
|
-
const thinkingBudget = this.getThinkingBudget(model, modelOptions.reasoningEffort);
|
|
174
|
-
const parameters = {
|
|
175
|
-
model,
|
|
176
|
-
contents,
|
|
177
|
-
config: {
|
|
178
|
-
thinkingConfig: thinkingBudget.support
|
|
179
|
-
? {
|
|
180
|
-
includeThoughts: true,
|
|
181
|
-
thinkingBudget: thinkingBudget.budget,
|
|
182
|
-
thinkingLevel: thinkingBudget.level,
|
|
183
|
-
}
|
|
184
|
-
: undefined,
|
|
185
|
-
responseModalities: modelOptions.modalities,
|
|
186
|
-
temperature: modelOptions.temperature,
|
|
187
|
-
topP: modelOptions.topP,
|
|
188
|
-
frequencyPenalty: modelOptions.frequencyPenalty,
|
|
189
|
-
presencePenalty: modelOptions.presencePenalty,
|
|
190
|
-
...config,
|
|
191
|
-
...(await this.buildConfig(input)),
|
|
192
|
-
},
|
|
193
|
-
};
|
|
194
|
-
return parameters;
|
|
195
|
-
}
|
|
196
|
-
async *processInput(input, options) {
|
|
197
|
-
const parameters = await this.getParameters(input);
|
|
198
|
-
const response = await this.googleClient.models.generateContentStream(parameters);
|
|
199
|
-
let usage = {
|
|
200
|
-
inputTokens: 0,
|
|
201
|
-
outputTokens: 0,
|
|
202
|
-
};
|
|
203
|
-
let responseModel;
|
|
204
|
-
const files = [];
|
|
205
|
-
const toolCalls = [];
|
|
206
|
-
let text = "";
|
|
207
|
-
let json;
|
|
208
|
-
for await (const chunk of response) {
|
|
209
|
-
if (!responseModel && chunk.modelVersion) {
|
|
210
|
-
responseModel = chunk.modelVersion;
|
|
211
|
-
yield { delta: { json: { model: responseModel } } };
|
|
212
|
-
}
|
|
213
|
-
for (const { content } of chunk.candidates ?? []) {
|
|
214
|
-
if (content?.parts) {
|
|
215
|
-
for (const part of content.parts) {
|
|
216
|
-
if (part.text) {
|
|
217
|
-
if (part.thought) {
|
|
218
|
-
yield { delta: { text: { thoughts: part.text } } };
|
|
219
|
-
}
|
|
220
|
-
else {
|
|
221
|
-
text += part.text;
|
|
222
|
-
if (input.responseFormat?.type !== "json_schema") {
|
|
223
|
-
yield { delta: { text: { text: part.text } } };
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
if (part.inlineData?.data) {
|
|
228
|
-
files.push({
|
|
229
|
-
type: "file",
|
|
230
|
-
data: part.inlineData.data,
|
|
231
|
-
filename: part.inlineData.displayName,
|
|
232
|
-
mimeType: part.inlineData.mimeType,
|
|
233
|
-
});
|
|
234
|
-
}
|
|
235
|
-
if (part.functionCall?.name) {
|
|
236
|
-
if (part.functionCall.name === OUTPUT_FUNCTION_NAME) {
|
|
237
|
-
json = part.functionCall.args;
|
|
238
|
-
}
|
|
239
|
-
else {
|
|
240
|
-
const toolCall = {
|
|
241
|
-
id: part.functionCall.id || v7(),
|
|
242
|
-
type: "function",
|
|
243
|
-
function: {
|
|
244
|
-
name: part.functionCall.name,
|
|
245
|
-
arguments: part.functionCall.args || {},
|
|
246
|
-
},
|
|
247
|
-
};
|
|
248
|
-
// Preserve thought_signature for 3.x models
|
|
249
|
-
if (part.thoughtSignature && parameters.model.includes("gemini-3")) {
|
|
250
|
-
toolCall.metadata = {
|
|
251
|
-
thoughtSignature: part.thoughtSignature,
|
|
252
|
-
};
|
|
253
|
-
}
|
|
254
|
-
toolCalls.push(toolCall);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
if (chunk.usageMetadata) {
|
|
261
|
-
if (chunk.usageMetadata.promptTokenCount)
|
|
262
|
-
usage.inputTokens = chunk.usageMetadata.promptTokenCount;
|
|
263
|
-
if (chunk.usageMetadata.candidatesTokenCount || chunk.usageMetadata.thoughtsTokenCount)
|
|
264
|
-
usage.outputTokens =
|
|
265
|
-
(chunk.usageMetadata.candidatesTokenCount || 0) +
|
|
266
|
-
(chunk.usageMetadata.thoughtsTokenCount || 0);
|
|
267
|
-
// Parse cache statistics if available
|
|
268
|
-
if (chunk.usageMetadata.cachedContentTokenCount) {
|
|
269
|
-
usage.cacheReadInputTokens = chunk.usageMetadata.cachedContentTokenCount;
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
if (toolCalls.length) {
|
|
274
|
-
yield { delta: { json: { toolCalls } } };
|
|
275
|
-
}
|
|
276
|
-
if (input.responseFormat?.type === "json_schema") {
|
|
277
|
-
if (json) {
|
|
278
|
-
yield { delta: { json: { json } } };
|
|
279
|
-
}
|
|
280
|
-
else if (text) {
|
|
281
|
-
yield { delta: { json: { json: safeParseJSON(text) } } };
|
|
282
|
-
}
|
|
283
|
-
else if (!toolCalls.length) {
|
|
284
|
-
throw new StructuredOutputError("No JSON response from the model");
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
else if (!toolCalls.length) {
|
|
288
|
-
// NOTE: gemini-2.5-pro sometimes returns an empty response,
|
|
289
|
-
// so we check here and retry with structured output mode (empty responses occur less frequently with tool calls)
|
|
290
|
-
if (!text && !files.length) {
|
|
291
|
-
logger.warn("Empty response from Gemini, retrying with structured output mode");
|
|
292
|
-
try {
|
|
293
|
-
const outputSchema = z.object({
|
|
294
|
-
output: z.string().describe("The final answer from the model"),
|
|
295
|
-
});
|
|
296
|
-
const response = await this.process({
|
|
297
|
-
...input,
|
|
298
|
-
responseFormat: {
|
|
299
|
-
type: "json_schema",
|
|
300
|
-
jsonSchema: {
|
|
301
|
-
name: "output",
|
|
302
|
-
schema: zodToJsonSchema(outputSchema),
|
|
303
|
-
},
|
|
304
|
-
},
|
|
305
|
-
}, options);
|
|
306
|
-
const result = await agentProcessResultToObject(response);
|
|
307
|
-
// Merge retry usage with the original usage
|
|
308
|
-
usage = mergeUsage(usage, result.usage);
|
|
309
|
-
// Return the tool calls if retry has tool calls
|
|
310
|
-
if (result.toolCalls?.length) {
|
|
311
|
-
toolCalls.push(...result.toolCalls);
|
|
312
|
-
yield { delta: { json: { toolCalls } } };
|
|
313
|
-
}
|
|
314
|
-
// Return the text from structured output of retry
|
|
315
|
-
else {
|
|
316
|
-
if (!result.json)
|
|
317
|
-
throw new Error("Retrying with structured output mode got no json response");
|
|
318
|
-
const parsed = outputSchema.safeParse(result.json);
|
|
319
|
-
if (!parsed.success)
|
|
320
|
-
throw new Error("Retrying with structured output mode got invalid json response");
|
|
321
|
-
text = parsed.data.output;
|
|
322
|
-
yield { delta: { text: { text } } };
|
|
323
|
-
logger.warn("Empty response from Gemini, retried with structured output mode successfully");
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
catch (error) {
|
|
327
|
-
logger.error("Empty response from Gemini, retrying with structured output mode failed", error);
|
|
328
|
-
throw new StructuredOutputError("No response from the model");
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
yield {
|
|
333
|
-
delta: {
|
|
334
|
-
json: {
|
|
335
|
-
usage,
|
|
336
|
-
files: files.length ? files : undefined,
|
|
337
|
-
modelOptions: {
|
|
338
|
-
reasoningEffort: parameters.config?.thinkingConfig?.thinkingLevel ||
|
|
339
|
-
parameters.config?.thinkingConfig?.thinkingBudget,
|
|
340
|
-
},
|
|
341
|
-
},
|
|
342
|
-
},
|
|
343
|
-
};
|
|
344
|
-
}
|
|
345
|
-
async buildConfig(input) {
|
|
346
|
-
const config = {};
|
|
347
|
-
const { tools, toolConfig } = await this.buildTools(input);
|
|
348
|
-
config.tools = tools;
|
|
349
|
-
config.toolConfig = toolConfig;
|
|
350
|
-
if (input.responseFormat?.type === "json_schema") {
|
|
351
|
-
if (config.tools?.length) {
|
|
352
|
-
config.tools.push({
|
|
353
|
-
functionDeclarations: [
|
|
354
|
-
{
|
|
355
|
-
name: OUTPUT_FUNCTION_NAME,
|
|
356
|
-
description: "Output the final response",
|
|
357
|
-
parametersJsonSchema: input.responseFormat.jsonSchema.schema,
|
|
358
|
-
},
|
|
359
|
-
],
|
|
360
|
-
});
|
|
361
|
-
config.toolConfig = {
|
|
362
|
-
...config.toolConfig,
|
|
363
|
-
functionCallingConfig: { mode: FunctionCallingConfigMode.ANY },
|
|
364
|
-
};
|
|
365
|
-
}
|
|
366
|
-
else {
|
|
367
|
-
config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
|
|
368
|
-
config.responseMimeType = "application/json";
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
return config;
|
|
372
|
-
}
|
|
373
|
-
async buildTools(input) {
|
|
374
|
-
const tools = [];
|
|
375
|
-
for (const tool of input.tools ?? []) {
|
|
376
|
-
tools.push({
|
|
377
|
-
functionDeclarations: [
|
|
378
|
-
{
|
|
379
|
-
name: tool.function.name,
|
|
380
|
-
description: tool.function.description,
|
|
381
|
-
parametersJsonSchema: tool.function.parameters,
|
|
382
|
-
},
|
|
383
|
-
],
|
|
384
|
-
});
|
|
385
|
-
}
|
|
386
|
-
const functionCallingConfig = !input.toolChoice
|
|
387
|
-
? undefined
|
|
388
|
-
: input.toolChoice === "auto"
|
|
389
|
-
? { mode: FunctionCallingConfigMode.AUTO }
|
|
390
|
-
: input.toolChoice === "none"
|
|
391
|
-
? { mode: FunctionCallingConfigMode.NONE }
|
|
392
|
-
: input.toolChoice === "required"
|
|
393
|
-
? { mode: FunctionCallingConfigMode.ANY }
|
|
394
|
-
: {
|
|
395
|
-
mode: FunctionCallingConfigMode.ANY,
|
|
396
|
-
allowedFunctionNames: [input.toolChoice.function.name],
|
|
397
|
-
};
|
|
398
|
-
return { tools, toolConfig: { functionCallingConfig } };
|
|
399
|
-
}
|
|
400
|
-
async buildVideoContentParts(media) {
|
|
401
|
-
const { path: filePath, mimeType: fileMimeType } = await this.transformFileType("local", media);
|
|
402
|
-
if (filePath) {
|
|
403
|
-
const stats = await nodejs.fs.stat(filePath);
|
|
404
|
-
const fileSizeInBytes = stats.size;
|
|
405
|
-
const fileSizeMB = fileSizeInBytes / (1024 * 1024);
|
|
406
|
-
if (fileSizeMB > NEED_UPLOAD_MAX_FILE_SIZE_MB) {
|
|
407
|
-
const uploadedFile = await this.googleClient.files.upload({
|
|
408
|
-
file: filePath,
|
|
409
|
-
config: { mimeType: fileMimeType },
|
|
410
|
-
});
|
|
411
|
-
let file = uploadedFile;
|
|
412
|
-
while (file.state === "PROCESSING") {
|
|
413
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
414
|
-
if (file.name) {
|
|
415
|
-
file = await this.googleClient.files.get({ name: file.name });
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
if (file.state !== "ACTIVE") {
|
|
419
|
-
throw new Error(`File ${file.name} failed to process: ${file.state}`);
|
|
420
|
-
}
|
|
421
|
-
if (file.uri && file.mimeType) {
|
|
422
|
-
const result = createUserContent([createPartFromUri(file.uri, file.mimeType), ""]);
|
|
423
|
-
const part = result.parts?.find((x) => x.fileData);
|
|
424
|
-
if (part) {
|
|
425
|
-
await nodejs.fs.rm(filePath);
|
|
426
|
-
return part;
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
async buildContents(input) {
|
|
433
|
-
const result = {
|
|
434
|
-
contents: [],
|
|
435
|
-
};
|
|
436
|
-
const systemParts = [];
|
|
437
|
-
result.contents = (await Promise.all(input.messages.map(async (msg) => {
|
|
438
|
-
if (msg.role === "system") {
|
|
439
|
-
if (typeof msg.content === "string") {
|
|
440
|
-
systemParts.push({ text: msg.content });
|
|
441
|
-
}
|
|
442
|
-
else if (Array.isArray(msg.content)) {
|
|
443
|
-
systemParts.push(...msg.content.map((item) => {
|
|
444
|
-
if (item.type === "text")
|
|
445
|
-
return { text: item.text };
|
|
446
|
-
throw new Error(`Unsupported content type: ${item.type}`);
|
|
447
|
-
}));
|
|
448
|
-
}
|
|
449
|
-
return;
|
|
450
|
-
}
|
|
451
|
-
const content = {
|
|
452
|
-
role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
|
|
453
|
-
};
|
|
454
|
-
if (msg.toolCalls) {
|
|
455
|
-
content.parts = msg.toolCalls.map((call) => {
|
|
456
|
-
const part = {
|
|
457
|
-
functionCall: {
|
|
458
|
-
id: call.id,
|
|
459
|
-
name: call.function.name,
|
|
460
|
-
args: call.function.arguments,
|
|
461
|
-
},
|
|
462
|
-
};
|
|
463
|
-
// Restore thought_signature for 3.x models
|
|
464
|
-
if (call.metadata?.thoughtSignature) {
|
|
465
|
-
part.thoughtSignature = call.metadata.thoughtSignature;
|
|
466
|
-
}
|
|
467
|
-
return part;
|
|
468
|
-
});
|
|
469
|
-
}
|
|
470
|
-
else if (msg.toolCallId) {
|
|
471
|
-
const call = input.messages
|
|
472
|
-
.flatMap((i) => i.toolCalls)
|
|
473
|
-
.find((c) => c?.id === msg.toolCallId);
|
|
474
|
-
if (!call)
|
|
475
|
-
throw new Error(`Tool call not found: ${msg.toolCallId}`);
|
|
476
|
-
if (!msg.content)
|
|
477
|
-
throw new Error("Tool call must have content");
|
|
478
|
-
// parse tool result as a record
|
|
479
|
-
let toolResult;
|
|
480
|
-
{
|
|
481
|
-
let text;
|
|
482
|
-
if (typeof msg.content === "string")
|
|
483
|
-
text = msg.content;
|
|
484
|
-
else if (msg.content?.length === 1) {
|
|
485
|
-
const first = msg.content[0];
|
|
486
|
-
if (first?.type === "text")
|
|
487
|
-
text = first.text;
|
|
488
|
-
}
|
|
489
|
-
if (text) {
|
|
490
|
-
try {
|
|
491
|
-
const obj = parse(text);
|
|
492
|
-
if (isRecord(obj))
|
|
493
|
-
toolResult = obj;
|
|
494
|
-
}
|
|
495
|
-
catch {
|
|
496
|
-
// ignore
|
|
497
|
-
}
|
|
498
|
-
if (!toolResult)
|
|
499
|
-
toolResult = { result: text };
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
const functionResponse = {
|
|
503
|
-
id: msg.toolCallId,
|
|
504
|
-
name: call.function.name,
|
|
505
|
-
};
|
|
506
|
-
if (toolResult) {
|
|
507
|
-
functionResponse.response = toolResult;
|
|
508
|
-
}
|
|
509
|
-
else {
|
|
510
|
-
functionResponse.parts = await this.contentToParts(msg.content);
|
|
511
|
-
}
|
|
512
|
-
content.parts = [{ functionResponse }];
|
|
513
|
-
}
|
|
514
|
-
else if (msg.content) {
|
|
515
|
-
content.parts = await this.contentToParts(msg.content);
|
|
516
|
-
}
|
|
517
|
-
return content;
|
|
518
|
-
}))).filter(isNonNullable);
|
|
519
|
-
this.ensureMessagesHasUserMessage(systemParts, result.contents);
|
|
520
|
-
if (systemParts.length) {
|
|
521
|
-
result.config ??= {};
|
|
522
|
-
result.config.systemInstruction = systemParts;
|
|
523
|
-
}
|
|
524
|
-
return result;
|
|
525
|
-
}
|
|
526
|
-
async contentToParts(content) {
|
|
527
|
-
if (typeof content === "string")
|
|
528
|
-
return [{ text: content }];
|
|
529
|
-
return Promise.all(content.map(async (item) => {
|
|
530
|
-
switch (item.type) {
|
|
531
|
-
case "text":
|
|
532
|
-
return { text: item.text };
|
|
533
|
-
case "url":
|
|
534
|
-
return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
|
|
535
|
-
case "file": {
|
|
536
|
-
const part = await this.buildVideoContentParts(item);
|
|
537
|
-
if (part)
|
|
538
|
-
return part;
|
|
539
|
-
return { inlineData: { data: item.data, mimeType: item.mimeType } };
|
|
540
|
-
}
|
|
541
|
-
case "local":
|
|
542
|
-
throw new Error(`Unsupported local file: ${item.path}, it should be converted to base64 at ChatModel`);
|
|
543
|
-
}
|
|
544
|
-
}));
|
|
545
|
-
}
|
|
546
|
-
ensureMessagesHasUserMessage(systems, contents) {
|
|
547
|
-
// no messages but system messages
|
|
548
|
-
if (!contents.length && systems.length) {
|
|
549
|
-
const system = systems.pop();
|
|
550
|
-
if (system)
|
|
551
|
-
contents.push({ role: "user", parts: [system] });
|
|
552
|
-
}
|
|
553
|
-
// first message is from model
|
|
554
|
-
if (contents[0]?.role === "model") {
|
|
555
|
-
const system = systems.pop();
|
|
556
|
-
if (system)
|
|
557
|
-
contents.unshift({ role: "user", parts: [system] });
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
}
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
|
-
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
-
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
|
-
}
|
|
5
|
-
export interface GeminiImageModelOutput extends ImageModelOutput {
|
|
6
|
-
}
|
|
7
|
-
export interface GeminiImageModelOptions extends ImageModelOptions<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
8
|
-
apiKey?: string;
|
|
9
|
-
baseURL?: string;
|
|
10
|
-
model?: string;
|
|
11
|
-
modelOptions?: Omit<Partial<GeminiImageModelInput>, "model">;
|
|
12
|
-
clientOptions?: Record<string, any>;
|
|
13
|
-
}
|
|
14
|
-
export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
15
|
-
options?: GeminiImageModelOptions | undefined;
|
|
16
|
-
constructor(options?: GeminiImageModelOptions | undefined);
|
|
17
|
-
protected _client?: GoogleGenAI;
|
|
18
|
-
protected apiKeyEnvName: string;
|
|
19
|
-
get client(): GoogleGenAI;
|
|
20
|
-
get credential(): {
|
|
21
|
-
url: string | undefined;
|
|
22
|
-
apiKey: string | undefined;
|
|
23
|
-
model: string;
|
|
24
|
-
};
|
|
25
|
-
get modelOptions(): Omit<Partial<GeminiImageModelInput>, "model"> | undefined;
|
|
26
|
-
/**
|
|
27
|
-
* Process the input and generate a response
|
|
28
|
-
* @param input The input to process
|
|
29
|
-
* @returns The generated response
|
|
30
|
-
*/
|
|
31
|
-
process(input: GeminiImageModelInput, _options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
|
-
private generateImageByImagenModel;
|
|
33
|
-
private generateImageByGeminiModel;
|
|
34
|
-
}
|