@aigne/gemini 0.14.2-beta → 0.14.2-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,139 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.2-beta.10](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.9...gemini-v0.14.2-beta.10) (2025-10-16)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * correct calculate token usage for gemini model ([7fd1328](https://github.com/AIGNE-io/aigne-framework/commit/7fd13289d3d0f8e062211f7c6dd5cb56e5318c1b))
9
+
10
+
11
+ ### Dependencies
12
+
13
+ * The following workspace dependencies were updated
14
+ * dependencies
15
+ * @aigne/core bumped to 1.63.0-beta.10
16
+ * devDependencies
17
+ * @aigne/test-utils bumped to 0.5.55-beta.10
18
+
19
+ ## [0.14.2-beta.9](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.8...gemini-v0.14.2-beta.9) (2025-10-16)
20
+
21
+
22
+ ### Bug Fixes
23
+
24
+ * **models:** auto retry when got emtpy response from gemini ([#636](https://github.com/AIGNE-io/aigne-framework/issues/636)) ([9367cef](https://github.com/AIGNE-io/aigne-framework/commit/9367cef49ea4c0c87b8a36b454deb2efaee6886f))
25
+ * **models:** enhance gemini model tool use with status fields ([#634](https://github.com/AIGNE-io/aigne-framework/issues/634)) ([067b175](https://github.com/AIGNE-io/aigne-framework/commit/067b175c8e31bb5b1a6d0fc5a5cfb2d070d8d709))
26
+
27
+
28
+ ### Dependencies
29
+
30
+ * The following workspace dependencies were updated
31
+ * dependencies
32
+ * @aigne/core bumped to 1.63.0-beta.9
33
+ * devDependencies
34
+ * @aigne/test-utils bumped to 0.5.55-beta.9
35
+
36
+ ## [0.14.2-beta.8](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.7...gemini-v0.14.2-beta.8) (2025-10-16)
37
+
38
+
39
+ ### Dependencies
40
+
41
+ * The following workspace dependencies were updated
42
+ * dependencies
43
+ * @aigne/openai bumped to 0.16.2-beta.8
44
+ * devDependencies
45
+ * @aigne/core bumped to 1.63.0-beta.8
46
+ * @aigne/test-utils bumped to 0.5.55-beta.8
47
+
48
+ ## [0.14.2-beta.7](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.6...gemini-v0.14.2-beta.7) (2025-10-15)
49
+
50
+
51
+ ### Dependencies
52
+
53
+ * The following workspace dependencies were updated
54
+ * dependencies
55
+ * @aigne/openai bumped to 0.16.2-beta.7
56
+ * devDependencies
57
+ * @aigne/core bumped to 1.63.0-beta.7
58
+ * @aigne/test-utils bumped to 0.5.55-beta.7
59
+
60
+ ## [0.14.2-beta.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.5...gemini-v0.14.2-beta.6) (2025-10-15)
61
+
62
+
63
+ ### Bug Fixes
64
+
65
+ * **models:** add imageConfig to gemini image model ([#621](https://github.com/AIGNE-io/aigne-framework/issues/621)) ([252de7a](https://github.com/AIGNE-io/aigne-framework/commit/252de7a10701c4f5302c2fff977c88e5e833b7b1))
66
+
67
+
68
+ ### Dependencies
69
+
70
+ * The following workspace dependencies were updated
71
+ * dependencies
72
+ * @aigne/openai bumped to 0.16.2-beta.6
73
+ * devDependencies
74
+ * @aigne/core bumped to 1.63.0-beta.6
75
+ * @aigne/test-utils bumped to 0.5.55-beta.6
76
+
77
+ ## [0.14.2-beta.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.4...gemini-v0.14.2-beta.5) (2025-10-13)
78
+
79
+
80
+ ### Dependencies
81
+
82
+ * The following workspace dependencies were updated
83
+ * dependencies
84
+ * @aigne/openai bumped to 0.16.2-beta.5
85
+ * devDependencies
86
+ * @aigne/core bumped to 1.63.0-beta.5
87
+ * @aigne/test-utils bumped to 0.5.55-beta.5
88
+
89
+ ## [0.14.2-beta.4](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.3...gemini-v0.14.2-beta.4) (2025-10-12)
90
+
91
+
92
+ ### Dependencies
93
+
94
+ * The following workspace dependencies were updated
95
+ * dependencies
96
+ * @aigne/openai bumped to 0.16.2-beta.4
97
+ * devDependencies
98
+ * @aigne/core bumped to 1.63.0-beta.4
99
+ * @aigne/test-utils bumped to 0.5.55-beta.4
100
+
101
+ ## [0.14.2-beta.3](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.2...gemini-v0.14.2-beta.3) (2025-10-11)
102
+
103
+
104
+ ### Dependencies
105
+
106
+ * The following workspace dependencies were updated
107
+ * dependencies
108
+ * @aigne/openai bumped to 0.16.2-beta.3
109
+ * devDependencies
110
+ * @aigne/core bumped to 1.63.0-beta.3
111
+ * @aigne/test-utils bumped to 0.5.55-beta.3
112
+
113
+ ## [0.14.2-beta.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.1...gemini-v0.14.2-beta.2) (2025-10-09)
114
+
115
+
116
+ ### Dependencies
117
+
118
+ * The following workspace dependencies were updated
119
+ * dependencies
120
+ * @aigne/openai bumped to 0.16.2-beta.2
121
+ * devDependencies
122
+ * @aigne/core bumped to 1.63.0-beta.2
123
+ * @aigne/test-utils bumped to 0.5.55-beta.2
124
+
125
+ ## [0.14.2-beta.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta...gemini-v0.14.2-beta.1) (2025-10-09)
126
+
127
+
128
+ ### Dependencies
129
+
130
+ * The following workspace dependencies were updated
131
+ * dependencies
132
+ * @aigne/openai bumped to 0.16.2-beta.1
133
+ * devDependencies
134
+ * @aigne/core bumped to 1.63.0-beta.1
135
+ * @aigne/test-utils bumped to 0.5.55-beta.1
136
+
3
137
  ## [0.14.2-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.1...gemini-v0.14.2-beta) (2025-10-07)
4
138
 
5
139
 
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -2,18 +2,15 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.GeminiChatModel = void 0;
4
4
  const core_1 = require("@aigne/core");
5
+ const logger_js_1 = require("@aigne/core/utils/logger.js");
5
6
  const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
6
- const openai_1 = require("@aigne/openai");
7
7
  const uuid_1 = require("@aigne/uuid");
8
8
  const genai_1 = require("@google/genai");
9
- const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
10
9
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
10
+ const OUTPUT_JSON_FUNCTION_NAME = "output_json";
11
11
  /**
12
12
  * Implementation of the ChatModel interface for Google's Gemini API
13
13
  *
14
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
15
- * providing access to models like Gemini 1.5 and Gemini 2.0.
16
- *
17
14
  * @example
18
15
  * Here's how to create and use a Gemini chat model:
19
16
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -22,19 +19,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
22
19
  * Here's an example with streaming response:
23
20
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
24
21
  */
25
- class GeminiChatModel extends openai_1.OpenAIChatModel {
22
+ class GeminiChatModel extends core_1.ChatModel {
23
+ options;
26
24
  constructor(options) {
27
25
  super({
28
26
  ...options,
29
27
  model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
30
- baseURL: options?.baseURL || GEMINI_BASE_URL,
31
28
  });
29
+ this.options = options;
32
30
  }
33
31
  apiKeyEnvName = "GEMINI_API_KEY";
34
- supportsToolsUseWithJsonSchema = false;
35
- supportsParallelToolCalls = false;
36
- supportsToolStreaming = false;
37
- optionalFieldMode = "optional";
38
32
  _googleClient;
39
33
  get googleClient() {
40
34
  if (this._googleClient)
@@ -42,20 +36,33 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
42
36
  const { apiKey } = this.credential;
43
37
  if (!apiKey)
44
38
  throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
45
- this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
39
+ this._googleClient ??= new genai_1.GoogleGenAI({
40
+ apiKey,
41
+ ...this.options?.clientOptions,
42
+ });
46
43
  return this._googleClient;
47
44
  }
48
- process(input, options) {
49
- const model = input.modelOptions?.model || this.credential.model;
50
- if (!model.includes("image"))
51
- return super.process(input, options);
52
- return this.handleImageModelProcessing(input);
45
+ get credential() {
46
+ const apiKey = this.options?.apiKey ||
47
+ process.env[this.apiKeyEnvName] ||
48
+ process.env.GEMINI_API_KEY ||
49
+ process.env.GOOGLE_API_KEY;
50
+ return {
51
+ apiKey,
52
+ model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
53
+ };
53
54
  }
54
- async *handleImageModelProcessing(input) {
55
+ get modelOptions() {
56
+ return this.options?.modelOptions;
57
+ }
58
+ process(input) {
59
+ return this.processInput(input);
60
+ }
61
+ async *processInput(input) {
55
62
  const model = input.modelOptions?.model || this.credential.model;
56
63
  const { contents, config } = await this.buildContents(input);
57
64
  const parameters = {
58
- model: model,
65
+ model,
59
66
  contents,
60
67
  config: {
61
68
  responseModalities: input.modelOptions?.modalities,
@@ -64,7 +71,6 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
64
71
  frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
65
72
  presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
66
73
  ...config,
67
- ...(await this.buildTools(input)),
68
74
  ...(await this.buildConfig(input)),
69
75
  },
70
76
  };
@@ -77,6 +83,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
77
83
  const files = [];
78
84
  const toolCalls = [];
79
85
  let text = "";
86
+ let json;
80
87
  for await (const chunk of response) {
81
88
  if (!responseModel && chunk.modelVersion) {
82
89
  responseModel = chunk.modelVersion;
@@ -100,34 +107,77 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
100
107
  });
101
108
  }
102
109
  if (part.functionCall?.name) {
103
- toolCalls.push({
104
- id: part.functionCall.id || (0, uuid_1.v7)(),
105
- type: "function",
106
- function: {
107
- name: part.functionCall.name,
108
- arguments: part.functionCall.args || {},
109
- },
110
- });
111
- yield { delta: { json: { toolCalls } } };
110
+ if (part.functionCall.name === OUTPUT_JSON_FUNCTION_NAME) {
111
+ json = part.functionCall.args;
112
+ }
113
+ else {
114
+ toolCalls.push({
115
+ id: part.functionCall.id || (0, uuid_1.v7)(),
116
+ type: "function",
117
+ function: {
118
+ name: part.functionCall.name,
119
+ arguments: part.functionCall.args || {},
120
+ },
121
+ });
122
+ yield { delta: { json: { toolCalls } } };
123
+ }
112
124
  }
113
125
  }
114
126
  }
115
127
  }
116
128
  if (chunk.usageMetadata) {
117
- usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
118
- usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
129
+ if (chunk.usageMetadata.promptTokenCount)
130
+ usage.inputTokens = chunk.usageMetadata.promptTokenCount;
131
+ if (chunk.usageMetadata.candidatesTokenCount)
132
+ usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
119
133
  }
120
134
  }
121
135
  if (input.responseFormat?.type === "json_schema") {
122
- yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
136
+ if (json) {
137
+ yield { delta: { json: { json } } };
138
+ }
139
+ else if (text) {
140
+ yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
141
+ }
142
+ else {
143
+ // NOTE: Trigger retry of chat model
144
+ throw new core_1.StructuredOutputError("No JSON response from the model");
145
+ }
146
+ }
147
+ else if (!toolCalls.length) {
148
+ if (!text) {
149
+ logger_js_1.logger.error("No text response from the model", parameters);
150
+ // NOTE: Trigger retry of chat model
151
+ throw new core_1.StructuredOutputError("No text response from the model");
152
+ }
123
153
  }
124
- yield { delta: { json: { usage, files } } };
154
+ yield { delta: { json: { usage, files: files.length ? files : undefined } } };
125
155
  }
126
156
  async buildConfig(input) {
127
157
  const config = {};
158
+ const { tools, toolConfig } = await this.buildTools(input);
159
+ config.tools = tools;
160
+ config.toolConfig = toolConfig;
128
161
  if (input.responseFormat?.type === "json_schema") {
129
- config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
130
- config.responseMimeType = "application/json";
162
+ if (config.tools?.length) {
163
+ config.tools.push({
164
+ functionDeclarations: [
165
+ {
166
+ name: OUTPUT_JSON_FUNCTION_NAME,
167
+ description: "Output the final response in JSON format",
168
+ parametersJsonSchema: input.responseFormat.jsonSchema.schema,
169
+ },
170
+ ],
171
+ });
172
+ config.toolConfig = {
173
+ ...config.toolConfig,
174
+ functionCallingConfig: { mode: genai_1.FunctionCallingConfigMode.ANY },
175
+ };
176
+ }
177
+ else {
178
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
179
+ config.responseMimeType = "application/json";
180
+ }
131
181
  }
132
182
  return config;
133
183
  }
@@ -178,7 +228,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
178
228
  return;
179
229
  }
180
230
  const content = {
181
- role: msg.role === "agent" ? "model" : "user",
231
+ role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
182
232
  };
183
233
  if (msg.toolCalls) {
184
234
  content.parts = msg.toolCalls.map((call) => ({
@@ -195,12 +245,31 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
195
245
  .find((c) => c?.id === msg.toolCallId);
196
246
  if (!call)
197
247
  throw new Error(`Tool call not found: ${msg.toolCallId}`);
248
+ const output = JSON.parse(msg.content);
249
+ const isError = "error" in output && Boolean(input.error);
250
+ const response = {
251
+ tool: call.function.name,
252
+ };
253
+ // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
254
+ // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
255
+ if (isError) {
256
+ Object.assign(response, { status: "error" }, output);
257
+ }
258
+ else {
259
+ Object.assign(response, { status: "success" });
260
+ if ("output" in output) {
261
+ Object.assign(response, output);
262
+ }
263
+ else {
264
+ Object.assign(response, { output });
265
+ }
266
+ }
198
267
  content.parts = [
199
268
  {
200
269
  functionResponse: {
201
270
  id: msg.toolCallId,
202
271
  name: call.function.name,
203
- response: JSON.parse(msg.content),
272
+ response,
204
273
  },
205
274
  },
206
275
  ];
@@ -224,24 +293,17 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
224
293
  }
225
294
  return content;
226
295
  }))).filter(type_utils_js_1.isNonNullable);
296
+ if (!result.contents.length && systemParts.length) {
297
+ const system = systemParts.pop();
298
+ if (system) {
299
+ result.contents.push({ role: "user", parts: [system] });
300
+ }
301
+ }
227
302
  if (systemParts.length) {
228
303
  result.config ??= {};
229
304
  result.config.systemInstruction = systemParts;
230
305
  }
231
306
  return result;
232
307
  }
233
- async getRunMessages(input) {
234
- const messages = await super.getRunMessages(input);
235
- if (!messages.some((i) => i.role === "user")) {
236
- for (const msg of messages) {
237
- if (msg.role === "system") {
238
- // Ensure the last message is from the user
239
- msg.role = "user";
240
- break;
241
- }
242
- }
243
- }
244
- return messages;
245
- }
246
308
  }
247
309
  exports.GeminiChatModel = GeminiChatModel;
@@ -132,6 +132,7 @@ class GeminiImageModel extends core_1.ImageModel {
132
132
  "tools",
133
133
  "topK",
134
134
  "topP",
135
+ "imageConfig",
135
136
  ];
136
137
  const images = await Promise.all((0, type_utils_js_1.flat)(input.image).map(async (image) => {
137
138
  const { data, mimeType } = await this.transformFileType("file", image, options);
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -1,16 +1,13 @@
1
- import { safeParseJSON, } from "@aigne/core";
1
+ import { ChatModel, StructuredOutputError, safeParseJSON, } from "@aigne/core";
2
+ import { logger } from "@aigne/core/utils/logger.js";
2
3
  import { isNonNullable } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel } from "@aigne/openai";
4
4
  import { v7 } from "@aigne/uuid";
5
5
  import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
6
- const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
7
6
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
7
+ const OUTPUT_JSON_FUNCTION_NAME = "output_json";
8
8
  /**
9
9
  * Implementation of the ChatModel interface for Google's Gemini API
10
10
  *
11
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
12
- * providing access to models like Gemini 1.5 and Gemini 2.0.
13
- *
14
11
  * @example
15
12
  * Here's how to create and use a Gemini chat model:
16
13
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -19,19 +16,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
19
16
  * Here's an example with streaming response:
20
17
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
21
18
  */
22
- export class GeminiChatModel extends OpenAIChatModel {
19
+ export class GeminiChatModel extends ChatModel {
20
+ options;
23
21
  constructor(options) {
24
22
  super({
25
23
  ...options,
26
24
  model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
27
- baseURL: options?.baseURL || GEMINI_BASE_URL,
28
25
  });
26
+ this.options = options;
29
27
  }
30
28
  apiKeyEnvName = "GEMINI_API_KEY";
31
- supportsToolsUseWithJsonSchema = false;
32
- supportsParallelToolCalls = false;
33
- supportsToolStreaming = false;
34
- optionalFieldMode = "optional";
35
29
  _googleClient;
36
30
  get googleClient() {
37
31
  if (this._googleClient)
@@ -39,20 +33,33 @@ export class GeminiChatModel extends OpenAIChatModel {
39
33
  const { apiKey } = this.credential;
40
34
  if (!apiKey)
41
35
  throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
42
- this._googleClient ??= new GoogleGenAI({ apiKey });
36
+ this._googleClient ??= new GoogleGenAI({
37
+ apiKey,
38
+ ...this.options?.clientOptions,
39
+ });
43
40
  return this._googleClient;
44
41
  }
45
- process(input, options) {
46
- const model = input.modelOptions?.model || this.credential.model;
47
- if (!model.includes("image"))
48
- return super.process(input, options);
49
- return this.handleImageModelProcessing(input);
42
+ get credential() {
43
+ const apiKey = this.options?.apiKey ||
44
+ process.env[this.apiKeyEnvName] ||
45
+ process.env.GEMINI_API_KEY ||
46
+ process.env.GOOGLE_API_KEY;
47
+ return {
48
+ apiKey,
49
+ model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
50
+ };
50
51
  }
51
- async *handleImageModelProcessing(input) {
52
+ get modelOptions() {
53
+ return this.options?.modelOptions;
54
+ }
55
+ process(input) {
56
+ return this.processInput(input);
57
+ }
58
+ async *processInput(input) {
52
59
  const model = input.modelOptions?.model || this.credential.model;
53
60
  const { contents, config } = await this.buildContents(input);
54
61
  const parameters = {
55
- model: model,
62
+ model,
56
63
  contents,
57
64
  config: {
58
65
  responseModalities: input.modelOptions?.modalities,
@@ -61,7 +68,6 @@ export class GeminiChatModel extends OpenAIChatModel {
61
68
  frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
62
69
  presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
63
70
  ...config,
64
- ...(await this.buildTools(input)),
65
71
  ...(await this.buildConfig(input)),
66
72
  },
67
73
  };
@@ -74,6 +80,7 @@ export class GeminiChatModel extends OpenAIChatModel {
74
80
  const files = [];
75
81
  const toolCalls = [];
76
82
  let text = "";
83
+ let json;
77
84
  for await (const chunk of response) {
78
85
  if (!responseModel && chunk.modelVersion) {
79
86
  responseModel = chunk.modelVersion;
@@ -97,34 +104,77 @@ export class GeminiChatModel extends OpenAIChatModel {
97
104
  });
98
105
  }
99
106
  if (part.functionCall?.name) {
100
- toolCalls.push({
101
- id: part.functionCall.id || v7(),
102
- type: "function",
103
- function: {
104
- name: part.functionCall.name,
105
- arguments: part.functionCall.args || {},
106
- },
107
- });
108
- yield { delta: { json: { toolCalls } } };
107
+ if (part.functionCall.name === OUTPUT_JSON_FUNCTION_NAME) {
108
+ json = part.functionCall.args;
109
+ }
110
+ else {
111
+ toolCalls.push({
112
+ id: part.functionCall.id || v7(),
113
+ type: "function",
114
+ function: {
115
+ name: part.functionCall.name,
116
+ arguments: part.functionCall.args || {},
117
+ },
118
+ });
119
+ yield { delta: { json: { toolCalls } } };
120
+ }
109
121
  }
110
122
  }
111
123
  }
112
124
  }
113
125
  if (chunk.usageMetadata) {
114
- usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
115
- usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
126
+ if (chunk.usageMetadata.promptTokenCount)
127
+ usage.inputTokens = chunk.usageMetadata.promptTokenCount;
128
+ if (chunk.usageMetadata.candidatesTokenCount)
129
+ usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
116
130
  }
117
131
  }
118
132
  if (input.responseFormat?.type === "json_schema") {
119
- yield { delta: { json: { json: safeParseJSON(text) } } };
133
+ if (json) {
134
+ yield { delta: { json: { json } } };
135
+ }
136
+ else if (text) {
137
+ yield { delta: { json: { json: safeParseJSON(text) } } };
138
+ }
139
+ else {
140
+ // NOTE: Trigger retry of chat model
141
+ throw new StructuredOutputError("No JSON response from the model");
142
+ }
143
+ }
144
+ else if (!toolCalls.length) {
145
+ if (!text) {
146
+ logger.error("No text response from the model", parameters);
147
+ // NOTE: Trigger retry of chat model
148
+ throw new StructuredOutputError("No text response from the model");
149
+ }
120
150
  }
121
- yield { delta: { json: { usage, files } } };
151
+ yield { delta: { json: { usage, files: files.length ? files : undefined } } };
122
152
  }
123
153
  async buildConfig(input) {
124
154
  const config = {};
155
+ const { tools, toolConfig } = await this.buildTools(input);
156
+ config.tools = tools;
157
+ config.toolConfig = toolConfig;
125
158
  if (input.responseFormat?.type === "json_schema") {
126
- config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
127
- config.responseMimeType = "application/json";
159
+ if (config.tools?.length) {
160
+ config.tools.push({
161
+ functionDeclarations: [
162
+ {
163
+ name: OUTPUT_JSON_FUNCTION_NAME,
164
+ description: "Output the final response in JSON format",
165
+ parametersJsonSchema: input.responseFormat.jsonSchema.schema,
166
+ },
167
+ ],
168
+ });
169
+ config.toolConfig = {
170
+ ...config.toolConfig,
171
+ functionCallingConfig: { mode: FunctionCallingConfigMode.ANY },
172
+ };
173
+ }
174
+ else {
175
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
176
+ config.responseMimeType = "application/json";
177
+ }
128
178
  }
129
179
  return config;
130
180
  }
@@ -175,7 +225,7 @@ export class GeminiChatModel extends OpenAIChatModel {
175
225
  return;
176
226
  }
177
227
  const content = {
178
- role: msg.role === "agent" ? "model" : "user",
228
+ role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
179
229
  };
180
230
  if (msg.toolCalls) {
181
231
  content.parts = msg.toolCalls.map((call) => ({
@@ -192,12 +242,31 @@ export class GeminiChatModel extends OpenAIChatModel {
192
242
  .find((c) => c?.id === msg.toolCallId);
193
243
  if (!call)
194
244
  throw new Error(`Tool call not found: ${msg.toolCallId}`);
245
+ const output = JSON.parse(msg.content);
246
+ const isError = "error" in output && Boolean(input.error);
247
+ const response = {
248
+ tool: call.function.name,
249
+ };
250
+ // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
251
+ // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
252
+ if (isError) {
253
+ Object.assign(response, { status: "error" }, output);
254
+ }
255
+ else {
256
+ Object.assign(response, { status: "success" });
257
+ if ("output" in output) {
258
+ Object.assign(response, output);
259
+ }
260
+ else {
261
+ Object.assign(response, { output });
262
+ }
263
+ }
195
264
  content.parts = [
196
265
  {
197
266
  functionResponse: {
198
267
  id: msg.toolCallId,
199
268
  name: call.function.name,
200
- response: JSON.parse(msg.content),
269
+ response,
201
270
  },
202
271
  },
203
272
  ];
@@ -221,23 +290,16 @@ export class GeminiChatModel extends OpenAIChatModel {
221
290
  }
222
291
  return content;
223
292
  }))).filter(isNonNullable);
293
+ if (!result.contents.length && systemParts.length) {
294
+ const system = systemParts.pop();
295
+ if (system) {
296
+ result.contents.push({ role: "user", parts: [system] });
297
+ }
298
+ }
224
299
  if (systemParts.length) {
225
300
  result.config ??= {};
226
301
  result.config.systemInstruction = systemParts;
227
302
  }
228
303
  return result;
229
304
  }
230
- async getRunMessages(input) {
231
- const messages = await super.getRunMessages(input);
232
- if (!messages.some((i) => i.role === "user")) {
233
- for (const msg of messages) {
234
- if (msg.role === "system") {
235
- // Ensure the last message is from the user
236
- msg.role = "user";
237
- break;
238
- }
239
- }
240
- }
241
- return messages;
242
- }
243
305
  }
@@ -129,6 +129,7 @@ export class GeminiImageModel extends ImageModel {
129
129
  "tools",
130
130
  "topK",
131
131
  "topP",
132
+ "imageConfig",
132
133
  ];
133
134
  const images = await Promise.all(flat(input.image).map(async (image) => {
134
135
  const { data, mimeType } = await this.transformFileType("file", image, options);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.14.2-beta",
3
+ "version": "0.14.2-beta.10",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -36,9 +36,9 @@
36
36
  },
37
37
  "dependencies": {
38
38
  "@aigne/uuid": "^13.0.1",
39
- "@google/genai": "^1.20.0",
39
+ "@google/genai": "^1.24.0",
40
40
  "zod": "^3.25.67",
41
- "@aigne/openai": "^0.16.2-beta",
41
+ "@aigne/core": "^1.63.0-beta.10",
42
42
  "@aigne/platform-helpers": "^0.6.3"
43
43
  },
44
44
  "devDependencies": {
@@ -47,8 +47,7 @@
47
47
  "npm-run-all": "^4.1.5",
48
48
  "rimraf": "^6.0.1",
49
49
  "typescript": "^5.9.2",
50
- "@aigne/core": "^1.63.0-beta",
51
- "@aigne/test-utils": "^0.5.55-beta"
50
+ "@aigne/test-utils": "^0.5.55-beta.10"
52
51
  },
53
52
  "scripts": {
54
53
  "lint": "tsc --noEmit",