@aigne/gemini 0.14.2-beta.8 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,76 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.12...gemini-v0.14.2) (2025-10-19)
4
+
5
+
6
+ ### Dependencies
7
+
8
+ * The following workspace dependencies were updated
9
+ * dependencies
10
+ * @aigne/core bumped to 1.63.0
11
+ * devDependencies
12
+ * @aigne/test-utils bumped to 0.5.55
13
+
14
+ ## [0.14.2-beta.12](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.11...gemini-v0.14.2-beta.12) (2025-10-17)
15
+
16
+
17
+ ### Dependencies
18
+
19
+ * The following workspace dependencies were updated
20
+ * dependencies
21
+ * @aigne/core bumped to 1.63.0-beta.12
22
+ * devDependencies
23
+ * @aigne/test-utils bumped to 0.5.55-beta.12
24
+
25
+ ## [0.14.2-beta.11](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.10...gemini-v0.14.2-beta.11) (2025-10-17)
26
+
27
+
28
+ ### Bug Fixes
29
+
30
+ * **gemini:** implement retry mechanism for empty responses with structured output fallback ([#638](https://github.com/AIGNE-io/aigne-framework/issues/638)) ([d33c8bb](https://github.com/AIGNE-io/aigne-framework/commit/d33c8bb9711aadddef9687d6cf472a179cd8ed9c))
31
+
32
+
33
+ ### Dependencies
34
+
35
+ * The following workspace dependencies were updated
36
+ * dependencies
37
+ * @aigne/core bumped to 1.63.0-beta.11
38
+ * devDependencies
39
+ * @aigne/test-utils bumped to 0.5.55-beta.11
40
+
41
+ ## [0.14.2-beta.10](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.9...gemini-v0.14.2-beta.10) (2025-10-16)
42
+
43
+
44
+ ### Bug Fixes
45
+
46
+ * correct calculate token usage for gemini model ([7fd1328](https://github.com/AIGNE-io/aigne-framework/commit/7fd13289d3d0f8e062211f7c6dd5cb56e5318c1b))
47
+
48
+
49
+ ### Dependencies
50
+
51
+ * The following workspace dependencies were updated
52
+ * dependencies
53
+ * @aigne/core bumped to 1.63.0-beta.10
54
+ * devDependencies
55
+ * @aigne/test-utils bumped to 0.5.55-beta.10
56
+
57
+ ## [0.14.2-beta.9](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.8...gemini-v0.14.2-beta.9) (2025-10-16)
58
+
59
+
60
+ ### Bug Fixes
61
+
62
+ * **models:** auto retry when got emtpy response from gemini ([#636](https://github.com/AIGNE-io/aigne-framework/issues/636)) ([9367cef](https://github.com/AIGNE-io/aigne-framework/commit/9367cef49ea4c0c87b8a36b454deb2efaee6886f))
63
+ * **models:** enhance gemini model tool use with status fields ([#634](https://github.com/AIGNE-io/aigne-framework/issues/634)) ([067b175](https://github.com/AIGNE-io/aigne-framework/commit/067b175c8e31bb5b1a6d0fc5a5cfb2d070d8d709))
64
+
65
+
66
+ ### Dependencies
67
+
68
+ * The following workspace dependencies were updated
69
+ * dependencies
70
+ * @aigne/core bumped to 1.63.0-beta.9
71
+ * devDependencies
72
+ * @aigne/test-utils bumped to 0.5.55-beta.9
73
+
3
74
  ## [0.14.2-beta.8](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.7...gemini-v0.14.2-beta.8) (2025-10-16)
4
75
 
5
76
 
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -2,18 +2,18 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.GeminiChatModel = void 0;
4
4
  const core_1 = require("@aigne/core");
5
+ const logger_js_1 = require("@aigne/core/utils/logger.js");
6
+ const model_utils_js_1 = require("@aigne/core/utils/model-utils.js");
5
7
  const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
6
- const openai_1 = require("@aigne/openai");
7
8
  const uuid_1 = require("@aigne/uuid");
8
9
  const genai_1 = require("@google/genai");
9
- const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
10
+ const zod_1 = require("zod");
11
+ const zod_to_json_schema_1 = require("zod-to-json-schema");
10
12
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
13
+ const OUTPUT_FUNCTION_NAME = "output";
11
14
  /**
12
15
  * Implementation of the ChatModel interface for Google's Gemini API
13
16
  *
14
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
15
- * providing access to models like Gemini 1.5 and Gemini 2.0.
16
- *
17
17
  * @example
18
18
  * Here's how to create and use a Gemini chat model:
19
19
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -22,19 +22,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
22
22
  * Here's an example with streaming response:
23
23
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
24
24
  */
25
- class GeminiChatModel extends openai_1.OpenAIChatModel {
25
+ class GeminiChatModel extends core_1.ChatModel {
26
+ options;
26
27
  constructor(options) {
27
28
  super({
28
29
  ...options,
29
30
  model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
30
- baseURL: options?.baseURL || GEMINI_BASE_URL,
31
31
  });
32
+ this.options = options;
32
33
  }
33
34
  apiKeyEnvName = "GEMINI_API_KEY";
34
- supportsToolsUseWithJsonSchema = false;
35
- supportsParallelToolCalls = false;
36
- supportsToolStreaming = false;
37
- optionalFieldMode = "optional";
38
35
  _googleClient;
39
36
  get googleClient() {
40
37
  if (this._googleClient)
@@ -42,20 +39,33 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
42
39
  const { apiKey } = this.credential;
43
40
  if (!apiKey)
44
41
  throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
45
- this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
42
+ this._googleClient ??= new genai_1.GoogleGenAI({
43
+ apiKey,
44
+ ...this.options?.clientOptions,
45
+ });
46
46
  return this._googleClient;
47
47
  }
48
- process(input, options) {
49
- const model = input.modelOptions?.model || this.credential.model;
50
- if (!model.includes("image"))
51
- return super.process(input, options);
52
- return this.handleImageModelProcessing(input);
48
+ get credential() {
49
+ const apiKey = this.options?.apiKey ||
50
+ process.env[this.apiKeyEnvName] ||
51
+ process.env.GEMINI_API_KEY ||
52
+ process.env.GOOGLE_API_KEY;
53
+ return {
54
+ apiKey,
55
+ model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
56
+ };
53
57
  }
54
- async *handleImageModelProcessing(input) {
58
+ get modelOptions() {
59
+ return this.options?.modelOptions;
60
+ }
61
+ process(input) {
62
+ return this.processInput(input);
63
+ }
64
+ async *processInput(input) {
55
65
  const model = input.modelOptions?.model || this.credential.model;
56
66
  const { contents, config } = await this.buildContents(input);
57
67
  const parameters = {
58
- model: model,
68
+ model,
59
69
  contents,
60
70
  config: {
61
71
  responseModalities: input.modelOptions?.modalities,
@@ -64,12 +74,11 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
64
74
  frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
65
75
  presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
66
76
  ...config,
67
- ...(await this.buildTools(input)),
68
77
  ...(await this.buildConfig(input)),
69
78
  },
70
79
  };
71
80
  const response = await this.googleClient.models.generateContentStream(parameters);
72
- const usage = {
81
+ let usage = {
73
82
  inputTokens: 0,
74
83
  outputTokens: 0,
75
84
  };
@@ -77,6 +86,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
77
86
  const files = [];
78
87
  const toolCalls = [];
79
88
  let text = "";
89
+ let json;
80
90
  for await (const chunk of response) {
81
91
  if (!responseModel && chunk.modelVersion) {
82
92
  responseModel = chunk.modelVersion;
@@ -100,34 +110,114 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
100
110
  });
101
111
  }
102
112
  if (part.functionCall?.name) {
103
- toolCalls.push({
104
- id: part.functionCall.id || (0, uuid_1.v7)(),
105
- type: "function",
106
- function: {
107
- name: part.functionCall.name,
108
- arguments: part.functionCall.args || {},
109
- },
110
- });
111
- yield { delta: { json: { toolCalls } } };
113
+ if (part.functionCall.name === OUTPUT_FUNCTION_NAME) {
114
+ json = part.functionCall.args;
115
+ }
116
+ else {
117
+ toolCalls.push({
118
+ id: part.functionCall.id || (0, uuid_1.v7)(),
119
+ type: "function",
120
+ function: {
121
+ name: part.functionCall.name,
122
+ arguments: part.functionCall.args || {},
123
+ },
124
+ });
125
+ yield { delta: { json: { toolCalls } } };
126
+ }
112
127
  }
113
128
  }
114
129
  }
115
130
  }
116
131
  if (chunk.usageMetadata) {
117
- usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
118
- usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
132
+ if (chunk.usageMetadata.promptTokenCount)
133
+ usage.inputTokens = chunk.usageMetadata.promptTokenCount;
134
+ if (chunk.usageMetadata.candidatesTokenCount)
135
+ usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
119
136
  }
120
137
  }
121
138
  if (input.responseFormat?.type === "json_schema") {
122
- yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
139
+ if (json) {
140
+ yield { delta: { json: { json } } };
141
+ }
142
+ else if (text) {
143
+ yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
144
+ }
145
+ else if (!toolCalls.length) {
146
+ throw new Error("No JSON response from the model");
147
+ }
148
+ }
149
+ else if (!toolCalls.length) {
150
+ // NOTE: gemini-2.5-pro sometimes returns an empty response,
151
+ // so we check here and retry with structured output mode (empty responses occur less frequently with tool calls)
152
+ if (!text) {
153
+ logger_js_1.logger.warn("Empty response from Gemini, retrying with structured output mode");
154
+ try {
155
+ const outputSchema = zod_1.z.object({
156
+ output: zod_1.z.string().describe("The final answer from the model"),
157
+ });
158
+ const response = await this.process({
159
+ ...input,
160
+ responseFormat: {
161
+ type: "json_schema",
162
+ jsonSchema: {
163
+ name: "output",
164
+ schema: (0, zod_to_json_schema_1.zodToJsonSchema)(outputSchema),
165
+ },
166
+ },
167
+ });
168
+ const result = await (0, core_1.agentProcessResultToObject)(response);
169
+ // Merge retry usage with the original usage
170
+ usage = (0, model_utils_js_1.mergeUsage)(usage, result.usage);
171
+ // Return the tool calls if retry has tool calls
172
+ if (result.toolCalls?.length) {
173
+ toolCalls.push(...result.toolCalls);
174
+ yield { delta: { json: { toolCalls } } };
175
+ }
176
+ // Return the text from structured output of retry
177
+ else {
178
+ if (!result.json)
179
+ throw new Error("Retrying with structured output mode got no json response");
180
+ const parsed = outputSchema.safeParse(result.json);
181
+ if (!parsed.success)
182
+ throw new Error("Retrying with structured output mode got invalid json response");
183
+ text = parsed.data.output;
184
+ yield { delta: { text: { text } } };
185
+ logger_js_1.logger.warn("Empty response from Gemini, retried with structured output mode successfully");
186
+ }
187
+ }
188
+ catch (error) {
189
+ logger_js_1.logger.error("Empty response from Gemini, retrying with structured output mode failed", error);
190
+ throw new core_1.StructuredOutputError("No response from the model");
191
+ }
192
+ }
123
193
  }
124
- yield { delta: { json: { usage, files } } };
194
+ yield { delta: { json: { usage, files: files.length ? files : undefined } } };
125
195
  }
126
196
  async buildConfig(input) {
127
197
  const config = {};
198
+ const { tools, toolConfig } = await this.buildTools(input);
199
+ config.tools = tools;
200
+ config.toolConfig = toolConfig;
128
201
  if (input.responseFormat?.type === "json_schema") {
129
- config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
130
- config.responseMimeType = "application/json";
202
+ if (config.tools?.length) {
203
+ config.tools.push({
204
+ functionDeclarations: [
205
+ {
206
+ name: OUTPUT_FUNCTION_NAME,
207
+ description: "Output the final response",
208
+ parametersJsonSchema: input.responseFormat.jsonSchema.schema,
209
+ },
210
+ ],
211
+ });
212
+ config.toolConfig = {
213
+ ...config.toolConfig,
214
+ functionCallingConfig: { mode: genai_1.FunctionCallingConfigMode.ANY },
215
+ };
216
+ }
217
+ else {
218
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
219
+ config.responseMimeType = "application/json";
220
+ }
131
221
  }
132
222
  return config;
133
223
  }
@@ -178,7 +268,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
178
268
  return;
179
269
  }
180
270
  const content = {
181
- role: msg.role === "agent" ? "model" : "user",
271
+ role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
182
272
  };
183
273
  if (msg.toolCalls) {
184
274
  content.parts = msg.toolCalls.map((call) => ({
@@ -195,12 +285,31 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
195
285
  .find((c) => c?.id === msg.toolCallId);
196
286
  if (!call)
197
287
  throw new Error(`Tool call not found: ${msg.toolCallId}`);
288
+ const output = JSON.parse(msg.content);
289
+ const isError = "error" in output && Boolean(input.error);
290
+ const response = {
291
+ tool: call.function.name,
292
+ };
293
+ // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
294
+ // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
295
+ if (isError) {
296
+ Object.assign(response, { status: "error" }, output);
297
+ }
298
+ else {
299
+ Object.assign(response, { status: "success" });
300
+ if ("output" in output) {
301
+ Object.assign(response, output);
302
+ }
303
+ else {
304
+ Object.assign(response, { output });
305
+ }
306
+ }
198
307
  content.parts = [
199
308
  {
200
309
  functionResponse: {
201
310
  id: msg.toolCallId,
202
311
  name: call.function.name,
203
- response: JSON.parse(msg.content),
312
+ response,
204
313
  },
205
314
  },
206
315
  ];
@@ -224,24 +333,17 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
224
333
  }
225
334
  return content;
226
335
  }))).filter(type_utils_js_1.isNonNullable);
336
+ if (!result.contents.length && systemParts.length) {
337
+ const system = systemParts.pop();
338
+ if (system) {
339
+ result.contents.push({ role: "user", parts: [system] });
340
+ }
341
+ }
227
342
  if (systemParts.length) {
228
343
  result.config ??= {};
229
344
  result.config.systemInstruction = systemParts;
230
345
  }
231
346
  return result;
232
347
  }
233
- async getRunMessages(input) {
234
- const messages = await super.getRunMessages(input);
235
- if (!messages.some((i) => i.role === "user")) {
236
- for (const msg of messages) {
237
- if (msg.role === "system") {
238
- // Ensure the last message is from the user
239
- msg.role = "user";
240
- break;
241
- }
242
- }
243
- }
244
- return messages;
245
- }
246
348
  }
247
349
  exports.GeminiChatModel = GeminiChatModel;
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -1,13 +1,21 @@
1
- import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
1
+ import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
2
2
  import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
- import { GoogleGenAI } from "@google/genai";
3
+ import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
4
+ export interface GeminiChatModelOptions extends ChatModelOptions {
5
+ /**
6
+ * API key for Gemini API
7
+ *
8
+ * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
9
+ */
10
+ apiKey?: string;
11
+ /**
12
+ * Optional client options for the Gemini SDK
13
+ */
14
+ clientOptions?: Partial<GoogleGenAIOptions>;
15
+ }
5
16
  /**
6
17
  * Implementation of the ChatModel interface for Google's Gemini API
7
18
  *
8
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
9
- * providing access to models like Gemini 1.5 and Gemini 2.0.
10
- *
11
19
  * @example
12
20
  * Here's how to create and use a Gemini chat model:
13
21
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
16
24
  * Here's an example with streaming response:
17
25
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
18
26
  */
19
- export declare class GeminiChatModel extends OpenAIChatModel {
20
- constructor(options?: OpenAIChatModelOptions);
27
+ export declare class GeminiChatModel extends ChatModel {
28
+ options?: GeminiChatModelOptions | undefined;
29
+ constructor(options?: GeminiChatModelOptions | undefined);
21
30
  protected apiKeyEnvName: string;
22
- protected supportsToolsUseWithJsonSchema: boolean;
23
- protected supportsParallelToolCalls: boolean;
24
- protected supportsToolStreaming: boolean;
25
- protected optionalFieldMode: "optional";
26
31
  protected _googleClient?: GoogleGenAI;
27
32
  get googleClient(): GoogleGenAI;
28
- process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
29
- private handleImageModelProcessing;
33
+ get credential(): {
34
+ apiKey: string | undefined;
35
+ model: string;
36
+ };
37
+ get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
38
+ process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
39
+ private processInput;
30
40
  private buildConfig;
31
41
  private buildTools;
32
42
  private buildContents;
33
- getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
34
43
  }
@@ -1,16 +1,16 @@
1
- import { safeParseJSON, } from "@aigne/core";
1
+ import { agentProcessResultToObject, ChatModel, StructuredOutputError, safeParseJSON, } from "@aigne/core";
2
+ import { logger } from "@aigne/core/utils/logger.js";
3
+ import { mergeUsage } from "@aigne/core/utils/model-utils.js";
2
4
  import { isNonNullable } from "@aigne/core/utils/type-utils.js";
3
- import { OpenAIChatModel } from "@aigne/openai";
4
5
  import { v7 } from "@aigne/uuid";
5
6
  import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
6
- const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
7
+ import { z } from "zod";
8
+ import { zodToJsonSchema } from "zod-to-json-schema";
7
9
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
10
+ const OUTPUT_FUNCTION_NAME = "output";
8
11
  /**
9
12
  * Implementation of the ChatModel interface for Google's Gemini API
10
13
  *
11
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
12
- * providing access to models like Gemini 1.5 and Gemini 2.0.
13
- *
14
14
  * @example
15
15
  * Here's how to create and use a Gemini chat model:
16
16
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -19,19 +19,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
19
19
  * Here's an example with streaming response:
20
20
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
21
21
  */
22
- export class GeminiChatModel extends OpenAIChatModel {
22
+ export class GeminiChatModel extends ChatModel {
23
+ options;
23
24
  constructor(options) {
24
25
  super({
25
26
  ...options,
26
27
  model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
27
- baseURL: options?.baseURL || GEMINI_BASE_URL,
28
28
  });
29
+ this.options = options;
29
30
  }
30
31
  apiKeyEnvName = "GEMINI_API_KEY";
31
- supportsToolsUseWithJsonSchema = false;
32
- supportsParallelToolCalls = false;
33
- supportsToolStreaming = false;
34
- optionalFieldMode = "optional";
35
32
  _googleClient;
36
33
  get googleClient() {
37
34
  if (this._googleClient)
@@ -39,20 +36,33 @@ export class GeminiChatModel extends OpenAIChatModel {
39
36
  const { apiKey } = this.credential;
40
37
  if (!apiKey)
41
38
  throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
42
- this._googleClient ??= new GoogleGenAI({ apiKey });
39
+ this._googleClient ??= new GoogleGenAI({
40
+ apiKey,
41
+ ...this.options?.clientOptions,
42
+ });
43
43
  return this._googleClient;
44
44
  }
45
- process(input, options) {
46
- const model = input.modelOptions?.model || this.credential.model;
47
- if (!model.includes("image"))
48
- return super.process(input, options);
49
- return this.handleImageModelProcessing(input);
45
+ get credential() {
46
+ const apiKey = this.options?.apiKey ||
47
+ process.env[this.apiKeyEnvName] ||
48
+ process.env.GEMINI_API_KEY ||
49
+ process.env.GOOGLE_API_KEY;
50
+ return {
51
+ apiKey,
52
+ model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
53
+ };
50
54
  }
51
- async *handleImageModelProcessing(input) {
55
+ get modelOptions() {
56
+ return this.options?.modelOptions;
57
+ }
58
+ process(input) {
59
+ return this.processInput(input);
60
+ }
61
+ async *processInput(input) {
52
62
  const model = input.modelOptions?.model || this.credential.model;
53
63
  const { contents, config } = await this.buildContents(input);
54
64
  const parameters = {
55
- model: model,
65
+ model,
56
66
  contents,
57
67
  config: {
58
68
  responseModalities: input.modelOptions?.modalities,
@@ -61,12 +71,11 @@ export class GeminiChatModel extends OpenAIChatModel {
61
71
  frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
62
72
  presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
63
73
  ...config,
64
- ...(await this.buildTools(input)),
65
74
  ...(await this.buildConfig(input)),
66
75
  },
67
76
  };
68
77
  const response = await this.googleClient.models.generateContentStream(parameters);
69
- const usage = {
78
+ let usage = {
70
79
  inputTokens: 0,
71
80
  outputTokens: 0,
72
81
  };
@@ -74,6 +83,7 @@ export class GeminiChatModel extends OpenAIChatModel {
74
83
  const files = [];
75
84
  const toolCalls = [];
76
85
  let text = "";
86
+ let json;
77
87
  for await (const chunk of response) {
78
88
  if (!responseModel && chunk.modelVersion) {
79
89
  responseModel = chunk.modelVersion;
@@ -97,34 +107,114 @@ export class GeminiChatModel extends OpenAIChatModel {
97
107
  });
98
108
  }
99
109
  if (part.functionCall?.name) {
100
- toolCalls.push({
101
- id: part.functionCall.id || v7(),
102
- type: "function",
103
- function: {
104
- name: part.functionCall.name,
105
- arguments: part.functionCall.args || {},
106
- },
107
- });
108
- yield { delta: { json: { toolCalls } } };
110
+ if (part.functionCall.name === OUTPUT_FUNCTION_NAME) {
111
+ json = part.functionCall.args;
112
+ }
113
+ else {
114
+ toolCalls.push({
115
+ id: part.functionCall.id || v7(),
116
+ type: "function",
117
+ function: {
118
+ name: part.functionCall.name,
119
+ arguments: part.functionCall.args || {},
120
+ },
121
+ });
122
+ yield { delta: { json: { toolCalls } } };
123
+ }
109
124
  }
110
125
  }
111
126
  }
112
127
  }
113
128
  if (chunk.usageMetadata) {
114
- usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
115
- usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
129
+ if (chunk.usageMetadata.promptTokenCount)
130
+ usage.inputTokens = chunk.usageMetadata.promptTokenCount;
131
+ if (chunk.usageMetadata.candidatesTokenCount)
132
+ usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
116
133
  }
117
134
  }
118
135
  if (input.responseFormat?.type === "json_schema") {
119
- yield { delta: { json: { json: safeParseJSON(text) } } };
136
+ if (json) {
137
+ yield { delta: { json: { json } } };
138
+ }
139
+ else if (text) {
140
+ yield { delta: { json: { json: safeParseJSON(text) } } };
141
+ }
142
+ else if (!toolCalls.length) {
143
+ throw new Error("No JSON response from the model");
144
+ }
145
+ }
146
+ else if (!toolCalls.length) {
147
+ // NOTE: gemini-2.5-pro sometimes returns an empty response,
148
+ // so we check here and retry with structured output mode (empty responses occur less frequently with tool calls)
149
+ if (!text) {
150
+ logger.warn("Empty response from Gemini, retrying with structured output mode");
151
+ try {
152
+ const outputSchema = z.object({
153
+ output: z.string().describe("The final answer from the model"),
154
+ });
155
+ const response = await this.process({
156
+ ...input,
157
+ responseFormat: {
158
+ type: "json_schema",
159
+ jsonSchema: {
160
+ name: "output",
161
+ schema: zodToJsonSchema(outputSchema),
162
+ },
163
+ },
164
+ });
165
+ const result = await agentProcessResultToObject(response);
166
+ // Merge retry usage with the original usage
167
+ usage = mergeUsage(usage, result.usage);
168
+ // Return the tool calls if retry has tool calls
169
+ if (result.toolCalls?.length) {
170
+ toolCalls.push(...result.toolCalls);
171
+ yield { delta: { json: { toolCalls } } };
172
+ }
173
+ // Return the text from structured output of retry
174
+ else {
175
+ if (!result.json)
176
+ throw new Error("Retrying with structured output mode got no json response");
177
+ const parsed = outputSchema.safeParse(result.json);
178
+ if (!parsed.success)
179
+ throw new Error("Retrying with structured output mode got invalid json response");
180
+ text = parsed.data.output;
181
+ yield { delta: { text: { text } } };
182
+ logger.warn("Empty response from Gemini, retried with structured output mode successfully");
183
+ }
184
+ }
185
+ catch (error) {
186
+ logger.error("Empty response from Gemini, retrying with structured output mode failed", error);
187
+ throw new StructuredOutputError("No response from the model");
188
+ }
189
+ }
120
190
  }
121
- yield { delta: { json: { usage, files } } };
191
+ yield { delta: { json: { usage, files: files.length ? files : undefined } } };
122
192
  }
123
193
  async buildConfig(input) {
124
194
  const config = {};
195
+ const { tools, toolConfig } = await this.buildTools(input);
196
+ config.tools = tools;
197
+ config.toolConfig = toolConfig;
125
198
  if (input.responseFormat?.type === "json_schema") {
126
- config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
127
- config.responseMimeType = "application/json";
199
+ if (config.tools?.length) {
200
+ config.tools.push({
201
+ functionDeclarations: [
202
+ {
203
+ name: OUTPUT_FUNCTION_NAME,
204
+ description: "Output the final response",
205
+ parametersJsonSchema: input.responseFormat.jsonSchema.schema,
206
+ },
207
+ ],
208
+ });
209
+ config.toolConfig = {
210
+ ...config.toolConfig,
211
+ functionCallingConfig: { mode: FunctionCallingConfigMode.ANY },
212
+ };
213
+ }
214
+ else {
215
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
216
+ config.responseMimeType = "application/json";
217
+ }
128
218
  }
129
219
  return config;
130
220
  }
@@ -175,7 +265,7 @@ export class GeminiChatModel extends OpenAIChatModel {
175
265
  return;
176
266
  }
177
267
  const content = {
178
- role: msg.role === "agent" ? "model" : "user",
268
+ role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
179
269
  };
180
270
  if (msg.toolCalls) {
181
271
  content.parts = msg.toolCalls.map((call) => ({
@@ -192,12 +282,31 @@ export class GeminiChatModel extends OpenAIChatModel {
192
282
  .find((c) => c?.id === msg.toolCallId);
193
283
  if (!call)
194
284
  throw new Error(`Tool call not found: ${msg.toolCallId}`);
285
+ const output = JSON.parse(msg.content);
286
+ const isError = "error" in output && Boolean(input.error);
287
+ const response = {
288
+ tool: call.function.name,
289
+ };
290
+ // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
291
+ // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
292
+ if (isError) {
293
+ Object.assign(response, { status: "error" }, output);
294
+ }
295
+ else {
296
+ Object.assign(response, { status: "success" });
297
+ if ("output" in output) {
298
+ Object.assign(response, output);
299
+ }
300
+ else {
301
+ Object.assign(response, { output });
302
+ }
303
+ }
195
304
  content.parts = [
196
305
  {
197
306
  functionResponse: {
198
307
  id: msg.toolCallId,
199
308
  name: call.function.name,
200
- response: JSON.parse(msg.content),
309
+ response,
201
310
  },
202
311
  },
203
312
  ];
@@ -221,23 +330,16 @@ export class GeminiChatModel extends OpenAIChatModel {
221
330
  }
222
331
  return content;
223
332
  }))).filter(isNonNullable);
333
+ if (!result.contents.length && systemParts.length) {
334
+ const system = systemParts.pop();
335
+ if (system) {
336
+ result.contents.push({ role: "user", parts: [system] });
337
+ }
338
+ }
224
339
  if (systemParts.length) {
225
340
  result.config ??= {};
226
341
  result.config.systemInstruction = systemParts;
227
342
  }
228
343
  return result;
229
344
  }
230
- async getRunMessages(input) {
231
- const messages = await super.getRunMessages(input);
232
- if (!messages.some((i) => i.role === "user")) {
233
- for (const msg of messages) {
234
- if (msg.role === "system") {
235
- // Ensure the last message is from the user
236
- msg.role = "user";
237
- break;
238
- }
239
- }
240
- }
241
- return messages;
242
- }
243
345
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.14.2-beta.8",
3
+ "version": "0.14.2",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -38,8 +38,9 @@
38
38
  "@aigne/uuid": "^13.0.1",
39
39
  "@google/genai": "^1.24.0",
40
40
  "zod": "^3.25.67",
41
- "@aigne/platform-helpers": "^0.6.3",
42
- "@aigne/openai": "^0.16.2-beta.8"
41
+ "zod-to-json-schema": "^3.24.6",
42
+ "@aigne/core": "^1.63.0",
43
+ "@aigne/platform-helpers": "^0.6.3"
43
44
  },
44
45
  "devDependencies": {
45
46
  "@types/bun": "^1.2.22",
@@ -47,8 +48,7 @@
47
48
  "npm-run-all": "^4.1.5",
48
49
  "rimraf": "^6.0.1",
49
50
  "typescript": "^5.9.2",
50
- "@aigne/core": "^1.63.0-beta.8",
51
- "@aigne/test-utils": "^0.5.55-beta.8"
51
+ "@aigne/test-utils": "^0.5.55"
52
52
  },
53
53
  "scripts": {
54
54
  "lint": "tsc --noEmit",