npm - @aigne/gemini - Versions diffs - 0.14.2-beta.8 → 0.14.2 - Mend

@aigne/gemini 0.14.2-beta.8 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +71 -0
package/lib/cjs/gemini-chat-model.d.ts +24 -15
package/lib/cjs/gemini-chat-model.js +153 -51
package/lib/dts/gemini-chat-model.d.ts +24 -15
package/lib/esm/gemini-chat-model.d.ts +24 -15
package/lib/esm/gemini-chat-model.js +154 -52
package/package.json +5 -5

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,76 @@
 # Changelog
+## [0.14.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.12...gemini-v0.14.2) (2025-10-19)
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/core bumped to 1.63.0
+  * devDependencies
+    * @aigne/test-utils bumped to 0.5.55
+## [0.14.2-beta.12](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.11...gemini-v0.14.2-beta.12) (2025-10-17)
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/core bumped to 1.63.0-beta.12
+  * devDependencies
+    * @aigne/test-utils bumped to 0.5.55-beta.12
+## [0.14.2-beta.11](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.10...gemini-v0.14.2-beta.11) (2025-10-17)
+### Bug Fixes
+* **gemini:** implement retry mechanism for empty responses with structured output fallback ([#638](https://github.com/AIGNE-io/aigne-framework/issues/638)) ([d33c8bb](https://github.com/AIGNE-io/aigne-framework/commit/d33c8bb9711aadddef9687d6cf472a179cd8ed9c))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/core bumped to 1.63.0-beta.11
+  * devDependencies
+    * @aigne/test-utils bumped to 0.5.55-beta.11
+## [0.14.2-beta.10](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.9...gemini-v0.14.2-beta.10) (2025-10-16)
+### Bug Fixes
+* correct calculate token usage for gemini model ([7fd1328](https://github.com/AIGNE-io/aigne-framework/commit/7fd13289d3d0f8e062211f7c6dd5cb56e5318c1b))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/core bumped to 1.63.0-beta.10
+  * devDependencies
+    * @aigne/test-utils bumped to 0.5.55-beta.10
+## [0.14.2-beta.9](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.8...gemini-v0.14.2-beta.9) (2025-10-16)
+### Bug Fixes
+* **models:** auto retry when got emtpy response from gemini ([#636](https://github.com/AIGNE-io/aigne-framework/issues/636)) ([9367cef](https://github.com/AIGNE-io/aigne-framework/commit/9367cef49ea4c0c87b8a36b454deb2efaee6886f))
+* **models:** enhance gemini model tool use with status fields ([#634](https://github.com/AIGNE-io/aigne-framework/issues/634)) ([067b175](https://github.com/AIGNE-io/aigne-framework/commit/067b175c8e31bb5b1a6d0fc5a5cfb2d070d8d709))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/core bumped to 1.63.0-beta.9
+  * devDependencies
+    * @aigne/test-utils bumped to 0.5.55-beta.9
 ## [0.14.2-beta.8](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.2-beta.7...gemini-v0.14.2-beta.8) (2025-10-16)

package/lib/cjs/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,13 +1,21 @@
-import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
 import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
-import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
-import { GoogleGenAI } from "@google/genai";
+import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
+export interface GeminiChatModelOptions extends ChatModelOptions {
+    /**
+     * API key for Gemini API
+     *
+     * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
+     */
+    apiKey?: string;
+    /**
+     * Optional client options for the Gemini SDK
+     */
+    clientOptions?: Partial<GoogleGenAIOptions>;
+}
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
- * providing access to models like Gemini 1.5 and Gemini 2.0.
- *
  * @example
  * Here's how to create and use a Gemini chat model:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
  * Here's an example with streaming response:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
  */
-export declare class GeminiChatModel extends OpenAIChatModel {
-    constructor(options?: OpenAIChatModelOptions);
+export declare class GeminiChatModel extends ChatModel {
+    options?: GeminiChatModelOptions | undefined;
+    constructor(options?: GeminiChatModelOptions | undefined);
     protected apiKeyEnvName: string;
-    protected supportsToolsUseWithJsonSchema: boolean;
-    protected supportsParallelToolCalls: boolean;
-    protected supportsToolStreaming: boolean;
-    protected optionalFieldMode: "optional";
     protected _googleClient?: GoogleGenAI;
     get googleClient(): GoogleGenAI;
-    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
-    private handleImageModelProcessing;
+    get credential(): {
+        apiKey: string | undefined;
+        model: string;
+    };
+    get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
+    process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private processInput;
     private buildConfig;
     private buildTools;
     private buildContents;
-    getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/cjs/gemini-chat-model.js CHANGED Viewed

@@ -2,18 +2,18 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.GeminiChatModel = void 0;
 const core_1 = require("@aigne/core");
+const logger_js_1 = require("@aigne/core/utils/logger.js");
+const model_utils_js_1 = require("@aigne/core/utils/model-utils.js");
 const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
-const openai_1 = require("@aigne/openai");
 const uuid_1 = require("@aigne/uuid");
 const genai_1 = require("@google/genai");
-const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
+const zod_1 = require("zod");
+const zod_to_json_schema_1 = require("zod-to-json-schema");
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
+const OUTPUT_FUNCTION_NAME = "output";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
- * providing access to models like Gemini 1.5 and Gemini 2.0.
- *
  * @example
  * Here's how to create and use a Gemini chat model:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -22,19 +22,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
  * Here's an example with streaming response:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
  */
-class GeminiChatModel extends openai_1.OpenAIChatModel {
+class GeminiChatModel extends core_1.ChatModel {
+    options;
     constructor(options) {
         super({
             ...options,
             model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
-            baseURL: options?.baseURL || GEMINI_BASE_URL,
         });
+        this.options = options;
     }
     apiKeyEnvName = "GEMINI_API_KEY";
-    supportsToolsUseWithJsonSchema = false;
-    supportsParallelToolCalls = false;
-    supportsToolStreaming = false;
-    optionalFieldMode = "optional";
     _googleClient;
     get googleClient() {
         if (this._googleClient)
@@ -42,20 +39,33 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
         const { apiKey } = this.credential;
         if (!apiKey)
             throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
-        this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
+        this._googleClient ??= new genai_1.GoogleGenAI({
+            apiKey,
+            ...this.options?.clientOptions,
+        });
         return this._googleClient;
     }
-    process(input, options) {
-        const model = input.modelOptions?.model || this.credential.model;
-        if (!model.includes("image"))
-            return super.process(input, options);
-        return this.handleImageModelProcessing(input);
+    get credential() {
+        const apiKey = this.options?.apiKey ||
+            process.env[this.apiKeyEnvName] ||
+            process.env.GEMINI_API_KEY ||
+            process.env.GOOGLE_API_KEY;
+        return {
+            apiKey,
+            model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
+        };
     }
-    async *handleImageModelProcessing(input) {
+    get modelOptions() {
+        return this.options?.modelOptions;
+    }
+    process(input) {
+        return this.processInput(input);
+    }
+    async *processInput(input) {
         const model = input.modelOptions?.model || this.credential.model;
         const { contents, config } = await this.buildContents(input);
         const parameters = {
-            model: model,
+            model,
             contents,
             config: {
                 responseModalities: input.modelOptions?.modalities,
@@ -64,12 +74,11 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
                 frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
                 presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
                 ...config,
-                ...(await this.buildTools(input)),
                 ...(await this.buildConfig(input)),
             },
         };
         const response = await this.googleClient.models.generateContentStream(parameters);
-        const usage = {
+        let usage = {
             inputTokens: 0,
             outputTokens: 0,
         };
@@ -77,6 +86,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
         const files = [];
         const toolCalls = [];
         let text = "";
+        let json;
         for await (const chunk of response) {
             if (!responseModel && chunk.modelVersion) {
                 responseModel = chunk.modelVersion;
@@ -100,34 +110,114 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
                             });
                         }
                         if (part.functionCall?.name) {
-                            toolCalls.push({
-                                id: part.functionCall.id || (0, uuid_1.v7)(),
-                                type: "function",
-                                function: {
-                                    name: part.functionCall.name,
-                                    arguments: part.functionCall.args || {},
-                                },
-                            });
-                            yield { delta: { json: { toolCalls } } };
+                            if (part.functionCall.name === OUTPUT_FUNCTION_NAME) {
+                                json = part.functionCall.args;
+                            }
+                            else {
+                                toolCalls.push({
+                                    id: part.functionCall.id || (0, uuid_1.v7)(),
+                                    type: "function",
+                                    function: {
+                                        name: part.functionCall.name,
+                                        arguments: part.functionCall.args || {},
+                                    },
+                                });
+                                yield { delta: { json: { toolCalls } } };
+                            }
                         }
                     }
                 }
             }
             if (chunk.usageMetadata) {
-                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
-                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                if (chunk.usageMetadata.promptTokenCount)
+                    usage.inputTokens = chunk.usageMetadata.promptTokenCount;
+                if (chunk.usageMetadata.candidatesTokenCount)
+                    usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
             }
         }
         if (input.responseFormat?.type === "json_schema") {
-            yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
+            if (json) {
+                yield { delta: { json: { json } } };
+            }
+            else if (text) {
+                yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
+            }
+            else if (!toolCalls.length) {
+                throw new Error("No JSON response from the model");
+            }
+        }
+        else if (!toolCalls.length) {
+            // NOTE: gemini-2.5-pro sometimes returns an empty response,
+            // so we check here and retry with structured output mode (empty responses occur less frequently with tool calls)
+            if (!text) {
+                logger_js_1.logger.warn("Empty response from Gemini, retrying with structured output mode");
+                try {
+                    const outputSchema = zod_1.z.object({
+                        output: zod_1.z.string().describe("The final answer from the model"),
+                    });
+                    const response = await this.process({
+                        ...input,
+                        responseFormat: {
+                            type: "json_schema",
+                            jsonSchema: {
+                                name: "output",
+                                schema: (0, zod_to_json_schema_1.zodToJsonSchema)(outputSchema),
+                            },
+                        },
+                    });
+                    const result = await (0, core_1.agentProcessResultToObject)(response);
+                    // Merge retry usage with the original usage
+                    usage = (0, model_utils_js_1.mergeUsage)(usage, result.usage);
+                    // Return the tool calls if retry has tool calls
+                    if (result.toolCalls?.length) {
+                        toolCalls.push(...result.toolCalls);
+                        yield { delta: { json: { toolCalls } } };
+                    }
+                    // Return the text from structured output of retry
+                    else {
+                        if (!result.json)
+                            throw new Error("Retrying with structured output mode got no json response");
+                        const parsed = outputSchema.safeParse(result.json);
+                        if (!parsed.success)
+                            throw new Error("Retrying with structured output mode got invalid json response");
+                        text = parsed.data.output;
+                        yield { delta: { text: { text } } };
+                        logger_js_1.logger.warn("Empty response from Gemini, retried with structured output mode successfully");
+                    }
+                }
+                catch (error) {
+                    logger_js_1.logger.error("Empty response from Gemini, retrying with structured output mode failed", error);
+                    throw new core_1.StructuredOutputError("No response from the model");
+                }
+            }
         }
-        yield { delta: { json: { usage, files } } };
+        yield { delta: { json: { usage, files: files.length ? files : undefined } } };
     }
     async buildConfig(input) {
         const config = {};
+        const { tools, toolConfig } = await this.buildTools(input);
+        config.tools = tools;
+        config.toolConfig = toolConfig;
         if (input.responseFormat?.type === "json_schema") {
-            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
-            config.responseMimeType = "application/json";
+            if (config.tools?.length) {
+                config.tools.push({
+                    functionDeclarations: [
+                        {
+                            name: OUTPUT_FUNCTION_NAME,
+                            description: "Output the final response",
+                            parametersJsonSchema: input.responseFormat.jsonSchema.schema,
+                        },
+                    ],
+                });
+                config.toolConfig = {
+                    ...config.toolConfig,
+                    functionCallingConfig: { mode: genai_1.FunctionCallingConfigMode.ANY },
+                };
+            }
+            else {
+                config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+                config.responseMimeType = "application/json";
+            }
         }
         return config;
     }
@@ -178,7 +268,7 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
                 return;
             }
             const content = {
-                role: msg.role === "agent" ? "model" : "user",
+                role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
             };
             if (msg.toolCalls) {
                 content.parts = msg.toolCalls.map((call) => ({
@@ -195,12 +285,31 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
                     .find((c) => c?.id === msg.toolCallId);
                 if (!call)
                     throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                const output = JSON.parse(msg.content);
+                const isError = "error" in output && Boolean(input.error);
+                const response = {
+                    tool: call.function.name,
+                };
+                // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
+                // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
+                if (isError) {
+                    Object.assign(response, { status: "error" }, output);
+                }
+                else {
+                    Object.assign(response, { status: "success" });
+                    if ("output" in output) {
+                        Object.assign(response, output);
+                    }
+                    else {
+                        Object.assign(response, { output });
+                    }
+                }
                 content.parts = [
                     {
                         functionResponse: {
                             id: msg.toolCallId,
                             name: call.function.name,
-                            response: JSON.parse(msg.content),
+                            response,
                         },
                     },
                 ];
@@ -224,24 +333,17 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
             }
             return content;
         }))).filter(type_utils_js_1.isNonNullable);
+        if (!result.contents.length && systemParts.length) {
+            const system = systemParts.pop();
+            if (system) {
+                result.contents.push({ role: "user", parts: [system] });
+            }
+        }
         if (systemParts.length) {
             result.config ??= {};
             result.config.systemInstruction = systemParts;
         }
         return result;
     }
-    async getRunMessages(input) {
-        const messages = await super.getRunMessages(input);
-        if (!messages.some((i) => i.role === "user")) {
-            for (const msg of messages) {
-                if (msg.role === "system") {
-                    // Ensure the last message is from the user
-                    msg.role = "user";
-                    break;
-                }
-            }
-        }
-        return messages;
-    }
 }
 exports.GeminiChatModel = GeminiChatModel;

package/lib/dts/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,13 +1,21 @@
-import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
 import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
-import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
-import { GoogleGenAI } from "@google/genai";
+import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
+export interface GeminiChatModelOptions extends ChatModelOptions {
+    /**
+     * API key for Gemini API
+     *
+     * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
+     */
+    apiKey?: string;
+    /**
+     * Optional client options for the Gemini SDK
+     */
+    clientOptions?: Partial<GoogleGenAIOptions>;
+}
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
- * providing access to models like Gemini 1.5 and Gemini 2.0.
- *
  * @example
  * Here's how to create and use a Gemini chat model:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
  * Here's an example with streaming response:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
  */
-export declare class GeminiChatModel extends OpenAIChatModel {
-    constructor(options?: OpenAIChatModelOptions);
+export declare class GeminiChatModel extends ChatModel {
+    options?: GeminiChatModelOptions | undefined;
+    constructor(options?: GeminiChatModelOptions | undefined);
     protected apiKeyEnvName: string;
-    protected supportsToolsUseWithJsonSchema: boolean;
-    protected supportsParallelToolCalls: boolean;
-    protected supportsToolStreaming: boolean;
-    protected optionalFieldMode: "optional";
     protected _googleClient?: GoogleGenAI;
     get googleClient(): GoogleGenAI;
-    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
-    private handleImageModelProcessing;
+    get credential(): {
+        apiKey: string | undefined;
+        model: string;
+    };
+    get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
+    process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private processInput;
     private buildConfig;
     private buildTools;
     private buildContents;
-    getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,13 +1,21 @@
-import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
 import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
-import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
-import { GoogleGenAI } from "@google/genai";
+import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
+export interface GeminiChatModelOptions extends ChatModelOptions {
+    /**
+     * API key for Gemini API
+     *
+     * If not provided, will look for GEMINI_API_KEY or GOOGLE_API_KEY in environment variables
+     */
+    apiKey?: string;
+    /**
+     * Optional client options for the Gemini SDK
+     */
+    clientOptions?: Partial<GoogleGenAIOptions>;
+}
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
- * providing access to models like Gemini 1.5 and Gemini 2.0.
- *
  * @example
  * Here's how to create and use a Gemini chat model:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -16,19 +24,20 @@ import { GoogleGenAI } from "@google/genai";
  * Here's an example with streaming response:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
  */
-export declare class GeminiChatModel extends OpenAIChatModel {
-    constructor(options?: OpenAIChatModelOptions);
+export declare class GeminiChatModel extends ChatModel {
+    options?: GeminiChatModelOptions | undefined;
+    constructor(options?: GeminiChatModelOptions | undefined);
     protected apiKeyEnvName: string;
-    protected supportsToolsUseWithJsonSchema: boolean;
-    protected supportsParallelToolCalls: boolean;
-    protected supportsToolStreaming: boolean;
-    protected optionalFieldMode: "optional";
     protected _googleClient?: GoogleGenAI;
     get googleClient(): GoogleGenAI;
-    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
-    private handleImageModelProcessing;
+    get credential(): {
+        apiKey: string | undefined;
+        model: string;
+    };
+    get modelOptions(): Omit<import("@aigne/core").ChatModelInputOptions, "model"> | undefined;
+    process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private processInput;
     private buildConfig;
     private buildTools;
     private buildContents;
-    getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.js CHANGED Viewed

@@ -1,16 +1,16 @@
-import { safeParseJSON, } from "@aigne/core";
+import { agentProcessResultToObject, ChatModel, StructuredOutputError, safeParseJSON, } from "@aigne/core";
+import { logger } from "@aigne/core/utils/logger.js";
+import { mergeUsage } from "@aigne/core/utils/model-utils.js";
 import { isNonNullable } from "@aigne/core/utils/type-utils.js";
-import { OpenAIChatModel } from "@aigne/openai";
 import { v7 } from "@aigne/uuid";
 import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
-const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
+import { z } from "zod";
+import { zodToJsonSchema } from "zod-to-json-schema";
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
+const OUTPUT_FUNCTION_NAME = "output";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
- * This model uses OpenAI-compatible API format to interact with Google's Gemini models,
- * providing access to models like Gemini 1.5 and Gemini 2.0.
- *
  * @example
  * Here's how to create and use a Gemini chat model:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model}
@@ -19,19 +19,16 @@ const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
  * Here's an example with streaming response:
  * {@includeCode ../test/gemini-chat-model.test.ts#example-gemini-chat-model-streaming}
  */
-export class GeminiChatModel extends OpenAIChatModel {
+export class GeminiChatModel extends ChatModel {
+    options;
     constructor(options) {
         super({
             ...options,
             model: options?.model || GEMINI_DEFAULT_CHAT_MODEL,
-            baseURL: options?.baseURL || GEMINI_BASE_URL,
         });
+        this.options = options;
     }
     apiKeyEnvName = "GEMINI_API_KEY";
-    supportsToolsUseWithJsonSchema = false;
-    supportsParallelToolCalls = false;
-    supportsToolStreaming = false;
-    optionalFieldMode = "optional";
     _googleClient;
     get googleClient() {
         if (this._googleClient)
@@ -39,20 +36,33 @@ export class GeminiChatModel extends OpenAIChatModel {
         const { apiKey } = this.credential;
         if (!apiKey)
             throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
-        this._googleClient ??= new GoogleGenAI({ apiKey });
+        this._googleClient ??= new GoogleGenAI({
+            apiKey,
+            ...this.options?.clientOptions,
+        });
         return this._googleClient;
     }
-    process(input, options) {
-        const model = input.modelOptions?.model || this.credential.model;
-        if (!model.includes("image"))
-            return super.process(input, options);
-        return this.handleImageModelProcessing(input);
+    get credential() {
+        const apiKey = this.options?.apiKey ||
+            process.env[this.apiKeyEnvName] ||
+            process.env.GEMINI_API_KEY ||
+            process.env.GOOGLE_API_KEY;
+        return {
+            apiKey,
+            model: this.options?.model || GEMINI_DEFAULT_CHAT_MODEL,
+        };
     }
-    async *handleImageModelProcessing(input) {
+    get modelOptions() {
+        return this.options?.modelOptions;
+    }
+    process(input) {
+        return this.processInput(input);
+    }
+    async *processInput(input) {
         const model = input.modelOptions?.model || this.credential.model;
         const { contents, config } = await this.buildContents(input);
         const parameters = {
-            model: model,
+            model,
             contents,
             config: {
                 responseModalities: input.modelOptions?.modalities,
@@ -61,12 +71,11 @@ export class GeminiChatModel extends OpenAIChatModel {
                 frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
                 presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
                 ...config,
-                ...(await this.buildTools(input)),
                 ...(await this.buildConfig(input)),
             },
         };
         const response = await this.googleClient.models.generateContentStream(parameters);
-        const usage = {
+        let usage = {
             inputTokens: 0,
             outputTokens: 0,
         };
@@ -74,6 +83,7 @@ export class GeminiChatModel extends OpenAIChatModel {
         const files = [];
         const toolCalls = [];
         let text = "";
+        let json;
         for await (const chunk of response) {
             if (!responseModel && chunk.modelVersion) {
                 responseModel = chunk.modelVersion;
@@ -97,34 +107,114 @@ export class GeminiChatModel extends OpenAIChatModel {
                             });
                         }
                         if (part.functionCall?.name) {
-                            toolCalls.push({
-                                id: part.functionCall.id || v7(),
-                                type: "function",
-                                function: {
-                                    name: part.functionCall.name,
-                                    arguments: part.functionCall.args || {},
-                                },
-                            });
-                            yield { delta: { json: { toolCalls } } };
+                            if (part.functionCall.name === OUTPUT_FUNCTION_NAME) {
+                                json = part.functionCall.args;
+                            }
+                            else {
+                                toolCalls.push({
+                                    id: part.functionCall.id || v7(),
+                                    type: "function",
+                                    function: {
+                                        name: part.functionCall.name,
+                                        arguments: part.functionCall.args || {},
+                                    },
+                                });
+                                yield { delta: { json: { toolCalls } } };
+                            }
                         }
                     }
                 }
             }
             if (chunk.usageMetadata) {
-                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
-                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                if (chunk.usageMetadata.promptTokenCount)
+                    usage.inputTokens = chunk.usageMetadata.promptTokenCount;
+                if (chunk.usageMetadata.candidatesTokenCount)
+                    usage.outputTokens = chunk.usageMetadata.candidatesTokenCount;
             }
         }
         if (input.responseFormat?.type === "json_schema") {
-            yield { delta: { json: { json: safeParseJSON(text) } } };
+            if (json) {
+                yield { delta: { json: { json } } };
+            }
+            else if (text) {
+                yield { delta: { json: { json: safeParseJSON(text) } } };
+            }
+            else if (!toolCalls.length) {
+                throw new Error("No JSON response from the model");
+            }
+        }
+        else if (!toolCalls.length) {
+            // NOTE: gemini-2.5-pro sometimes returns an empty response,
+            // so we check here and retry with structured output mode (empty responses occur less frequently with tool calls)
+            if (!text) {
+                logger.warn("Empty response from Gemini, retrying with structured output mode");
+                try {
+                    const outputSchema = z.object({
+                        output: z.string().describe("The final answer from the model"),
+                    });
+                    const response = await this.process({
+                        ...input,
+                        responseFormat: {
+                            type: "json_schema",
+                            jsonSchema: {
+                                name: "output",
+                                schema: zodToJsonSchema(outputSchema),
+                            },
+                        },
+                    });
+                    const result = await agentProcessResultToObject(response);
+                    // Merge retry usage with the original usage
+                    usage = mergeUsage(usage, result.usage);
+                    // Return the tool calls if retry has tool calls
+                    if (result.toolCalls?.length) {
+                        toolCalls.push(...result.toolCalls);
+                        yield { delta: { json: { toolCalls } } };
+                    }
+                    // Return the text from structured output of retry
+                    else {
+                        if (!result.json)
+                            throw new Error("Retrying with structured output mode got no json response");
+                        const parsed = outputSchema.safeParse(result.json);
+                        if (!parsed.success)
+                            throw new Error("Retrying with structured output mode got invalid json response");
+                        text = parsed.data.output;
+                        yield { delta: { text: { text } } };
+                        logger.warn("Empty response from Gemini, retried with structured output mode successfully");
+                    }
+                }
+                catch (error) {
+                    logger.error("Empty response from Gemini, retrying with structured output mode failed", error);
+                    throw new StructuredOutputError("No response from the model");
+                }
+            }
         }
-        yield { delta: { json: { usage, files } } };
+        yield { delta: { json: { usage, files: files.length ? files : undefined } } };
     }
     async buildConfig(input) {
         const config = {};
+        const { tools, toolConfig } = await this.buildTools(input);
+        config.tools = tools;
+        config.toolConfig = toolConfig;
         if (input.responseFormat?.type === "json_schema") {
-            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
-            config.responseMimeType = "application/json";
+            if (config.tools?.length) {
+                config.tools.push({
+                    functionDeclarations: [
+                        {
+                            name: OUTPUT_FUNCTION_NAME,
+                            description: "Output the final response",
+                            parametersJsonSchema: input.responseFormat.jsonSchema.schema,
+                        },
+                    ],
+                });
+                config.toolConfig = {
+                    ...config.toolConfig,
+                    functionCallingConfig: { mode: FunctionCallingConfigMode.ANY },
+                };
+            }
+            else {
+                config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+                config.responseMimeType = "application/json";
+            }
         }
         return config;
     }
@@ -175,7 +265,7 @@ export class GeminiChatModel extends OpenAIChatModel {
                 return;
             }
             const content = {
-                role: msg.role === "agent" ? "model" : "user",
+                role: msg.role === "agent" ? "model" : msg.role === "user" ? "user" : undefined,
             };
             if (msg.toolCalls) {
                 content.parts = msg.toolCalls.map((call) => ({
@@ -192,12 +282,31 @@ export class GeminiChatModel extends OpenAIChatModel {
                     .find((c) => c?.id === msg.toolCallId);
                 if (!call)
                     throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                const output = JSON.parse(msg.content);
+                const isError = "error" in output && Boolean(input.error);
+                const response = {
+                    tool: call.function.name,
+                };
+                // NOTE: base on the documentation of gemini api, the content should include `output` field for successful result or `error` field for failed result,
+                // and base on the actual test, add a tool field presenting the tool name can improve the LLM understanding that which tool is called.
+                if (isError) {
+                    Object.assign(response, { status: "error" }, output);
+                }
+                else {
+                    Object.assign(response, { status: "success" });
+                    if ("output" in output) {
+                        Object.assign(response, output);
+                    }
+                    else {
+                        Object.assign(response, { output });
+                    }
+                }
                 content.parts = [
                     {
                         functionResponse: {
                             id: msg.toolCallId,
                             name: call.function.name,
-                            response: JSON.parse(msg.content),
+                            response,
                         },
                     },
                 ];
@@ -221,23 +330,16 @@ export class GeminiChatModel extends OpenAIChatModel {
             }
             return content;
         }))).filter(isNonNullable);
+        if (!result.contents.length && systemParts.length) {
+            const system = systemParts.pop();
+            if (system) {
+                result.contents.push({ role: "user", parts: [system] });
+            }
+        }
         if (systemParts.length) {
             result.config ??= {};
             result.config.systemInstruction = systemParts;
         }
         return result;
     }
-    async getRunMessages(input) {
-        const messages = await super.getRunMessages(input);
-        if (!messages.some((i) => i.role === "user")) {
-            for (const msg of messages) {
-                if (msg.role === "system") {
-                    // Ensure the last message is from the user
-                    msg.role = "user";
-                    break;
-                }
-            }
-        }
-        return messages;
-    }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aigne/gemini",
-  "version": "0.14.2-beta.8",
+  "version": "0.14.2",
   "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
   "publishConfig": {
     "access": "public"
@@ -38,8 +38,9 @@
     "@aigne/uuid": "^13.0.1",
     "@google/genai": "^1.24.0",
     "zod": "^3.25.67",
-    "@aigne/platform-helpers": "^0.6.3",
-    "@aigne/openai": "^0.16.2-beta.8"
+    "zod-to-json-schema": "^3.24.6",
+    "@aigne/core": "^1.63.0",
+    "@aigne/platform-helpers": "^0.6.3"
   },
   "devDependencies": {
     "@types/bun": "^1.2.22",
@@ -47,8 +48,7 @@
     "npm-run-all": "^4.1.5",
     "rimraf": "^6.0.1",
     "typescript": "^5.9.2",
-    "@aigne/core": "^1.63.0-beta.8",
-    "@aigne/test-utils": "^0.5.55-beta.8"
+    "@aigne/test-utils": "^0.5.55"
   },
   "scripts": {
     "lint": "tsc --noEmit",