npm - @aigne/gemini - Versions diffs - 0.11.6 → 0.12.0 - Mend

@aigne/gemini 0.11.6 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +17 -0
package/lib/cjs/gemini-chat-model.d.ts +10 -1
package/lib/cjs/gemini-chat-model.js +206 -0
package/lib/dts/gemini-chat-model.d.ts +10 -1
package/lib/esm/gemini-chat-model.d.ts +10 -1
package/lib/esm/gemini-chat-model.js +206 -0
package/package.json +6 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,22 @@
 # Changelog
+## [0.12.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.6...gemini-v0.12.0) (2025-09-05)
+### Features
+* add modalities support for chat model ([#454](https://github.com/AIGNE-io/aigne-framework/issues/454)) ([70d1bf6](https://github.com/AIGNE-io/aigne-framework/commit/70d1bf631f4e711235d89c6df8ee210a19179b30))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/openai bumped to 0.14.0
+  * devDependencies
+    * @aigne/core bumped to 1.58.0
+    * @aigne/test-utils bumped to 0.5.44
 ## [0.11.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.5...gemini-v0.11.6) (2025-09-01)

package/lib/cjs/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/cjs/gemini-chat-model.js CHANGED Viewed

@@ -1,7 +1,12 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.GeminiChatModel = void 0;
+const core_1 = require("@aigne/core");
+const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
 const openai_1 = require("@aigne/openai");
+const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
+const genai_1 = require("@google/genai");
+const uuid_1 = require("uuid");
 const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
 /**
@@ -30,6 +35,207 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
     supportsToolsUseWithJsonSchema = false;
     supportsParallelToolCalls = false;
     supportsToolStreaming = false;
+    _googleClient;
+    get googleClient() {
+        if (this._googleClient)
+            return this._googleClient;
+        const { apiKey } = this.credential;
+        if (!apiKey)
+            throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
+        this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
+        return this._googleClient;
+    }
+    process(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        if (!model.includes("image"))
+            return super.process(input, options);
+        return this.handleImageModelProcessing(input, options);
+    }
+    async *handleImageModelProcessing(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        const { contents, config } = await this.buildContents(input);
+        const parameters = {
+            model: model,
+            contents,
+            config: {
+                responseModalities: input.modelOptions?.modalities,
+                temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
+                topP: input.modelOptions?.topP || this.modelOptions?.topP,
+                frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
+                presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
+                ...config,
+                ...(await this.buildTools(input)),
+                ...(await this.buildConfig(input)),
+            },
+        };
+        const response = await this.googleClient.models.generateContentStream(parameters);
+        const usage = {
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+        let responseModel;
+        const files = [];
+        const toolCalls = [];
+        let text = "";
+        for await (const chunk of response) {
+            if (!responseModel && chunk.modelVersion) {
+                responseModel = chunk.modelVersion;
+                yield { delta: { json: { model: responseModel } } };
+            }
+            for (const { content } of chunk.candidates ?? []) {
+                if (content?.parts) {
+                    for (const part of content.parts) {
+                        if (part.text) {
+                            text += part.text;
+                            if (input.responseFormat?.type !== "json_schema") {
+                                yield { delta: { text: { text: part.text } } };
+                            }
+                        }
+                        if (part.inlineData?.data) {
+                            files.push(await this.transformFileOutput(input, {
+                                type: "file",
+                                data: part.inlineData.data,
+                                filename: part.inlineData.displayName,
+                                mimeType: part.inlineData.mimeType,
+                            }, options));
+                            yield { delta: { json: { files } } };
+                        }
+                        if (part.functionCall?.name) {
+                            toolCalls.push({
+                                id: part.functionCall.id || (0, uuid_1.v7)(),
+                                type: "function",
+                                function: {
+                                    name: part.functionCall.name,
+                                    arguments: part.functionCall.args || {},
+                                },
+                            });
+                            yield { delta: { json: { toolCalls } } };
+                        }
+                    }
+                }
+            }
+            if (chunk.usageMetadata) {
+                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
+                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                yield { delta: { json: { usage } } };
+            }
+        }
+        if (input.responseFormat?.type === "json_schema") {
+            yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
+        }
+    }
+    async buildConfig(input) {
+        const config = {};
+        if (input.responseFormat?.type === "json_schema") {
+            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+            config.responseMimeType = "application/json";
+        }
+        return config;
+    }
+    async buildTools(input) {
+        const tools = [];
+        for (const tool of input.tools ?? []) {
+            tools.push({
+                functionDeclarations: [
+                    {
+                        name: tool.function.name,
+                        description: tool.function.description,
+                        parametersJsonSchema: tool.function.parameters,
+                    },
+                ],
+            });
+        }
+        const functionCallingConfig = !input.toolChoice
+            ? undefined
+            : input.toolChoice === "auto"
+                ? { mode: genai_1.FunctionCallingConfigMode.AUTO }
+                : input.toolChoice === "none"
+                    ? { mode: genai_1.FunctionCallingConfigMode.NONE }
+                    : input.toolChoice === "required"
+                        ? { mode: genai_1.FunctionCallingConfigMode.ANY }
+                        : {
+                            mode: genai_1.FunctionCallingConfigMode.ANY,
+                            allowedFunctionNames: [input.toolChoice.function.name],
+                        };
+        return { tools, toolConfig: { functionCallingConfig } };
+    }
+    async buildContents(input) {
+        const result = {
+            contents: [],
+        };
+        const systemParts = [];
+        result.contents = (await Promise.all(input.messages.map(async (msg) => {
+            if (msg.role === "system") {
+                if (typeof msg.content === "string") {
+                    systemParts.push({ text: msg.content });
+                }
+                else if (Array.isArray(msg.content)) {
+                    systemParts.push(...msg.content.map((item) => {
+                        if (item.type === "text")
+                            return { text: item.text };
+                        throw new Error(`Unsupported content type: ${item.type}`);
+                    }));
+                }
+                return;
+            }
+            const content = {
+                role: msg.role === "agent" ? "model" : "user",
+            };
+            if (msg.toolCalls) {
+                content.parts = msg.toolCalls.map((call) => ({
+                    functionCall: {
+                        id: call.id,
+                        name: call.function.name,
+                        args: call.function.arguments,
+                    },
+                }));
+            }
+            else if (msg.toolCallId) {
+                const call = input.messages
+                    .flatMap((i) => i.toolCalls)
+                    .find((c) => c?.id === msg.toolCallId);
+                if (!call)
+                    throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                content.parts = [
+                    {
+                        functionResponse: {
+                            id: msg.toolCallId,
+                            name: call.function.name,
+                            response: JSON.parse(msg.content),
+                        },
+                    },
+                ];
+            }
+            else if (typeof msg.content === "string") {
+                content.parts = [{ text: msg.content }];
+            }
+            else if (Array.isArray(msg.content)) {
+                content.parts = await Promise.all(msg.content.map(async (item) => {
+                    switch (item.type) {
+                        case "text":
+                            return { text: item.text };
+                        case "url":
+                            return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
+                        case "file":
+                            return { inlineData: { data: item.data, mimeType: item.mimeType } };
+                        case "local":
+                            return {
+                                inlineData: {
+                                    data: await index_js_1.nodejs.fs.readFile(item.path, "base64"),
+                                    mimeType: item.mimeType,
+                                },
+                            };
+                    }
+                }));
+            }
+            return content;
+        }))).filter(type_utils_js_1.isNonNullable);
+        if (systemParts) {
+            result.config ??= {};
+            result.config.systemInstruction = systemParts;
+        }
+        return result;
+    }
     async getRunMessages(input) {
         const messages = await super.getRunMessages(input);
         const lastMessage = messages.at(-1);

package/lib/dts/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.js CHANGED Viewed

@@ -1,4 +1,9 @@
+import { safeParseJSON, } from "@aigne/core";
+import { isNonNullable } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel } from "@aigne/openai";
+import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
+import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
+import { v7 } from "uuid";
 const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
 /**
@@ -27,6 +32,207 @@ export class GeminiChatModel extends OpenAIChatModel {
     supportsToolsUseWithJsonSchema = false;
     supportsParallelToolCalls = false;
     supportsToolStreaming = false;
+    _googleClient;
+    get googleClient() {
+        if (this._googleClient)
+            return this._googleClient;
+        const { apiKey } = this.credential;
+        if (!apiKey)
+            throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
+        this._googleClient ??= new GoogleGenAI({ apiKey });
+        return this._googleClient;
+    }
+    process(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        if (!model.includes("image"))
+            return super.process(input, options);
+        return this.handleImageModelProcessing(input, options);
+    }
+    async *handleImageModelProcessing(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        const { contents, config } = await this.buildContents(input);
+        const parameters = {
+            model: model,
+            contents,
+            config: {
+                responseModalities: input.modelOptions?.modalities,
+                temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
+                topP: input.modelOptions?.topP || this.modelOptions?.topP,
+                frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
+                presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
+                ...config,
+                ...(await this.buildTools(input)),
+                ...(await this.buildConfig(input)),
+            },
+        };
+        const response = await this.googleClient.models.generateContentStream(parameters);
+        const usage = {
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+        let responseModel;
+        const files = [];
+        const toolCalls = [];
+        let text = "";
+        for await (const chunk of response) {
+            if (!responseModel && chunk.modelVersion) {
+                responseModel = chunk.modelVersion;
+                yield { delta: { json: { model: responseModel } } };
+            }
+            for (const { content } of chunk.candidates ?? []) {
+                if (content?.parts) {
+                    for (const part of content.parts) {
+                        if (part.text) {
+                            text += part.text;
+                            if (input.responseFormat?.type !== "json_schema") {
+                                yield { delta: { text: { text: part.text } } };
+                            }
+                        }
+                        if (part.inlineData?.data) {
+                            files.push(await this.transformFileOutput(input, {
+                                type: "file",
+                                data: part.inlineData.data,
+                                filename: part.inlineData.displayName,
+                                mimeType: part.inlineData.mimeType,
+                            }, options));
+                            yield { delta: { json: { files } } };
+                        }
+                        if (part.functionCall?.name) {
+                            toolCalls.push({
+                                id: part.functionCall.id || v7(),
+                                type: "function",
+                                function: {
+                                    name: part.functionCall.name,
+                                    arguments: part.functionCall.args || {},
+                                },
+                            });
+                            yield { delta: { json: { toolCalls } } };
+                        }
+                    }
+                }
+            }
+            if (chunk.usageMetadata) {
+                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
+                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                yield { delta: { json: { usage } } };
+            }
+        }
+        if (input.responseFormat?.type === "json_schema") {
+            yield { delta: { json: { json: safeParseJSON(text) } } };
+        }
+    }
+    async buildConfig(input) {
+        const config = {};
+        if (input.responseFormat?.type === "json_schema") {
+            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+            config.responseMimeType = "application/json";
+        }
+        return config;
+    }
+    async buildTools(input) {
+        const tools = [];
+        for (const tool of input.tools ?? []) {
+            tools.push({
+                functionDeclarations: [
+                    {
+                        name: tool.function.name,
+                        description: tool.function.description,
+                        parametersJsonSchema: tool.function.parameters,
+                    },
+                ],
+            });
+        }
+        const functionCallingConfig = !input.toolChoice
+            ? undefined
+            : input.toolChoice === "auto"
+                ? { mode: FunctionCallingConfigMode.AUTO }
+                : input.toolChoice === "none"
+                    ? { mode: FunctionCallingConfigMode.NONE }
+                    : input.toolChoice === "required"
+                        ? { mode: FunctionCallingConfigMode.ANY }
+                        : {
+                            mode: FunctionCallingConfigMode.ANY,
+                            allowedFunctionNames: [input.toolChoice.function.name],
+                        };
+        return { tools, toolConfig: { functionCallingConfig } };
+    }
+    async buildContents(input) {
+        const result = {
+            contents: [],
+        };
+        const systemParts = [];
+        result.contents = (await Promise.all(input.messages.map(async (msg) => {
+            if (msg.role === "system") {
+                if (typeof msg.content === "string") {
+                    systemParts.push({ text: msg.content });
+                }
+                else if (Array.isArray(msg.content)) {
+                    systemParts.push(...msg.content.map((item) => {
+                        if (item.type === "text")
+                            return { text: item.text };
+                        throw new Error(`Unsupported content type: ${item.type}`);
+                    }));
+                }
+                return;
+            }
+            const content = {
+                role: msg.role === "agent" ? "model" : "user",
+            };
+            if (msg.toolCalls) {
+                content.parts = msg.toolCalls.map((call) => ({
+                    functionCall: {
+                        id: call.id,
+                        name: call.function.name,
+                        args: call.function.arguments,
+                    },
+                }));
+            }
+            else if (msg.toolCallId) {
+                const call = input.messages
+                    .flatMap((i) => i.toolCalls)
+                    .find((c) => c?.id === msg.toolCallId);
+                if (!call)
+                    throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                content.parts = [
+                    {
+                        functionResponse: {
+                            id: msg.toolCallId,
+                            name: call.function.name,
+                            response: JSON.parse(msg.content),
+                        },
+                    },
+                ];
+            }
+            else if (typeof msg.content === "string") {
+                content.parts = [{ text: msg.content }];
+            }
+            else if (Array.isArray(msg.content)) {
+                content.parts = await Promise.all(msg.content.map(async (item) => {
+                    switch (item.type) {
+                        case "text":
+                            return { text: item.text };
+                        case "url":
+                            return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
+                        case "file":
+                            return { inlineData: { data: item.data, mimeType: item.mimeType } };
+                        case "local":
+                            return {
+                                inlineData: {
+                                    data: await nodejs.fs.readFile(item.path, "base64"),
+                                    mimeType: item.mimeType,
+                                },
+                            };
+                    }
+                }));
+            }
+            return content;
+        }))).filter(isNonNullable);
+        if (systemParts) {
+            result.config ??= {};
+            result.config.systemInstruction = systemParts;
+        }
+        return result;
+    }
     async getRunMessages(input) {
         const messages = await super.getRunMessages(input);
         const lastMessage = messages.at(-1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aigne/gemini",
-  "version": "0.11.6",
+  "version": "0.12.0",
   "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
   "publishConfig": {
     "access": "public"
@@ -36,8 +36,10 @@
   },
   "dependencies": {
     "@google/genai": "^1.15.0",
+    "uuid": "^11.1.0",
     "zod": "^3.25.67",
-    "@aigne/openai": "^0.13.7"
+    "@aigne/platform-helpers": "^0.6.2",
+    "@aigne/openai": "^0.14.0"
   },
   "devDependencies": {
     "@types/bun": "^1.2.18",
@@ -45,8 +47,8 @@
     "npm-run-all": "^4.1.5",
     "rimraf": "^6.0.1",
     "typescript": "^5.8.3",
-    "@aigne/test-utils": "^0.5.43",
-    "@aigne/core": "^1.57.5"
+    "@aigne/core": "^1.58.0",
+    "@aigne/test-utils": "^0.5.44"
   },
   "scripts": {
     "lint": "tsc --noEmit",