npm - @aigne/gemini - Versions diffs - 0.11.5 → 0.12.0 - Mend

@aigne/gemini 0.11.5 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +34 -0
package/README.md +120 -1
package/lib/cjs/gemini-chat-model.d.ts +10 -1
package/lib/cjs/gemini-chat-model.js +206 -0
package/lib/cjs/gemini-image-model.js +2 -2
package/lib/dts/gemini-chat-model.d.ts +10 -1
package/lib/esm/gemini-chat-model.d.ts +10 -1
package/lib/esm/gemini-chat-model.js +206 -0
package/lib/esm/gemini-image-model.js +2 -2
package/package.json +6 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,39 @@
 # Changelog
+## [0.12.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.6...gemini-v0.12.0) (2025-09-05)
+### Features
+* add modalities support for chat model ([#454](https://github.com/AIGNE-io/aigne-framework/issues/454)) ([70d1bf6](https://github.com/AIGNE-io/aigne-framework/commit/70d1bf631f4e711235d89c6df8ee210a19179b30))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/openai bumped to 0.14.0
+  * devDependencies
+    * @aigne/core bumped to 1.58.0
+    * @aigne/test-utils bumped to 0.5.44
+## [0.11.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.5...gemini-v0.11.6) (2025-09-01)
+### Bug Fixes
+* **transport:** improve HTTP client option handling and error serialization ([#445](https://github.com/AIGNE-io/aigne-framework/issues/445)) ([d3bcdd2](https://github.com/AIGNE-io/aigne-framework/commit/d3bcdd23ab8011a7d40fc157fd61eb240494c7a5))
+### Dependencies
+* The following workspace dependencies were updated
+  * dependencies
+    * @aigne/openai bumped to 0.13.7
+  * devDependencies
+    * @aigne/core bumped to 1.57.5
+    * @aigne/test-utils bumped to 0.5.43
 ## [0.11.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.4...gemini-v0.11.5) (2025-08-30)

package/README.md CHANGED Viewed

@@ -23,13 +23,14 @@ AIGNE Gemini SDK for integrating with Google's Gemini AI models within the [AIGN
 <picture>
   <source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini-dark.png" media="(prefers-color-scheme: dark)">
   <source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" media="(prefers-color-scheme: light)">
-  <img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/aigne-gemini.png" alt="AIGNE Arch" />
+  <img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" alt="AIGNE Arch" />
 </picture>
 ## Features
 * **Google Gemini API Integration**: Direct connection to Google's Gemini API services
 * **Chat Completions**: Support for Gemini's chat completions API with all available models
+* **Image Generation**: Support for both Imagen and Gemini image generation models
 * **Multimodal Support**: Built-in support for handling both text and image inputs
 * **Function Calling**: Support for function calling capabilities
 * **Streaming Responses**: Support for streaming responses for more responsive applications
@@ -60,6 +61,8 @@ pnpm add @aigne/gemini @aigne/core
 ## Basic Usage
+### Chat Model
 ```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model"
 import { GeminiChatModel } from "@aigne/gemini";
@@ -86,6 +89,38 @@ console.log(result);
   */
 ```
+### Image Generation Model
+```typescript
+import { GeminiImageModel } from "@aigne/gemini";
+const model = new GeminiImageModel({
+  apiKey: "your-api-key", // Optional if set in env variables
+  model: "imagen-4.0-generate-001", // Default Imagen model
+});
+const result = await model.invoke({
+  prompt: "A serene mountain landscape at sunset with golden light",
+  n: 1,
+});
+console.log(result);
+/* Output:
+  {
+    images: [
+      {
+        base64: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
+      }
+    ],
+    usage: {
+      inputTokens: 0,
+      outputTokens: 0
+    },
+    model: "imagen-4.0-generate-001"
+  }
+  */
+```
 ## Streaming Responses
 ```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model-streaming"
@@ -119,6 +154,90 @@ console.log(fullText); // Output: "Hello from Gemini! I'm Google's helpful AI as
 console.log(json); // { model: "gemini-1.5-flash" }
 ```
+## Image Generation Parameters
+The `GeminiImageModel` supports different parameters depending on the model type:
+### Imagen Models (e.g., `imagen-4.0-generate-001`)
+- **`prompt`** (string): The text description of the image you want to generate
+- **`n`** (number): Number of images to generate (defaults to 1)
+- **`seed`** (number): Random seed for reproducible generation
+- **`safetyFilterLevel`** (string): Safety filter level for content moderation
+- **`personGeneration`** (string): Person generation settings
+- **`outputMimeType`** (string): Output image format (e.g., "image/png", "image/jpeg")
+- **`outputGcsUri`** (string): Google Cloud Storage URI for output
+- **`outputCompressionQuality`** (number): JPEG compression quality (1-100)
+- **`negativePrompt`** (string): Description of what to exclude from the image
+- **`language`** (string): Language for the prompt
+- **`includeSafetyAttributes`** (boolean): Include safety attributes in response
+- **`includeRaiReason`** (boolean): Include RAI reasoning in response
+- **`imageSize`** (string): Size of the generated image
+- **`guidanceScale`** (number): Guidance scale for generation
+- **`aspectRatio`** (string): Aspect ratio of the image
+- **`addWatermark`** (boolean): Add watermark to generated images
+### Gemini Models (e.g., `gemini-1.5-pro`)
+- **`prompt`** (string): The text description of the image you want to generate
+- **`n`** (number): Number of images to generate (defaults to 1)
+- **`temperature`** (number): Controls randomness in generation (0.0 to 1.0)
+- **`maxOutputTokens`** (number): Maximum number of tokens in response
+- **`topP`** (number): Nucleus sampling parameter
+- **`topK`** (number): Top-k sampling parameter
+- **`safetySettings`** (array): Safety settings for content generation
+- **`seed`** (number): Random seed for reproducible generation
+- **`stopSequences`** (array): Sequences that stop generation
+- **`systemInstruction`** (string): System-level instructions
+### Advanced Image Generation Example
+```typescript
+const result = await model.invoke({
+  prompt: "A futuristic cityscape with neon lights and flying cars",
+  model: "imagen-4.0-generate-001",
+  n: 2,
+  imageSize: "1024x1024",
+  aspectRatio: "1:1",
+  guidanceScale: 7.5,
+  negativePrompt: "blurry, low quality, distorted",
+  seed: 12345,
+  includeSafetyAttributes: true,
+  outputMimeType: "image/png"
+});
+```
+## Model Options
+You can also set default options when creating the model:
+```typescript
+const model = new GeminiImageModel({
+  apiKey: "your-api-key",
+  model: "imagen-4.0-generate-001",
+  modelOptions: {
+    safetyFilterLevel: "BLOCK_MEDIUM_AND_ABOVE",
+    includeSafetyAttributes: true,
+    outputMimeType: "image/png"
+  }
+});
+```
+## Environment Variables
+Set the following environment variable for automatic API key detection:
+```bash
+export GEMINI_API_KEY="your-gemini-api-key"
+```
+## API Reference
+For complete parameter details and advanced features:
+- **Imagen Models**: Refer to [Google GenAI Models.generateImages()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generateimages)
+- **Gemini Models**: Refer to [Google GenAI Models.generateContent()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generatecontent)
 ## License
 Elastic-2.0

package/lib/cjs/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/cjs/gemini-chat-model.js CHANGED Viewed

@@ -1,7 +1,12 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.GeminiChatModel = void 0;
+const core_1 = require("@aigne/core");
+const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
 const openai_1 = require("@aigne/openai");
+const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
+const genai_1 = require("@google/genai");
+const uuid_1 = require("uuid");
 const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
 /**
@@ -30,6 +35,207 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
     supportsToolsUseWithJsonSchema = false;
     supportsParallelToolCalls = false;
     supportsToolStreaming = false;
+    _googleClient;
+    get googleClient() {
+        if (this._googleClient)
+            return this._googleClient;
+        const { apiKey } = this.credential;
+        if (!apiKey)
+            throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
+        this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
+        return this._googleClient;
+    }
+    process(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        if (!model.includes("image"))
+            return super.process(input, options);
+        return this.handleImageModelProcessing(input, options);
+    }
+    async *handleImageModelProcessing(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        const { contents, config } = await this.buildContents(input);
+        const parameters = {
+            model: model,
+            contents,
+            config: {
+                responseModalities: input.modelOptions?.modalities,
+                temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
+                topP: input.modelOptions?.topP || this.modelOptions?.topP,
+                frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
+                presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
+                ...config,
+                ...(await this.buildTools(input)),
+                ...(await this.buildConfig(input)),
+            },
+        };
+        const response = await this.googleClient.models.generateContentStream(parameters);
+        const usage = {
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+        let responseModel;
+        const files = [];
+        const toolCalls = [];
+        let text = "";
+        for await (const chunk of response) {
+            if (!responseModel && chunk.modelVersion) {
+                responseModel = chunk.modelVersion;
+                yield { delta: { json: { model: responseModel } } };
+            }
+            for (const { content } of chunk.candidates ?? []) {
+                if (content?.parts) {
+                    for (const part of content.parts) {
+                        if (part.text) {
+                            text += part.text;
+                            if (input.responseFormat?.type !== "json_schema") {
+                                yield { delta: { text: { text: part.text } } };
+                            }
+                        }
+                        if (part.inlineData?.data) {
+                            files.push(await this.transformFileOutput(input, {
+                                type: "file",
+                                data: part.inlineData.data,
+                                filename: part.inlineData.displayName,
+                                mimeType: part.inlineData.mimeType,
+                            }, options));
+                            yield { delta: { json: { files } } };
+                        }
+                        if (part.functionCall?.name) {
+                            toolCalls.push({
+                                id: part.functionCall.id || (0, uuid_1.v7)(),
+                                type: "function",
+                                function: {
+                                    name: part.functionCall.name,
+                                    arguments: part.functionCall.args || {},
+                                },
+                            });
+                            yield { delta: { json: { toolCalls } } };
+                        }
+                    }
+                }
+            }
+            if (chunk.usageMetadata) {
+                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
+                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                yield { delta: { json: { usage } } };
+            }
+        }
+        if (input.responseFormat?.type === "json_schema") {
+            yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
+        }
+    }
+    async buildConfig(input) {
+        const config = {};
+        if (input.responseFormat?.type === "json_schema") {
+            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+            config.responseMimeType = "application/json";
+        }
+        return config;
+    }
+    async buildTools(input) {
+        const tools = [];
+        for (const tool of input.tools ?? []) {
+            tools.push({
+                functionDeclarations: [
+                    {
+                        name: tool.function.name,
+                        description: tool.function.description,
+                        parametersJsonSchema: tool.function.parameters,
+                    },
+                ],
+            });
+        }
+        const functionCallingConfig = !input.toolChoice
+            ? undefined
+            : input.toolChoice === "auto"
+                ? { mode: genai_1.FunctionCallingConfigMode.AUTO }
+                : input.toolChoice === "none"
+                    ? { mode: genai_1.FunctionCallingConfigMode.NONE }
+                    : input.toolChoice === "required"
+                        ? { mode: genai_1.FunctionCallingConfigMode.ANY }
+                        : {
+                            mode: genai_1.FunctionCallingConfigMode.ANY,
+                            allowedFunctionNames: [input.toolChoice.function.name],
+                        };
+        return { tools, toolConfig: { functionCallingConfig } };
+    }
+    async buildContents(input) {
+        const result = {
+            contents: [],
+        };
+        const systemParts = [];
+        result.contents = (await Promise.all(input.messages.map(async (msg) => {
+            if (msg.role === "system") {
+                if (typeof msg.content === "string") {
+                    systemParts.push({ text: msg.content });
+                }
+                else if (Array.isArray(msg.content)) {
+                    systemParts.push(...msg.content.map((item) => {
+                        if (item.type === "text")
+                            return { text: item.text };
+                        throw new Error(`Unsupported content type: ${item.type}`);
+                    }));
+                }
+                return;
+            }
+            const content = {
+                role: msg.role === "agent" ? "model" : "user",
+            };
+            if (msg.toolCalls) {
+                content.parts = msg.toolCalls.map((call) => ({
+                    functionCall: {
+                        id: call.id,
+                        name: call.function.name,
+                        args: call.function.arguments,
+                    },
+                }));
+            }
+            else if (msg.toolCallId) {
+                const call = input.messages
+                    .flatMap((i) => i.toolCalls)
+                    .find((c) => c?.id === msg.toolCallId);
+                if (!call)
+                    throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                content.parts = [
+                    {
+                        functionResponse: {
+                            id: msg.toolCallId,
+                            name: call.function.name,
+                            response: JSON.parse(msg.content),
+                        },
+                    },
+                ];
+            }
+            else if (typeof msg.content === "string") {
+                content.parts = [{ text: msg.content }];
+            }
+            else if (Array.isArray(msg.content)) {
+                content.parts = await Promise.all(msg.content.map(async (item) => {
+                    switch (item.type) {
+                        case "text":
+                            return { text: item.text };
+                        case "url":
+                            return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
+                        case "file":
+                            return { inlineData: { data: item.data, mimeType: item.mimeType } };
+                        case "local":
+                            return {
+                                inlineData: {
+                                    data: await index_js_1.nodejs.fs.readFile(item.path, "base64"),
+                                    mimeType: item.mimeType,
+                                },
+                            };
+                    }
+                }));
+            }
+            return content;
+        }))).filter(type_utils_js_1.isNonNullable);
+        if (systemParts) {
+            result.config ??= {};
+            result.config.systemInstruction = systemParts;
+        }
+        return result;
+    }
     async getRunMessages(input) {
         const messages = await super.getRunMessages(input);
         const lastMessage = messages.at(-1);

package/lib/cjs/gemini-image-model.js CHANGED Viewed

@@ -142,8 +142,8 @@ class GeminiImageModel extends core_1.ImageModel {
         });
         const allImages = (response.candidates ?? [])
             .flatMap((candidate) => candidate.content?.parts ?? [])
-            .filter((part) => part?.inlineData?.data)
-            .map((part) => ({ base64: part.inlineData.data }));
+            .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
+            .filter(type_utils_js_1.isNonNullable);
         return {
             images: allImages,
             usage: {

package/lib/dts/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import type { ChatModelInput } from "@aigne/core";
+import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
+import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
+import { GoogleGenAI } from "@google/genai";
 /**
  * Implementation of the ChatModel interface for Google's Gemini API
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
     protected supportsToolsUseWithJsonSchema: boolean;
     protected supportsParallelToolCalls: boolean;
     protected supportsToolStreaming: boolean;
+    protected _googleClient?: GoogleGenAI;
+    get googleClient(): GoogleGenAI;
+    process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
+    private handleImageModelProcessing;
+    private buildConfig;
+    private buildTools;
+    private buildContents;
     getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
 }

package/lib/esm/gemini-chat-model.js CHANGED Viewed

@@ -1,4 +1,9 @@
+import { safeParseJSON, } from "@aigne/core";
+import { isNonNullable } from "@aigne/core/utils/type-utils.js";
 import { OpenAIChatModel } from "@aigne/openai";
+import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
+import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
+import { v7 } from "uuid";
 const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
 const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
 /**
@@ -27,6 +32,207 @@ export class GeminiChatModel extends OpenAIChatModel {
     supportsToolsUseWithJsonSchema = false;
     supportsParallelToolCalls = false;
     supportsToolStreaming = false;
+    _googleClient;
+    get googleClient() {
+        if (this._googleClient)
+            return this._googleClient;
+        const { apiKey } = this.credential;
+        if (!apiKey)
+            throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
+        this._googleClient ??= new GoogleGenAI({ apiKey });
+        return this._googleClient;
+    }
+    process(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        if (!model.includes("image"))
+            return super.process(input, options);
+        return this.handleImageModelProcessing(input, options);
+    }
+    async *handleImageModelProcessing(input, options) {
+        const model = input.modelOptions?.model || this.credential.model;
+        const { contents, config } = await this.buildContents(input);
+        const parameters = {
+            model: model,
+            contents,
+            config: {
+                responseModalities: input.modelOptions?.modalities,
+                temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
+                topP: input.modelOptions?.topP || this.modelOptions?.topP,
+                frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
+                presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
+                ...config,
+                ...(await this.buildTools(input)),
+                ...(await this.buildConfig(input)),
+            },
+        };
+        const response = await this.googleClient.models.generateContentStream(parameters);
+        const usage = {
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+        let responseModel;
+        const files = [];
+        const toolCalls = [];
+        let text = "";
+        for await (const chunk of response) {
+            if (!responseModel && chunk.modelVersion) {
+                responseModel = chunk.modelVersion;
+                yield { delta: { json: { model: responseModel } } };
+            }
+            for (const { content } of chunk.candidates ?? []) {
+                if (content?.parts) {
+                    for (const part of content.parts) {
+                        if (part.text) {
+                            text += part.text;
+                            if (input.responseFormat?.type !== "json_schema") {
+                                yield { delta: { text: { text: part.text } } };
+                            }
+                        }
+                        if (part.inlineData?.data) {
+                            files.push(await this.transformFileOutput(input, {
+                                type: "file",
+                                data: part.inlineData.data,
+                                filename: part.inlineData.displayName,
+                                mimeType: part.inlineData.mimeType,
+                            }, options));
+                            yield { delta: { json: { files } } };
+                        }
+                        if (part.functionCall?.name) {
+                            toolCalls.push({
+                                id: part.functionCall.id || v7(),
+                                type: "function",
+                                function: {
+                                    name: part.functionCall.name,
+                                    arguments: part.functionCall.args || {},
+                                },
+                            });
+                            yield { delta: { json: { toolCalls } } };
+                        }
+                    }
+                }
+            }
+            if (chunk.usageMetadata) {
+                usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
+                usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
+                yield { delta: { json: { usage } } };
+            }
+        }
+        if (input.responseFormat?.type === "json_schema") {
+            yield { delta: { json: { json: safeParseJSON(text) } } };
+        }
+    }
+    async buildConfig(input) {
+        const config = {};
+        if (input.responseFormat?.type === "json_schema") {
+            config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
+            config.responseMimeType = "application/json";
+        }
+        return config;
+    }
+    async buildTools(input) {
+        const tools = [];
+        for (const tool of input.tools ?? []) {
+            tools.push({
+                functionDeclarations: [
+                    {
+                        name: tool.function.name,
+                        description: tool.function.description,
+                        parametersJsonSchema: tool.function.parameters,
+                    },
+                ],
+            });
+        }
+        const functionCallingConfig = !input.toolChoice
+            ? undefined
+            : input.toolChoice === "auto"
+                ? { mode: FunctionCallingConfigMode.AUTO }
+                : input.toolChoice === "none"
+                    ? { mode: FunctionCallingConfigMode.NONE }
+                    : input.toolChoice === "required"
+                        ? { mode: FunctionCallingConfigMode.ANY }
+                        : {
+                            mode: FunctionCallingConfigMode.ANY,
+                            allowedFunctionNames: [input.toolChoice.function.name],
+                        };
+        return { tools, toolConfig: { functionCallingConfig } };
+    }
+    async buildContents(input) {
+        const result = {
+            contents: [],
+        };
+        const systemParts = [];
+        result.contents = (await Promise.all(input.messages.map(async (msg) => {
+            if (msg.role === "system") {
+                if (typeof msg.content === "string") {
+                    systemParts.push({ text: msg.content });
+                }
+                else if (Array.isArray(msg.content)) {
+                    systemParts.push(...msg.content.map((item) => {
+                        if (item.type === "text")
+                            return { text: item.text };
+                        throw new Error(`Unsupported content type: ${item.type}`);
+                    }));
+                }
+                return;
+            }
+            const content = {
+                role: msg.role === "agent" ? "model" : "user",
+            };
+            if (msg.toolCalls) {
+                content.parts = msg.toolCalls.map((call) => ({
+                    functionCall: {
+                        id: call.id,
+                        name: call.function.name,
+                        args: call.function.arguments,
+                    },
+                }));
+            }
+            else if (msg.toolCallId) {
+                const call = input.messages
+                    .flatMap((i) => i.toolCalls)
+                    .find((c) => c?.id === msg.toolCallId);
+                if (!call)
+                    throw new Error(`Tool call not found: ${msg.toolCallId}`);
+                content.parts = [
+                    {
+                        functionResponse: {
+                            id: msg.toolCallId,
+                            name: call.function.name,
+                            response: JSON.parse(msg.content),
+                        },
+                    },
+                ];
+            }
+            else if (typeof msg.content === "string") {
+                content.parts = [{ text: msg.content }];
+            }
+            else if (Array.isArray(msg.content)) {
+                content.parts = await Promise.all(msg.content.map(async (item) => {
+                    switch (item.type) {
+                        case "text":
+                            return { text: item.text };
+                        case "url":
+                            return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
+                        case "file":
+                            return { inlineData: { data: item.data, mimeType: item.mimeType } };
+                        case "local":
+                            return {
+                                inlineData: {
+                                    data: await nodejs.fs.readFile(item.path, "base64"),
+                                    mimeType: item.mimeType,
+                                },
+                            };
+                    }
+                }));
+            }
+            return content;
+        }))).filter(isNonNullable);
+        if (systemParts) {
+            result.config ??= {};
+            result.config.systemInstruction = systemParts;
+        }
+        return result;
+    }
     async getRunMessages(input) {
         const messages = await super.getRunMessages(input);
         const lastMessage = messages.at(-1);

package/lib/esm/gemini-image-model.js CHANGED Viewed

@@ -139,8 +139,8 @@ export class GeminiImageModel extends ImageModel {
         });
         const allImages = (response.candidates ?? [])
             .flatMap((candidate) => candidate.content?.parts ?? [])
-            .filter((part) => part?.inlineData?.data)
-            .map((part) => ({ base64: part.inlineData.data }));
+            .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
+            .filter(isNonNullable);
         return {
             images: allImages,
             usage: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aigne/gemini",
-  "version": "0.11.5",
+  "version": "0.12.0",
   "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
   "publishConfig": {
     "access": "public"
@@ -36,8 +36,10 @@
   },
   "dependencies": {
     "@google/genai": "^1.15.0",
+    "uuid": "^11.1.0",
     "zod": "^3.25.67",
-    "@aigne/openai": "^0.13.6"
+    "@aigne/platform-helpers": "^0.6.2",
+    "@aigne/openai": "^0.14.0"
   },
   "devDependencies": {
     "@types/bun": "^1.2.18",
@@ -45,8 +47,8 @@
     "npm-run-all": "^4.1.5",
     "rimraf": "^6.0.1",
     "typescript": "^5.8.3",
-    "@aigne/core": "^1.57.4",
-    "@aigne/test-utils": "^0.5.42"
+    "@aigne/core": "^1.58.0",
+    "@aigne/test-utils": "^0.5.44"
   },
   "scripts": {
     "lint": "tsc --noEmit",