@aigne/gemini 0.11.5 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.12.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.6...gemini-v0.12.0) (2025-09-05)
4
+
5
+
6
+ ### Features
7
+
8
+ * add modalities support for chat model ([#454](https://github.com/AIGNE-io/aigne-framework/issues/454)) ([70d1bf6](https://github.com/AIGNE-io/aigne-framework/commit/70d1bf631f4e711235d89c6df8ee210a19179b30))
9
+
10
+
11
+ ### Dependencies
12
+
13
+ * The following workspace dependencies were updated
14
+ * dependencies
15
+ * @aigne/openai bumped to 0.14.0
16
+ * devDependencies
17
+ * @aigne/core bumped to 1.58.0
18
+ * @aigne/test-utils bumped to 0.5.44
19
+
20
+ ## [0.11.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.5...gemini-v0.11.6) (2025-09-01)
21
+
22
+
23
+ ### Bug Fixes
24
+
25
+ * **transport:** improve HTTP client option handling and error serialization ([#445](https://github.com/AIGNE-io/aigne-framework/issues/445)) ([d3bcdd2](https://github.com/AIGNE-io/aigne-framework/commit/d3bcdd23ab8011a7d40fc157fd61eb240494c7a5))
26
+
27
+
28
+ ### Dependencies
29
+
30
+ * The following workspace dependencies were updated
31
+ * dependencies
32
+ * @aigne/openai bumped to 0.13.7
33
+ * devDependencies
34
+ * @aigne/core bumped to 1.57.5
35
+ * @aigne/test-utils bumped to 0.5.43
36
+
3
37
  ## [0.11.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.4...gemini-v0.11.5) (2025-08-30)
4
38
 
5
39
 
package/README.md CHANGED
@@ -23,13 +23,14 @@ AIGNE Gemini SDK for integrating with Google's Gemini AI models within the [AIGN
23
23
  <picture>
24
24
  <source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini-dark.png" media="(prefers-color-scheme: dark)">
25
25
  <source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" media="(prefers-color-scheme: light)">
26
- <img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/aigne-gemini.png" alt="AIGNE Arch" />
26
+ <img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" alt="AIGNE Arch" />
27
27
  </picture>
28
28
 
29
29
  ## Features
30
30
 
31
31
  * **Google Gemini API Integration**: Direct connection to Google's Gemini API services
32
32
  * **Chat Completions**: Support for Gemini's chat completions API with all available models
33
+ * **Image Generation**: Support for both Imagen and Gemini image generation models
33
34
  * **Multimodal Support**: Built-in support for handling both text and image inputs
34
35
  * **Function Calling**: Support for function calling capabilities
35
36
  * **Streaming Responses**: Support for streaming responses for more responsive applications
@@ -60,6 +61,8 @@ pnpm add @aigne/gemini @aigne/core
60
61
 
61
62
  ## Basic Usage
62
63
 
64
+ ### Chat Model
65
+
63
66
  ```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model"
64
67
  import { GeminiChatModel } from "@aigne/gemini";
65
68
 
@@ -86,6 +89,38 @@ console.log(result);
86
89
  */
87
90
  ```
88
91
 
92
+ ### Image Generation Model
93
+
94
+ ```typescript
95
+ import { GeminiImageModel } from "@aigne/gemini";
96
+
97
+ const model = new GeminiImageModel({
98
+ apiKey: "your-api-key", // Optional if set in env variables
99
+ model: "imagen-4.0-generate-001", // Default Imagen model
100
+ });
101
+
102
+ const result = await model.invoke({
103
+ prompt: "A serene mountain landscape at sunset with golden light",
104
+ n: 1,
105
+ });
106
+
107
+ console.log(result);
108
+ /* Output:
109
+ {
110
+ images: [
111
+ {
112
+ base64: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
113
+ }
114
+ ],
115
+ usage: {
116
+ inputTokens: 0,
117
+ outputTokens: 0
118
+ },
119
+ model: "imagen-4.0-generate-001"
120
+ }
121
+ */
122
+ ```
123
+
89
124
  ## Streaming Responses
90
125
 
91
126
  ```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model-streaming"
@@ -119,6 +154,90 @@ console.log(fullText); // Output: "Hello from Gemini! I'm Google's helpful AI as
119
154
  console.log(json); // { model: "gemini-1.5-flash" }
120
155
  ```
121
156
 
157
+ ## Image Generation Parameters
158
+
159
+ The `GeminiImageModel` supports different parameters depending on the model type:
160
+
161
+ ### Imagen Models (e.g., `imagen-4.0-generate-001`)
162
+
163
+ - **`prompt`** (string): The text description of the image you want to generate
164
+ - **`n`** (number): Number of images to generate (defaults to 1)
165
+ - **`seed`** (number): Random seed for reproducible generation
166
+ - **`safetyFilterLevel`** (string): Safety filter level for content moderation
167
+ - **`personGeneration`** (string): Person generation settings
168
+ - **`outputMimeType`** (string): Output image format (e.g., "image/png", "image/jpeg")
169
+ - **`outputGcsUri`** (string): Google Cloud Storage URI for output
170
+ - **`outputCompressionQuality`** (number): JPEG compression quality (1-100)
171
+ - **`negativePrompt`** (string): Description of what to exclude from the image
172
+ - **`language`** (string): Language for the prompt
173
+ - **`includeSafetyAttributes`** (boolean): Include safety attributes in response
174
+ - **`includeRaiReason`** (boolean): Include RAI reasoning in response
175
+ - **`imageSize`** (string): Size of the generated image
176
+ - **`guidanceScale`** (number): Guidance scale for generation
177
+ - **`aspectRatio`** (string): Aspect ratio of the image
178
+ - **`addWatermark`** (boolean): Add watermark to generated images
179
+
180
+ ### Gemini Models (e.g., `gemini-1.5-pro`)
181
+
182
+ - **`prompt`** (string): The text description of the image you want to generate
183
+ - **`n`** (number): Number of images to generate (defaults to 1)
184
+ - **`temperature`** (number): Controls randomness in generation (0.0 to 1.0)
185
+ - **`maxOutputTokens`** (number): Maximum number of tokens in response
186
+ - **`topP`** (number): Nucleus sampling parameter
187
+ - **`topK`** (number): Top-k sampling parameter
188
+ - **`safetySettings`** (array): Safety settings for content generation
189
+ - **`seed`** (number): Random seed for reproducible generation
190
+ - **`stopSequences`** (array): Sequences that stop generation
191
+ - **`systemInstruction`** (string): System-level instructions
192
+
193
+ ### Advanced Image Generation Example
194
+
195
+ ```typescript
196
+ const result = await model.invoke({
197
+ prompt: "A futuristic cityscape with neon lights and flying cars",
198
+ model: "imagen-4.0-generate-001",
199
+ n: 2,
200
+ imageSize: "1024x1024",
201
+ aspectRatio: "1:1",
202
+ guidanceScale: 7.5,
203
+ negativePrompt: "blurry, low quality, distorted",
204
+ seed: 12345,
205
+ includeSafetyAttributes: true,
206
+ outputMimeType: "image/png"
207
+ });
208
+ ```
209
+
210
+ ## Model Options
211
+
212
+ You can also set default options when creating the model:
213
+
214
+ ```typescript
215
+ const model = new GeminiImageModel({
216
+ apiKey: "your-api-key",
217
+ model: "imagen-4.0-generate-001",
218
+ modelOptions: {
219
+ safetyFilterLevel: "BLOCK_MEDIUM_AND_ABOVE",
220
+ includeSafetyAttributes: true,
221
+ outputMimeType: "image/png"
222
+ }
223
+ });
224
+ ```
225
+
226
+ ## Environment Variables
227
+
228
+ Set the following environment variable for automatic API key detection:
229
+
230
+ ```bash
231
+ export GEMINI_API_KEY="your-gemini-api-key"
232
+ ```
233
+
234
+ ## API Reference
235
+
236
+ For complete parameter details and advanced features:
237
+
238
+ - **Imagen Models**: Refer to [Google GenAI Models.generateImages()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generateimages)
239
+ - **Gemini Models**: Refer to [Google GenAI Models.generateContent()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generatecontent)
240
+
122
241
  ## License
123
242
 
124
243
  Elastic-2.0
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,7 +1,12 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.GeminiChatModel = void 0;
4
+ const core_1 = require("@aigne/core");
5
+ const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
4
6
  const openai_1 = require("@aigne/openai");
7
+ const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
8
+ const genai_1 = require("@google/genai");
9
+ const uuid_1 = require("uuid");
5
10
  const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
6
11
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
7
12
  /**
@@ -30,6 +35,207 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
30
35
  supportsToolsUseWithJsonSchema = false;
31
36
  supportsParallelToolCalls = false;
32
37
  supportsToolStreaming = false;
38
+ _googleClient;
39
+ get googleClient() {
40
+ if (this._googleClient)
41
+ return this._googleClient;
42
+ const { apiKey } = this.credential;
43
+ if (!apiKey)
44
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
45
+ this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
46
+ return this._googleClient;
47
+ }
48
+ process(input, options) {
49
+ const model = input.modelOptions?.model || this.credential.model;
50
+ if (!model.includes("image"))
51
+ return super.process(input, options);
52
+ return this.handleImageModelProcessing(input, options);
53
+ }
54
+ async *handleImageModelProcessing(input, options) {
55
+ const model = input.modelOptions?.model || this.credential.model;
56
+ const { contents, config } = await this.buildContents(input);
57
+ const parameters = {
58
+ model: model,
59
+ contents,
60
+ config: {
61
+ responseModalities: input.modelOptions?.modalities,
62
+ temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
63
+ topP: input.modelOptions?.topP || this.modelOptions?.topP,
64
+ frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
65
+ presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
66
+ ...config,
67
+ ...(await this.buildTools(input)),
68
+ ...(await this.buildConfig(input)),
69
+ },
70
+ };
71
+ const response = await this.googleClient.models.generateContentStream(parameters);
72
+ const usage = {
73
+ inputTokens: 0,
74
+ outputTokens: 0,
75
+ };
76
+ let responseModel;
77
+ const files = [];
78
+ const toolCalls = [];
79
+ let text = "";
80
+ for await (const chunk of response) {
81
+ if (!responseModel && chunk.modelVersion) {
82
+ responseModel = chunk.modelVersion;
83
+ yield { delta: { json: { model: responseModel } } };
84
+ }
85
+ for (const { content } of chunk.candidates ?? []) {
86
+ if (content?.parts) {
87
+ for (const part of content.parts) {
88
+ if (part.text) {
89
+ text += part.text;
90
+ if (input.responseFormat?.type !== "json_schema") {
91
+ yield { delta: { text: { text: part.text } } };
92
+ }
93
+ }
94
+ if (part.inlineData?.data) {
95
+ files.push(await this.transformFileOutput(input, {
96
+ type: "file",
97
+ data: part.inlineData.data,
98
+ filename: part.inlineData.displayName,
99
+ mimeType: part.inlineData.mimeType,
100
+ }, options));
101
+ yield { delta: { json: { files } } };
102
+ }
103
+ if (part.functionCall?.name) {
104
+ toolCalls.push({
105
+ id: part.functionCall.id || (0, uuid_1.v7)(),
106
+ type: "function",
107
+ function: {
108
+ name: part.functionCall.name,
109
+ arguments: part.functionCall.args || {},
110
+ },
111
+ });
112
+ yield { delta: { json: { toolCalls } } };
113
+ }
114
+ }
115
+ }
116
+ }
117
+ if (chunk.usageMetadata) {
118
+ usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
119
+ usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
120
+ yield { delta: { json: { usage } } };
121
+ }
122
+ }
123
+ if (input.responseFormat?.type === "json_schema") {
124
+ yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
125
+ }
126
+ }
127
+ async buildConfig(input) {
128
+ const config = {};
129
+ if (input.responseFormat?.type === "json_schema") {
130
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
131
+ config.responseMimeType = "application/json";
132
+ }
133
+ return config;
134
+ }
135
+ async buildTools(input) {
136
+ const tools = [];
137
+ for (const tool of input.tools ?? []) {
138
+ tools.push({
139
+ functionDeclarations: [
140
+ {
141
+ name: tool.function.name,
142
+ description: tool.function.description,
143
+ parametersJsonSchema: tool.function.parameters,
144
+ },
145
+ ],
146
+ });
147
+ }
148
+ const functionCallingConfig = !input.toolChoice
149
+ ? undefined
150
+ : input.toolChoice === "auto"
151
+ ? { mode: genai_1.FunctionCallingConfigMode.AUTO }
152
+ : input.toolChoice === "none"
153
+ ? { mode: genai_1.FunctionCallingConfigMode.NONE }
154
+ : input.toolChoice === "required"
155
+ ? { mode: genai_1.FunctionCallingConfigMode.ANY }
156
+ : {
157
+ mode: genai_1.FunctionCallingConfigMode.ANY,
158
+ allowedFunctionNames: [input.toolChoice.function.name],
159
+ };
160
+ return { tools, toolConfig: { functionCallingConfig } };
161
+ }
162
+ async buildContents(input) {
163
+ const result = {
164
+ contents: [],
165
+ };
166
+ const systemParts = [];
167
+ result.contents = (await Promise.all(input.messages.map(async (msg) => {
168
+ if (msg.role === "system") {
169
+ if (typeof msg.content === "string") {
170
+ systemParts.push({ text: msg.content });
171
+ }
172
+ else if (Array.isArray(msg.content)) {
173
+ systemParts.push(...msg.content.map((item) => {
174
+ if (item.type === "text")
175
+ return { text: item.text };
176
+ throw new Error(`Unsupported content type: ${item.type}`);
177
+ }));
178
+ }
179
+ return;
180
+ }
181
+ const content = {
182
+ role: msg.role === "agent" ? "model" : "user",
183
+ };
184
+ if (msg.toolCalls) {
185
+ content.parts = msg.toolCalls.map((call) => ({
186
+ functionCall: {
187
+ id: call.id,
188
+ name: call.function.name,
189
+ args: call.function.arguments,
190
+ },
191
+ }));
192
+ }
193
+ else if (msg.toolCallId) {
194
+ const call = input.messages
195
+ .flatMap((i) => i.toolCalls)
196
+ .find((c) => c?.id === msg.toolCallId);
197
+ if (!call)
198
+ throw new Error(`Tool call not found: ${msg.toolCallId}`);
199
+ content.parts = [
200
+ {
201
+ functionResponse: {
202
+ id: msg.toolCallId,
203
+ name: call.function.name,
204
+ response: JSON.parse(msg.content),
205
+ },
206
+ },
207
+ ];
208
+ }
209
+ else if (typeof msg.content === "string") {
210
+ content.parts = [{ text: msg.content }];
211
+ }
212
+ else if (Array.isArray(msg.content)) {
213
+ content.parts = await Promise.all(msg.content.map(async (item) => {
214
+ switch (item.type) {
215
+ case "text":
216
+ return { text: item.text };
217
+ case "url":
218
+ return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
219
+ case "file":
220
+ return { inlineData: { data: item.data, mimeType: item.mimeType } };
221
+ case "local":
222
+ return {
223
+ inlineData: {
224
+ data: await index_js_1.nodejs.fs.readFile(item.path, "base64"),
225
+ mimeType: item.mimeType,
226
+ },
227
+ };
228
+ }
229
+ }));
230
+ }
231
+ return content;
232
+ }))).filter(type_utils_js_1.isNonNullable);
233
+ if (systemParts) {
234
+ result.config ??= {};
235
+ result.config.systemInstruction = systemParts;
236
+ }
237
+ return result;
238
+ }
33
239
  async getRunMessages(input) {
34
240
  const messages = await super.getRunMessages(input);
35
241
  const lastMessage = messages.at(-1);
@@ -142,8 +142,8 @@ class GeminiImageModel extends core_1.ImageModel {
142
142
  });
143
143
  const allImages = (response.candidates ?? [])
144
144
  .flatMap((candidate) => candidate.content?.parts ?? [])
145
- .filter((part) => part?.inlineData?.data)
146
- .map((part) => ({ base64: part.inlineData.data }));
145
+ .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
146
+ .filter(type_utils_js_1.isNonNullable);
147
147
  return {
148
148
  images: allImages,
149
149
  usage: {
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,4 +1,9 @@
1
+ import { safeParseJSON, } from "@aigne/core";
2
+ import { isNonNullable } from "@aigne/core/utils/type-utils.js";
1
3
  import { OpenAIChatModel } from "@aigne/openai";
4
+ import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
5
+ import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
6
+ import { v7 } from "uuid";
2
7
  const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
3
8
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
4
9
  /**
@@ -27,6 +32,207 @@ export class GeminiChatModel extends OpenAIChatModel {
27
32
  supportsToolsUseWithJsonSchema = false;
28
33
  supportsParallelToolCalls = false;
29
34
  supportsToolStreaming = false;
35
+ _googleClient;
36
+ get googleClient() {
37
+ if (this._googleClient)
38
+ return this._googleClient;
39
+ const { apiKey } = this.credential;
40
+ if (!apiKey)
41
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
42
+ this._googleClient ??= new GoogleGenAI({ apiKey });
43
+ return this._googleClient;
44
+ }
45
+ process(input, options) {
46
+ const model = input.modelOptions?.model || this.credential.model;
47
+ if (!model.includes("image"))
48
+ return super.process(input, options);
49
+ return this.handleImageModelProcessing(input, options);
50
+ }
51
+ async *handleImageModelProcessing(input, options) {
52
+ const model = input.modelOptions?.model || this.credential.model;
53
+ const { contents, config } = await this.buildContents(input);
54
+ const parameters = {
55
+ model: model,
56
+ contents,
57
+ config: {
58
+ responseModalities: input.modelOptions?.modalities,
59
+ temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
60
+ topP: input.modelOptions?.topP || this.modelOptions?.topP,
61
+ frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
62
+ presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
63
+ ...config,
64
+ ...(await this.buildTools(input)),
65
+ ...(await this.buildConfig(input)),
66
+ },
67
+ };
68
+ const response = await this.googleClient.models.generateContentStream(parameters);
69
+ const usage = {
70
+ inputTokens: 0,
71
+ outputTokens: 0,
72
+ };
73
+ let responseModel;
74
+ const files = [];
75
+ const toolCalls = [];
76
+ let text = "";
77
+ for await (const chunk of response) {
78
+ if (!responseModel && chunk.modelVersion) {
79
+ responseModel = chunk.modelVersion;
80
+ yield { delta: { json: { model: responseModel } } };
81
+ }
82
+ for (const { content } of chunk.candidates ?? []) {
83
+ if (content?.parts) {
84
+ for (const part of content.parts) {
85
+ if (part.text) {
86
+ text += part.text;
87
+ if (input.responseFormat?.type !== "json_schema") {
88
+ yield { delta: { text: { text: part.text } } };
89
+ }
90
+ }
91
+ if (part.inlineData?.data) {
92
+ files.push(await this.transformFileOutput(input, {
93
+ type: "file",
94
+ data: part.inlineData.data,
95
+ filename: part.inlineData.displayName,
96
+ mimeType: part.inlineData.mimeType,
97
+ }, options));
98
+ yield { delta: { json: { files } } };
99
+ }
100
+ if (part.functionCall?.name) {
101
+ toolCalls.push({
102
+ id: part.functionCall.id || v7(),
103
+ type: "function",
104
+ function: {
105
+ name: part.functionCall.name,
106
+ arguments: part.functionCall.args || {},
107
+ },
108
+ });
109
+ yield { delta: { json: { toolCalls } } };
110
+ }
111
+ }
112
+ }
113
+ }
114
+ if (chunk.usageMetadata) {
115
+ usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
116
+ usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
117
+ yield { delta: { json: { usage } } };
118
+ }
119
+ }
120
+ if (input.responseFormat?.type === "json_schema") {
121
+ yield { delta: { json: { json: safeParseJSON(text) } } };
122
+ }
123
+ }
124
+ async buildConfig(input) {
125
+ const config = {};
126
+ if (input.responseFormat?.type === "json_schema") {
127
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
128
+ config.responseMimeType = "application/json";
129
+ }
130
+ return config;
131
+ }
132
+ async buildTools(input) {
133
+ const tools = [];
134
+ for (const tool of input.tools ?? []) {
135
+ tools.push({
136
+ functionDeclarations: [
137
+ {
138
+ name: tool.function.name,
139
+ description: tool.function.description,
140
+ parametersJsonSchema: tool.function.parameters,
141
+ },
142
+ ],
143
+ });
144
+ }
145
+ const functionCallingConfig = !input.toolChoice
146
+ ? undefined
147
+ : input.toolChoice === "auto"
148
+ ? { mode: FunctionCallingConfigMode.AUTO }
149
+ : input.toolChoice === "none"
150
+ ? { mode: FunctionCallingConfigMode.NONE }
151
+ : input.toolChoice === "required"
152
+ ? { mode: FunctionCallingConfigMode.ANY }
153
+ : {
154
+ mode: FunctionCallingConfigMode.ANY,
155
+ allowedFunctionNames: [input.toolChoice.function.name],
156
+ };
157
+ return { tools, toolConfig: { functionCallingConfig } };
158
+ }
159
+ async buildContents(input) {
160
+ const result = {
161
+ contents: [],
162
+ };
163
+ const systemParts = [];
164
+ result.contents = (await Promise.all(input.messages.map(async (msg) => {
165
+ if (msg.role === "system") {
166
+ if (typeof msg.content === "string") {
167
+ systemParts.push({ text: msg.content });
168
+ }
169
+ else if (Array.isArray(msg.content)) {
170
+ systemParts.push(...msg.content.map((item) => {
171
+ if (item.type === "text")
172
+ return { text: item.text };
173
+ throw new Error(`Unsupported content type: ${item.type}`);
174
+ }));
175
+ }
176
+ return;
177
+ }
178
+ const content = {
179
+ role: msg.role === "agent" ? "model" : "user",
180
+ };
181
+ if (msg.toolCalls) {
182
+ content.parts = msg.toolCalls.map((call) => ({
183
+ functionCall: {
184
+ id: call.id,
185
+ name: call.function.name,
186
+ args: call.function.arguments,
187
+ },
188
+ }));
189
+ }
190
+ else if (msg.toolCallId) {
191
+ const call = input.messages
192
+ .flatMap((i) => i.toolCalls)
193
+ .find((c) => c?.id === msg.toolCallId);
194
+ if (!call)
195
+ throw new Error(`Tool call not found: ${msg.toolCallId}`);
196
+ content.parts = [
197
+ {
198
+ functionResponse: {
199
+ id: msg.toolCallId,
200
+ name: call.function.name,
201
+ response: JSON.parse(msg.content),
202
+ },
203
+ },
204
+ ];
205
+ }
206
+ else if (typeof msg.content === "string") {
207
+ content.parts = [{ text: msg.content }];
208
+ }
209
+ else if (Array.isArray(msg.content)) {
210
+ content.parts = await Promise.all(msg.content.map(async (item) => {
211
+ switch (item.type) {
212
+ case "text":
213
+ return { text: item.text };
214
+ case "url":
215
+ return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
216
+ case "file":
217
+ return { inlineData: { data: item.data, mimeType: item.mimeType } };
218
+ case "local":
219
+ return {
220
+ inlineData: {
221
+ data: await nodejs.fs.readFile(item.path, "base64"),
222
+ mimeType: item.mimeType,
223
+ },
224
+ };
225
+ }
226
+ }));
227
+ }
228
+ return content;
229
+ }))).filter(isNonNullable);
230
+ if (systemParts) {
231
+ result.config ??= {};
232
+ result.config.systemInstruction = systemParts;
233
+ }
234
+ return result;
235
+ }
30
236
  async getRunMessages(input) {
31
237
  const messages = await super.getRunMessages(input);
32
238
  const lastMessage = messages.at(-1);
@@ -139,8 +139,8 @@ export class GeminiImageModel extends ImageModel {
139
139
  });
140
140
  const allImages = (response.candidates ?? [])
141
141
  .flatMap((candidate) => candidate.content?.parts ?? [])
142
- .filter((part) => part?.inlineData?.data)
143
- .map((part) => ({ base64: part.inlineData.data }));
142
+ .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
143
+ .filter(isNonNullable);
144
144
  return {
145
145
  images: allImages,
146
146
  usage: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.11.5",
3
+ "version": "0.12.0",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -36,8 +36,10 @@
36
36
  },
37
37
  "dependencies": {
38
38
  "@google/genai": "^1.15.0",
39
+ "uuid": "^11.1.0",
39
40
  "zod": "^3.25.67",
40
- "@aigne/openai": "^0.13.6"
41
+ "@aigne/platform-helpers": "^0.6.2",
42
+ "@aigne/openai": "^0.14.0"
41
43
  },
42
44
  "devDependencies": {
43
45
  "@types/bun": "^1.2.18",
@@ -45,8 +47,8 @@
45
47
  "npm-run-all": "^4.1.5",
46
48
  "rimraf": "^6.0.1",
47
49
  "typescript": "^5.8.3",
48
- "@aigne/core": "^1.57.4",
49
- "@aigne/test-utils": "^0.5.42"
50
+ "@aigne/core": "^1.58.0",
51
+ "@aigne/test-utils": "^0.5.44"
50
52
  },
51
53
  "scripts": {
52
54
  "lint": "tsc --noEmit",