@aigne/gemini 0.11.6 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.12.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.6...gemini-v0.12.0) (2025-09-05)
4
+
5
+
6
+ ### Features
7
+
8
+ * add modalities support for chat model ([#454](https://github.com/AIGNE-io/aigne-framework/issues/454)) ([70d1bf6](https://github.com/AIGNE-io/aigne-framework/commit/70d1bf631f4e711235d89c6df8ee210a19179b30))
9
+
10
+
11
+ ### Dependencies
12
+
13
+ * The following workspace dependencies were updated
14
+ * dependencies
15
+ * @aigne/openai bumped to 0.14.0
16
+ * devDependencies
17
+ * @aigne/core bumped to 1.58.0
18
+ * @aigne/test-utils bumped to 0.5.44
19
+
3
20
  ## [0.11.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.5...gemini-v0.11.6) (2025-09-01)
4
21
 
5
22
 
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,7 +1,12 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.GeminiChatModel = void 0;
4
+ const core_1 = require("@aigne/core");
5
+ const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
4
6
  const openai_1 = require("@aigne/openai");
7
+ const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
8
+ const genai_1 = require("@google/genai");
9
+ const uuid_1 = require("uuid");
5
10
  const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
6
11
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
7
12
  /**
@@ -30,6 +35,207 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
30
35
  supportsToolsUseWithJsonSchema = false;
31
36
  supportsParallelToolCalls = false;
32
37
  supportsToolStreaming = false;
38
+ _googleClient;
39
+ get googleClient() {
40
+ if (this._googleClient)
41
+ return this._googleClient;
42
+ const { apiKey } = this.credential;
43
+ if (!apiKey)
44
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
45
+ this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
46
+ return this._googleClient;
47
+ }
48
+ process(input, options) {
49
+ const model = input.modelOptions?.model || this.credential.model;
50
+ if (!model.includes("image"))
51
+ return super.process(input, options);
52
+ return this.handleImageModelProcessing(input, options);
53
+ }
54
+ async *handleImageModelProcessing(input, options) {
55
+ const model = input.modelOptions?.model || this.credential.model;
56
+ const { contents, config } = await this.buildContents(input);
57
+ const parameters = {
58
+ model: model,
59
+ contents,
60
+ config: {
61
+ responseModalities: input.modelOptions?.modalities,
62
+ temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
63
+ topP: input.modelOptions?.topP || this.modelOptions?.topP,
64
+ frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
65
+ presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
66
+ ...config,
67
+ ...(await this.buildTools(input)),
68
+ ...(await this.buildConfig(input)),
69
+ },
70
+ };
71
+ const response = await this.googleClient.models.generateContentStream(parameters);
72
+ const usage = {
73
+ inputTokens: 0,
74
+ outputTokens: 0,
75
+ };
76
+ let responseModel;
77
+ const files = [];
78
+ const toolCalls = [];
79
+ let text = "";
80
+ for await (const chunk of response) {
81
+ if (!responseModel && chunk.modelVersion) {
82
+ responseModel = chunk.modelVersion;
83
+ yield { delta: { json: { model: responseModel } } };
84
+ }
85
+ for (const { content } of chunk.candidates ?? []) {
86
+ if (content?.parts) {
87
+ for (const part of content.parts) {
88
+ if (part.text) {
89
+ text += part.text;
90
+ if (input.responseFormat?.type !== "json_schema") {
91
+ yield { delta: { text: { text: part.text } } };
92
+ }
93
+ }
94
+ if (part.inlineData?.data) {
95
+ files.push(await this.transformFileOutput(input, {
96
+ type: "file",
97
+ data: part.inlineData.data,
98
+ filename: part.inlineData.displayName,
99
+ mimeType: part.inlineData.mimeType,
100
+ }, options));
101
+ yield { delta: { json: { files } } };
102
+ }
103
+ if (part.functionCall?.name) {
104
+ toolCalls.push({
105
+ id: part.functionCall.id || (0, uuid_1.v7)(),
106
+ type: "function",
107
+ function: {
108
+ name: part.functionCall.name,
109
+ arguments: part.functionCall.args || {},
110
+ },
111
+ });
112
+ yield { delta: { json: { toolCalls } } };
113
+ }
114
+ }
115
+ }
116
+ }
117
+ if (chunk.usageMetadata) {
118
+ usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
119
+ usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
120
+ yield { delta: { json: { usage } } };
121
+ }
122
+ }
123
+ if (input.responseFormat?.type === "json_schema") {
124
+ yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
125
+ }
126
+ }
127
+ async buildConfig(input) {
128
+ const config = {};
129
+ if (input.responseFormat?.type === "json_schema") {
130
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
131
+ config.responseMimeType = "application/json";
132
+ }
133
+ return config;
134
+ }
135
+ async buildTools(input) {
136
+ const tools = [];
137
+ for (const tool of input.tools ?? []) {
138
+ tools.push({
139
+ functionDeclarations: [
140
+ {
141
+ name: tool.function.name,
142
+ description: tool.function.description,
143
+ parametersJsonSchema: tool.function.parameters,
144
+ },
145
+ ],
146
+ });
147
+ }
148
+ const functionCallingConfig = !input.toolChoice
149
+ ? undefined
150
+ : input.toolChoice === "auto"
151
+ ? { mode: genai_1.FunctionCallingConfigMode.AUTO }
152
+ : input.toolChoice === "none"
153
+ ? { mode: genai_1.FunctionCallingConfigMode.NONE }
154
+ : input.toolChoice === "required"
155
+ ? { mode: genai_1.FunctionCallingConfigMode.ANY }
156
+ : {
157
+ mode: genai_1.FunctionCallingConfigMode.ANY,
158
+ allowedFunctionNames: [input.toolChoice.function.name],
159
+ };
160
+ return { tools, toolConfig: { functionCallingConfig } };
161
+ }
162
+ async buildContents(input) {
163
+ const result = {
164
+ contents: [],
165
+ };
166
+ const systemParts = [];
167
+ result.contents = (await Promise.all(input.messages.map(async (msg) => {
168
+ if (msg.role === "system") {
169
+ if (typeof msg.content === "string") {
170
+ systemParts.push({ text: msg.content });
171
+ }
172
+ else if (Array.isArray(msg.content)) {
173
+ systemParts.push(...msg.content.map((item) => {
174
+ if (item.type === "text")
175
+ return { text: item.text };
176
+ throw new Error(`Unsupported content type: ${item.type}`);
177
+ }));
178
+ }
179
+ return;
180
+ }
181
+ const content = {
182
+ role: msg.role === "agent" ? "model" : "user",
183
+ };
184
+ if (msg.toolCalls) {
185
+ content.parts = msg.toolCalls.map((call) => ({
186
+ functionCall: {
187
+ id: call.id,
188
+ name: call.function.name,
189
+ args: call.function.arguments,
190
+ },
191
+ }));
192
+ }
193
+ else if (msg.toolCallId) {
194
+ const call = input.messages
195
+ .flatMap((i) => i.toolCalls)
196
+ .find((c) => c?.id === msg.toolCallId);
197
+ if (!call)
198
+ throw new Error(`Tool call not found: ${msg.toolCallId}`);
199
+ content.parts = [
200
+ {
201
+ functionResponse: {
202
+ id: msg.toolCallId,
203
+ name: call.function.name,
204
+ response: JSON.parse(msg.content),
205
+ },
206
+ },
207
+ ];
208
+ }
209
+ else if (typeof msg.content === "string") {
210
+ content.parts = [{ text: msg.content }];
211
+ }
212
+ else if (Array.isArray(msg.content)) {
213
+ content.parts = await Promise.all(msg.content.map(async (item) => {
214
+ switch (item.type) {
215
+ case "text":
216
+ return { text: item.text };
217
+ case "url":
218
+ return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
219
+ case "file":
220
+ return { inlineData: { data: item.data, mimeType: item.mimeType } };
221
+ case "local":
222
+ return {
223
+ inlineData: {
224
+ data: await index_js_1.nodejs.fs.readFile(item.path, "base64"),
225
+ mimeType: item.mimeType,
226
+ },
227
+ };
228
+ }
229
+ }));
230
+ }
231
+ return content;
232
+ }))).filter(type_utils_js_1.isNonNullable);
233
+ if (systemParts) {
234
+ result.config ??= {};
235
+ result.config.systemInstruction = systemParts;
236
+ }
237
+ return result;
238
+ }
33
239
  async getRunMessages(input) {
34
240
  const messages = await super.getRunMessages(input);
35
241
  const lastMessage = messages.at(-1);
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,5 +1,7 @@
1
- import type { ChatModelInput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
2
+ import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
2
3
  import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
4
+ import { GoogleGenAI } from "@google/genai";
3
5
  /**
4
6
  * Implementation of the ChatModel interface for Google's Gemini API
5
7
  *
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
20
22
  protected supportsToolsUseWithJsonSchema: boolean;
21
23
  protected supportsParallelToolCalls: boolean;
22
24
  protected supportsToolStreaming: boolean;
25
+ protected _googleClient?: GoogleGenAI;
26
+ get googleClient(): GoogleGenAI;
27
+ process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
28
+ private handleImageModelProcessing;
29
+ private buildConfig;
30
+ private buildTools;
31
+ private buildContents;
23
32
  getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
24
33
  }
@@ -1,4 +1,9 @@
1
+ import { safeParseJSON, } from "@aigne/core";
2
+ import { isNonNullable } from "@aigne/core/utils/type-utils.js";
1
3
  import { OpenAIChatModel } from "@aigne/openai";
4
+ import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
5
+ import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
6
+ import { v7 } from "uuid";
2
7
  const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
3
8
  const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
4
9
  /**
@@ -27,6 +32,207 @@ export class GeminiChatModel extends OpenAIChatModel {
27
32
  supportsToolsUseWithJsonSchema = false;
28
33
  supportsParallelToolCalls = false;
29
34
  supportsToolStreaming = false;
35
+ _googleClient;
36
+ get googleClient() {
37
+ if (this._googleClient)
38
+ return this._googleClient;
39
+ const { apiKey } = this.credential;
40
+ if (!apiKey)
41
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
42
+ this._googleClient ??= new GoogleGenAI({ apiKey });
43
+ return this._googleClient;
44
+ }
45
+ process(input, options) {
46
+ const model = input.modelOptions?.model || this.credential.model;
47
+ if (!model.includes("image"))
48
+ return super.process(input, options);
49
+ return this.handleImageModelProcessing(input, options);
50
+ }
51
+ async *handleImageModelProcessing(input, options) {
52
+ const model = input.modelOptions?.model || this.credential.model;
53
+ const { contents, config } = await this.buildContents(input);
54
+ const parameters = {
55
+ model: model,
56
+ contents,
57
+ config: {
58
+ responseModalities: input.modelOptions?.modalities,
59
+ temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
60
+ topP: input.modelOptions?.topP || this.modelOptions?.topP,
61
+ frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
62
+ presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
63
+ ...config,
64
+ ...(await this.buildTools(input)),
65
+ ...(await this.buildConfig(input)),
66
+ },
67
+ };
68
+ const response = await this.googleClient.models.generateContentStream(parameters);
69
+ const usage = {
70
+ inputTokens: 0,
71
+ outputTokens: 0,
72
+ };
73
+ let responseModel;
74
+ const files = [];
75
+ const toolCalls = [];
76
+ let text = "";
77
+ for await (const chunk of response) {
78
+ if (!responseModel && chunk.modelVersion) {
79
+ responseModel = chunk.modelVersion;
80
+ yield { delta: { json: { model: responseModel } } };
81
+ }
82
+ for (const { content } of chunk.candidates ?? []) {
83
+ if (content?.parts) {
84
+ for (const part of content.parts) {
85
+ if (part.text) {
86
+ text += part.text;
87
+ if (input.responseFormat?.type !== "json_schema") {
88
+ yield { delta: { text: { text: part.text } } };
89
+ }
90
+ }
91
+ if (part.inlineData?.data) {
92
+ files.push(await this.transformFileOutput(input, {
93
+ type: "file",
94
+ data: part.inlineData.data,
95
+ filename: part.inlineData.displayName,
96
+ mimeType: part.inlineData.mimeType,
97
+ }, options));
98
+ yield { delta: { json: { files } } };
99
+ }
100
+ if (part.functionCall?.name) {
101
+ toolCalls.push({
102
+ id: part.functionCall.id || v7(),
103
+ type: "function",
104
+ function: {
105
+ name: part.functionCall.name,
106
+ arguments: part.functionCall.args || {},
107
+ },
108
+ });
109
+ yield { delta: { json: { toolCalls } } };
110
+ }
111
+ }
112
+ }
113
+ }
114
+ if (chunk.usageMetadata) {
115
+ usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
116
+ usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
117
+ yield { delta: { json: { usage } } };
118
+ }
119
+ }
120
+ if (input.responseFormat?.type === "json_schema") {
121
+ yield { delta: { json: { json: safeParseJSON(text) } } };
122
+ }
123
+ }
124
+ async buildConfig(input) {
125
+ const config = {};
126
+ if (input.responseFormat?.type === "json_schema") {
127
+ config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
128
+ config.responseMimeType = "application/json";
129
+ }
130
+ return config;
131
+ }
132
+ async buildTools(input) {
133
+ const tools = [];
134
+ for (const tool of input.tools ?? []) {
135
+ tools.push({
136
+ functionDeclarations: [
137
+ {
138
+ name: tool.function.name,
139
+ description: tool.function.description,
140
+ parametersJsonSchema: tool.function.parameters,
141
+ },
142
+ ],
143
+ });
144
+ }
145
+ const functionCallingConfig = !input.toolChoice
146
+ ? undefined
147
+ : input.toolChoice === "auto"
148
+ ? { mode: FunctionCallingConfigMode.AUTO }
149
+ : input.toolChoice === "none"
150
+ ? { mode: FunctionCallingConfigMode.NONE }
151
+ : input.toolChoice === "required"
152
+ ? { mode: FunctionCallingConfigMode.ANY }
153
+ : {
154
+ mode: FunctionCallingConfigMode.ANY,
155
+ allowedFunctionNames: [input.toolChoice.function.name],
156
+ };
157
+ return { tools, toolConfig: { functionCallingConfig } };
158
+ }
159
+ async buildContents(input) {
160
+ const result = {
161
+ contents: [],
162
+ };
163
+ const systemParts = [];
164
+ result.contents = (await Promise.all(input.messages.map(async (msg) => {
165
+ if (msg.role === "system") {
166
+ if (typeof msg.content === "string") {
167
+ systemParts.push({ text: msg.content });
168
+ }
169
+ else if (Array.isArray(msg.content)) {
170
+ systemParts.push(...msg.content.map((item) => {
171
+ if (item.type === "text")
172
+ return { text: item.text };
173
+ throw new Error(`Unsupported content type: ${item.type}`);
174
+ }));
175
+ }
176
+ return;
177
+ }
178
+ const content = {
179
+ role: msg.role === "agent" ? "model" : "user",
180
+ };
181
+ if (msg.toolCalls) {
182
+ content.parts = msg.toolCalls.map((call) => ({
183
+ functionCall: {
184
+ id: call.id,
185
+ name: call.function.name,
186
+ args: call.function.arguments,
187
+ },
188
+ }));
189
+ }
190
+ else if (msg.toolCallId) {
191
+ const call = input.messages
192
+ .flatMap((i) => i.toolCalls)
193
+ .find((c) => c?.id === msg.toolCallId);
194
+ if (!call)
195
+ throw new Error(`Tool call not found: ${msg.toolCallId}`);
196
+ content.parts = [
197
+ {
198
+ functionResponse: {
199
+ id: msg.toolCallId,
200
+ name: call.function.name,
201
+ response: JSON.parse(msg.content),
202
+ },
203
+ },
204
+ ];
205
+ }
206
+ else if (typeof msg.content === "string") {
207
+ content.parts = [{ text: msg.content }];
208
+ }
209
+ else if (Array.isArray(msg.content)) {
210
+ content.parts = await Promise.all(msg.content.map(async (item) => {
211
+ switch (item.type) {
212
+ case "text":
213
+ return { text: item.text };
214
+ case "url":
215
+ return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
216
+ case "file":
217
+ return { inlineData: { data: item.data, mimeType: item.mimeType } };
218
+ case "local":
219
+ return {
220
+ inlineData: {
221
+ data: await nodejs.fs.readFile(item.path, "base64"),
222
+ mimeType: item.mimeType,
223
+ },
224
+ };
225
+ }
226
+ }));
227
+ }
228
+ return content;
229
+ }))).filter(isNonNullable);
230
+ if (systemParts) {
231
+ result.config ??= {};
232
+ result.config.systemInstruction = systemParts;
233
+ }
234
+ return result;
235
+ }
30
236
  async getRunMessages(input) {
31
237
  const messages = await super.getRunMessages(input);
32
238
  const lastMessage = messages.at(-1);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.11.6",
3
+ "version": "0.12.0",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -36,8 +36,10 @@
36
36
  },
37
37
  "dependencies": {
38
38
  "@google/genai": "^1.15.0",
39
+ "uuid": "^11.1.0",
39
40
  "zod": "^3.25.67",
40
- "@aigne/openai": "^0.13.7"
41
+ "@aigne/platform-helpers": "^0.6.2",
42
+ "@aigne/openai": "^0.14.0"
41
43
  },
42
44
  "devDependencies": {
43
45
  "@types/bun": "^1.2.18",
@@ -45,8 +47,8 @@
45
47
  "npm-run-all": "^4.1.5",
46
48
  "rimraf": "^6.0.1",
47
49
  "typescript": "^5.8.3",
48
- "@aigne/test-utils": "^0.5.43",
49
- "@aigne/core": "^1.57.5"
50
+ "@aigne/core": "^1.58.0",
51
+ "@aigne/test-utils": "^0.5.44"
50
52
  },
51
53
  "scripts": {
52
54
  "lint": "tsc --noEmit",