@aigne/gemini 0.13.4 → 0.14.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/lib/cjs/gemini-chat-model.js +8 -3
- package/lib/cjs/gemini-image-model.d.ts +2 -2
- package/lib/cjs/gemini-image-model.js +22 -9
- package/lib/dts/gemini-image-model.d.ts +2 -2
- package/lib/esm/gemini-chat-model.js +8 -3
- package/lib/esm/gemini-image-model.d.ts +2 -2
- package/lib/esm/gemini-image-model.js +24 -11
- package/package.json +5 -5
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,30 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.0-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.5...gemini-v0.14.0-beta) (2025-09-22)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* improve image model architecture and file handling ([#527](https://github.com/AIGNE-io/aigne-framework/issues/527)) ([4db50aa](https://github.com/AIGNE-io/aigne-framework/commit/4db50aa0387a1a0f045ca11aaa61613e36ca7597))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
* The following workspace dependencies were updated
|
|
14
|
+
* dependencies
|
|
15
|
+
* @aigne/openai bumped to 0.16.0-beta
|
|
16
|
+
* @aigne/platform-helpers bumped to 0.6.3-beta
|
|
17
|
+
* devDependencies
|
|
18
|
+
* @aigne/core bumped to 1.61.0-beta
|
|
19
|
+
* @aigne/test-utils bumped to 0.5.53-beta
|
|
20
|
+
|
|
21
|
+
## [0.13.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.4...gemini-v0.13.5) (2025-09-18)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
### Bug Fixes
|
|
25
|
+
|
|
26
|
+
* **gemini:** should include at least one user message ([#521](https://github.com/AIGNE-io/aigne-framework/issues/521)) ([eb2752e](https://github.com/AIGNE-io/aigne-framework/commit/eb2752ed7d78f59c435ecc3ccb7227e804e3781e))
|
|
27
|
+
|
|
3
28
|
## [0.13.4](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.3...gemini-v0.13.4) (2025-09-18)
|
|
4
29
|
|
|
5
30
|
|
|
@@ -232,9 +232,14 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
|
|
|
232
232
|
}
|
|
233
233
|
async getRunMessages(input) {
|
|
234
234
|
const messages = await super.getRunMessages(input);
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
235
|
+
if (!messages.some((i) => i.role === "user")) {
|
|
236
|
+
for (const msg of messages) {
|
|
237
|
+
if (msg.role === "system") {
|
|
238
|
+
// Ensure the last message is from the user
|
|
239
|
+
msg.role = "user";
|
|
240
|
+
break;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
238
243
|
}
|
|
239
244
|
return messages;
|
|
240
245
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -52,7 +52,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
52
52
|
* @param input The input to process
|
|
53
53
|
* @returns The generated response
|
|
54
54
|
*/
|
|
55
|
-
async process(input) {
|
|
55
|
+
async process(input, options) {
|
|
56
56
|
const model = input.model || this.credential.model;
|
|
57
57
|
const responseFormat = input.responseFormat || "base64";
|
|
58
58
|
if (responseFormat === "url") {
|
|
@@ -61,7 +61,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
61
61
|
if (model.includes("imagen")) {
|
|
62
62
|
return this.generateImageByImagenModel(input);
|
|
63
63
|
}
|
|
64
|
-
return this.generateImageByGeminiModel(input);
|
|
64
|
+
return this.generateImageByGeminiModel(input, options);
|
|
65
65
|
}
|
|
66
66
|
async generateImageByImagenModel(input) {
|
|
67
67
|
const model = input.model || this.credential.model;
|
|
@@ -89,7 +89,9 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
89
89
|
});
|
|
90
90
|
return {
|
|
91
91
|
images: response.generatedImages
|
|
92
|
-
?.map(({ image }) =>
|
|
92
|
+
?.map(({ image }) => image?.imageBytes
|
|
93
|
+
? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
|
|
94
|
+
: undefined)
|
|
93
95
|
.filter(type_utils_js_1.isNonNullable) || [],
|
|
94
96
|
usage: {
|
|
95
97
|
inputTokens: 0,
|
|
@@ -98,7 +100,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
98
100
|
model,
|
|
99
101
|
};
|
|
100
102
|
}
|
|
101
|
-
async generateImageByGeminiModel(input) {
|
|
103
|
+
async generateImageByGeminiModel(input, options) {
|
|
102
104
|
const model = input.model || this.credential.model;
|
|
103
105
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
104
106
|
const inputKeys = [
|
|
@@ -131,24 +133,35 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
131
133
|
"topK",
|
|
132
134
|
"topP",
|
|
133
135
|
];
|
|
136
|
+
const images = await Promise.all((0, type_utils_js_1.flat)(input.image).map(async (image) => {
|
|
137
|
+
const { data, mimeType } = await this.transformFileOutput(core_1.FileOutputType.file, image, options);
|
|
138
|
+
return { inlineData: { data, mimeType } };
|
|
139
|
+
}));
|
|
134
140
|
const response = await this.client.models.generateContent({
|
|
135
141
|
model: model,
|
|
136
|
-
contents: input.prompt,
|
|
142
|
+
contents: [{ text: input.prompt }, ...images],
|
|
137
143
|
config: {
|
|
138
|
-
responseModalities: [genai_1.Modality.
|
|
144
|
+
responseModalities: [genai_1.Modality.IMAGE],
|
|
139
145
|
candidateCount: input.n || 1,
|
|
140
146
|
...(0, type_utils_js_1.pick)(mergedInput, inputKeys),
|
|
141
147
|
},
|
|
142
148
|
});
|
|
143
149
|
const allImages = (response.candidates ?? [])
|
|
144
150
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
145
|
-
.map((part) =>
|
|
151
|
+
.map((part) => part.inlineData?.data
|
|
152
|
+
? {
|
|
153
|
+
type: "file",
|
|
154
|
+
data: part.inlineData.data,
|
|
155
|
+
filename: part.inlineData.displayName,
|
|
156
|
+
mimeType: part.inlineData.mimeType,
|
|
157
|
+
}
|
|
158
|
+
: null)
|
|
146
159
|
.filter(type_utils_js_1.isNonNullable);
|
|
147
160
|
return {
|
|
148
161
|
images: allImages,
|
|
149
162
|
usage: {
|
|
150
|
-
inputTokens: 0,
|
|
151
|
-
outputTokens: 0,
|
|
163
|
+
inputTokens: response.usageMetadata?.promptTokenCount || 0,
|
|
164
|
+
outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
152
165
|
},
|
|
153
166
|
model,
|
|
154
167
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -229,9 +229,14 @@ export class GeminiChatModel extends OpenAIChatModel {
|
|
|
229
229
|
}
|
|
230
230
|
async getRunMessages(input) {
|
|
231
231
|
const messages = await super.getRunMessages(input);
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
232
|
+
if (!messages.some((i) => i.role === "user")) {
|
|
233
|
+
for (const msg of messages) {
|
|
234
|
+
if (msg.role === "system") {
|
|
235
|
+
// Ensure the last message is from the user
|
|
236
|
+
msg.role = "user";
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
235
240
|
}
|
|
236
241
|
return messages;
|
|
237
242
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { ImageModel, imageModelInputSchema, } from "@aigne/core";
|
|
2
|
-
import { checkArguments, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
|
|
1
|
+
import { FileOutputType, ImageModel, imageModelInputSchema, } from "@aigne/core";
|
|
2
|
+
import { checkArguments, flat, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
|
|
3
3
|
import { GoogleGenAI, Modality, } from "@google/genai";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
const DEFAULT_MODEL = "imagen-4.0-generate-001";
|
|
@@ -49,7 +49,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
49
49
|
* @param input The input to process
|
|
50
50
|
* @returns The generated response
|
|
51
51
|
*/
|
|
52
|
-
async process(input) {
|
|
52
|
+
async process(input, options) {
|
|
53
53
|
const model = input.model || this.credential.model;
|
|
54
54
|
const responseFormat = input.responseFormat || "base64";
|
|
55
55
|
if (responseFormat === "url") {
|
|
@@ -58,7 +58,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
58
58
|
if (model.includes("imagen")) {
|
|
59
59
|
return this.generateImageByImagenModel(input);
|
|
60
60
|
}
|
|
61
|
-
return this.generateImageByGeminiModel(input);
|
|
61
|
+
return this.generateImageByGeminiModel(input, options);
|
|
62
62
|
}
|
|
63
63
|
async generateImageByImagenModel(input) {
|
|
64
64
|
const model = input.model || this.credential.model;
|
|
@@ -86,7 +86,9 @@ export class GeminiImageModel extends ImageModel {
|
|
|
86
86
|
});
|
|
87
87
|
return {
|
|
88
88
|
images: response.generatedImages
|
|
89
|
-
?.map(({ image }) =>
|
|
89
|
+
?.map(({ image }) => image?.imageBytes
|
|
90
|
+
? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
|
|
91
|
+
: undefined)
|
|
90
92
|
.filter(isNonNullable) || [],
|
|
91
93
|
usage: {
|
|
92
94
|
inputTokens: 0,
|
|
@@ -95,7 +97,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
95
97
|
model,
|
|
96
98
|
};
|
|
97
99
|
}
|
|
98
|
-
async generateImageByGeminiModel(input) {
|
|
100
|
+
async generateImageByGeminiModel(input, options) {
|
|
99
101
|
const model = input.model || this.credential.model;
|
|
100
102
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
101
103
|
const inputKeys = [
|
|
@@ -128,24 +130,35 @@ export class GeminiImageModel extends ImageModel {
|
|
|
128
130
|
"topK",
|
|
129
131
|
"topP",
|
|
130
132
|
];
|
|
133
|
+
const images = await Promise.all(flat(input.image).map(async (image) => {
|
|
134
|
+
const { data, mimeType } = await this.transformFileOutput(FileOutputType.file, image, options);
|
|
135
|
+
return { inlineData: { data, mimeType } };
|
|
136
|
+
}));
|
|
131
137
|
const response = await this.client.models.generateContent({
|
|
132
138
|
model: model,
|
|
133
|
-
contents: input.prompt,
|
|
139
|
+
contents: [{ text: input.prompt }, ...images],
|
|
134
140
|
config: {
|
|
135
|
-
responseModalities: [Modality.
|
|
141
|
+
responseModalities: [Modality.IMAGE],
|
|
136
142
|
candidateCount: input.n || 1,
|
|
137
143
|
...pick(mergedInput, inputKeys),
|
|
138
144
|
},
|
|
139
145
|
});
|
|
140
146
|
const allImages = (response.candidates ?? [])
|
|
141
147
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
142
|
-
.map((part) =>
|
|
148
|
+
.map((part) => part.inlineData?.data
|
|
149
|
+
? {
|
|
150
|
+
type: "file",
|
|
151
|
+
data: part.inlineData.data,
|
|
152
|
+
filename: part.inlineData.displayName,
|
|
153
|
+
mimeType: part.inlineData.mimeType,
|
|
154
|
+
}
|
|
155
|
+
: null)
|
|
143
156
|
.filter(isNonNullable);
|
|
144
157
|
return {
|
|
145
158
|
images: allImages,
|
|
146
159
|
usage: {
|
|
147
|
-
inputTokens: 0,
|
|
148
|
-
outputTokens: 0,
|
|
160
|
+
inputTokens: response.usageMetadata?.promptTokenCount || 0,
|
|
161
|
+
outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
149
162
|
},
|
|
150
163
|
model,
|
|
151
164
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0-beta",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
"@google/genai": "^1.20.0",
|
|
39
39
|
"uuid": "^13.0.0",
|
|
40
40
|
"zod": "^3.25.67",
|
|
41
|
-
"@aigne/
|
|
42
|
-
"@aigne/
|
|
41
|
+
"@aigne/platform-helpers": "^0.6.3-beta",
|
|
42
|
+
"@aigne/openai": "^0.16.0-beta"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@types/bun": "^1.2.22",
|
|
@@ -47,8 +47,8 @@
|
|
|
47
47
|
"npm-run-all": "^4.1.5",
|
|
48
48
|
"rimraf": "^6.0.1",
|
|
49
49
|
"typescript": "^5.9.2",
|
|
50
|
-
"@aigne/
|
|
51
|
-
"@aigne/
|
|
50
|
+
"@aigne/test-utils": "^0.5.53-beta",
|
|
51
|
+
"@aigne/core": "^1.61.0-beta"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"lint": "tsc --noEmit",
|