@aigne/gemini 0.13.5 → 0.14.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.0-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.5...gemini-v0.14.0-beta) (2025-09-22)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* improve image model architecture and file handling ([#527](https://github.com/AIGNE-io/aigne-framework/issues/527)) ([4db50aa](https://github.com/AIGNE-io/aigne-framework/commit/4db50aa0387a1a0f045ca11aaa61613e36ca7597))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
* The following workspace dependencies were updated
|
|
14
|
+
* dependencies
|
|
15
|
+
* @aigne/openai bumped to 0.16.0-beta
|
|
16
|
+
* @aigne/platform-helpers bumped to 0.6.3-beta
|
|
17
|
+
* devDependencies
|
|
18
|
+
* @aigne/core bumped to 1.61.0-beta
|
|
19
|
+
* @aigne/test-utils bumped to 0.5.53-beta
|
|
20
|
+
|
|
3
21
|
## [0.13.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.4...gemini-v0.13.5) (2025-09-18)
|
|
4
22
|
|
|
5
23
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -52,7 +52,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
52
52
|
* @param input The input to process
|
|
53
53
|
* @returns The generated response
|
|
54
54
|
*/
|
|
55
|
-
async process(input) {
|
|
55
|
+
async process(input, options) {
|
|
56
56
|
const model = input.model || this.credential.model;
|
|
57
57
|
const responseFormat = input.responseFormat || "base64";
|
|
58
58
|
if (responseFormat === "url") {
|
|
@@ -61,7 +61,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
61
61
|
if (model.includes("imagen")) {
|
|
62
62
|
return this.generateImageByImagenModel(input);
|
|
63
63
|
}
|
|
64
|
-
return this.generateImageByGeminiModel(input);
|
|
64
|
+
return this.generateImageByGeminiModel(input, options);
|
|
65
65
|
}
|
|
66
66
|
async generateImageByImagenModel(input) {
|
|
67
67
|
const model = input.model || this.credential.model;
|
|
@@ -89,7 +89,9 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
89
89
|
});
|
|
90
90
|
return {
|
|
91
91
|
images: response.generatedImages
|
|
92
|
-
?.map(({ image }) =>
|
|
92
|
+
?.map(({ image }) => image?.imageBytes
|
|
93
|
+
? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
|
|
94
|
+
: undefined)
|
|
93
95
|
.filter(type_utils_js_1.isNonNullable) || [],
|
|
94
96
|
usage: {
|
|
95
97
|
inputTokens: 0,
|
|
@@ -98,7 +100,7 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
98
100
|
model,
|
|
99
101
|
};
|
|
100
102
|
}
|
|
101
|
-
async generateImageByGeminiModel(input) {
|
|
103
|
+
async generateImageByGeminiModel(input, options) {
|
|
102
104
|
const model = input.model || this.credential.model;
|
|
103
105
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
104
106
|
const inputKeys = [
|
|
@@ -131,24 +133,35 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
131
133
|
"topK",
|
|
132
134
|
"topP",
|
|
133
135
|
];
|
|
136
|
+
const images = await Promise.all((0, type_utils_js_1.flat)(input.image).map(async (image) => {
|
|
137
|
+
const { data, mimeType } = await this.transformFileOutput(core_1.FileOutputType.file, image, options);
|
|
138
|
+
return { inlineData: { data, mimeType } };
|
|
139
|
+
}));
|
|
134
140
|
const response = await this.client.models.generateContent({
|
|
135
141
|
model: model,
|
|
136
|
-
contents: input.prompt,
|
|
142
|
+
contents: [{ text: input.prompt }, ...images],
|
|
137
143
|
config: {
|
|
138
|
-
responseModalities: [genai_1.Modality.
|
|
144
|
+
responseModalities: [genai_1.Modality.IMAGE],
|
|
139
145
|
candidateCount: input.n || 1,
|
|
140
146
|
...(0, type_utils_js_1.pick)(mergedInput, inputKeys),
|
|
141
147
|
},
|
|
142
148
|
});
|
|
143
149
|
const allImages = (response.candidates ?? [])
|
|
144
150
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
145
|
-
.map((part) =>
|
|
151
|
+
.map((part) => part.inlineData?.data
|
|
152
|
+
? {
|
|
153
|
+
type: "file",
|
|
154
|
+
data: part.inlineData.data,
|
|
155
|
+
filename: part.inlineData.displayName,
|
|
156
|
+
mimeType: part.inlineData.mimeType,
|
|
157
|
+
}
|
|
158
|
+
: null)
|
|
146
159
|
.filter(type_utils_js_1.isNonNullable);
|
|
147
160
|
return {
|
|
148
161
|
images: allImages,
|
|
149
162
|
usage: {
|
|
150
|
-
inputTokens: 0,
|
|
151
|
-
outputTokens: 0,
|
|
163
|
+
inputTokens: response.usageMetadata?.promptTokenCount || 0,
|
|
164
|
+
outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
152
165
|
},
|
|
153
166
|
model,
|
|
154
167
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
2
|
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
3
|
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
28
28
|
* @param input The input to process
|
|
29
29
|
* @returns The generated response
|
|
30
30
|
*/
|
|
31
|
-
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
31
|
+
process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
|
|
32
32
|
private generateImageByImagenModel;
|
|
33
33
|
private generateImageByGeminiModel;
|
|
34
34
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { ImageModel, imageModelInputSchema, } from "@aigne/core";
|
|
2
|
-
import { checkArguments, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
|
|
1
|
+
import { FileOutputType, ImageModel, imageModelInputSchema, } from "@aigne/core";
|
|
2
|
+
import { checkArguments, flat, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
|
|
3
3
|
import { GoogleGenAI, Modality, } from "@google/genai";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
const DEFAULT_MODEL = "imagen-4.0-generate-001";
|
|
@@ -49,7 +49,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
49
49
|
* @param input The input to process
|
|
50
50
|
* @returns The generated response
|
|
51
51
|
*/
|
|
52
|
-
async process(input) {
|
|
52
|
+
async process(input, options) {
|
|
53
53
|
const model = input.model || this.credential.model;
|
|
54
54
|
const responseFormat = input.responseFormat || "base64";
|
|
55
55
|
if (responseFormat === "url") {
|
|
@@ -58,7 +58,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
58
58
|
if (model.includes("imagen")) {
|
|
59
59
|
return this.generateImageByImagenModel(input);
|
|
60
60
|
}
|
|
61
|
-
return this.generateImageByGeminiModel(input);
|
|
61
|
+
return this.generateImageByGeminiModel(input, options);
|
|
62
62
|
}
|
|
63
63
|
async generateImageByImagenModel(input) {
|
|
64
64
|
const model = input.model || this.credential.model;
|
|
@@ -86,7 +86,9 @@ export class GeminiImageModel extends ImageModel {
|
|
|
86
86
|
});
|
|
87
87
|
return {
|
|
88
88
|
images: response.generatedImages
|
|
89
|
-
?.map(({ image }) =>
|
|
89
|
+
?.map(({ image }) => image?.imageBytes
|
|
90
|
+
? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
|
|
91
|
+
: undefined)
|
|
90
92
|
.filter(isNonNullable) || [],
|
|
91
93
|
usage: {
|
|
92
94
|
inputTokens: 0,
|
|
@@ -95,7 +97,7 @@ export class GeminiImageModel extends ImageModel {
|
|
|
95
97
|
model,
|
|
96
98
|
};
|
|
97
99
|
}
|
|
98
|
-
async generateImageByGeminiModel(input) {
|
|
100
|
+
async generateImageByGeminiModel(input, options) {
|
|
99
101
|
const model = input.model || this.credential.model;
|
|
100
102
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
101
103
|
const inputKeys = [
|
|
@@ -128,24 +130,35 @@ export class GeminiImageModel extends ImageModel {
|
|
|
128
130
|
"topK",
|
|
129
131
|
"topP",
|
|
130
132
|
];
|
|
133
|
+
const images = await Promise.all(flat(input.image).map(async (image) => {
|
|
134
|
+
const { data, mimeType } = await this.transformFileOutput(FileOutputType.file, image, options);
|
|
135
|
+
return { inlineData: { data, mimeType } };
|
|
136
|
+
}));
|
|
131
137
|
const response = await this.client.models.generateContent({
|
|
132
138
|
model: model,
|
|
133
|
-
contents: input.prompt,
|
|
139
|
+
contents: [{ text: input.prompt }, ...images],
|
|
134
140
|
config: {
|
|
135
|
-
responseModalities: [Modality.
|
|
141
|
+
responseModalities: [Modality.IMAGE],
|
|
136
142
|
candidateCount: input.n || 1,
|
|
137
143
|
...pick(mergedInput, inputKeys),
|
|
138
144
|
},
|
|
139
145
|
});
|
|
140
146
|
const allImages = (response.candidates ?? [])
|
|
141
147
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
142
|
-
.map((part) =>
|
|
148
|
+
.map((part) => part.inlineData?.data
|
|
149
|
+
? {
|
|
150
|
+
type: "file",
|
|
151
|
+
data: part.inlineData.data,
|
|
152
|
+
filename: part.inlineData.displayName,
|
|
153
|
+
mimeType: part.inlineData.mimeType,
|
|
154
|
+
}
|
|
155
|
+
: null)
|
|
143
156
|
.filter(isNonNullable);
|
|
144
157
|
return {
|
|
145
158
|
images: allImages,
|
|
146
159
|
usage: {
|
|
147
|
-
inputTokens: 0,
|
|
148
|
-
outputTokens: 0,
|
|
160
|
+
inputTokens: response.usageMetadata?.promptTokenCount || 0,
|
|
161
|
+
outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
149
162
|
},
|
|
150
163
|
model,
|
|
151
164
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0-beta",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
"@google/genai": "^1.20.0",
|
|
39
39
|
"uuid": "^13.0.0",
|
|
40
40
|
"zod": "^3.25.67",
|
|
41
|
-
"@aigne/
|
|
42
|
-
"@aigne/
|
|
41
|
+
"@aigne/platform-helpers": "^0.6.3-beta",
|
|
42
|
+
"@aigne/openai": "^0.16.0-beta"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@types/bun": "^1.2.22",
|
|
@@ -47,8 +47,8 @@
|
|
|
47
47
|
"npm-run-all": "^4.1.5",
|
|
48
48
|
"rimraf": "^6.0.1",
|
|
49
49
|
"typescript": "^5.9.2",
|
|
50
|
-
"@aigne/test-utils": "^0.5.
|
|
51
|
-
"@aigne/core": "^1.
|
|
50
|
+
"@aigne/test-utils": "^0.5.53-beta",
|
|
51
|
+
"@aigne/core": "^1.61.0-beta"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"lint": "tsc --noEmit",
|