@aigne/gemini 0.11.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.11.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.0...gemini-v0.11.1) (2025-08-28)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* support gemini-2.0-flash model for image model ([#429](https://github.com/AIGNE-io/aigne-framework/issues/429)) ([5a0bba1](https://github.com/AIGNE-io/aigne-framework/commit/5a0bba197cf8785384b70302f86cf702d04b7fc4))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
* The following workspace dependencies were updated
|
|
14
|
+
* dependencies
|
|
15
|
+
* @aigne/openai bumped to 0.13.2
|
|
16
|
+
* devDependencies
|
|
17
|
+
* @aigne/core bumped to 1.57.0
|
|
18
|
+
* @aigne/test-utils bumped to 0.5.38
|
|
19
|
+
|
|
3
20
|
## [0.11.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.10.0...gemini-v0.11.0) (2025-08-27)
|
|
4
21
|
|
|
5
22
|
|
|
@@ -1,35 +1,14 @@
|
|
|
1
1
|
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
|
-
import { type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
-
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig {
|
|
2
|
+
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
+
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
5
5
|
export interface GeminiImageModelOutput extends ImageModelOutput {
|
|
6
6
|
}
|
|
7
7
|
export interface GeminiImageModelOptions extends ImageModelOptions<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
8
|
-
/**
|
|
9
|
-
* API key for Gemini API
|
|
10
|
-
*
|
|
11
|
-
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
12
|
-
*/
|
|
13
8
|
apiKey?: string;
|
|
14
|
-
/**
|
|
15
|
-
* Base URL for Gemini API
|
|
16
|
-
*
|
|
17
|
-
* Useful for proxies or alternate endpoints
|
|
18
|
-
*/
|
|
19
9
|
baseURL?: string;
|
|
20
|
-
/**
|
|
21
|
-
* Gemini model to use
|
|
22
|
-
*
|
|
23
|
-
* Defaults to 'gemini-2.0-flash'
|
|
24
|
-
*/
|
|
25
10
|
model?: string;
|
|
26
|
-
/**
|
|
27
|
-
* Additional model options to control behavior
|
|
28
|
-
*/
|
|
29
11
|
modelOptions?: Omit<Partial<GeminiImageModelInput>, "model">;
|
|
30
|
-
/**
|
|
31
|
-
* Client options for Gemini API
|
|
32
|
-
*/
|
|
33
12
|
clientOptions?: Record<string, any>;
|
|
34
13
|
}
|
|
35
14
|
export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
@@ -50,4 +29,6 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
50
29
|
* @returns The generated response
|
|
51
30
|
*/
|
|
52
31
|
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
32
|
+
private generateImageByImagenModel;
|
|
33
|
+
private generateImageByGeminiModel;
|
|
53
34
|
}
|
|
@@ -58,6 +58,13 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
58
58
|
if (responseFormat === "url") {
|
|
59
59
|
throw new Error("Gemini image models currently only support base64 format");
|
|
60
60
|
}
|
|
61
|
+
if (model.includes("imagen")) {
|
|
62
|
+
return this.generateImageByImagenModel(input);
|
|
63
|
+
}
|
|
64
|
+
return this.generateImageByGeminiModel(input);
|
|
65
|
+
}
|
|
66
|
+
async generateImageByImagenModel(input) {
|
|
67
|
+
const model = input.model || this.credential.model;
|
|
61
68
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
62
69
|
const inputKeys = [
|
|
63
70
|
"seed",
|
|
@@ -91,5 +98,60 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
91
98
|
model,
|
|
92
99
|
};
|
|
93
100
|
}
|
|
101
|
+
async generateImageByGeminiModel(input) {
|
|
102
|
+
const model = input.model || this.credential.model;
|
|
103
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
104
|
+
const inputKeys = [
|
|
105
|
+
"abortSignal",
|
|
106
|
+
"audioTimestamp",
|
|
107
|
+
"automaticFunctionCalling",
|
|
108
|
+
"cachedContent",
|
|
109
|
+
"frequencyPenalty",
|
|
110
|
+
"httpOptions",
|
|
111
|
+
"labels",
|
|
112
|
+
"logprobs",
|
|
113
|
+
"maxOutputTokens",
|
|
114
|
+
"mediaResolution",
|
|
115
|
+
"modelSelectionConfig",
|
|
116
|
+
"presencePenalty",
|
|
117
|
+
"responseJsonSchema",
|
|
118
|
+
"responseLogprobs",
|
|
119
|
+
"responseMimeType",
|
|
120
|
+
"responseSchema",
|
|
121
|
+
"routingConfig",
|
|
122
|
+
"safetySettings",
|
|
123
|
+
"seed",
|
|
124
|
+
"speechConfig",
|
|
125
|
+
"stopSequences",
|
|
126
|
+
"systemInstruction",
|
|
127
|
+
"temperature",
|
|
128
|
+
"thinkingConfig",
|
|
129
|
+
"toolConfig",
|
|
130
|
+
"tools",
|
|
131
|
+
"topK",
|
|
132
|
+
"topP",
|
|
133
|
+
];
|
|
134
|
+
const response = await this.client.models.generateContent({
|
|
135
|
+
model: model,
|
|
136
|
+
contents: input.prompt,
|
|
137
|
+
config: {
|
|
138
|
+
responseModalities: [genai_1.Modality.TEXT, genai_1.Modality.IMAGE],
|
|
139
|
+
candidateCount: input.n || 1,
|
|
140
|
+
...(0, type_utils_js_1.pick)(mergedInput, inputKeys),
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
const allImages = (response.candidates ?? [])
|
|
144
|
+
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
145
|
+
.filter((part) => part?.inlineData?.data)
|
|
146
|
+
.map((part) => ({ base64: part.inlineData.data }));
|
|
147
|
+
return {
|
|
148
|
+
images: allImages,
|
|
149
|
+
usage: {
|
|
150
|
+
inputTokens: 0,
|
|
151
|
+
outputTokens: 0,
|
|
152
|
+
},
|
|
153
|
+
model,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
94
156
|
}
|
|
95
157
|
exports.GeminiImageModel = GeminiImageModel;
|
|
@@ -1,35 +1,14 @@
|
|
|
1
1
|
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
|
-
import { type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
-
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig {
|
|
2
|
+
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
+
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
5
5
|
export interface GeminiImageModelOutput extends ImageModelOutput {
|
|
6
6
|
}
|
|
7
7
|
export interface GeminiImageModelOptions extends ImageModelOptions<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
8
|
-
/**
|
|
9
|
-
* API key for Gemini API
|
|
10
|
-
*
|
|
11
|
-
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
12
|
-
*/
|
|
13
8
|
apiKey?: string;
|
|
14
|
-
/**
|
|
15
|
-
* Base URL for Gemini API
|
|
16
|
-
*
|
|
17
|
-
* Useful for proxies or alternate endpoints
|
|
18
|
-
*/
|
|
19
9
|
baseURL?: string;
|
|
20
|
-
/**
|
|
21
|
-
* Gemini model to use
|
|
22
|
-
*
|
|
23
|
-
* Defaults to 'gemini-2.0-flash'
|
|
24
|
-
*/
|
|
25
10
|
model?: string;
|
|
26
|
-
/**
|
|
27
|
-
* Additional model options to control behavior
|
|
28
|
-
*/
|
|
29
11
|
modelOptions?: Omit<Partial<GeminiImageModelInput>, "model">;
|
|
30
|
-
/**
|
|
31
|
-
* Client options for Gemini API
|
|
32
|
-
*/
|
|
33
12
|
clientOptions?: Record<string, any>;
|
|
34
13
|
}
|
|
35
14
|
export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
@@ -50,4 +29,6 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
50
29
|
* @returns The generated response
|
|
51
30
|
*/
|
|
52
31
|
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
32
|
+
private generateImageByImagenModel;
|
|
33
|
+
private generateImageByGeminiModel;
|
|
53
34
|
}
|
|
@@ -1,35 +1,14 @@
|
|
|
1
1
|
import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
|
|
2
|
-
import { type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
-
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig {
|
|
2
|
+
import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
|
|
3
|
+
export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
|
|
4
4
|
}
|
|
5
5
|
export interface GeminiImageModelOutput extends ImageModelOutput {
|
|
6
6
|
}
|
|
7
7
|
export interface GeminiImageModelOptions extends ImageModelOptions<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
8
|
-
/**
|
|
9
|
-
* API key for Gemini API
|
|
10
|
-
*
|
|
11
|
-
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
12
|
-
*/
|
|
13
8
|
apiKey?: string;
|
|
14
|
-
/**
|
|
15
|
-
* Base URL for Gemini API
|
|
16
|
-
*
|
|
17
|
-
* Useful for proxies or alternate endpoints
|
|
18
|
-
*/
|
|
19
9
|
baseURL?: string;
|
|
20
|
-
/**
|
|
21
|
-
* Gemini model to use
|
|
22
|
-
*
|
|
23
|
-
* Defaults to 'gemini-2.0-flash'
|
|
24
|
-
*/
|
|
25
10
|
model?: string;
|
|
26
|
-
/**
|
|
27
|
-
* Additional model options to control behavior
|
|
28
|
-
*/
|
|
29
11
|
modelOptions?: Omit<Partial<GeminiImageModelInput>, "model">;
|
|
30
|
-
/**
|
|
31
|
-
* Client options for Gemini API
|
|
32
|
-
*/
|
|
33
12
|
clientOptions?: Record<string, any>;
|
|
34
13
|
}
|
|
35
14
|
export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput, GeminiImageModelOutput> {
|
|
@@ -50,4 +29,6 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
|
|
|
50
29
|
* @returns The generated response
|
|
51
30
|
*/
|
|
52
31
|
process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
|
|
32
|
+
private generateImageByImagenModel;
|
|
33
|
+
private generateImageByGeminiModel;
|
|
53
34
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ImageModel, imageModelInputSchema, } from "@aigne/core";
|
|
2
2
|
import { checkArguments, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
|
|
3
|
-
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
import { GoogleGenAI, Modality, } from "@google/genai";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
const DEFAULT_MODEL = "imagen-4.0-generate-001";
|
|
6
6
|
const geminiImageModelInputSchema = imageModelInputSchema.extend({});
|
|
@@ -55,6 +55,13 @@ export class GeminiImageModel extends ImageModel {
|
|
|
55
55
|
if (responseFormat === "url") {
|
|
56
56
|
throw new Error("Gemini image models currently only support base64 format");
|
|
57
57
|
}
|
|
58
|
+
if (model.includes("imagen")) {
|
|
59
|
+
return this.generateImageByImagenModel(input);
|
|
60
|
+
}
|
|
61
|
+
return this.generateImageByGeminiModel(input);
|
|
62
|
+
}
|
|
63
|
+
async generateImageByImagenModel(input) {
|
|
64
|
+
const model = input.model || this.credential.model;
|
|
58
65
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
59
66
|
const inputKeys = [
|
|
60
67
|
"seed",
|
|
@@ -88,4 +95,59 @@ export class GeminiImageModel extends ImageModel {
|
|
|
88
95
|
model,
|
|
89
96
|
};
|
|
90
97
|
}
|
|
98
|
+
async generateImageByGeminiModel(input) {
|
|
99
|
+
const model = input.model || this.credential.model;
|
|
100
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
101
|
+
const inputKeys = [
|
|
102
|
+
"abortSignal",
|
|
103
|
+
"audioTimestamp",
|
|
104
|
+
"automaticFunctionCalling",
|
|
105
|
+
"cachedContent",
|
|
106
|
+
"frequencyPenalty",
|
|
107
|
+
"httpOptions",
|
|
108
|
+
"labels",
|
|
109
|
+
"logprobs",
|
|
110
|
+
"maxOutputTokens",
|
|
111
|
+
"mediaResolution",
|
|
112
|
+
"modelSelectionConfig",
|
|
113
|
+
"presencePenalty",
|
|
114
|
+
"responseJsonSchema",
|
|
115
|
+
"responseLogprobs",
|
|
116
|
+
"responseMimeType",
|
|
117
|
+
"responseSchema",
|
|
118
|
+
"routingConfig",
|
|
119
|
+
"safetySettings",
|
|
120
|
+
"seed",
|
|
121
|
+
"speechConfig",
|
|
122
|
+
"stopSequences",
|
|
123
|
+
"systemInstruction",
|
|
124
|
+
"temperature",
|
|
125
|
+
"thinkingConfig",
|
|
126
|
+
"toolConfig",
|
|
127
|
+
"tools",
|
|
128
|
+
"topK",
|
|
129
|
+
"topP",
|
|
130
|
+
];
|
|
131
|
+
const response = await this.client.models.generateContent({
|
|
132
|
+
model: model,
|
|
133
|
+
contents: input.prompt,
|
|
134
|
+
config: {
|
|
135
|
+
responseModalities: [Modality.TEXT, Modality.IMAGE],
|
|
136
|
+
candidateCount: input.n || 1,
|
|
137
|
+
...pick(mergedInput, inputKeys),
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
const allImages = (response.candidates ?? [])
|
|
141
|
+
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
142
|
+
.filter((part) => part?.inlineData?.data)
|
|
143
|
+
.map((part) => ({ base64: part.inlineData.data }));
|
|
144
|
+
return {
|
|
145
|
+
images: allImages,
|
|
146
|
+
usage: {
|
|
147
|
+
inputTokens: 0,
|
|
148
|
+
outputTokens: 0,
|
|
149
|
+
},
|
|
150
|
+
model,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
91
153
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.11.
|
|
3
|
+
"version": "0.11.1",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@google/genai": "^1.15.0",
|
|
39
39
|
"zod": "^3.25.67",
|
|
40
|
-
"@aigne/openai": "^0.13.
|
|
40
|
+
"@aigne/openai": "^0.13.2"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/bun": "^1.2.18",
|
|
@@ -45,8 +45,8 @@
|
|
|
45
45
|
"npm-run-all": "^4.1.5",
|
|
46
46
|
"rimraf": "^6.0.1",
|
|
47
47
|
"typescript": "^5.8.3",
|
|
48
|
-
"@aigne/core": "^1.
|
|
49
|
-
"@aigne/test-utils": "^0.5.
|
|
48
|
+
"@aigne/core": "^1.57.0",
|
|
49
|
+
"@aigne/test-utils": "^0.5.38"
|
|
50
50
|
},
|
|
51
51
|
"scripts": {
|
|
52
52
|
"lint": "tsc --noEmit",
|