@aigne/gemini 0.13.5 → 0.14.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,37 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.0-beta.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.0-beta...gemini-v0.14.0-beta.1) (2025-09-23)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * improve image model parameters ([#530](https://github.com/AIGNE-io/aigne-framework/issues/530)) ([d66b5ca](https://github.com/AIGNE-io/aigne-framework/commit/d66b5ca01e14baad2712cc1a84930cdb63703232))
9
+
10
+
11
+ ### Dependencies
12
+
13
+ * The following workspace dependencies were updated
14
+ * dependencies
15
+ * @aigne/openai bumped to 0.16.0-beta.1
16
+
17
+ ## [0.14.0-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.5...gemini-v0.14.0-beta) (2025-09-22)
18
+
19
+
20
+ ### Features
21
+
22
+ * improve image model architecture and file handling ([#527](https://github.com/AIGNE-io/aigne-framework/issues/527)) ([4db50aa](https://github.com/AIGNE-io/aigne-framework/commit/4db50aa0387a1a0f045ca11aaa61613e36ca7597))
23
+
24
+
25
+ ### Dependencies
26
+
27
+ * The following workspace dependencies were updated
28
+ * dependencies
29
+ * @aigne/openai bumped to 0.16.0-beta
30
+ * @aigne/platform-helpers bumped to 0.6.3-beta
31
+ * devDependencies
32
+ * @aigne/core bumped to 1.61.0-beta
33
+ * @aigne/test-utils bumped to 0.5.53-beta
34
+
3
35
  ## [0.13.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.13.4...gemini-v0.13.5) (2025-09-18)
4
36
 
5
37
 
@@ -1,4 +1,4 @@
1
- import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
2
2
  import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
3
3
  export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
4
4
  }
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
28
28
  * @param input The input to process
29
29
  * @returns The generated response
30
30
  */
31
- process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
31
+ process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
32
32
  private generateImageByImagenModel;
33
33
  private generateImageByGeminiModel;
34
34
  }
@@ -52,7 +52,7 @@ class GeminiImageModel extends core_1.ImageModel {
52
52
  * @param input The input to process
53
53
  * @returns The generated response
54
54
  */
55
- async process(input) {
55
+ async process(input, options) {
56
56
  const model = input.model || this.credential.model;
57
57
  const responseFormat = input.responseFormat || "base64";
58
58
  if (responseFormat === "url") {
@@ -61,7 +61,7 @@ class GeminiImageModel extends core_1.ImageModel {
61
61
  if (model.includes("imagen")) {
62
62
  return this.generateImageByImagenModel(input);
63
63
  }
64
- return this.generateImageByGeminiModel(input);
64
+ return this.generateImageByGeminiModel(input, options);
65
65
  }
66
66
  async generateImageByImagenModel(input) {
67
67
  const model = input.model || this.credential.model;
@@ -89,7 +89,9 @@ class GeminiImageModel extends core_1.ImageModel {
89
89
  });
90
90
  return {
91
91
  images: response.generatedImages
92
- ?.map(({ image }) => (image?.imageBytes ? { base64: image.imageBytes } : undefined))
92
+ ?.map(({ image }) => image?.imageBytes
93
+ ? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
94
+ : undefined)
93
95
  .filter(type_utils_js_1.isNonNullable) || [],
94
96
  usage: {
95
97
  inputTokens: 0,
@@ -98,7 +100,7 @@ class GeminiImageModel extends core_1.ImageModel {
98
100
  model,
99
101
  };
100
102
  }
101
- async generateImageByGeminiModel(input) {
103
+ async generateImageByGeminiModel(input, options) {
102
104
  const model = input.model || this.credential.model;
103
105
  const mergedInput = { ...this.modelOptions, ...input };
104
106
  const inputKeys = [
@@ -131,9 +133,13 @@ class GeminiImageModel extends core_1.ImageModel {
131
133
  "topK",
132
134
  "topP",
133
135
  ];
136
+ const images = await Promise.all((0, type_utils_js_1.flat)(input.image).map(async (image) => {
137
+ const { data, mimeType } = await this.transformFileOutput(core_1.FileOutputType.file, image, options);
138
+ return { inlineData: { data, mimeType } };
139
+ }));
134
140
  const response = await this.client.models.generateContent({
135
141
  model: model,
136
- contents: input.prompt,
142
+ contents: [{ text: input.prompt }, ...images],
137
143
  config: {
138
144
  responseModalities: [genai_1.Modality.TEXT, genai_1.Modality.IMAGE],
139
145
  candidateCount: input.n || 1,
@@ -142,13 +148,20 @@ class GeminiImageModel extends core_1.ImageModel {
142
148
  });
143
149
  const allImages = (response.candidates ?? [])
144
150
  .flatMap((candidate) => candidate.content?.parts ?? [])
145
- .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
151
+ .map((part) => part.inlineData?.data
152
+ ? {
153
+ type: "file",
154
+ data: part.inlineData.data,
155
+ filename: part.inlineData.displayName,
156
+ mimeType: part.inlineData.mimeType,
157
+ }
158
+ : null)
146
159
  .filter(type_utils_js_1.isNonNullable);
147
160
  return {
148
161
  images: allImages,
149
162
  usage: {
150
- inputTokens: 0,
151
- outputTokens: 0,
163
+ inputTokens: response.usageMetadata?.promptTokenCount || 0,
164
+ outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
152
165
  },
153
166
  model,
154
167
  };
@@ -1,4 +1,4 @@
1
- import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
2
2
  import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
3
3
  export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
4
4
  }
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
28
28
  * @param input The input to process
29
29
  * @returns The generated response
30
30
  */
31
- process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
31
+ process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
32
32
  private generateImageByImagenModel;
33
33
  private generateImageByGeminiModel;
34
34
  }
@@ -1,4 +1,4 @@
1
- import { ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
1
+ import { type AgentInvokeOptions, ImageModel, type ImageModelInput, type ImageModelOptions, type ImageModelOutput } from "@aigne/core";
2
2
  import { type GenerateContentConfig, type GenerateImagesConfig, GoogleGenAI } from "@google/genai";
3
3
  export interface GeminiImageModelInput extends ImageModelInput, GenerateImagesConfig, GenerateContentConfig {
4
4
  }
@@ -28,7 +28,7 @@ export declare class GeminiImageModel extends ImageModel<GeminiImageModelInput,
28
28
  * @param input The input to process
29
29
  * @returns The generated response
30
30
  */
31
- process(input: GeminiImageModelInput): Promise<ImageModelOutput>;
31
+ process(input: GeminiImageModelInput, options: AgentInvokeOptions): Promise<ImageModelOutput>;
32
32
  private generateImageByImagenModel;
33
33
  private generateImageByGeminiModel;
34
34
  }
@@ -1,5 +1,5 @@
1
- import { ImageModel, imageModelInputSchema, } from "@aigne/core";
2
- import { checkArguments, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
1
+ import { FileOutputType, ImageModel, imageModelInputSchema, } from "@aigne/core";
2
+ import { checkArguments, flat, isNonNullable, pick } from "@aigne/core/utils/type-utils.js";
3
3
  import { GoogleGenAI, Modality, } from "@google/genai";
4
4
  import { z } from "zod";
5
5
  const DEFAULT_MODEL = "imagen-4.0-generate-001";
@@ -49,7 +49,7 @@ export class GeminiImageModel extends ImageModel {
49
49
  * @param input The input to process
50
50
  * @returns The generated response
51
51
  */
52
- async process(input) {
52
+ async process(input, options) {
53
53
  const model = input.model || this.credential.model;
54
54
  const responseFormat = input.responseFormat || "base64";
55
55
  if (responseFormat === "url") {
@@ -58,7 +58,7 @@ export class GeminiImageModel extends ImageModel {
58
58
  if (model.includes("imagen")) {
59
59
  return this.generateImageByImagenModel(input);
60
60
  }
61
- return this.generateImageByGeminiModel(input);
61
+ return this.generateImageByGeminiModel(input, options);
62
62
  }
63
63
  async generateImageByImagenModel(input) {
64
64
  const model = input.model || this.credential.model;
@@ -86,7 +86,9 @@ export class GeminiImageModel extends ImageModel {
86
86
  });
87
87
  return {
88
88
  images: response.generatedImages
89
- ?.map(({ image }) => (image?.imageBytes ? { base64: image.imageBytes } : undefined))
89
+ ?.map(({ image }) => image?.imageBytes
90
+ ? { type: "file", data: image.imageBytes, mimeType: image.mimeType }
91
+ : undefined)
90
92
  .filter(isNonNullable) || [],
91
93
  usage: {
92
94
  inputTokens: 0,
@@ -95,7 +97,7 @@ export class GeminiImageModel extends ImageModel {
95
97
  model,
96
98
  };
97
99
  }
98
- async generateImageByGeminiModel(input) {
100
+ async generateImageByGeminiModel(input, options) {
99
101
  const model = input.model || this.credential.model;
100
102
  const mergedInput = { ...this.modelOptions, ...input };
101
103
  const inputKeys = [
@@ -128,9 +130,13 @@ export class GeminiImageModel extends ImageModel {
128
130
  "topK",
129
131
  "topP",
130
132
  ];
133
+ const images = await Promise.all(flat(input.image).map(async (image) => {
134
+ const { data, mimeType } = await this.transformFileOutput(FileOutputType.file, image, options);
135
+ return { inlineData: { data, mimeType } };
136
+ }));
131
137
  const response = await this.client.models.generateContent({
132
138
  model: model,
133
- contents: input.prompt,
139
+ contents: [{ text: input.prompt }, ...images],
134
140
  config: {
135
141
  responseModalities: [Modality.TEXT, Modality.IMAGE],
136
142
  candidateCount: input.n || 1,
@@ -139,13 +145,20 @@ export class GeminiImageModel extends ImageModel {
139
145
  });
140
146
  const allImages = (response.candidates ?? [])
141
147
  .flatMap((candidate) => candidate.content?.parts ?? [])
142
- .map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
148
+ .map((part) => part.inlineData?.data
149
+ ? {
150
+ type: "file",
151
+ data: part.inlineData.data,
152
+ filename: part.inlineData.displayName,
153
+ mimeType: part.inlineData.mimeType,
154
+ }
155
+ : null)
143
156
  .filter(isNonNullable);
144
157
  return {
145
158
  images: allImages,
146
159
  usage: {
147
- inputTokens: 0,
148
- outputTokens: 0,
160
+ inputTokens: response.usageMetadata?.promptTokenCount || 0,
161
+ outputTokens: response.usageMetadata?.candidatesTokenCount || 0,
149
162
  },
150
163
  model,
151
164
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.13.5",
3
+ "version": "0.14.0-beta.1",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -38,8 +38,8 @@
38
38
  "@google/genai": "^1.20.0",
39
39
  "uuid": "^13.0.0",
40
40
  "zod": "^3.25.67",
41
- "@aigne/openai": "^0.15.4",
42
- "@aigne/platform-helpers": "^0.6.2"
41
+ "@aigne/openai": "^0.16.0-beta.1",
42
+ "@aigne/platform-helpers": "^0.6.3-beta"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@types/bun": "^1.2.22",
@@ -47,8 +47,8 @@
47
47
  "npm-run-all": "^4.1.5",
48
48
  "rimraf": "^6.0.1",
49
49
  "typescript": "^5.9.2",
50
- "@aigne/test-utils": "^0.5.52",
51
- "@aigne/core": "^1.60.3"
50
+ "@aigne/core": "^1.61.0-beta",
51
+ "@aigne/test-utils": "^0.5.53-beta"
52
52
  },
53
53
  "scripts": {
54
54
  "lint": "tsc --noEmit",