@aigne/gemini 0.11.5 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/README.md +120 -1
- package/lib/cjs/gemini-chat-model.d.ts +10 -1
- package/lib/cjs/gemini-chat-model.js +206 -0
- package/lib/cjs/gemini-image-model.js +2 -2
- package/lib/dts/gemini-chat-model.d.ts +10 -1
- package/lib/esm/gemini-chat-model.d.ts +10 -1
- package/lib/esm/gemini-chat-model.js +206 -0
- package/lib/esm/gemini-image-model.js +2 -2
- package/package.json +6 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.12.0](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.6...gemini-v0.12.0) (2025-09-05)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* add modalities support for chat model ([#454](https://github.com/AIGNE-io/aigne-framework/issues/454)) ([70d1bf6](https://github.com/AIGNE-io/aigne-framework/commit/70d1bf631f4e711235d89c6df8ee210a19179b30))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
* The following workspace dependencies were updated
|
|
14
|
+
* dependencies
|
|
15
|
+
* @aigne/openai bumped to 0.14.0
|
|
16
|
+
* devDependencies
|
|
17
|
+
* @aigne/core bumped to 1.58.0
|
|
18
|
+
* @aigne/test-utils bumped to 0.5.44
|
|
19
|
+
|
|
20
|
+
## [0.11.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.5...gemini-v0.11.6) (2025-09-01)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
### Bug Fixes
|
|
24
|
+
|
|
25
|
+
* **transport:** improve HTTP client option handling and error serialization ([#445](https://github.com/AIGNE-io/aigne-framework/issues/445)) ([d3bcdd2](https://github.com/AIGNE-io/aigne-framework/commit/d3bcdd23ab8011a7d40fc157fd61eb240494c7a5))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
### Dependencies
|
|
29
|
+
|
|
30
|
+
* The following workspace dependencies were updated
|
|
31
|
+
* dependencies
|
|
32
|
+
* @aigne/openai bumped to 0.13.7
|
|
33
|
+
* devDependencies
|
|
34
|
+
* @aigne/core bumped to 1.57.5
|
|
35
|
+
* @aigne/test-utils bumped to 0.5.43
|
|
36
|
+
|
|
3
37
|
## [0.11.5](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.11.4...gemini-v0.11.5) (2025-08-30)
|
|
4
38
|
|
|
5
39
|
|
package/README.md
CHANGED
|
@@ -23,13 +23,14 @@ AIGNE Gemini SDK for integrating with Google's Gemini AI models within the [AIGN
|
|
|
23
23
|
<picture>
|
|
24
24
|
<source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini-dark.png" media="(prefers-color-scheme: dark)">
|
|
25
25
|
<source srcset="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" media="(prefers-color-scheme: light)">
|
|
26
|
-
<img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/aigne-gemini.png" alt="AIGNE Arch" />
|
|
26
|
+
<img src="https://raw.githubusercontent.com/AIGNE-io/aigne-framework/main/assets/aigne-gemini.png" alt="AIGNE Arch" />
|
|
27
27
|
</picture>
|
|
28
28
|
|
|
29
29
|
## Features
|
|
30
30
|
|
|
31
31
|
* **Google Gemini API Integration**: Direct connection to Google's Gemini API services
|
|
32
32
|
* **Chat Completions**: Support for Gemini's chat completions API with all available models
|
|
33
|
+
* **Image Generation**: Support for both Imagen and Gemini image generation models
|
|
33
34
|
* **Multimodal Support**: Built-in support for handling both text and image inputs
|
|
34
35
|
* **Function Calling**: Support for function calling capabilities
|
|
35
36
|
* **Streaming Responses**: Support for streaming responses for more responsive applications
|
|
@@ -60,6 +61,8 @@ pnpm add @aigne/gemini @aigne/core
|
|
|
60
61
|
|
|
61
62
|
## Basic Usage
|
|
62
63
|
|
|
64
|
+
### Chat Model
|
|
65
|
+
|
|
63
66
|
```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model"
|
|
64
67
|
import { GeminiChatModel } from "@aigne/gemini";
|
|
65
68
|
|
|
@@ -86,6 +89,38 @@ console.log(result);
|
|
|
86
89
|
*/
|
|
87
90
|
```
|
|
88
91
|
|
|
92
|
+
### Image Generation Model
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
import { GeminiImageModel } from "@aigne/gemini";
|
|
96
|
+
|
|
97
|
+
const model = new GeminiImageModel({
|
|
98
|
+
apiKey: "your-api-key", // Optional if set in env variables
|
|
99
|
+
model: "imagen-4.0-generate-001", // Default Imagen model
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const result = await model.invoke({
|
|
103
|
+
prompt: "A serene mountain landscape at sunset with golden light",
|
|
104
|
+
n: 1,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
console.log(result);
|
|
108
|
+
/* Output:
|
|
109
|
+
{
|
|
110
|
+
images: [
|
|
111
|
+
{
|
|
112
|
+
base64: "..."
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
usage: {
|
|
116
|
+
inputTokens: 0,
|
|
117
|
+
outputTokens: 0
|
|
118
|
+
},
|
|
119
|
+
model: "imagen-4.0-generate-001"
|
|
120
|
+
}
|
|
121
|
+
*/
|
|
122
|
+
```
|
|
123
|
+
|
|
89
124
|
## Streaming Responses
|
|
90
125
|
|
|
91
126
|
```typescript file="test/gemini-chat-model.test.ts" region="example-gemini-chat-model-streaming"
|
|
@@ -119,6 +154,90 @@ console.log(fullText); // Output: "Hello from Gemini! I'm Google's helpful AI as
|
|
|
119
154
|
console.log(json); // { model: "gemini-1.5-flash" }
|
|
120
155
|
```
|
|
121
156
|
|
|
157
|
+
## Image Generation Parameters
|
|
158
|
+
|
|
159
|
+
The `GeminiImageModel` supports different parameters depending on the model type:
|
|
160
|
+
|
|
161
|
+
### Imagen Models (e.g., `imagen-4.0-generate-001`)
|
|
162
|
+
|
|
163
|
+
- **`prompt`** (string): The text description of the image you want to generate
|
|
164
|
+
- **`n`** (number): Number of images to generate (defaults to 1)
|
|
165
|
+
- **`seed`** (number): Random seed for reproducible generation
|
|
166
|
+
- **`safetyFilterLevel`** (string): Safety filter level for content moderation
|
|
167
|
+
- **`personGeneration`** (string): Person generation settings
|
|
168
|
+
- **`outputMimeType`** (string): Output image format (e.g., "image/png", "image/jpeg")
|
|
169
|
+
- **`outputGcsUri`** (string): Google Cloud Storage URI for output
|
|
170
|
+
- **`outputCompressionQuality`** (number): JPEG compression quality (1-100)
|
|
171
|
+
- **`negativePrompt`** (string): Description of what to exclude from the image
|
|
172
|
+
- **`language`** (string): Language for the prompt
|
|
173
|
+
- **`includeSafetyAttributes`** (boolean): Include safety attributes in response
|
|
174
|
+
- **`includeRaiReason`** (boolean): Include RAI reasoning in response
|
|
175
|
+
- **`imageSize`** (string): Size of the generated image
|
|
176
|
+
- **`guidanceScale`** (number): Guidance scale for generation
|
|
177
|
+
- **`aspectRatio`** (string): Aspect ratio of the image
|
|
178
|
+
- **`addWatermark`** (boolean): Add watermark to generated images
|
|
179
|
+
|
|
180
|
+
### Gemini Models (e.g., `gemini-1.5-pro`)
|
|
181
|
+
|
|
182
|
+
- **`prompt`** (string): The text description of the image you want to generate
|
|
183
|
+
- **`n`** (number): Number of images to generate (defaults to 1)
|
|
184
|
+
- **`temperature`** (number): Controls randomness in generation (0.0 to 1.0)
|
|
185
|
+
- **`maxOutputTokens`** (number): Maximum number of tokens in response
|
|
186
|
+
- **`topP`** (number): Nucleus sampling parameter
|
|
187
|
+
- **`topK`** (number): Top-k sampling parameter
|
|
188
|
+
- **`safetySettings`** (array): Safety settings for content generation
|
|
189
|
+
- **`seed`** (number): Random seed for reproducible generation
|
|
190
|
+
- **`stopSequences`** (array): Sequences that stop generation
|
|
191
|
+
- **`systemInstruction`** (string): System-level instructions
|
|
192
|
+
|
|
193
|
+
### Advanced Image Generation Example
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
const result = await model.invoke({
|
|
197
|
+
prompt: "A futuristic cityscape with neon lights and flying cars",
|
|
198
|
+
model: "imagen-4.0-generate-001",
|
|
199
|
+
n: 2,
|
|
200
|
+
imageSize: "1024x1024",
|
|
201
|
+
aspectRatio: "1:1",
|
|
202
|
+
guidanceScale: 7.5,
|
|
203
|
+
negativePrompt: "blurry, low quality, distorted",
|
|
204
|
+
seed: 12345,
|
|
205
|
+
includeSafetyAttributes: true,
|
|
206
|
+
outputMimeType: "image/png"
|
|
207
|
+
});
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## Model Options
|
|
211
|
+
|
|
212
|
+
You can also set default options when creating the model:
|
|
213
|
+
|
|
214
|
+
```typescript
|
|
215
|
+
const model = new GeminiImageModel({
|
|
216
|
+
apiKey: "your-api-key",
|
|
217
|
+
model: "imagen-4.0-generate-001",
|
|
218
|
+
modelOptions: {
|
|
219
|
+
safetyFilterLevel: "BLOCK_MEDIUM_AND_ABOVE",
|
|
220
|
+
includeSafetyAttributes: true,
|
|
221
|
+
outputMimeType: "image/png"
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Environment Variables
|
|
227
|
+
|
|
228
|
+
Set the following environment variable for automatic API key detection:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
export GEMINI_API_KEY="your-gemini-api-key"
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## API Reference
|
|
235
|
+
|
|
236
|
+
For complete parameter details and advanced features:
|
|
237
|
+
|
|
238
|
+
- **Imagen Models**: Refer to [Google GenAI Models.generateImages()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generateimages)
|
|
239
|
+
- **Gemini Models**: Refer to [Google GenAI Models.generateContent()](https://googleapis.github.io/js-genai/release_docs/classes/models.Models.html#generatecontent)
|
|
240
|
+
|
|
122
241
|
## License
|
|
123
242
|
|
|
124
243
|
Elastic-2.0
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
|
|
2
|
+
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
2
3
|
import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
|
|
4
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
5
|
/**
|
|
4
6
|
* Implementation of the ChatModel interface for Google's Gemini API
|
|
5
7
|
*
|
|
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
|
|
|
20
22
|
protected supportsToolsUseWithJsonSchema: boolean;
|
|
21
23
|
protected supportsParallelToolCalls: boolean;
|
|
22
24
|
protected supportsToolStreaming: boolean;
|
|
25
|
+
protected _googleClient?: GoogleGenAI;
|
|
26
|
+
get googleClient(): GoogleGenAI;
|
|
27
|
+
process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
28
|
+
private handleImageModelProcessing;
|
|
29
|
+
private buildConfig;
|
|
30
|
+
private buildTools;
|
|
31
|
+
private buildContents;
|
|
23
32
|
getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
|
|
24
33
|
}
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.GeminiChatModel = void 0;
|
|
4
|
+
const core_1 = require("@aigne/core");
|
|
5
|
+
const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
|
|
4
6
|
const openai_1 = require("@aigne/openai");
|
|
7
|
+
const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
|
|
8
|
+
const genai_1 = require("@google/genai");
|
|
9
|
+
const uuid_1 = require("uuid");
|
|
5
10
|
const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
6
11
|
const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
|
|
7
12
|
/**
|
|
@@ -30,6 +35,207 @@ class GeminiChatModel extends openai_1.OpenAIChatModel {
|
|
|
30
35
|
supportsToolsUseWithJsonSchema = false;
|
|
31
36
|
supportsParallelToolCalls = false;
|
|
32
37
|
supportsToolStreaming = false;
|
|
38
|
+
_googleClient;
|
|
39
|
+
get googleClient() {
|
|
40
|
+
if (this._googleClient)
|
|
41
|
+
return this._googleClient;
|
|
42
|
+
const { apiKey } = this.credential;
|
|
43
|
+
if (!apiKey)
|
|
44
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
45
|
+
this._googleClient ??= new genai_1.GoogleGenAI({ apiKey });
|
|
46
|
+
return this._googleClient;
|
|
47
|
+
}
|
|
48
|
+
process(input, options) {
|
|
49
|
+
const model = input.modelOptions?.model || this.credential.model;
|
|
50
|
+
if (!model.includes("image"))
|
|
51
|
+
return super.process(input, options);
|
|
52
|
+
return this.handleImageModelProcessing(input, options);
|
|
53
|
+
}
|
|
54
|
+
async *handleImageModelProcessing(input, options) {
|
|
55
|
+
const model = input.modelOptions?.model || this.credential.model;
|
|
56
|
+
const { contents, config } = await this.buildContents(input);
|
|
57
|
+
const parameters = {
|
|
58
|
+
model: model,
|
|
59
|
+
contents,
|
|
60
|
+
config: {
|
|
61
|
+
responseModalities: input.modelOptions?.modalities,
|
|
62
|
+
temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
|
|
63
|
+
topP: input.modelOptions?.topP || this.modelOptions?.topP,
|
|
64
|
+
frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
|
|
65
|
+
presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
|
|
66
|
+
...config,
|
|
67
|
+
...(await this.buildTools(input)),
|
|
68
|
+
...(await this.buildConfig(input)),
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
const response = await this.googleClient.models.generateContentStream(parameters);
|
|
72
|
+
const usage = {
|
|
73
|
+
inputTokens: 0,
|
|
74
|
+
outputTokens: 0,
|
|
75
|
+
};
|
|
76
|
+
let responseModel;
|
|
77
|
+
const files = [];
|
|
78
|
+
const toolCalls = [];
|
|
79
|
+
let text = "";
|
|
80
|
+
for await (const chunk of response) {
|
|
81
|
+
if (!responseModel && chunk.modelVersion) {
|
|
82
|
+
responseModel = chunk.modelVersion;
|
|
83
|
+
yield { delta: { json: { model: responseModel } } };
|
|
84
|
+
}
|
|
85
|
+
for (const { content } of chunk.candidates ?? []) {
|
|
86
|
+
if (content?.parts) {
|
|
87
|
+
for (const part of content.parts) {
|
|
88
|
+
if (part.text) {
|
|
89
|
+
text += part.text;
|
|
90
|
+
if (input.responseFormat?.type !== "json_schema") {
|
|
91
|
+
yield { delta: { text: { text: part.text } } };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (part.inlineData?.data) {
|
|
95
|
+
files.push(await this.transformFileOutput(input, {
|
|
96
|
+
type: "file",
|
|
97
|
+
data: part.inlineData.data,
|
|
98
|
+
filename: part.inlineData.displayName,
|
|
99
|
+
mimeType: part.inlineData.mimeType,
|
|
100
|
+
}, options));
|
|
101
|
+
yield { delta: { json: { files } } };
|
|
102
|
+
}
|
|
103
|
+
if (part.functionCall?.name) {
|
|
104
|
+
toolCalls.push({
|
|
105
|
+
id: part.functionCall.id || (0, uuid_1.v7)(),
|
|
106
|
+
type: "function",
|
|
107
|
+
function: {
|
|
108
|
+
name: part.functionCall.name,
|
|
109
|
+
arguments: part.functionCall.args || {},
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
yield { delta: { json: { toolCalls } } };
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (chunk.usageMetadata) {
|
|
118
|
+
usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
|
|
119
|
+
usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
|
|
120
|
+
yield { delta: { json: { usage } } };
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
if (input.responseFormat?.type === "json_schema") {
|
|
124
|
+
yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async buildConfig(input) {
|
|
128
|
+
const config = {};
|
|
129
|
+
if (input.responseFormat?.type === "json_schema") {
|
|
130
|
+
config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
|
|
131
|
+
config.responseMimeType = "application/json";
|
|
132
|
+
}
|
|
133
|
+
return config;
|
|
134
|
+
}
|
|
135
|
+
async buildTools(input) {
|
|
136
|
+
const tools = [];
|
|
137
|
+
for (const tool of input.tools ?? []) {
|
|
138
|
+
tools.push({
|
|
139
|
+
functionDeclarations: [
|
|
140
|
+
{
|
|
141
|
+
name: tool.function.name,
|
|
142
|
+
description: tool.function.description,
|
|
143
|
+
parametersJsonSchema: tool.function.parameters,
|
|
144
|
+
},
|
|
145
|
+
],
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
const functionCallingConfig = !input.toolChoice
|
|
149
|
+
? undefined
|
|
150
|
+
: input.toolChoice === "auto"
|
|
151
|
+
? { mode: genai_1.FunctionCallingConfigMode.AUTO }
|
|
152
|
+
: input.toolChoice === "none"
|
|
153
|
+
? { mode: genai_1.FunctionCallingConfigMode.NONE }
|
|
154
|
+
: input.toolChoice === "required"
|
|
155
|
+
? { mode: genai_1.FunctionCallingConfigMode.ANY }
|
|
156
|
+
: {
|
|
157
|
+
mode: genai_1.FunctionCallingConfigMode.ANY,
|
|
158
|
+
allowedFunctionNames: [input.toolChoice.function.name],
|
|
159
|
+
};
|
|
160
|
+
return { tools, toolConfig: { functionCallingConfig } };
|
|
161
|
+
}
|
|
162
|
+
async buildContents(input) {
|
|
163
|
+
const result = {
|
|
164
|
+
contents: [],
|
|
165
|
+
};
|
|
166
|
+
const systemParts = [];
|
|
167
|
+
result.contents = (await Promise.all(input.messages.map(async (msg) => {
|
|
168
|
+
if (msg.role === "system") {
|
|
169
|
+
if (typeof msg.content === "string") {
|
|
170
|
+
systemParts.push({ text: msg.content });
|
|
171
|
+
}
|
|
172
|
+
else if (Array.isArray(msg.content)) {
|
|
173
|
+
systemParts.push(...msg.content.map((item) => {
|
|
174
|
+
if (item.type === "text")
|
|
175
|
+
return { text: item.text };
|
|
176
|
+
throw new Error(`Unsupported content type: ${item.type}`);
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const content = {
|
|
182
|
+
role: msg.role === "agent" ? "model" : "user",
|
|
183
|
+
};
|
|
184
|
+
if (msg.toolCalls) {
|
|
185
|
+
content.parts = msg.toolCalls.map((call) => ({
|
|
186
|
+
functionCall: {
|
|
187
|
+
id: call.id,
|
|
188
|
+
name: call.function.name,
|
|
189
|
+
args: call.function.arguments,
|
|
190
|
+
},
|
|
191
|
+
}));
|
|
192
|
+
}
|
|
193
|
+
else if (msg.toolCallId) {
|
|
194
|
+
const call = input.messages
|
|
195
|
+
.flatMap((i) => i.toolCalls)
|
|
196
|
+
.find((c) => c?.id === msg.toolCallId);
|
|
197
|
+
if (!call)
|
|
198
|
+
throw new Error(`Tool call not found: ${msg.toolCallId}`);
|
|
199
|
+
content.parts = [
|
|
200
|
+
{
|
|
201
|
+
functionResponse: {
|
|
202
|
+
id: msg.toolCallId,
|
|
203
|
+
name: call.function.name,
|
|
204
|
+
response: JSON.parse(msg.content),
|
|
205
|
+
},
|
|
206
|
+
},
|
|
207
|
+
];
|
|
208
|
+
}
|
|
209
|
+
else if (typeof msg.content === "string") {
|
|
210
|
+
content.parts = [{ text: msg.content }];
|
|
211
|
+
}
|
|
212
|
+
else if (Array.isArray(msg.content)) {
|
|
213
|
+
content.parts = await Promise.all(msg.content.map(async (item) => {
|
|
214
|
+
switch (item.type) {
|
|
215
|
+
case "text":
|
|
216
|
+
return { text: item.text };
|
|
217
|
+
case "url":
|
|
218
|
+
return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
|
|
219
|
+
case "file":
|
|
220
|
+
return { inlineData: { data: item.data, mimeType: item.mimeType } };
|
|
221
|
+
case "local":
|
|
222
|
+
return {
|
|
223
|
+
inlineData: {
|
|
224
|
+
data: await index_js_1.nodejs.fs.readFile(item.path, "base64"),
|
|
225
|
+
mimeType: item.mimeType,
|
|
226
|
+
},
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
}));
|
|
230
|
+
}
|
|
231
|
+
return content;
|
|
232
|
+
}))).filter(type_utils_js_1.isNonNullable);
|
|
233
|
+
if (systemParts) {
|
|
234
|
+
result.config ??= {};
|
|
235
|
+
result.config.systemInstruction = systemParts;
|
|
236
|
+
}
|
|
237
|
+
return result;
|
|
238
|
+
}
|
|
33
239
|
async getRunMessages(input) {
|
|
34
240
|
const messages = await super.getRunMessages(input);
|
|
35
241
|
const lastMessage = messages.at(-1);
|
|
@@ -142,8 +142,8 @@ class GeminiImageModel extends core_1.ImageModel {
|
|
|
142
142
|
});
|
|
143
143
|
const allImages = (response.candidates ?? [])
|
|
144
144
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
145
|
-
.
|
|
146
|
-
.
|
|
145
|
+
.map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
|
|
146
|
+
.filter(type_utils_js_1.isNonNullable);
|
|
147
147
|
return {
|
|
148
148
|
images: allImages,
|
|
149
149
|
usage: {
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
|
|
2
|
+
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
2
3
|
import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
|
|
4
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
5
|
/**
|
|
4
6
|
* Implementation of the ChatModel interface for Google's Gemini API
|
|
5
7
|
*
|
|
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
|
|
|
20
22
|
protected supportsToolsUseWithJsonSchema: boolean;
|
|
21
23
|
protected supportsParallelToolCalls: boolean;
|
|
22
24
|
protected supportsToolStreaming: boolean;
|
|
25
|
+
protected _googleClient?: GoogleGenAI;
|
|
26
|
+
get googleClient(): GoogleGenAI;
|
|
27
|
+
process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
28
|
+
private handleImageModelProcessing;
|
|
29
|
+
private buildConfig;
|
|
30
|
+
private buildTools;
|
|
31
|
+
private buildContents;
|
|
23
32
|
getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
|
|
24
33
|
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type AgentInvokeOptions, type AgentProcessResult, type ChatModelInput, type ChatModelOutput } from "@aigne/core";
|
|
2
|
+
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
2
3
|
import { OpenAIChatModel, type OpenAIChatModelOptions } from "@aigne/openai";
|
|
4
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
5
|
/**
|
|
4
6
|
* Implementation of the ChatModel interface for Google's Gemini API
|
|
5
7
|
*
|
|
@@ -20,5 +22,12 @@ export declare class GeminiChatModel extends OpenAIChatModel {
|
|
|
20
22
|
protected supportsToolsUseWithJsonSchema: boolean;
|
|
21
23
|
protected supportsParallelToolCalls: boolean;
|
|
22
24
|
protected supportsToolStreaming: boolean;
|
|
25
|
+
protected _googleClient?: GoogleGenAI;
|
|
26
|
+
get googleClient(): GoogleGenAI;
|
|
27
|
+
process(input: ChatModelInput, options: AgentInvokeOptions): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
28
|
+
private handleImageModelProcessing;
|
|
29
|
+
private buildConfig;
|
|
30
|
+
private buildTools;
|
|
31
|
+
private buildContents;
|
|
23
32
|
getRunMessages(input: ChatModelInput): ReturnType<OpenAIChatModel["getRunMessages"]>;
|
|
24
33
|
}
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
+
import { safeParseJSON, } from "@aigne/core";
|
|
2
|
+
import { isNonNullable } from "@aigne/core/utils/type-utils.js";
|
|
1
3
|
import { OpenAIChatModel } from "@aigne/openai";
|
|
4
|
+
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
5
|
+
import { FunctionCallingConfigMode, GoogleGenAI, } from "@google/genai";
|
|
6
|
+
import { v7 } from "uuid";
|
|
2
7
|
const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
3
8
|
const GEMINI_DEFAULT_CHAT_MODEL = "gemini-2.0-flash";
|
|
4
9
|
/**
|
|
@@ -27,6 +32,207 @@ export class GeminiChatModel extends OpenAIChatModel {
|
|
|
27
32
|
supportsToolsUseWithJsonSchema = false;
|
|
28
33
|
supportsParallelToolCalls = false;
|
|
29
34
|
supportsToolStreaming = false;
|
|
35
|
+
_googleClient;
|
|
36
|
+
get googleClient() {
|
|
37
|
+
if (this._googleClient)
|
|
38
|
+
return this._googleClient;
|
|
39
|
+
const { apiKey } = this.credential;
|
|
40
|
+
if (!apiKey)
|
|
41
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
42
|
+
this._googleClient ??= new GoogleGenAI({ apiKey });
|
|
43
|
+
return this._googleClient;
|
|
44
|
+
}
|
|
45
|
+
process(input, options) {
|
|
46
|
+
const model = input.modelOptions?.model || this.credential.model;
|
|
47
|
+
if (!model.includes("image"))
|
|
48
|
+
return super.process(input, options);
|
|
49
|
+
return this.handleImageModelProcessing(input, options);
|
|
50
|
+
}
|
|
51
|
+
async *handleImageModelProcessing(input, options) {
|
|
52
|
+
const model = input.modelOptions?.model || this.credential.model;
|
|
53
|
+
const { contents, config } = await this.buildContents(input);
|
|
54
|
+
const parameters = {
|
|
55
|
+
model: model,
|
|
56
|
+
contents,
|
|
57
|
+
config: {
|
|
58
|
+
responseModalities: input.modelOptions?.modalities,
|
|
59
|
+
temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
|
|
60
|
+
topP: input.modelOptions?.topP || this.modelOptions?.topP,
|
|
61
|
+
frequencyPenalty: input.modelOptions?.frequencyPenalty || this.modelOptions?.frequencyPenalty,
|
|
62
|
+
presencePenalty: input.modelOptions?.presencePenalty || this.modelOptions?.presencePenalty,
|
|
63
|
+
...config,
|
|
64
|
+
...(await this.buildTools(input)),
|
|
65
|
+
...(await this.buildConfig(input)),
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
const response = await this.googleClient.models.generateContentStream(parameters);
|
|
69
|
+
const usage = {
|
|
70
|
+
inputTokens: 0,
|
|
71
|
+
outputTokens: 0,
|
|
72
|
+
};
|
|
73
|
+
let responseModel;
|
|
74
|
+
const files = [];
|
|
75
|
+
const toolCalls = [];
|
|
76
|
+
let text = "";
|
|
77
|
+
for await (const chunk of response) {
|
|
78
|
+
if (!responseModel && chunk.modelVersion) {
|
|
79
|
+
responseModel = chunk.modelVersion;
|
|
80
|
+
yield { delta: { json: { model: responseModel } } };
|
|
81
|
+
}
|
|
82
|
+
for (const { content } of chunk.candidates ?? []) {
|
|
83
|
+
if (content?.parts) {
|
|
84
|
+
for (const part of content.parts) {
|
|
85
|
+
if (part.text) {
|
|
86
|
+
text += part.text;
|
|
87
|
+
if (input.responseFormat?.type !== "json_schema") {
|
|
88
|
+
yield { delta: { text: { text: part.text } } };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (part.inlineData?.data) {
|
|
92
|
+
files.push(await this.transformFileOutput(input, {
|
|
93
|
+
type: "file",
|
|
94
|
+
data: part.inlineData.data,
|
|
95
|
+
filename: part.inlineData.displayName,
|
|
96
|
+
mimeType: part.inlineData.mimeType,
|
|
97
|
+
}, options));
|
|
98
|
+
yield { delta: { json: { files } } };
|
|
99
|
+
}
|
|
100
|
+
if (part.functionCall?.name) {
|
|
101
|
+
toolCalls.push({
|
|
102
|
+
id: part.functionCall.id || v7(),
|
|
103
|
+
type: "function",
|
|
104
|
+
function: {
|
|
105
|
+
name: part.functionCall.name,
|
|
106
|
+
arguments: part.functionCall.args || {},
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
yield { delta: { json: { toolCalls } } };
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (chunk.usageMetadata) {
|
|
115
|
+
usage.inputTokens += chunk.usageMetadata.promptTokenCount || 0;
|
|
116
|
+
usage.outputTokens += chunk.usageMetadata.candidatesTokenCount || 0;
|
|
117
|
+
yield { delta: { json: { usage } } };
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (input.responseFormat?.type === "json_schema") {
|
|
121
|
+
yield { delta: { json: { json: safeParseJSON(text) } } };
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
async buildConfig(input) {
|
|
125
|
+
const config = {};
|
|
126
|
+
if (input.responseFormat?.type === "json_schema") {
|
|
127
|
+
config.responseJsonSchema = input.responseFormat.jsonSchema.schema;
|
|
128
|
+
config.responseMimeType = "application/json";
|
|
129
|
+
}
|
|
130
|
+
return config;
|
|
131
|
+
}
|
|
132
|
+
async buildTools(input) {
|
|
133
|
+
const tools = [];
|
|
134
|
+
for (const tool of input.tools ?? []) {
|
|
135
|
+
tools.push({
|
|
136
|
+
functionDeclarations: [
|
|
137
|
+
{
|
|
138
|
+
name: tool.function.name,
|
|
139
|
+
description: tool.function.description,
|
|
140
|
+
parametersJsonSchema: tool.function.parameters,
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
const functionCallingConfig = !input.toolChoice
|
|
146
|
+
? undefined
|
|
147
|
+
: input.toolChoice === "auto"
|
|
148
|
+
? { mode: FunctionCallingConfigMode.AUTO }
|
|
149
|
+
: input.toolChoice === "none"
|
|
150
|
+
? { mode: FunctionCallingConfigMode.NONE }
|
|
151
|
+
: input.toolChoice === "required"
|
|
152
|
+
? { mode: FunctionCallingConfigMode.ANY }
|
|
153
|
+
: {
|
|
154
|
+
mode: FunctionCallingConfigMode.ANY,
|
|
155
|
+
allowedFunctionNames: [input.toolChoice.function.name],
|
|
156
|
+
};
|
|
157
|
+
return { tools, toolConfig: { functionCallingConfig } };
|
|
158
|
+
}
|
|
159
|
+
async buildContents(input) {
|
|
160
|
+
const result = {
|
|
161
|
+
contents: [],
|
|
162
|
+
};
|
|
163
|
+
const systemParts = [];
|
|
164
|
+
result.contents = (await Promise.all(input.messages.map(async (msg) => {
|
|
165
|
+
if (msg.role === "system") {
|
|
166
|
+
if (typeof msg.content === "string") {
|
|
167
|
+
systemParts.push({ text: msg.content });
|
|
168
|
+
}
|
|
169
|
+
else if (Array.isArray(msg.content)) {
|
|
170
|
+
systemParts.push(...msg.content.map((item) => {
|
|
171
|
+
if (item.type === "text")
|
|
172
|
+
return { text: item.text };
|
|
173
|
+
throw new Error(`Unsupported content type: ${item.type}`);
|
|
174
|
+
}));
|
|
175
|
+
}
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
const content = {
|
|
179
|
+
role: msg.role === "agent" ? "model" : "user",
|
|
180
|
+
};
|
|
181
|
+
if (msg.toolCalls) {
|
|
182
|
+
content.parts = msg.toolCalls.map((call) => ({
|
|
183
|
+
functionCall: {
|
|
184
|
+
id: call.id,
|
|
185
|
+
name: call.function.name,
|
|
186
|
+
args: call.function.arguments,
|
|
187
|
+
},
|
|
188
|
+
}));
|
|
189
|
+
}
|
|
190
|
+
else if (msg.toolCallId) {
|
|
191
|
+
const call = input.messages
|
|
192
|
+
.flatMap((i) => i.toolCalls)
|
|
193
|
+
.find((c) => c?.id === msg.toolCallId);
|
|
194
|
+
if (!call)
|
|
195
|
+
throw new Error(`Tool call not found: ${msg.toolCallId}`);
|
|
196
|
+
content.parts = [
|
|
197
|
+
{
|
|
198
|
+
functionResponse: {
|
|
199
|
+
id: msg.toolCallId,
|
|
200
|
+
name: call.function.name,
|
|
201
|
+
response: JSON.parse(msg.content),
|
|
202
|
+
},
|
|
203
|
+
},
|
|
204
|
+
];
|
|
205
|
+
}
|
|
206
|
+
else if (typeof msg.content === "string") {
|
|
207
|
+
content.parts = [{ text: msg.content }];
|
|
208
|
+
}
|
|
209
|
+
else if (Array.isArray(msg.content)) {
|
|
210
|
+
content.parts = await Promise.all(msg.content.map(async (item) => {
|
|
211
|
+
switch (item.type) {
|
|
212
|
+
case "text":
|
|
213
|
+
return { text: item.text };
|
|
214
|
+
case "url":
|
|
215
|
+
return { fileData: { fileUri: item.url, mimeType: item.mimeType } };
|
|
216
|
+
case "file":
|
|
217
|
+
return { inlineData: { data: item.data, mimeType: item.mimeType } };
|
|
218
|
+
case "local":
|
|
219
|
+
return {
|
|
220
|
+
inlineData: {
|
|
221
|
+
data: await nodejs.fs.readFile(item.path, "base64"),
|
|
222
|
+
mimeType: item.mimeType,
|
|
223
|
+
},
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}));
|
|
227
|
+
}
|
|
228
|
+
return content;
|
|
229
|
+
}))).filter(isNonNullable);
|
|
230
|
+
if (systemParts) {
|
|
231
|
+
result.config ??= {};
|
|
232
|
+
result.config.systemInstruction = systemParts;
|
|
233
|
+
}
|
|
234
|
+
return result;
|
|
235
|
+
}
|
|
30
236
|
async getRunMessages(input) {
|
|
31
237
|
const messages = await super.getRunMessages(input);
|
|
32
238
|
const lastMessage = messages.at(-1);
|
|
@@ -139,8 +139,8 @@ export class GeminiImageModel extends ImageModel {
|
|
|
139
139
|
});
|
|
140
140
|
const allImages = (response.candidates ?? [])
|
|
141
141
|
.flatMap((candidate) => candidate.content?.parts ?? [])
|
|
142
|
-
.
|
|
143
|
-
.
|
|
142
|
+
.map((part) => (part.inlineData?.data ? { base64: part.inlineData?.data } : null))
|
|
143
|
+
.filter(isNonNullable);
|
|
144
144
|
return {
|
|
145
145
|
images: allImages,
|
|
146
146
|
usage: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -36,8 +36,10 @@
|
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@google/genai": "^1.15.0",
|
|
39
|
+
"uuid": "^11.1.0",
|
|
39
40
|
"zod": "^3.25.67",
|
|
40
|
-
"@aigne/
|
|
41
|
+
"@aigne/platform-helpers": "^0.6.2",
|
|
42
|
+
"@aigne/openai": "^0.14.0"
|
|
41
43
|
},
|
|
42
44
|
"devDependencies": {
|
|
43
45
|
"@types/bun": "^1.2.18",
|
|
@@ -45,8 +47,8 @@
|
|
|
45
47
|
"npm-run-all": "^4.1.5",
|
|
46
48
|
"rimraf": "^6.0.1",
|
|
47
49
|
"typescript": "^5.8.3",
|
|
48
|
-
"@aigne/core": "^1.
|
|
49
|
-
"@aigne/test-utils": "^0.5.
|
|
50
|
+
"@aigne/core": "^1.58.0",
|
|
51
|
+
"@aigne/test-utils": "^0.5.44"
|
|
50
52
|
},
|
|
51
53
|
"scripts": {
|
|
52
54
|
"lint": "tsc --noEmit",
|