@aigne/gemini 0.14.4-beta.6 → 0.14.4-beta.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/lib/cjs/gemini-chat-model.d.ts +23 -3
- package/lib/cjs/gemini-chat-model.js +61 -4
- package/lib/cjs/gemini-video-model.d.ts +14 -7
- package/lib/cjs/gemini-video-model.js +38 -2
- package/lib/cjs/utils.d.ts +15 -0
- package/lib/cjs/utils.js +37 -0
- package/lib/dts/gemini-chat-model.d.ts +23 -3
- package/lib/dts/gemini-video-model.d.ts +14 -7
- package/lib/dts/utils.d.ts +15 -0
- package/lib/esm/gemini-chat-model.d.ts +23 -3
- package/lib/esm/gemini-chat-model.js +61 -4
- package/lib/esm/gemini-video-model.d.ts +14 -7
- package/lib/esm/gemini-video-model.js +39 -3
- package/lib/esm/utils.d.ts +15 -0
- package/lib/esm/utils.js +34 -0
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.4-beta.8](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.7...gemini-v0.14.4-beta.8) (2025-10-31)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* **models:** add image parameters support for video generation ([#684](https://github.com/AIGNE-io/aigne-framework/issues/684)) ([b048b7f](https://github.com/AIGNE-io/aigne-framework/commit/b048b7f92bd7a532dbdbeb6fb5fa5499bae6b953))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
* The following workspace dependencies were updated
|
|
14
|
+
* dependencies
|
|
15
|
+
* @aigne/core bumped to 1.65.0-beta.5
|
|
16
|
+
* devDependencies
|
|
17
|
+
* @aigne/test-utils bumped to 0.5.57-beta.6
|
|
18
|
+
|
|
19
|
+
## [0.14.4-beta.7](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.6...gemini-v0.14.4-beta.7) (2025-10-29)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
### Features
|
|
23
|
+
|
|
24
|
+
* add reasoningEffort option for chat model ([#680](https://github.com/AIGNE-io/aigne-framework/issues/680)) ([f69d232](https://github.com/AIGNE-io/aigne-framework/commit/f69d232d714d4a3e4946bdc8c6598747c9bcbd57))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
### Dependencies
|
|
28
|
+
|
|
29
|
+
* The following workspace dependencies were updated
|
|
30
|
+
* dependencies
|
|
31
|
+
* @aigne/core bumped to 1.65.0-beta.4
|
|
32
|
+
* devDependencies
|
|
33
|
+
* @aigne/test-utils bumped to 0.5.57-beta.5
|
|
34
|
+
|
|
3
35
|
## [0.14.4-beta.6](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.5...gemini-v0.14.4-beta.6) (2025-10-28)
|
|
4
36
|
|
|
5
37
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelInputOptions, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
2
2
|
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
3
3
|
import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
|
|
4
4
|
export interface GeminiChatModelOptions extends ChatModelOptions {
|
|
@@ -34,10 +34,30 @@ export declare class GeminiChatModel extends ChatModel {
|
|
|
34
34
|
apiKey: string | undefined;
|
|
35
35
|
model: string;
|
|
36
36
|
};
|
|
37
|
-
get modelOptions(): Omit<
|
|
37
|
+
get modelOptions(): Omit<ChatModelInputOptions, "model"> | undefined;
|
|
38
38
|
process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
39
|
+
protected thinkingBudgetModelMap: ({
|
|
40
|
+
pattern: RegExp;
|
|
41
|
+
support: boolean;
|
|
42
|
+
min: number;
|
|
43
|
+
max: number;
|
|
44
|
+
} | {
|
|
45
|
+
pattern: RegExp;
|
|
46
|
+
support: boolean;
|
|
47
|
+
min?: undefined;
|
|
48
|
+
max?: undefined;
|
|
49
|
+
})[];
|
|
50
|
+
protected thinkingBudgetLevelMap: {
|
|
51
|
+
high: number;
|
|
52
|
+
medium: number;
|
|
53
|
+
low: number;
|
|
54
|
+
minimal: number;
|
|
55
|
+
};
|
|
56
|
+
protected getThinkingBudget(model: string, effort: ChatModelInputOptions["reasoningEffort"]): {
|
|
57
|
+
support: boolean;
|
|
58
|
+
budget?: number;
|
|
59
|
+
};
|
|
39
60
|
private processInput;
|
|
40
|
-
protected supportThinkingModels: string[];
|
|
41
61
|
private buildConfig;
|
|
42
62
|
private buildTools;
|
|
43
63
|
private buildContents;
|
|
@@ -61,15 +61,63 @@ class GeminiChatModel extends core_1.ChatModel {
|
|
|
61
61
|
process(input) {
|
|
62
62
|
return this.processInput(input);
|
|
63
63
|
}
|
|
64
|
+
// References: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
65
|
+
thinkingBudgetModelMap = [
|
|
66
|
+
{
|
|
67
|
+
pattern: /gemini-2.5-pro/,
|
|
68
|
+
support: true,
|
|
69
|
+
min: 128,
|
|
70
|
+
max: 32768,
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
pattern: /gemini-2.5-flash/,
|
|
74
|
+
support: true,
|
|
75
|
+
min: 0,
|
|
76
|
+
max: 24576,
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
pattern: /2.5-flash-lite/,
|
|
80
|
+
support: true,
|
|
81
|
+
min: 512,
|
|
82
|
+
max: 24576,
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
pattern: /.*/,
|
|
86
|
+
support: false,
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
thinkingBudgetLevelMap = {
|
|
90
|
+
high: 100000, // use 100k for high, finally capped by model max
|
|
91
|
+
medium: 10000,
|
|
92
|
+
low: 5000,
|
|
93
|
+
minimal: 200,
|
|
94
|
+
};
|
|
95
|
+
getThinkingBudget(model, effort) {
|
|
96
|
+
const m = this.thinkingBudgetModelMap.find((i) => i.pattern.test(model));
|
|
97
|
+
if (!m?.support)
|
|
98
|
+
return { support: false };
|
|
99
|
+
let budget = typeof effort === "string" ? this.thinkingBudgetLevelMap[effort] || undefined : effort;
|
|
100
|
+
if (typeof budget === "undefined")
|
|
101
|
+
return { support: true };
|
|
102
|
+
if (typeof m.min === "number")
|
|
103
|
+
budget = Math.max(m.min, budget);
|
|
104
|
+
if (typeof m.max === "number")
|
|
105
|
+
budget = Math.min(m.max, budget);
|
|
106
|
+
return { support: true, budget };
|
|
107
|
+
}
|
|
64
108
|
async *processInput(input) {
|
|
65
109
|
const model = input.modelOptions?.model || this.credential.model;
|
|
66
110
|
const { contents, config } = await this.buildContents(input);
|
|
111
|
+
const thinkingBudget = this.getThinkingBudget(model, input.modelOptions?.reasoningEffort ?? this.modelOptions?.reasoningEffort);
|
|
67
112
|
const parameters = {
|
|
68
113
|
model,
|
|
69
114
|
contents,
|
|
70
115
|
config: {
|
|
71
|
-
thinkingConfig:
|
|
72
|
-
? {
|
|
116
|
+
thinkingConfig: thinkingBudget.support
|
|
117
|
+
? {
|
|
118
|
+
includeThoughts: true,
|
|
119
|
+
thinkingBudget: thinkingBudget.budget,
|
|
120
|
+
}
|
|
73
121
|
: undefined,
|
|
74
122
|
responseModalities: input.modelOptions?.modalities,
|
|
75
123
|
temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
|
|
@@ -201,9 +249,18 @@ class GeminiChatModel extends core_1.ChatModel {
|
|
|
201
249
|
}
|
|
202
250
|
}
|
|
203
251
|
}
|
|
204
|
-
yield {
|
|
252
|
+
yield {
|
|
253
|
+
delta: {
|
|
254
|
+
json: {
|
|
255
|
+
usage,
|
|
256
|
+
files: files.length ? files : undefined,
|
|
257
|
+
modelOptions: {
|
|
258
|
+
reasoningEffort: parameters.config?.thinkingConfig?.thinkingBudget,
|
|
259
|
+
},
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
};
|
|
205
263
|
}
|
|
206
|
-
supportThinkingModels = ["gemini-2.5-pro", "gemini-2.5-flash"];
|
|
207
264
|
async buildConfig(input) {
|
|
208
265
|
const config = {};
|
|
209
266
|
const { tools, toolConfig } = await this.buildTools(input);
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { VideoModel } from "@aigne/core";
|
|
2
3
|
import { GoogleGenAI } from "@google/genai";
|
|
3
4
|
/**
|
|
4
5
|
* Input options for Gemini Video Model
|
|
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
13
14
|
*
|
|
14
15
|
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
16
|
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
-
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
17
|
*/
|
|
18
|
-
aspectRatio?:
|
|
18
|
+
aspectRatio?: "16:9" | "9:16";
|
|
19
19
|
/**
|
|
20
20
|
* Resolution of the video
|
|
21
21
|
*
|
|
22
22
|
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
23
|
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
-
* Veo 2: Not supported
|
|
25
24
|
*/
|
|
26
|
-
size?:
|
|
25
|
+
size?: "720p" | "1080p";
|
|
27
26
|
/**
|
|
28
27
|
* Duration of the generated video in seconds
|
|
29
28
|
*
|
|
30
29
|
* Veo 3.1: "4", "6", "8"
|
|
31
30
|
* Veo 3: "4", "6", "8"
|
|
32
|
-
* Veo 2: "5", "6", "8"
|
|
33
31
|
*/
|
|
34
|
-
seconds?:
|
|
32
|
+
seconds?: "4" | "6" | "8";
|
|
35
33
|
/**
|
|
36
34
|
* Control person generation
|
|
37
35
|
*
|
|
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
41
39
|
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
40
|
*/
|
|
43
41
|
personGeneration?: string;
|
|
42
|
+
/**
|
|
43
|
+
* Last frame for video generation (frame interpolation)
|
|
44
|
+
*/
|
|
45
|
+
lastFrame?: FileUnionContent;
|
|
46
|
+
/**
|
|
47
|
+
* Reference images for video generation
|
|
48
|
+
* Only supported in Veo 3.1 models
|
|
49
|
+
*/
|
|
50
|
+
referenceImages?: FileUnionContent[];
|
|
44
51
|
}
|
|
45
52
|
/**
|
|
46
53
|
* Output from Gemini Video Model
|
|
@@ -7,12 +7,17 @@ const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
|
|
|
7
7
|
const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
|
|
8
8
|
const genai_1 = require("@google/genai");
|
|
9
9
|
const zod_1 = require("zod");
|
|
10
|
+
const utils_js_1 = require("./utils.js");
|
|
10
11
|
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
11
12
|
const DEFAULT_SECONDS = 8;
|
|
12
13
|
const geminiVideoModelInputSchema = core_1.videoModelInputSchema.extend({
|
|
13
14
|
negativePrompt: zod_1.z.string().optional(),
|
|
14
|
-
aspectRatio: zod_1.z.
|
|
15
|
+
aspectRatio: zod_1.z.enum(["16:9", "9:16"]).optional(),
|
|
16
|
+
size: zod_1.z.enum(["720p", "1080p"]).optional(),
|
|
17
|
+
seconds: zod_1.z.enum(["4", "6", "8"]).optional(),
|
|
15
18
|
personGeneration: zod_1.z.string().optional(),
|
|
19
|
+
lastFrame: core_1.fileUnionContentSchema.optional(),
|
|
20
|
+
referenceImages: core_1.fileUnionContentSchema.array().optional(),
|
|
16
21
|
});
|
|
17
22
|
const geminiVideoModelOptionsSchema = zod_1.z.object({
|
|
18
23
|
apiKey: zod_1.z.string().optional(),
|
|
@@ -61,13 +66,16 @@ class GeminiVideoModel extends core_1.VideoModel {
|
|
|
61
66
|
const localPath = index_js_1.nodejs.path.join(dir, `${videoId}.mp4`);
|
|
62
67
|
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
63
68
|
logger_js_1.logger.debug(`Generated video saved to ${localPath}`);
|
|
64
|
-
await
|
|
69
|
+
await (0, utils_js_1.waitFileSizeStable)(localPath);
|
|
65
70
|
const buffer = await index_js_1.nodejs.fs.readFile(localPath);
|
|
66
71
|
return buffer.toString("base64");
|
|
67
72
|
}
|
|
68
73
|
async process(input, options) {
|
|
69
74
|
const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
|
|
70
75
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
76
|
+
if (mergedInput.referenceImages && !model.includes("veo-3.1")) {
|
|
77
|
+
throw new Error("referenceImages is only supported in Veo 3.1 models");
|
|
78
|
+
}
|
|
71
79
|
const config = {};
|
|
72
80
|
if (mergedInput.negativePrompt)
|
|
73
81
|
config.negativePrompt = mergedInput.negativePrompt;
|
|
@@ -79,11 +87,39 @@ class GeminiVideoModel extends core_1.VideoModel {
|
|
|
79
87
|
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
80
88
|
if (mergedInput.personGeneration)
|
|
81
89
|
config.personGeneration = mergedInput.personGeneration;
|
|
90
|
+
if (mergedInput.lastFrame) {
|
|
91
|
+
config.lastFrame = await this.transformFileType("file", mergedInput.lastFrame, options).then((file) => {
|
|
92
|
+
return {
|
|
93
|
+
imageBytes: file.data,
|
|
94
|
+
mimeType: file.mimeType,
|
|
95
|
+
};
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
if (mergedInput.referenceImages) {
|
|
99
|
+
config.referenceImages = await Promise.all(mergedInput.referenceImages.map(async (image) => {
|
|
100
|
+
return await this.transformFileType("file", image, options).then((file) => {
|
|
101
|
+
return {
|
|
102
|
+
image: {
|
|
103
|
+
imageBytes: file.data,
|
|
104
|
+
mimeType: file.mimeType,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
82
110
|
const params = {
|
|
83
111
|
model,
|
|
84
112
|
prompt: mergedInput.prompt,
|
|
85
113
|
config,
|
|
86
114
|
};
|
|
115
|
+
if (mergedInput.image) {
|
|
116
|
+
params.image = await this.transformFileType("file", mergedInput.image, options).then((file) => {
|
|
117
|
+
return {
|
|
118
|
+
imageBytes: file.data,
|
|
119
|
+
mimeType: file.mimeType,
|
|
120
|
+
};
|
|
121
|
+
});
|
|
122
|
+
}
|
|
87
123
|
// Start video generation
|
|
88
124
|
let operation = await this.client.models.generateVideos(params);
|
|
89
125
|
logger_js_1.logger.debug("Video generation started...");
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wait for file size to stabilize, ensuring the file download is complete.
|
|
3
|
+
*
|
|
4
|
+
* @param filePath - The path to the file to check
|
|
5
|
+
* @param options - Configuration options
|
|
6
|
+
* @param options.checkInterval - Check interval in milliseconds (default: 500ms)
|
|
7
|
+
* @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
|
|
8
|
+
* @param options.timeout - Timeout in milliseconds (default: 60000ms)
|
|
9
|
+
* @throws Error when timeout is reached
|
|
10
|
+
*/
|
|
11
|
+
export declare function waitFileSizeStable(filePath: string, options?: {
|
|
12
|
+
checkInterval?: number;
|
|
13
|
+
stableCount?: number;
|
|
14
|
+
timeout?: number;
|
|
15
|
+
}): Promise<void>;
|
package/lib/cjs/utils.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.waitFileSizeStable = waitFileSizeStable;
|
|
4
|
+
const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
|
|
5
|
+
/**
|
|
6
|
+
* Wait for file size to stabilize, ensuring the file download is complete.
|
|
7
|
+
*
|
|
8
|
+
* @param filePath - The path to the file to check
|
|
9
|
+
* @param options - Configuration options
|
|
10
|
+
* @param options.checkInterval - Check interval in milliseconds (default: 500ms)
|
|
11
|
+
* @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
|
|
12
|
+
* @param options.timeout - Timeout in milliseconds (default: 60000ms)
|
|
13
|
+
* @throws Error when timeout is reached
|
|
14
|
+
*/
|
|
15
|
+
async function waitFileSizeStable(filePath, options) {
|
|
16
|
+
const checkInterval = options?.checkInterval ?? 500;
|
|
17
|
+
const requiredStableCount = options?.stableCount ?? 3;
|
|
18
|
+
const timeout = options?.timeout ?? 60000;
|
|
19
|
+
const startTime = Date.now();
|
|
20
|
+
let previousSize = 0;
|
|
21
|
+
let stableCount = 0;
|
|
22
|
+
while (stableCount < requiredStableCount) {
|
|
23
|
+
if (Date.now() - startTime > timeout) {
|
|
24
|
+
throw new Error(`Timeout waiting for file to stabilize: ${filePath}`);
|
|
25
|
+
}
|
|
26
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval));
|
|
27
|
+
const stats = await index_js_1.nodejs.fs.stat(filePath);
|
|
28
|
+
const currentSize = stats.size;
|
|
29
|
+
if (currentSize === previousSize && currentSize > 0) {
|
|
30
|
+
stableCount++;
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
stableCount = 0;
|
|
34
|
+
previousSize = currentSize;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelInputOptions, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
2
2
|
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
3
3
|
import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
|
|
4
4
|
export interface GeminiChatModelOptions extends ChatModelOptions {
|
|
@@ -34,10 +34,30 @@ export declare class GeminiChatModel extends ChatModel {
|
|
|
34
34
|
apiKey: string | undefined;
|
|
35
35
|
model: string;
|
|
36
36
|
};
|
|
37
|
-
get modelOptions(): Omit<
|
|
37
|
+
get modelOptions(): Omit<ChatModelInputOptions, "model"> | undefined;
|
|
38
38
|
process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
39
|
+
protected thinkingBudgetModelMap: ({
|
|
40
|
+
pattern: RegExp;
|
|
41
|
+
support: boolean;
|
|
42
|
+
min: number;
|
|
43
|
+
max: number;
|
|
44
|
+
} | {
|
|
45
|
+
pattern: RegExp;
|
|
46
|
+
support: boolean;
|
|
47
|
+
min?: undefined;
|
|
48
|
+
max?: undefined;
|
|
49
|
+
})[];
|
|
50
|
+
protected thinkingBudgetLevelMap: {
|
|
51
|
+
high: number;
|
|
52
|
+
medium: number;
|
|
53
|
+
low: number;
|
|
54
|
+
minimal: number;
|
|
55
|
+
};
|
|
56
|
+
protected getThinkingBudget(model: string, effort: ChatModelInputOptions["reasoningEffort"]): {
|
|
57
|
+
support: boolean;
|
|
58
|
+
budget?: number;
|
|
59
|
+
};
|
|
39
60
|
private processInput;
|
|
40
|
-
protected supportThinkingModels: string[];
|
|
41
61
|
private buildConfig;
|
|
42
62
|
private buildTools;
|
|
43
63
|
private buildContents;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { VideoModel } from "@aigne/core";
|
|
2
3
|
import { GoogleGenAI } from "@google/genai";
|
|
3
4
|
/**
|
|
4
5
|
* Input options for Gemini Video Model
|
|
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
13
14
|
*
|
|
14
15
|
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
16
|
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
-
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
17
|
*/
|
|
18
|
-
aspectRatio?:
|
|
18
|
+
aspectRatio?: "16:9" | "9:16";
|
|
19
19
|
/**
|
|
20
20
|
* Resolution of the video
|
|
21
21
|
*
|
|
22
22
|
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
23
|
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
-
* Veo 2: Not supported
|
|
25
24
|
*/
|
|
26
|
-
size?:
|
|
25
|
+
size?: "720p" | "1080p";
|
|
27
26
|
/**
|
|
28
27
|
* Duration of the generated video in seconds
|
|
29
28
|
*
|
|
30
29
|
* Veo 3.1: "4", "6", "8"
|
|
31
30
|
* Veo 3: "4", "6", "8"
|
|
32
|
-
* Veo 2: "5", "6", "8"
|
|
33
31
|
*/
|
|
34
|
-
seconds?:
|
|
32
|
+
seconds?: "4" | "6" | "8";
|
|
35
33
|
/**
|
|
36
34
|
* Control person generation
|
|
37
35
|
*
|
|
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
41
39
|
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
40
|
*/
|
|
43
41
|
personGeneration?: string;
|
|
42
|
+
/**
|
|
43
|
+
* Last frame for video generation (frame interpolation)
|
|
44
|
+
*/
|
|
45
|
+
lastFrame?: FileUnionContent;
|
|
46
|
+
/**
|
|
47
|
+
* Reference images for video generation
|
|
48
|
+
* Only supported in Veo 3.1 models
|
|
49
|
+
*/
|
|
50
|
+
referenceImages?: FileUnionContent[];
|
|
44
51
|
}
|
|
45
52
|
/**
|
|
46
53
|
* Output from Gemini Video Model
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wait for file size to stabilize, ensuring the file download is complete.
|
|
3
|
+
*
|
|
4
|
+
* @param filePath - The path to the file to check
|
|
5
|
+
* @param options - Configuration options
|
|
6
|
+
* @param options.checkInterval - Check interval in milliseconds (default: 500ms)
|
|
7
|
+
* @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
|
|
8
|
+
* @param options.timeout - Timeout in milliseconds (default: 60000ms)
|
|
9
|
+
* @throws Error when timeout is reached
|
|
10
|
+
*/
|
|
11
|
+
export declare function waitFileSizeStable(filePath: string, options?: {
|
|
12
|
+
checkInterval?: number;
|
|
13
|
+
stableCount?: number;
|
|
14
|
+
timeout?: number;
|
|
15
|
+
}): Promise<void>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
1
|
+
import { type AgentProcessResult, ChatModel, type ChatModelInput, type ChatModelInputOptions, type ChatModelOptions, type ChatModelOutput } from "@aigne/core";
|
|
2
2
|
import { type PromiseOrValue } from "@aigne/core/utils/type-utils.js";
|
|
3
3
|
import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
|
|
4
4
|
export interface GeminiChatModelOptions extends ChatModelOptions {
|
|
@@ -34,10 +34,30 @@ export declare class GeminiChatModel extends ChatModel {
|
|
|
34
34
|
apiKey: string | undefined;
|
|
35
35
|
model: string;
|
|
36
36
|
};
|
|
37
|
-
get modelOptions(): Omit<
|
|
37
|
+
get modelOptions(): Omit<ChatModelInputOptions, "model"> | undefined;
|
|
38
38
|
process(input: ChatModelInput): PromiseOrValue<AgentProcessResult<ChatModelOutput>>;
|
|
39
|
+
protected thinkingBudgetModelMap: ({
|
|
40
|
+
pattern: RegExp;
|
|
41
|
+
support: boolean;
|
|
42
|
+
min: number;
|
|
43
|
+
max: number;
|
|
44
|
+
} | {
|
|
45
|
+
pattern: RegExp;
|
|
46
|
+
support: boolean;
|
|
47
|
+
min?: undefined;
|
|
48
|
+
max?: undefined;
|
|
49
|
+
})[];
|
|
50
|
+
protected thinkingBudgetLevelMap: {
|
|
51
|
+
high: number;
|
|
52
|
+
medium: number;
|
|
53
|
+
low: number;
|
|
54
|
+
minimal: number;
|
|
55
|
+
};
|
|
56
|
+
protected getThinkingBudget(model: string, effort: ChatModelInputOptions["reasoningEffort"]): {
|
|
57
|
+
support: boolean;
|
|
58
|
+
budget?: number;
|
|
59
|
+
};
|
|
39
60
|
private processInput;
|
|
40
|
-
protected supportThinkingModels: string[];
|
|
41
61
|
private buildConfig;
|
|
42
62
|
private buildTools;
|
|
43
63
|
private buildContents;
|
|
@@ -58,15 +58,63 @@ export class GeminiChatModel extends ChatModel {
|
|
|
58
58
|
process(input) {
|
|
59
59
|
return this.processInput(input);
|
|
60
60
|
}
|
|
61
|
+
// References: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
62
|
+
thinkingBudgetModelMap = [
|
|
63
|
+
{
|
|
64
|
+
pattern: /gemini-2.5-pro/,
|
|
65
|
+
support: true,
|
|
66
|
+
min: 128,
|
|
67
|
+
max: 32768,
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
pattern: /gemini-2.5-flash/,
|
|
71
|
+
support: true,
|
|
72
|
+
min: 0,
|
|
73
|
+
max: 24576,
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
pattern: /2.5-flash-lite/,
|
|
77
|
+
support: true,
|
|
78
|
+
min: 512,
|
|
79
|
+
max: 24576,
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
pattern: /.*/,
|
|
83
|
+
support: false,
|
|
84
|
+
},
|
|
85
|
+
];
|
|
86
|
+
thinkingBudgetLevelMap = {
|
|
87
|
+
high: 100000, // use 100k for high, finally capped by model max
|
|
88
|
+
medium: 10000,
|
|
89
|
+
low: 5000,
|
|
90
|
+
minimal: 200,
|
|
91
|
+
};
|
|
92
|
+
getThinkingBudget(model, effort) {
|
|
93
|
+
const m = this.thinkingBudgetModelMap.find((i) => i.pattern.test(model));
|
|
94
|
+
if (!m?.support)
|
|
95
|
+
return { support: false };
|
|
96
|
+
let budget = typeof effort === "string" ? this.thinkingBudgetLevelMap[effort] || undefined : effort;
|
|
97
|
+
if (typeof budget === "undefined")
|
|
98
|
+
return { support: true };
|
|
99
|
+
if (typeof m.min === "number")
|
|
100
|
+
budget = Math.max(m.min, budget);
|
|
101
|
+
if (typeof m.max === "number")
|
|
102
|
+
budget = Math.min(m.max, budget);
|
|
103
|
+
return { support: true, budget };
|
|
104
|
+
}
|
|
61
105
|
async *processInput(input) {
|
|
62
106
|
const model = input.modelOptions?.model || this.credential.model;
|
|
63
107
|
const { contents, config } = await this.buildContents(input);
|
|
108
|
+
const thinkingBudget = this.getThinkingBudget(model, input.modelOptions?.reasoningEffort ?? this.modelOptions?.reasoningEffort);
|
|
64
109
|
const parameters = {
|
|
65
110
|
model,
|
|
66
111
|
contents,
|
|
67
112
|
config: {
|
|
68
|
-
thinkingConfig:
|
|
69
|
-
? {
|
|
113
|
+
thinkingConfig: thinkingBudget.support
|
|
114
|
+
? {
|
|
115
|
+
includeThoughts: true,
|
|
116
|
+
thinkingBudget: thinkingBudget.budget,
|
|
117
|
+
}
|
|
70
118
|
: undefined,
|
|
71
119
|
responseModalities: input.modelOptions?.modalities,
|
|
72
120
|
temperature: input.modelOptions?.temperature || this.modelOptions?.temperature,
|
|
@@ -198,9 +246,18 @@ export class GeminiChatModel extends ChatModel {
|
|
|
198
246
|
}
|
|
199
247
|
}
|
|
200
248
|
}
|
|
201
|
-
yield {
|
|
249
|
+
yield {
|
|
250
|
+
delta: {
|
|
251
|
+
json: {
|
|
252
|
+
usage,
|
|
253
|
+
files: files.length ? files : undefined,
|
|
254
|
+
modelOptions: {
|
|
255
|
+
reasoningEffort: parameters.config?.thinkingConfig?.thinkingBudget,
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
},
|
|
259
|
+
};
|
|
202
260
|
}
|
|
203
|
-
supportThinkingModels = ["gemini-2.5-pro", "gemini-2.5-flash"];
|
|
204
261
|
async buildConfig(input) {
|
|
205
262
|
const config = {};
|
|
206
263
|
const { tools, toolConfig } = await this.buildTools(input);
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { VideoModel } from "@aigne/core";
|
|
2
3
|
import { GoogleGenAI } from "@google/genai";
|
|
3
4
|
/**
|
|
4
5
|
* Input options for Gemini Video Model
|
|
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
13
14
|
*
|
|
14
15
|
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
16
|
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
-
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
17
|
*/
|
|
18
|
-
aspectRatio?:
|
|
18
|
+
aspectRatio?: "16:9" | "9:16";
|
|
19
19
|
/**
|
|
20
20
|
* Resolution of the video
|
|
21
21
|
*
|
|
22
22
|
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
23
|
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
-
* Veo 2: Not supported
|
|
25
24
|
*/
|
|
26
|
-
size?:
|
|
25
|
+
size?: "720p" | "1080p";
|
|
27
26
|
/**
|
|
28
27
|
* Duration of the generated video in seconds
|
|
29
28
|
*
|
|
30
29
|
* Veo 3.1: "4", "6", "8"
|
|
31
30
|
* Veo 3: "4", "6", "8"
|
|
32
|
-
* Veo 2: "5", "6", "8"
|
|
33
31
|
*/
|
|
34
|
-
seconds?:
|
|
32
|
+
seconds?: "4" | "6" | "8";
|
|
35
33
|
/**
|
|
36
34
|
* Control person generation
|
|
37
35
|
*
|
|
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
|
|
|
41
39
|
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
40
|
*/
|
|
43
41
|
personGeneration?: string;
|
|
42
|
+
/**
|
|
43
|
+
* Last frame for video generation (frame interpolation)
|
|
44
|
+
*/
|
|
45
|
+
lastFrame?: FileUnionContent;
|
|
46
|
+
/**
|
|
47
|
+
* Reference images for video generation
|
|
48
|
+
* Only supported in Veo 3.1 models
|
|
49
|
+
*/
|
|
50
|
+
referenceImages?: FileUnionContent[];
|
|
44
51
|
}
|
|
45
52
|
/**
|
|
46
53
|
* Output from Gemini Video Model
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
-
import { VideoModel, videoModelInputSchema
|
|
1
|
+
import { fileUnionContentSchema, VideoModel, videoModelInputSchema } from "@aigne/core";
|
|
2
2
|
import { logger } from "@aigne/core/utils/logger.js";
|
|
3
3
|
import { checkArguments } from "@aigne/core/utils/type-utils.js";
|
|
4
4
|
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
5
5
|
import { GoogleGenAI } from "@google/genai";
|
|
6
6
|
import { z } from "zod";
|
|
7
|
+
import { waitFileSizeStable } from "./utils.js";
|
|
7
8
|
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
8
9
|
const DEFAULT_SECONDS = 8;
|
|
9
10
|
const geminiVideoModelInputSchema = videoModelInputSchema.extend({
|
|
10
11
|
negativePrompt: z.string().optional(),
|
|
11
|
-
aspectRatio: z.
|
|
12
|
+
aspectRatio: z.enum(["16:9", "9:16"]).optional(),
|
|
13
|
+
size: z.enum(["720p", "1080p"]).optional(),
|
|
14
|
+
seconds: z.enum(["4", "6", "8"]).optional(),
|
|
12
15
|
personGeneration: z.string().optional(),
|
|
16
|
+
lastFrame: fileUnionContentSchema.optional(),
|
|
17
|
+
referenceImages: fileUnionContentSchema.array().optional(),
|
|
13
18
|
});
|
|
14
19
|
const geminiVideoModelOptionsSchema = z.object({
|
|
15
20
|
apiKey: z.string().optional(),
|
|
@@ -58,13 +63,16 @@ export class GeminiVideoModel extends VideoModel {
|
|
|
58
63
|
const localPath = nodejs.path.join(dir, `${videoId}.mp4`);
|
|
59
64
|
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
60
65
|
logger.debug(`Generated video saved to ${localPath}`);
|
|
61
|
-
await
|
|
66
|
+
await waitFileSizeStable(localPath);
|
|
62
67
|
const buffer = await nodejs.fs.readFile(localPath);
|
|
63
68
|
return buffer.toString("base64");
|
|
64
69
|
}
|
|
65
70
|
async process(input, options) {
|
|
66
71
|
const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
|
|
67
72
|
const mergedInput = { ...this.modelOptions, ...input };
|
|
73
|
+
if (mergedInput.referenceImages && !model.includes("veo-3.1")) {
|
|
74
|
+
throw new Error("referenceImages is only supported in Veo 3.1 models");
|
|
75
|
+
}
|
|
68
76
|
const config = {};
|
|
69
77
|
if (mergedInput.negativePrompt)
|
|
70
78
|
config.negativePrompt = mergedInput.negativePrompt;
|
|
@@ -76,11 +84,39 @@ export class GeminiVideoModel extends VideoModel {
|
|
|
76
84
|
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
77
85
|
if (mergedInput.personGeneration)
|
|
78
86
|
config.personGeneration = mergedInput.personGeneration;
|
|
87
|
+
if (mergedInput.lastFrame) {
|
|
88
|
+
config.lastFrame = await this.transformFileType("file", mergedInput.lastFrame, options).then((file) => {
|
|
89
|
+
return {
|
|
90
|
+
imageBytes: file.data,
|
|
91
|
+
mimeType: file.mimeType,
|
|
92
|
+
};
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
if (mergedInput.referenceImages) {
|
|
96
|
+
config.referenceImages = await Promise.all(mergedInput.referenceImages.map(async (image) => {
|
|
97
|
+
return await this.transformFileType("file", image, options).then((file) => {
|
|
98
|
+
return {
|
|
99
|
+
image: {
|
|
100
|
+
imageBytes: file.data,
|
|
101
|
+
mimeType: file.mimeType,
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
});
|
|
105
|
+
}));
|
|
106
|
+
}
|
|
79
107
|
const params = {
|
|
80
108
|
model,
|
|
81
109
|
prompt: mergedInput.prompt,
|
|
82
110
|
config,
|
|
83
111
|
};
|
|
112
|
+
if (mergedInput.image) {
|
|
113
|
+
params.image = await this.transformFileType("file", mergedInput.image, options).then((file) => {
|
|
114
|
+
return {
|
|
115
|
+
imageBytes: file.data,
|
|
116
|
+
mimeType: file.mimeType,
|
|
117
|
+
};
|
|
118
|
+
});
|
|
119
|
+
}
|
|
84
120
|
// Start video generation
|
|
85
121
|
let operation = await this.client.models.generateVideos(params);
|
|
86
122
|
logger.debug("Video generation started...");
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wait for file size to stabilize, ensuring the file download is complete.
|
|
3
|
+
*
|
|
4
|
+
* @param filePath - The path to the file to check
|
|
5
|
+
* @param options - Configuration options
|
|
6
|
+
* @param options.checkInterval - Check interval in milliseconds (default: 500ms)
|
|
7
|
+
* @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
|
|
8
|
+
* @param options.timeout - Timeout in milliseconds (default: 60000ms)
|
|
9
|
+
* @throws Error when timeout is reached
|
|
10
|
+
*/
|
|
11
|
+
export declare function waitFileSizeStable(filePath: string, options?: {
|
|
12
|
+
checkInterval?: number;
|
|
13
|
+
stableCount?: number;
|
|
14
|
+
timeout?: number;
|
|
15
|
+
}): Promise<void>;
|
package/lib/esm/utils.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Wait for file size to stabilize, ensuring the file download is complete.
|
|
4
|
+
*
|
|
5
|
+
* @param filePath - The path to the file to check
|
|
6
|
+
* @param options - Configuration options
|
|
7
|
+
* @param options.checkInterval - Check interval in milliseconds (default: 500ms)
|
|
8
|
+
* @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
|
|
9
|
+
* @param options.timeout - Timeout in milliseconds (default: 60000ms)
|
|
10
|
+
* @throws Error when timeout is reached
|
|
11
|
+
*/
|
|
12
|
+
export async function waitFileSizeStable(filePath, options) {
|
|
13
|
+
const checkInterval = options?.checkInterval ?? 500;
|
|
14
|
+
const requiredStableCount = options?.stableCount ?? 3;
|
|
15
|
+
const timeout = options?.timeout ?? 60000;
|
|
16
|
+
const startTime = Date.now();
|
|
17
|
+
let previousSize = 0;
|
|
18
|
+
let stableCount = 0;
|
|
19
|
+
while (stableCount < requiredStableCount) {
|
|
20
|
+
if (Date.now() - startTime > timeout) {
|
|
21
|
+
throw new Error(`Timeout waiting for file to stabilize: ${filePath}`);
|
|
22
|
+
}
|
|
23
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval));
|
|
24
|
+
const stats = await nodejs.fs.stat(filePath);
|
|
25
|
+
const currentSize = stats.size;
|
|
26
|
+
if (currentSize === previousSize && currentSize > 0) {
|
|
27
|
+
stableCount++;
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
stableCount = 0;
|
|
31
|
+
previousSize = currentSize;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.14.4-beta.
|
|
3
|
+
"version": "0.14.4-beta.8",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"@google/genai": "^1.24.0",
|
|
40
40
|
"zod": "^3.25.67",
|
|
41
41
|
"zod-to-json-schema": "^3.24.6",
|
|
42
|
-
"@aigne/core": "^1.65.0-beta.
|
|
42
|
+
"@aigne/core": "^1.65.0-beta.5",
|
|
43
43
|
"@aigne/platform-helpers": "^0.6.3"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
"npm-run-all": "^4.1.5",
|
|
49
49
|
"rimraf": "^6.0.1",
|
|
50
50
|
"typescript": "^5.9.2",
|
|
51
|
-
"@aigne/test-utils": "^0.5.57-beta.
|
|
51
|
+
"@aigne/test-utils": "^0.5.57-beta.6"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"lint": "tsc --noEmit",
|