@aigne/gemini 0.14.3 → 0.14.4-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/lib/cjs/gemini-chat-model.d.ts +1 -0
- package/lib/cjs/gemini-chat-model.js +16 -7
- package/lib/cjs/gemini-video-model.d.ts +107 -0
- package/lib/cjs/gemini-video-model.js +128 -0
- package/lib/cjs/index.d.ts +1 -0
- package/lib/cjs/index.js +1 -0
- package/lib/dts/gemini-chat-model.d.ts +1 -0
- package/lib/dts/gemini-video-model.d.ts +107 -0
- package/lib/dts/index.d.ts +1 -0
- package/lib/esm/gemini-chat-model.d.ts +1 -0
- package/lib/esm/gemini-chat-model.js +16 -7
- package/lib/esm/gemini-video-model.d.ts +107 -0
- package/lib/esm/gemini-video-model.js +124 -0
- package/lib/esm/index.d.ts +1 -0
- package/lib/esm/index.js +1 -0
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,49 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.4-beta.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.1...gemini-v0.14.4-beta.2) (2025-10-24)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **model:** support video model ([#647](https://github.com/AIGNE-io/aigne-framework/issues/647)) ([de81742](https://github.com/AIGNE-io/aigne-framework/commit/de817421ef1dd3246d0d8c51ff12f0a855658f9f))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* **model:** updated default video duration settings for AI video models ([#663](https://github.com/AIGNE-io/aigne-framework/issues/663)) ([1203941](https://github.com/AIGNE-io/aigne-framework/commit/12039411aaef77ba665e8edfb0fe6f8097c43e39))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
### Dependencies
|
|
17
|
+
|
|
18
|
+
* The following workspace dependencies were updated
|
|
19
|
+
* dependencies
|
|
20
|
+
* @aigne/core bumped to 1.65.0-beta
|
|
21
|
+
* devDependencies
|
|
22
|
+
* @aigne/test-utils bumped to 0.5.57-beta.1
|
|
23
|
+
|
|
24
|
+
## [0.14.4-beta.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta...gemini-v0.14.4-beta.1) (2025-10-24)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
### Bug Fixes
|
|
28
|
+
|
|
29
|
+
* **gemini:** use StructuredOutputError to trigger retry for missing JSON response ([#660](https://github.com/AIGNE-io/aigne-framework/issues/660)) ([e8826ed](https://github.com/AIGNE-io/aigne-framework/commit/e8826ed96db57bfcce0b577881bf0d2fd828c269))
|
|
30
|
+
|
|
31
|
+
## [0.14.4-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.3...gemini-v0.14.4-beta) (2025-10-23)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
### Bug Fixes
|
|
35
|
+
|
|
36
|
+
* **models:** improve message structure handling and enable auto-message options ([#657](https://github.com/AIGNE-io/aigne-framework/issues/657)) ([233d70c](https://github.com/AIGNE-io/aigne-framework/commit/233d70cb292b937200fada8434f33d957d766ad6))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
### Dependencies
|
|
40
|
+
|
|
41
|
+
* The following workspace dependencies were updated
|
|
42
|
+
* dependencies
|
|
43
|
+
* @aigne/core bumped to 1.64.1-beta
|
|
44
|
+
* devDependencies
|
|
45
|
+
* @aigne/test-utils bumped to 0.5.57-beta
|
|
46
|
+
|
|
3
47
|
## [0.14.3](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.3-beta.1...gemini-v0.14.3) (2025-10-22)
|
|
4
48
|
|
|
5
49
|
|
|
@@ -151,7 +151,7 @@ class GeminiChatModel extends core_1.ChatModel {
|
|
|
151
151
|
yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
|
|
152
152
|
}
|
|
153
153
|
else if (!toolCalls.length) {
|
|
154
|
-
throw new
|
|
154
|
+
throw new core_1.StructuredOutputError("No JSON response from the model");
|
|
155
155
|
}
|
|
156
156
|
}
|
|
157
157
|
else if (!toolCalls.length) {
|
|
@@ -342,17 +342,26 @@ class GeminiChatModel extends core_1.ChatModel {
|
|
|
342
342
|
}
|
|
343
343
|
return content;
|
|
344
344
|
}))).filter(type_utils_js_1.isNonNullable);
|
|
345
|
-
|
|
346
|
-
const system = systemParts.pop();
|
|
347
|
-
if (system) {
|
|
348
|
-
result.contents.push({ role: "user", parts: [system] });
|
|
349
|
-
}
|
|
350
|
-
}
|
|
345
|
+
this.ensureMessagesHasUserMessage(systemParts, result.contents);
|
|
351
346
|
if (systemParts.length) {
|
|
352
347
|
result.config ??= {};
|
|
353
348
|
result.config.systemInstruction = systemParts;
|
|
354
349
|
}
|
|
355
350
|
return result;
|
|
356
351
|
}
|
|
352
|
+
ensureMessagesHasUserMessage(systems, contents) {
|
|
353
|
+
// no messages but system messages
|
|
354
|
+
if (!contents.length && systems.length) {
|
|
355
|
+
const system = systems.pop();
|
|
356
|
+
if (system)
|
|
357
|
+
contents.push({ role: "user", parts: [system] });
|
|
358
|
+
}
|
|
359
|
+
// first message is from model
|
|
360
|
+
if (contents[0]?.role === "model") {
|
|
361
|
+
const system = systems.pop();
|
|
362
|
+
if (system)
|
|
363
|
+
contents.unshift({ role: "user", parts: [system] });
|
|
364
|
+
}
|
|
365
|
+
}
|
|
357
366
|
}
|
|
358
367
|
exports.GeminiChatModel = GeminiChatModel;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GeminiVideoModel = void 0;
|
|
4
|
+
const core_1 = require("@aigne/core");
|
|
5
|
+
const logger_js_1 = require("@aigne/core/utils/logger.js");
|
|
6
|
+
const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
|
|
7
|
+
const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
|
|
8
|
+
const genai_1 = require("@google/genai");
|
|
9
|
+
const zod_1 = require("zod");
|
|
10
|
+
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
11
|
+
const DEFAULT_SECONDS = 8;
|
|
12
|
+
const geminiVideoModelInputSchema = core_1.videoModelInputSchema.extend({
|
|
13
|
+
negativePrompt: zod_1.z.string().optional(),
|
|
14
|
+
aspectRatio: zod_1.z.string().optional(),
|
|
15
|
+
personGeneration: zod_1.z.string().optional(),
|
|
16
|
+
});
|
|
17
|
+
const geminiVideoModelOptionsSchema = zod_1.z.object({
|
|
18
|
+
apiKey: zod_1.z.string().optional(),
|
|
19
|
+
baseURL: zod_1.z.string().optional(),
|
|
20
|
+
model: zod_1.z.string().optional(),
|
|
21
|
+
modelOptions: zod_1.z.object({}).optional(),
|
|
22
|
+
clientOptions: zod_1.z.object({}).optional(),
|
|
23
|
+
pollingInterval: zod_1.z.number().optional(),
|
|
24
|
+
});
|
|
25
|
+
class GeminiVideoModel extends core_1.VideoModel {
|
|
26
|
+
options;
|
|
27
|
+
constructor(options) {
|
|
28
|
+
super({
|
|
29
|
+
...options,
|
|
30
|
+
description: options?.description ?? "Generate videos using Google Gemini Veo models",
|
|
31
|
+
inputSchema: geminiVideoModelInputSchema,
|
|
32
|
+
});
|
|
33
|
+
this.options = options;
|
|
34
|
+
if (options)
|
|
35
|
+
(0, type_utils_js_1.checkArguments)(this.name, geminiVideoModelOptionsSchema, options);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* @hidden
|
|
39
|
+
*/
|
|
40
|
+
_client;
|
|
41
|
+
apiKeyEnvName = "GEMINI_API_KEY";
|
|
42
|
+
get client() {
|
|
43
|
+
const { apiKey } = this.credential;
|
|
44
|
+
if (!apiKey)
|
|
45
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
46
|
+
this._client ??= new genai_1.GoogleGenAI({ apiKey, ...this.options?.clientOptions });
|
|
47
|
+
return this._client;
|
|
48
|
+
}
|
|
49
|
+
get credential() {
|
|
50
|
+
return {
|
|
51
|
+
url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
|
|
52
|
+
apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
|
|
53
|
+
model: this.options?.model || DEFAULT_MODEL,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
get modelOptions() {
|
|
57
|
+
return this.options?.modelOptions;
|
|
58
|
+
}
|
|
59
|
+
async downloadToFile(dir, videoId, videoFile) {
|
|
60
|
+
logger_js_1.logger.debug("Downloading video content...");
|
|
61
|
+
const localPath = index_js_1.nodejs.path.join(dir, `${videoId}.mp4`);
|
|
62
|
+
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
63
|
+
logger_js_1.logger.debug(`Generated video saved to ${localPath}`);
|
|
64
|
+
await new Promise((resolve) => setTimeout(resolve, 300));
|
|
65
|
+
const buffer = await index_js_1.nodejs.fs.readFile(localPath);
|
|
66
|
+
const base64 = buffer.toString("base64");
|
|
67
|
+
const dataUrl = `data:video/mp4;base64,${base64}`;
|
|
68
|
+
return dataUrl;
|
|
69
|
+
}
|
|
70
|
+
async process(input, options) {
|
|
71
|
+
const model = input.model ?? this.credential.model;
|
|
72
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
73
|
+
const config = {};
|
|
74
|
+
if (mergedInput.negativePrompt)
|
|
75
|
+
config.negativePrompt = mergedInput.negativePrompt;
|
|
76
|
+
if (mergedInput.aspectRatio)
|
|
77
|
+
config.aspectRatio = mergedInput.aspectRatio;
|
|
78
|
+
if (mergedInput.size)
|
|
79
|
+
config.resolution = mergedInput.size;
|
|
80
|
+
if (mergedInput.seconds)
|
|
81
|
+
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
82
|
+
if (mergedInput.personGeneration)
|
|
83
|
+
config.personGeneration = mergedInput.personGeneration;
|
|
84
|
+
const params = {
|
|
85
|
+
model,
|
|
86
|
+
prompt: mergedInput.prompt,
|
|
87
|
+
config,
|
|
88
|
+
};
|
|
89
|
+
// Start video generation
|
|
90
|
+
let operation = await this.client.models.generateVideos(params);
|
|
91
|
+
logger_js_1.logger.debug("Video generation started...");
|
|
92
|
+
// Poll operation status until complete
|
|
93
|
+
const pollingInterval = this.options?.pollingInterval ?? 10000;
|
|
94
|
+
while (!operation.done) {
|
|
95
|
+
logger_js_1.logger.debug("Waiting for video generation to complete...");
|
|
96
|
+
await new Promise((resolve) => setTimeout(resolve, pollingInterval));
|
|
97
|
+
operation = await this.client.operations.getVideosOperation({ operation });
|
|
98
|
+
}
|
|
99
|
+
if (!operation.response?.generatedVideos?.[0]?.video) {
|
|
100
|
+
throw new Error("Video generation failed: No video generated");
|
|
101
|
+
}
|
|
102
|
+
// Download the generated video
|
|
103
|
+
const generatedVideo = operation.response.generatedVideos[0];
|
|
104
|
+
const videoFile = generatedVideo.video;
|
|
105
|
+
if (!videoFile) {
|
|
106
|
+
throw new Error("Video generation failed: No video file returned");
|
|
107
|
+
}
|
|
108
|
+
// Save to temporary directory
|
|
109
|
+
const dir = index_js_1.nodejs.path.join(index_js_1.nodejs.os.tmpdir(), options?.context?.id || "");
|
|
110
|
+
await index_js_1.nodejs.fs.mkdir(dir, { recursive: true });
|
|
111
|
+
const videoId = Date.now().toString();
|
|
112
|
+
return {
|
|
113
|
+
videos: [
|
|
114
|
+
{
|
|
115
|
+
type: "file",
|
|
116
|
+
data: await this.downloadToFile(dir, videoId, videoFile),
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
usage: {
|
|
120
|
+
inputTokens: 0,
|
|
121
|
+
outputTokens: 0,
|
|
122
|
+
},
|
|
123
|
+
model,
|
|
124
|
+
seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
exports.GeminiVideoModel = GeminiVideoModel;
|
package/lib/cjs/index.d.ts
CHANGED
package/lib/cjs/index.js
CHANGED
|
@@ -16,3 +16,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
__exportStar(require("./gemini-chat-model.js"), exports);
|
|
18
18
|
__exportStar(require("./gemini-image-model.js"), exports);
|
|
19
|
+
__exportStar(require("./gemini-video-model.js"), exports);
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
package/lib/dts/index.d.ts
CHANGED
|
@@ -148,7 +148,7 @@ export class GeminiChatModel extends ChatModel {
|
|
|
148
148
|
yield { delta: { json: { json: safeParseJSON(text) } } };
|
|
149
149
|
}
|
|
150
150
|
else if (!toolCalls.length) {
|
|
151
|
-
throw new
|
|
151
|
+
throw new StructuredOutputError("No JSON response from the model");
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
else if (!toolCalls.length) {
|
|
@@ -339,16 +339,25 @@ export class GeminiChatModel extends ChatModel {
|
|
|
339
339
|
}
|
|
340
340
|
return content;
|
|
341
341
|
}))).filter(isNonNullable);
|
|
342
|
-
|
|
343
|
-
const system = systemParts.pop();
|
|
344
|
-
if (system) {
|
|
345
|
-
result.contents.push({ role: "user", parts: [system] });
|
|
346
|
-
}
|
|
347
|
-
}
|
|
342
|
+
this.ensureMessagesHasUserMessage(systemParts, result.contents);
|
|
348
343
|
if (systemParts.length) {
|
|
349
344
|
result.config ??= {};
|
|
350
345
|
result.config.systemInstruction = systemParts;
|
|
351
346
|
}
|
|
352
347
|
return result;
|
|
353
348
|
}
|
|
349
|
+
ensureMessagesHasUserMessage(systems, contents) {
|
|
350
|
+
// no messages but system messages
|
|
351
|
+
if (!contents.length && systems.length) {
|
|
352
|
+
const system = systems.pop();
|
|
353
|
+
if (system)
|
|
354
|
+
contents.push({ role: "user", parts: [system] });
|
|
355
|
+
}
|
|
356
|
+
// first message is from model
|
|
357
|
+
if (contents[0]?.role === "model") {
|
|
358
|
+
const system = systems.pop();
|
|
359
|
+
if (system)
|
|
360
|
+
contents.unshift({ role: "user", parts: [system] });
|
|
361
|
+
}
|
|
362
|
+
}
|
|
354
363
|
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { VideoModel, videoModelInputSchema, } from "@aigne/core";
|
|
2
|
+
import { logger } from "@aigne/core/utils/logger.js";
|
|
3
|
+
import { checkArguments } from "@aigne/core/utils/type-utils.js";
|
|
4
|
+
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
5
|
+
import { GoogleGenAI } from "@google/genai";
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
8
|
+
const DEFAULT_SECONDS = 8;
|
|
9
|
+
const geminiVideoModelInputSchema = videoModelInputSchema.extend({
|
|
10
|
+
negativePrompt: z.string().optional(),
|
|
11
|
+
aspectRatio: z.string().optional(),
|
|
12
|
+
personGeneration: z.string().optional(),
|
|
13
|
+
});
|
|
14
|
+
const geminiVideoModelOptionsSchema = z.object({
|
|
15
|
+
apiKey: z.string().optional(),
|
|
16
|
+
baseURL: z.string().optional(),
|
|
17
|
+
model: z.string().optional(),
|
|
18
|
+
modelOptions: z.object({}).optional(),
|
|
19
|
+
clientOptions: z.object({}).optional(),
|
|
20
|
+
pollingInterval: z.number().optional(),
|
|
21
|
+
});
|
|
22
|
+
export class GeminiVideoModel extends VideoModel {
|
|
23
|
+
options;
|
|
24
|
+
constructor(options) {
|
|
25
|
+
super({
|
|
26
|
+
...options,
|
|
27
|
+
description: options?.description ?? "Generate videos using Google Gemini Veo models",
|
|
28
|
+
inputSchema: geminiVideoModelInputSchema,
|
|
29
|
+
});
|
|
30
|
+
this.options = options;
|
|
31
|
+
if (options)
|
|
32
|
+
checkArguments(this.name, geminiVideoModelOptionsSchema, options);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* @hidden
|
|
36
|
+
*/
|
|
37
|
+
_client;
|
|
38
|
+
apiKeyEnvName = "GEMINI_API_KEY";
|
|
39
|
+
get client() {
|
|
40
|
+
const { apiKey } = this.credential;
|
|
41
|
+
if (!apiKey)
|
|
42
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
43
|
+
this._client ??= new GoogleGenAI({ apiKey, ...this.options?.clientOptions });
|
|
44
|
+
return this._client;
|
|
45
|
+
}
|
|
46
|
+
get credential() {
|
|
47
|
+
return {
|
|
48
|
+
url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
|
|
49
|
+
apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
|
|
50
|
+
model: this.options?.model || DEFAULT_MODEL,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
get modelOptions() {
|
|
54
|
+
return this.options?.modelOptions;
|
|
55
|
+
}
|
|
56
|
+
async downloadToFile(dir, videoId, videoFile) {
|
|
57
|
+
logger.debug("Downloading video content...");
|
|
58
|
+
const localPath = nodejs.path.join(dir, `${videoId}.mp4`);
|
|
59
|
+
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
60
|
+
logger.debug(`Generated video saved to ${localPath}`);
|
|
61
|
+
await new Promise((resolve) => setTimeout(resolve, 300));
|
|
62
|
+
const buffer = await nodejs.fs.readFile(localPath);
|
|
63
|
+
const base64 = buffer.toString("base64");
|
|
64
|
+
const dataUrl = `data:video/mp4;base64,${base64}`;
|
|
65
|
+
return dataUrl;
|
|
66
|
+
}
|
|
67
|
+
async process(input, options) {
|
|
68
|
+
const model = input.model ?? this.credential.model;
|
|
69
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
70
|
+
const config = {};
|
|
71
|
+
if (mergedInput.negativePrompt)
|
|
72
|
+
config.negativePrompt = mergedInput.negativePrompt;
|
|
73
|
+
if (mergedInput.aspectRatio)
|
|
74
|
+
config.aspectRatio = mergedInput.aspectRatio;
|
|
75
|
+
if (mergedInput.size)
|
|
76
|
+
config.resolution = mergedInput.size;
|
|
77
|
+
if (mergedInput.seconds)
|
|
78
|
+
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
79
|
+
if (mergedInput.personGeneration)
|
|
80
|
+
config.personGeneration = mergedInput.personGeneration;
|
|
81
|
+
const params = {
|
|
82
|
+
model,
|
|
83
|
+
prompt: mergedInput.prompt,
|
|
84
|
+
config,
|
|
85
|
+
};
|
|
86
|
+
// Start video generation
|
|
87
|
+
let operation = await this.client.models.generateVideos(params);
|
|
88
|
+
logger.debug("Video generation started...");
|
|
89
|
+
// Poll operation status until complete
|
|
90
|
+
const pollingInterval = this.options?.pollingInterval ?? 10000;
|
|
91
|
+
while (!operation.done) {
|
|
92
|
+
logger.debug("Waiting for video generation to complete...");
|
|
93
|
+
await new Promise((resolve) => setTimeout(resolve, pollingInterval));
|
|
94
|
+
operation = await this.client.operations.getVideosOperation({ operation });
|
|
95
|
+
}
|
|
96
|
+
if (!operation.response?.generatedVideos?.[0]?.video) {
|
|
97
|
+
throw new Error("Video generation failed: No video generated");
|
|
98
|
+
}
|
|
99
|
+
// Download the generated video
|
|
100
|
+
const generatedVideo = operation.response.generatedVideos[0];
|
|
101
|
+
const videoFile = generatedVideo.video;
|
|
102
|
+
if (!videoFile) {
|
|
103
|
+
throw new Error("Video generation failed: No video file returned");
|
|
104
|
+
}
|
|
105
|
+
// Save to temporary directory
|
|
106
|
+
const dir = nodejs.path.join(nodejs.os.tmpdir(), options?.context?.id || "");
|
|
107
|
+
await nodejs.fs.mkdir(dir, { recursive: true });
|
|
108
|
+
const videoId = Date.now().toString();
|
|
109
|
+
return {
|
|
110
|
+
videos: [
|
|
111
|
+
{
|
|
112
|
+
type: "file",
|
|
113
|
+
data: await this.downloadToFile(dir, videoId, videoFile),
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
usage: {
|
|
117
|
+
inputTokens: 0,
|
|
118
|
+
outputTokens: 0,
|
|
119
|
+
},
|
|
120
|
+
model,
|
|
121
|
+
seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
package/lib/esm/index.d.ts
CHANGED
package/lib/esm/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.4-beta.2",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"@google/genai": "^1.24.0",
|
|
40
40
|
"zod": "^3.25.67",
|
|
41
41
|
"zod-to-json-schema": "^3.24.6",
|
|
42
|
-
"@aigne/core": "^1.
|
|
42
|
+
"@aigne/core": "^1.65.0-beta",
|
|
43
43
|
"@aigne/platform-helpers": "^0.6.3"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
"npm-run-all": "^4.1.5",
|
|
49
49
|
"rimraf": "^6.0.1",
|
|
50
50
|
"typescript": "^5.9.2",
|
|
51
|
-
"@aigne/test-utils": "^0.5.
|
|
51
|
+
"@aigne/test-utils": "^0.5.57-beta.1"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"lint": "tsc --noEmit",
|