@aigne/gemini 0.14.4-beta.1 → 0.14.4-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/lib/cjs/gemini-video-model.d.ts +107 -0
- package/lib/cjs/gemini-video-model.js +128 -0
- package/lib/cjs/index.d.ts +1 -0
- package/lib/cjs/index.js +1 -0
- package/lib/dts/gemini-video-model.d.ts +107 -0
- package/lib/dts/index.d.ts +1 -0
- package/lib/esm/gemini-video-model.d.ts +107 -0
- package/lib/esm/gemini-video-model.js +124 -0
- package/lib/esm/index.d.ts +1 -0
- package/lib/esm/index.js +1 -0
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.4-beta.3](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.2...gemini-v0.14.4-beta.3) (2025-10-26)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* **models:** add mineType for transform file ([#667](https://github.com/AIGNE-io/aigne-framework/issues/667)) ([155a173](https://github.com/AIGNE-io/aigne-framework/commit/155a173e75aff1dbe870a1305455a4300942e07a))
|
|
9
|
+
* **models:** aigne hub video params ([#665](https://github.com/AIGNE-io/aigne-framework/issues/665)) ([d00f836](https://github.com/AIGNE-io/aigne-framework/commit/d00f8368422d8e3707b974e1aff06714731ebb28))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
### Dependencies
|
|
13
|
+
|
|
14
|
+
* The following workspace dependencies were updated
|
|
15
|
+
* dependencies
|
|
16
|
+
* @aigne/core bumped to 1.65.0-beta.1
|
|
17
|
+
* devDependencies
|
|
18
|
+
* @aigne/test-utils bumped to 0.5.57-beta.2
|
|
19
|
+
|
|
20
|
+
## [0.14.4-beta.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.1...gemini-v0.14.4-beta.2) (2025-10-24)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
### Features
|
|
24
|
+
|
|
25
|
+
* **model:** support video model ([#647](https://github.com/AIGNE-io/aigne-framework/issues/647)) ([de81742](https://github.com/AIGNE-io/aigne-framework/commit/de817421ef1dd3246d0d8c51ff12f0a855658f9f))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
### Bug Fixes
|
|
29
|
+
|
|
30
|
+
* **model:** updated default video duration settings for AI video models ([#663](https://github.com/AIGNE-io/aigne-framework/issues/663)) ([1203941](https://github.com/AIGNE-io/aigne-framework/commit/12039411aaef77ba665e8edfb0fe6f8097c43e39))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
### Dependencies
|
|
34
|
+
|
|
35
|
+
* The following workspace dependencies were updated
|
|
36
|
+
* dependencies
|
|
37
|
+
* @aigne/core bumped to 1.65.0-beta
|
|
38
|
+
* devDependencies
|
|
39
|
+
* @aigne/test-utils bumped to 0.5.57-beta.1
|
|
40
|
+
|
|
3
41
|
## [0.14.4-beta.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta...gemini-v0.14.4-beta.1) (2025-10-24)
|
|
4
42
|
|
|
5
43
|
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GeminiVideoModel = void 0;
|
|
4
|
+
const core_1 = require("@aigne/core");
|
|
5
|
+
const logger_js_1 = require("@aigne/core/utils/logger.js");
|
|
6
|
+
const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
|
|
7
|
+
const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
|
|
8
|
+
const genai_1 = require("@google/genai");
|
|
9
|
+
const zod_1 = require("zod");
|
|
10
|
+
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
11
|
+
const DEFAULT_SECONDS = 8;
|
|
12
|
+
const geminiVideoModelInputSchema = core_1.videoModelInputSchema.extend({
|
|
13
|
+
negativePrompt: zod_1.z.string().optional(),
|
|
14
|
+
aspectRatio: zod_1.z.string().optional(),
|
|
15
|
+
personGeneration: zod_1.z.string().optional(),
|
|
16
|
+
});
|
|
17
|
+
const geminiVideoModelOptionsSchema = zod_1.z.object({
|
|
18
|
+
apiKey: zod_1.z.string().optional(),
|
|
19
|
+
baseURL: zod_1.z.string().optional(),
|
|
20
|
+
model: zod_1.z.string().optional(),
|
|
21
|
+
modelOptions: zod_1.z.object({}).optional(),
|
|
22
|
+
clientOptions: zod_1.z.object({}).optional(),
|
|
23
|
+
pollingInterval: zod_1.z.number().optional(),
|
|
24
|
+
});
|
|
25
|
+
class GeminiVideoModel extends core_1.VideoModel {
|
|
26
|
+
options;
|
|
27
|
+
constructor(options) {
|
|
28
|
+
super({
|
|
29
|
+
...options,
|
|
30
|
+
description: options?.description ?? "Generate videos using Google Gemini Veo models",
|
|
31
|
+
inputSchema: geminiVideoModelInputSchema,
|
|
32
|
+
});
|
|
33
|
+
this.options = options;
|
|
34
|
+
if (options)
|
|
35
|
+
(0, type_utils_js_1.checkArguments)(this.name, geminiVideoModelOptionsSchema, options);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* @hidden
|
|
39
|
+
*/
|
|
40
|
+
_client;
|
|
41
|
+
apiKeyEnvName = "GEMINI_API_KEY";
|
|
42
|
+
get client() {
|
|
43
|
+
const { apiKey } = this.credential;
|
|
44
|
+
if (!apiKey)
|
|
45
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
46
|
+
this._client ??= new genai_1.GoogleGenAI({ apiKey, ...this.options?.clientOptions });
|
|
47
|
+
return this._client;
|
|
48
|
+
}
|
|
49
|
+
get credential() {
|
|
50
|
+
return {
|
|
51
|
+
url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
|
|
52
|
+
apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
|
|
53
|
+
model: this.options?.model || DEFAULT_MODEL,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
get modelOptions() {
|
|
57
|
+
return this.options?.modelOptions;
|
|
58
|
+
}
|
|
59
|
+
async downloadToFile(dir, videoId, videoFile) {
|
|
60
|
+
logger_js_1.logger.debug("Downloading video content...");
|
|
61
|
+
const localPath = index_js_1.nodejs.path.join(dir, `${videoId}.mp4`);
|
|
62
|
+
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
63
|
+
logger_js_1.logger.debug(`Generated video saved to ${localPath}`);
|
|
64
|
+
await new Promise((resolve) => setTimeout(resolve, 300));
|
|
65
|
+
const buffer = await index_js_1.nodejs.fs.readFile(localPath);
|
|
66
|
+
return buffer.toString("base64");
|
|
67
|
+
}
|
|
68
|
+
async process(input, options) {
|
|
69
|
+
const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
|
|
70
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
71
|
+
const config = {};
|
|
72
|
+
if (mergedInput.negativePrompt)
|
|
73
|
+
config.negativePrompt = mergedInput.negativePrompt;
|
|
74
|
+
if (mergedInput.aspectRatio)
|
|
75
|
+
config.aspectRatio = mergedInput.aspectRatio;
|
|
76
|
+
if (mergedInput.size)
|
|
77
|
+
config.resolution = mergedInput.size;
|
|
78
|
+
if (mergedInput.seconds)
|
|
79
|
+
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
80
|
+
if (mergedInput.personGeneration)
|
|
81
|
+
config.personGeneration = mergedInput.personGeneration;
|
|
82
|
+
const params = {
|
|
83
|
+
model,
|
|
84
|
+
prompt: mergedInput.prompt,
|
|
85
|
+
config,
|
|
86
|
+
};
|
|
87
|
+
// Start video generation
|
|
88
|
+
let operation = await this.client.models.generateVideos(params);
|
|
89
|
+
logger_js_1.logger.debug("Video generation started...");
|
|
90
|
+
// Poll operation status until complete
|
|
91
|
+
const pollingInterval = this.options?.pollingInterval ?? 10000;
|
|
92
|
+
while (!operation.done) {
|
|
93
|
+
logger_js_1.logger.debug("Waiting for video generation to complete...");
|
|
94
|
+
await new Promise((resolve) => setTimeout(resolve, pollingInterval));
|
|
95
|
+
operation = await this.client.operations.getVideosOperation({ operation });
|
|
96
|
+
}
|
|
97
|
+
if (!operation.response?.generatedVideos?.[0]?.video) {
|
|
98
|
+
throw new Error("Video generation failed: No video generated");
|
|
99
|
+
}
|
|
100
|
+
// Download the generated video
|
|
101
|
+
const generatedVideo = operation.response.generatedVideos[0];
|
|
102
|
+
const videoFile = generatedVideo.video;
|
|
103
|
+
if (!videoFile) {
|
|
104
|
+
throw new Error("Video generation failed: No video file returned");
|
|
105
|
+
}
|
|
106
|
+
// Save to temporary directory
|
|
107
|
+
const dir = index_js_1.nodejs.path.join(index_js_1.nodejs.os.tmpdir(), options?.context?.id || "");
|
|
108
|
+
await index_js_1.nodejs.fs.mkdir(dir, { recursive: true });
|
|
109
|
+
const videoId = Date.now().toString();
|
|
110
|
+
return {
|
|
111
|
+
videos: [
|
|
112
|
+
{
|
|
113
|
+
type: "file",
|
|
114
|
+
data: await this.downloadToFile(dir, videoId, videoFile),
|
|
115
|
+
mimeType: "video/mp4",
|
|
116
|
+
filename: `${videoId}.mp4`,
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
usage: {
|
|
120
|
+
inputTokens: 0,
|
|
121
|
+
outputTokens: 0,
|
|
122
|
+
},
|
|
123
|
+
model,
|
|
124
|
+
seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
exports.GeminiVideoModel = GeminiVideoModel;
|
package/lib/cjs/index.d.ts
CHANGED
package/lib/cjs/index.js
CHANGED
|
@@ -16,3 +16,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
__exportStar(require("./gemini-chat-model.js"), exports);
|
|
18
18
|
__exportStar(require("./gemini-image-model.js"), exports);
|
|
19
|
+
__exportStar(require("./gemini-video-model.js"), exports);
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
package/lib/dts/index.d.ts
CHANGED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
/**
|
|
4
|
+
* Input options for Gemini Video Model
|
|
5
|
+
*/
|
|
6
|
+
export interface GeminiVideoModelInput extends VideoModelInput {
|
|
7
|
+
/**
|
|
8
|
+
* Text describing content that should not appear in the video
|
|
9
|
+
*/
|
|
10
|
+
negativePrompt?: string;
|
|
11
|
+
/**
|
|
12
|
+
* Aspect ratio of the video
|
|
13
|
+
*
|
|
14
|
+
* Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
15
|
+
* Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
|
|
16
|
+
* Veo 2: "16:9" (default, 720p), "9:16" (720p)
|
|
17
|
+
*/
|
|
18
|
+
aspectRatio?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Resolution of the video
|
|
21
|
+
*
|
|
22
|
+
* Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
|
|
23
|
+
* Veo 3: "720p" (default), "1080p" (16:9 only)
|
|
24
|
+
* Veo 2: Not supported
|
|
25
|
+
*/
|
|
26
|
+
size?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Duration of the generated video in seconds
|
|
29
|
+
*
|
|
30
|
+
* Veo 3.1: "4", "6", "8"
|
|
31
|
+
* Veo 3: "4", "6", "8"
|
|
32
|
+
* Veo 2: "5", "6", "8"
|
|
33
|
+
*/
|
|
34
|
+
seconds?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Control person generation
|
|
37
|
+
*
|
|
38
|
+
* For text-to-video and image-to-video:
|
|
39
|
+
* - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
|
|
40
|
+
* - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
|
|
41
|
+
* - Veo 2: "allow_all", "allow_adult", "dont_allow"
|
|
42
|
+
*/
|
|
43
|
+
personGeneration?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Output from Gemini Video Model
|
|
47
|
+
*/
|
|
48
|
+
export interface GeminiVideoModelOutput extends VideoModelOutput {
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Configuration options for Gemini Video Model
|
|
52
|
+
*/
|
|
53
|
+
export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
54
|
+
/**
|
|
55
|
+
* API key for Gemini API
|
|
56
|
+
*
|
|
57
|
+
* If not provided, will look for GEMINI_API_KEY in environment variables
|
|
58
|
+
*/
|
|
59
|
+
apiKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Base URL for Gemini API
|
|
62
|
+
*
|
|
63
|
+
* Useful for proxies or alternate endpoints
|
|
64
|
+
*/
|
|
65
|
+
baseURL?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Gemini model to use
|
|
68
|
+
*
|
|
69
|
+
* Defaults to 'veo-3.1-generate-preview'
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Additional model options to control behavior
|
|
74
|
+
*/
|
|
75
|
+
modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
|
|
76
|
+
/**
|
|
77
|
+
* Client options for Gemini API
|
|
78
|
+
*/
|
|
79
|
+
clientOptions?: Record<string, any>;
|
|
80
|
+
/**
|
|
81
|
+
* Polling interval in milliseconds for checking video generation status
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 10000ms (10 seconds)
|
|
84
|
+
*/
|
|
85
|
+
pollingInterval?: number;
|
|
86
|
+
}
|
|
87
|
+
export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
|
|
88
|
+
options?: GeminiVideoModelOptions | undefined;
|
|
89
|
+
constructor(options?: GeminiVideoModelOptions | undefined);
|
|
90
|
+
/**
|
|
91
|
+
* @hidden
|
|
92
|
+
*/
|
|
93
|
+
protected _client?: GoogleGenAI;
|
|
94
|
+
protected apiKeyEnvName: string;
|
|
95
|
+
get client(): GoogleGenAI;
|
|
96
|
+
get credential(): {
|
|
97
|
+
url: string | undefined;
|
|
98
|
+
apiKey: string | undefined;
|
|
99
|
+
model: string;
|
|
100
|
+
};
|
|
101
|
+
get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
|
|
102
|
+
downloadToFile(dir: string, videoId: string, videoFile: {
|
|
103
|
+
uri?: string;
|
|
104
|
+
videoBytes?: any;
|
|
105
|
+
}): Promise<string>;
|
|
106
|
+
process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
|
|
107
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { VideoModel, videoModelInputSchema, } from "@aigne/core";
|
|
2
|
+
import { logger } from "@aigne/core/utils/logger.js";
|
|
3
|
+
import { checkArguments } from "@aigne/core/utils/type-utils.js";
|
|
4
|
+
import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
|
|
5
|
+
import { GoogleGenAI } from "@google/genai";
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
const DEFAULT_MODEL = "veo-3.1-generate-preview";
|
|
8
|
+
const DEFAULT_SECONDS = 8;
|
|
9
|
+
const geminiVideoModelInputSchema = videoModelInputSchema.extend({
|
|
10
|
+
negativePrompt: z.string().optional(),
|
|
11
|
+
aspectRatio: z.string().optional(),
|
|
12
|
+
personGeneration: z.string().optional(),
|
|
13
|
+
});
|
|
14
|
+
const geminiVideoModelOptionsSchema = z.object({
|
|
15
|
+
apiKey: z.string().optional(),
|
|
16
|
+
baseURL: z.string().optional(),
|
|
17
|
+
model: z.string().optional(),
|
|
18
|
+
modelOptions: z.object({}).optional(),
|
|
19
|
+
clientOptions: z.object({}).optional(),
|
|
20
|
+
pollingInterval: z.number().optional(),
|
|
21
|
+
});
|
|
22
|
+
export class GeminiVideoModel extends VideoModel {
|
|
23
|
+
options;
|
|
24
|
+
constructor(options) {
|
|
25
|
+
super({
|
|
26
|
+
...options,
|
|
27
|
+
description: options?.description ?? "Generate videos using Google Gemini Veo models",
|
|
28
|
+
inputSchema: geminiVideoModelInputSchema,
|
|
29
|
+
});
|
|
30
|
+
this.options = options;
|
|
31
|
+
if (options)
|
|
32
|
+
checkArguments(this.name, geminiVideoModelOptionsSchema, options);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* @hidden
|
|
36
|
+
*/
|
|
37
|
+
_client;
|
|
38
|
+
apiKeyEnvName = "GEMINI_API_KEY";
|
|
39
|
+
get client() {
|
|
40
|
+
const { apiKey } = this.credential;
|
|
41
|
+
if (!apiKey)
|
|
42
|
+
throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
|
|
43
|
+
this._client ??= new GoogleGenAI({ apiKey, ...this.options?.clientOptions });
|
|
44
|
+
return this._client;
|
|
45
|
+
}
|
|
46
|
+
get credential() {
|
|
47
|
+
return {
|
|
48
|
+
url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
|
|
49
|
+
apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
|
|
50
|
+
model: this.options?.model || DEFAULT_MODEL,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
get modelOptions() {
|
|
54
|
+
return this.options?.modelOptions;
|
|
55
|
+
}
|
|
56
|
+
async downloadToFile(dir, videoId, videoFile) {
|
|
57
|
+
logger.debug("Downloading video content...");
|
|
58
|
+
const localPath = nodejs.path.join(dir, `${videoId}.mp4`);
|
|
59
|
+
await this.client.files.download({ file: videoFile, downloadPath: localPath });
|
|
60
|
+
logger.debug(`Generated video saved to ${localPath}`);
|
|
61
|
+
await new Promise((resolve) => setTimeout(resolve, 300));
|
|
62
|
+
const buffer = await nodejs.fs.readFile(localPath);
|
|
63
|
+
return buffer.toString("base64");
|
|
64
|
+
}
|
|
65
|
+
async process(input, options) {
|
|
66
|
+
const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
|
|
67
|
+
const mergedInput = { ...this.modelOptions, ...input };
|
|
68
|
+
const config = {};
|
|
69
|
+
if (mergedInput.negativePrompt)
|
|
70
|
+
config.negativePrompt = mergedInput.negativePrompt;
|
|
71
|
+
if (mergedInput.aspectRatio)
|
|
72
|
+
config.aspectRatio = mergedInput.aspectRatio;
|
|
73
|
+
if (mergedInput.size)
|
|
74
|
+
config.resolution = mergedInput.size;
|
|
75
|
+
if (mergedInput.seconds)
|
|
76
|
+
config.durationSeconds = parseInt(mergedInput.seconds, 10);
|
|
77
|
+
if (mergedInput.personGeneration)
|
|
78
|
+
config.personGeneration = mergedInput.personGeneration;
|
|
79
|
+
const params = {
|
|
80
|
+
model,
|
|
81
|
+
prompt: mergedInput.prompt,
|
|
82
|
+
config,
|
|
83
|
+
};
|
|
84
|
+
// Start video generation
|
|
85
|
+
let operation = await this.client.models.generateVideos(params);
|
|
86
|
+
logger.debug("Video generation started...");
|
|
87
|
+
// Poll operation status until complete
|
|
88
|
+
const pollingInterval = this.options?.pollingInterval ?? 10000;
|
|
89
|
+
while (!operation.done) {
|
|
90
|
+
logger.debug("Waiting for video generation to complete...");
|
|
91
|
+
await new Promise((resolve) => setTimeout(resolve, pollingInterval));
|
|
92
|
+
operation = await this.client.operations.getVideosOperation({ operation });
|
|
93
|
+
}
|
|
94
|
+
if (!operation.response?.generatedVideos?.[0]?.video) {
|
|
95
|
+
throw new Error("Video generation failed: No video generated");
|
|
96
|
+
}
|
|
97
|
+
// Download the generated video
|
|
98
|
+
const generatedVideo = operation.response.generatedVideos[0];
|
|
99
|
+
const videoFile = generatedVideo.video;
|
|
100
|
+
if (!videoFile) {
|
|
101
|
+
throw new Error("Video generation failed: No video file returned");
|
|
102
|
+
}
|
|
103
|
+
// Save to temporary directory
|
|
104
|
+
const dir = nodejs.path.join(nodejs.os.tmpdir(), options?.context?.id || "");
|
|
105
|
+
await nodejs.fs.mkdir(dir, { recursive: true });
|
|
106
|
+
const videoId = Date.now().toString();
|
|
107
|
+
return {
|
|
108
|
+
videos: [
|
|
109
|
+
{
|
|
110
|
+
type: "file",
|
|
111
|
+
data: await this.downloadToFile(dir, videoId, videoFile),
|
|
112
|
+
mimeType: "video/mp4",
|
|
113
|
+
filename: `${videoId}.mp4`,
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
usage: {
|
|
117
|
+
inputTokens: 0,
|
|
118
|
+
outputTokens: 0,
|
|
119
|
+
},
|
|
120
|
+
model,
|
|
121
|
+
seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
package/lib/esm/index.d.ts
CHANGED
package/lib/esm/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/gemini",
|
|
3
|
-
"version": "0.14.4-beta.
|
|
3
|
+
"version": "0.14.4-beta.3",
|
|
4
4
|
"description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"@google/genai": "^1.24.0",
|
|
40
40
|
"zod": "^3.25.67",
|
|
41
41
|
"zod-to-json-schema": "^3.24.6",
|
|
42
|
-
"@aigne/core": "^1.
|
|
42
|
+
"@aigne/core": "^1.65.0-beta.1",
|
|
43
43
|
"@aigne/platform-helpers": "^0.6.3"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
"npm-run-all": "^4.1.5",
|
|
49
49
|
"rimraf": "^6.0.1",
|
|
50
50
|
"typescript": "^5.9.2",
|
|
51
|
-
"@aigne/test-utils": "^0.5.57-beta"
|
|
51
|
+
"@aigne/test-utils": "^0.5.57-beta.2"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"lint": "tsc --noEmit",
|