@aigne/gemini 0.14.3 → 0.14.4-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,49 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.4-beta.2](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.1...gemini-v0.14.4-beta.2) (2025-10-24)
4
+
5
+
6
+ ### Features
7
+
8
+ * **model:** support video model ([#647](https://github.com/AIGNE-io/aigne-framework/issues/647)) ([de81742](https://github.com/AIGNE-io/aigne-framework/commit/de817421ef1dd3246d0d8c51ff12f0a855658f9f))
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * **model:** updated default video duration settings for AI video models ([#663](https://github.com/AIGNE-io/aigne-framework/issues/663)) ([1203941](https://github.com/AIGNE-io/aigne-framework/commit/12039411aaef77ba665e8edfb0fe6f8097c43e39))
14
+
15
+
16
+ ### Dependencies
17
+
18
+ * The following workspace dependencies were updated
19
+ * dependencies
20
+ * @aigne/core bumped to 1.65.0-beta
21
+ * devDependencies
22
+ * @aigne/test-utils bumped to 0.5.57-beta.1
23
+
24
+ ## [0.14.4-beta.1](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta...gemini-v0.14.4-beta.1) (2025-10-24)
25
+
26
+
27
+ ### Bug Fixes
28
+
29
+ * **gemini:** use StructuredOutputError to trigger retry for missing JSON response ([#660](https://github.com/AIGNE-io/aigne-framework/issues/660)) ([e8826ed](https://github.com/AIGNE-io/aigne-framework/commit/e8826ed96db57bfcce0b577881bf0d2fd828c269))
30
+
31
+ ## [0.14.4-beta](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.3...gemini-v0.14.4-beta) (2025-10-23)
32
+
33
+
34
+ ### Bug Fixes
35
+
36
+ * **models:** improve message structure handling and enable auto-message options ([#657](https://github.com/AIGNE-io/aigne-framework/issues/657)) ([233d70c](https://github.com/AIGNE-io/aigne-framework/commit/233d70cb292b937200fada8434f33d957d766ad6))
37
+
38
+
39
+ ### Dependencies
40
+
41
+ * The following workspace dependencies were updated
42
+ * dependencies
43
+ * @aigne/core bumped to 1.64.1-beta
44
+ * devDependencies
45
+ * @aigne/test-utils bumped to 0.5.57-beta
46
+
3
47
  ## [0.14.3](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.3-beta.1...gemini-v0.14.3) (2025-10-22)
4
48
 
5
49
 
@@ -41,4 +41,5 @@ export declare class GeminiChatModel extends ChatModel {
41
41
  private buildConfig;
42
42
  private buildTools;
43
43
  private buildContents;
44
+ private ensureMessagesHasUserMessage;
44
45
  }
@@ -151,7 +151,7 @@ class GeminiChatModel extends core_1.ChatModel {
151
151
  yield { delta: { json: { json: (0, core_1.safeParseJSON)(text) } } };
152
152
  }
153
153
  else if (!toolCalls.length) {
154
- throw new Error("No JSON response from the model");
154
+ throw new core_1.StructuredOutputError("No JSON response from the model");
155
155
  }
156
156
  }
157
157
  else if (!toolCalls.length) {
@@ -342,17 +342,26 @@ class GeminiChatModel extends core_1.ChatModel {
342
342
  }
343
343
  return content;
344
344
  }))).filter(type_utils_js_1.isNonNullable);
345
- if (!result.contents.length && systemParts.length) {
346
- const system = systemParts.pop();
347
- if (system) {
348
- result.contents.push({ role: "user", parts: [system] });
349
- }
350
- }
345
+ this.ensureMessagesHasUserMessage(systemParts, result.contents);
351
346
  if (systemParts.length) {
352
347
  result.config ??= {};
353
348
  result.config.systemInstruction = systemParts;
354
349
  }
355
350
  return result;
356
351
  }
352
+ ensureMessagesHasUserMessage(systems, contents) {
353
+ // no messages but system messages
354
+ if (!contents.length && systems.length) {
355
+ const system = systems.pop();
356
+ if (system)
357
+ contents.push({ role: "user", parts: [system] });
358
+ }
359
+ // first message is from model
360
+ if (contents[0]?.role === "model") {
361
+ const system = systems.pop();
362
+ if (system)
363
+ contents.unshift({ role: "user", parts: [system] });
364
+ }
365
+ }
357
366
  }
358
367
  exports.GeminiChatModel = GeminiChatModel;
@@ -0,0 +1,107 @@
1
+ import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
2
+ import { GoogleGenAI } from "@google/genai";
3
+ /**
4
+ * Input options for Gemini Video Model
5
+ */
6
+ export interface GeminiVideoModelInput extends VideoModelInput {
7
+ /**
8
+ * Text describing content that should not appear in the video
9
+ */
10
+ negativePrompt?: string;
11
+ /**
12
+ * Aspect ratio of the video
13
+ *
14
+ * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
+ * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
+ * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
+ */
18
+ aspectRatio?: string;
19
+ /**
20
+ * Resolution of the video
21
+ *
22
+ * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
+ * Veo 3: "720p" (default), "1080p" (16:9 only)
24
+ * Veo 2: Not supported
25
+ */
26
+ size?: string;
27
+ /**
28
+ * Duration of the generated video in seconds
29
+ *
30
+ * Veo 3.1: "4", "6", "8"
31
+ * Veo 3: "4", "6", "8"
32
+ * Veo 2: "5", "6", "8"
33
+ */
34
+ seconds?: string;
35
+ /**
36
+ * Control person generation
37
+ *
38
+ * For text-to-video and image-to-video:
39
+ * - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
40
+ * - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
41
+ * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
+ */
43
+ personGeneration?: string;
44
+ }
45
+ /**
46
+ * Output from Gemini Video Model
47
+ */
48
+ export interface GeminiVideoModelOutput extends VideoModelOutput {
49
+ }
50
+ /**
51
+ * Configuration options for Gemini Video Model
52
+ */
53
+ export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
54
+ /**
55
+ * API key for Gemini API
56
+ *
57
+ * If not provided, will look for GEMINI_API_KEY in environment variables
58
+ */
59
+ apiKey?: string;
60
+ /**
61
+ * Base URL for Gemini API
62
+ *
63
+ * Useful for proxies or alternate endpoints
64
+ */
65
+ baseURL?: string;
66
+ /**
67
+ * Gemini model to use
68
+ *
69
+ * Defaults to 'veo-3.1-generate-preview'
70
+ */
71
+ model?: string;
72
+ /**
73
+ * Additional model options to control behavior
74
+ */
75
+ modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
76
+ /**
77
+ * Client options for Gemini API
78
+ */
79
+ clientOptions?: Record<string, any>;
80
+ /**
81
+ * Polling interval in milliseconds for checking video generation status
82
+ *
83
+ * Defaults to 10000ms (10 seconds)
84
+ */
85
+ pollingInterval?: number;
86
+ }
87
+ export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
88
+ options?: GeminiVideoModelOptions | undefined;
89
+ constructor(options?: GeminiVideoModelOptions | undefined);
90
+ /**
91
+ * @hidden
92
+ */
93
+ protected _client?: GoogleGenAI;
94
+ protected apiKeyEnvName: string;
95
+ get client(): GoogleGenAI;
96
+ get credential(): {
97
+ url: string | undefined;
98
+ apiKey: string | undefined;
99
+ model: string;
100
+ };
101
+ get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
102
+ downloadToFile(dir: string, videoId: string, videoFile: {
103
+ uri?: string;
104
+ videoBytes?: any;
105
+ }): Promise<string>;
106
+ process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
107
+ }
@@ -0,0 +1,128 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GeminiVideoModel = void 0;
4
+ const core_1 = require("@aigne/core");
5
+ const logger_js_1 = require("@aigne/core/utils/logger.js");
6
+ const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
7
+ const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
8
+ const genai_1 = require("@google/genai");
9
+ const zod_1 = require("zod");
10
+ const DEFAULT_MODEL = "veo-3.1-generate-preview";
11
+ const DEFAULT_SECONDS = 8;
12
+ const geminiVideoModelInputSchema = core_1.videoModelInputSchema.extend({
13
+ negativePrompt: zod_1.z.string().optional(),
14
+ aspectRatio: zod_1.z.string().optional(),
15
+ personGeneration: zod_1.z.string().optional(),
16
+ });
17
+ const geminiVideoModelOptionsSchema = zod_1.z.object({
18
+ apiKey: zod_1.z.string().optional(),
19
+ baseURL: zod_1.z.string().optional(),
20
+ model: zod_1.z.string().optional(),
21
+ modelOptions: zod_1.z.object({}).optional(),
22
+ clientOptions: zod_1.z.object({}).optional(),
23
+ pollingInterval: zod_1.z.number().optional(),
24
+ });
25
+ class GeminiVideoModel extends core_1.VideoModel {
26
+ options;
27
+ constructor(options) {
28
+ super({
29
+ ...options,
30
+ description: options?.description ?? "Generate videos using Google Gemini Veo models",
31
+ inputSchema: geminiVideoModelInputSchema,
32
+ });
33
+ this.options = options;
34
+ if (options)
35
+ (0, type_utils_js_1.checkArguments)(this.name, geminiVideoModelOptionsSchema, options);
36
+ }
37
+ /**
38
+ * @hidden
39
+ */
40
+ _client;
41
+ apiKeyEnvName = "GEMINI_API_KEY";
42
+ get client() {
43
+ const { apiKey } = this.credential;
44
+ if (!apiKey)
45
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
46
+ this._client ??= new genai_1.GoogleGenAI({ apiKey, ...this.options?.clientOptions });
47
+ return this._client;
48
+ }
49
+ get credential() {
50
+ return {
51
+ url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
52
+ apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
53
+ model: this.options?.model || DEFAULT_MODEL,
54
+ };
55
+ }
56
+ get modelOptions() {
57
+ return this.options?.modelOptions;
58
+ }
59
+ async downloadToFile(dir, videoId, videoFile) {
60
+ logger_js_1.logger.debug("Downloading video content...");
61
+ const localPath = index_js_1.nodejs.path.join(dir, `${videoId}.mp4`);
62
+ await this.client.files.download({ file: videoFile, downloadPath: localPath });
63
+ logger_js_1.logger.debug(`Generated video saved to ${localPath}`);
64
+ await new Promise((resolve) => setTimeout(resolve, 300));
65
+ const buffer = await index_js_1.nodejs.fs.readFile(localPath);
66
+ const base64 = buffer.toString("base64");
67
+ const dataUrl = `data:video/mp4;base64,${base64}`;
68
+ return dataUrl;
69
+ }
70
+ async process(input, options) {
71
+ const model = input.model ?? this.credential.model;
72
+ const mergedInput = { ...this.modelOptions, ...input };
73
+ const config = {};
74
+ if (mergedInput.negativePrompt)
75
+ config.negativePrompt = mergedInput.negativePrompt;
76
+ if (mergedInput.aspectRatio)
77
+ config.aspectRatio = mergedInput.aspectRatio;
78
+ if (mergedInput.size)
79
+ config.resolution = mergedInput.size;
80
+ if (mergedInput.seconds)
81
+ config.durationSeconds = parseInt(mergedInput.seconds, 10);
82
+ if (mergedInput.personGeneration)
83
+ config.personGeneration = mergedInput.personGeneration;
84
+ const params = {
85
+ model,
86
+ prompt: mergedInput.prompt,
87
+ config,
88
+ };
89
+ // Start video generation
90
+ let operation = await this.client.models.generateVideos(params);
91
+ logger_js_1.logger.debug("Video generation started...");
92
+ // Poll operation status until complete
93
+ const pollingInterval = this.options?.pollingInterval ?? 10000;
94
+ while (!operation.done) {
95
+ logger_js_1.logger.debug("Waiting for video generation to complete...");
96
+ await new Promise((resolve) => setTimeout(resolve, pollingInterval));
97
+ operation = await this.client.operations.getVideosOperation({ operation });
98
+ }
99
+ if (!operation.response?.generatedVideos?.[0]?.video) {
100
+ throw new Error("Video generation failed: No video generated");
101
+ }
102
+ // Download the generated video
103
+ const generatedVideo = operation.response.generatedVideos[0];
104
+ const videoFile = generatedVideo.video;
105
+ if (!videoFile) {
106
+ throw new Error("Video generation failed: No video file returned");
107
+ }
108
+ // Save to temporary directory
109
+ const dir = index_js_1.nodejs.path.join(index_js_1.nodejs.os.tmpdir(), options?.context?.id || "");
110
+ await index_js_1.nodejs.fs.mkdir(dir, { recursive: true });
111
+ const videoId = Date.now().toString();
112
+ return {
113
+ videos: [
114
+ {
115
+ type: "file",
116
+ data: await this.downloadToFile(dir, videoId, videoFile),
117
+ },
118
+ ],
119
+ usage: {
120
+ inputTokens: 0,
121
+ outputTokens: 0,
122
+ },
123
+ model,
124
+ seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
125
+ };
126
+ }
127
+ }
128
+ exports.GeminiVideoModel = GeminiVideoModel;
@@ -1,2 +1,3 @@
1
1
  export * from "./gemini-chat-model.js";
2
2
  export * from "./gemini-image-model.js";
3
+ export * from "./gemini-video-model.js";
package/lib/cjs/index.js CHANGED
@@ -16,3 +16,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./gemini-chat-model.js"), exports);
18
18
  __exportStar(require("./gemini-image-model.js"), exports);
19
+ __exportStar(require("./gemini-video-model.js"), exports);
@@ -41,4 +41,5 @@ export declare class GeminiChatModel extends ChatModel {
41
41
  private buildConfig;
42
42
  private buildTools;
43
43
  private buildContents;
44
+ private ensureMessagesHasUserMessage;
44
45
  }
@@ -0,0 +1,107 @@
1
+ import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
2
+ import { GoogleGenAI } from "@google/genai";
3
+ /**
4
+ * Input options for Gemini Video Model
5
+ */
6
+ export interface GeminiVideoModelInput extends VideoModelInput {
7
+ /**
8
+ * Text describing content that should not appear in the video
9
+ */
10
+ negativePrompt?: string;
11
+ /**
12
+ * Aspect ratio of the video
13
+ *
14
+ * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
+ * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
+ * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
+ */
18
+ aspectRatio?: string;
19
+ /**
20
+ * Resolution of the video
21
+ *
22
+ * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
+ * Veo 3: "720p" (default), "1080p" (16:9 only)
24
+ * Veo 2: Not supported
25
+ */
26
+ size?: string;
27
+ /**
28
+ * Duration of the generated video in seconds
29
+ *
30
+ * Veo 3.1: "4", "6", "8"
31
+ * Veo 3: "4", "6", "8"
32
+ * Veo 2: "5", "6", "8"
33
+ */
34
+ seconds?: string;
35
+ /**
36
+ * Control person generation
37
+ *
38
+ * For text-to-video and image-to-video:
39
+ * - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
40
+ * - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
41
+ * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
+ */
43
+ personGeneration?: string;
44
+ }
45
+ /**
46
+ * Output from Gemini Video Model
47
+ */
48
+ export interface GeminiVideoModelOutput extends VideoModelOutput {
49
+ }
50
+ /**
51
+ * Configuration options for Gemini Video Model
52
+ */
53
+ export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
54
+ /**
55
+ * API key for Gemini API
56
+ *
57
+ * If not provided, will look for GEMINI_API_KEY in environment variables
58
+ */
59
+ apiKey?: string;
60
+ /**
61
+ * Base URL for Gemini API
62
+ *
63
+ * Useful for proxies or alternate endpoints
64
+ */
65
+ baseURL?: string;
66
+ /**
67
+ * Gemini model to use
68
+ *
69
+ * Defaults to 'veo-3.1-generate-preview'
70
+ */
71
+ model?: string;
72
+ /**
73
+ * Additional model options to control behavior
74
+ */
75
+ modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
76
+ /**
77
+ * Client options for Gemini API
78
+ */
79
+ clientOptions?: Record<string, any>;
80
+ /**
81
+ * Polling interval in milliseconds for checking video generation status
82
+ *
83
+ * Defaults to 10000ms (10 seconds)
84
+ */
85
+ pollingInterval?: number;
86
+ }
87
+ export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
88
+ options?: GeminiVideoModelOptions | undefined;
89
+ constructor(options?: GeminiVideoModelOptions | undefined);
90
+ /**
91
+ * @hidden
92
+ */
93
+ protected _client?: GoogleGenAI;
94
+ protected apiKeyEnvName: string;
95
+ get client(): GoogleGenAI;
96
+ get credential(): {
97
+ url: string | undefined;
98
+ apiKey: string | undefined;
99
+ model: string;
100
+ };
101
+ get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
102
+ downloadToFile(dir: string, videoId: string, videoFile: {
103
+ uri?: string;
104
+ videoBytes?: any;
105
+ }): Promise<string>;
106
+ process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
107
+ }
@@ -1,2 +1,3 @@
1
1
  export * from "./gemini-chat-model.js";
2
2
  export * from "./gemini-image-model.js";
3
+ export * from "./gemini-video-model.js";
@@ -41,4 +41,5 @@ export declare class GeminiChatModel extends ChatModel {
41
41
  private buildConfig;
42
42
  private buildTools;
43
43
  private buildContents;
44
+ private ensureMessagesHasUserMessage;
44
45
  }
@@ -148,7 +148,7 @@ export class GeminiChatModel extends ChatModel {
148
148
  yield { delta: { json: { json: safeParseJSON(text) } } };
149
149
  }
150
150
  else if (!toolCalls.length) {
151
- throw new Error("No JSON response from the model");
151
+ throw new StructuredOutputError("No JSON response from the model");
152
152
  }
153
153
  }
154
154
  else if (!toolCalls.length) {
@@ -339,16 +339,25 @@ export class GeminiChatModel extends ChatModel {
339
339
  }
340
340
  return content;
341
341
  }))).filter(isNonNullable);
342
- if (!result.contents.length && systemParts.length) {
343
- const system = systemParts.pop();
344
- if (system) {
345
- result.contents.push({ role: "user", parts: [system] });
346
- }
347
- }
342
+ this.ensureMessagesHasUserMessage(systemParts, result.contents);
348
343
  if (systemParts.length) {
349
344
  result.config ??= {};
350
345
  result.config.systemInstruction = systemParts;
351
346
  }
352
347
  return result;
353
348
  }
349
+ ensureMessagesHasUserMessage(systems, contents) {
350
+ // no messages but system messages
351
+ if (!contents.length && systems.length) {
352
+ const system = systems.pop();
353
+ if (system)
354
+ contents.push({ role: "user", parts: [system] });
355
+ }
356
+ // first message is from model
357
+ if (contents[0]?.role === "model") {
358
+ const system = systems.pop();
359
+ if (system)
360
+ contents.unshift({ role: "user", parts: [system] });
361
+ }
362
+ }
354
363
  }
@@ -0,0 +1,107 @@
1
+ import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
2
+ import { GoogleGenAI } from "@google/genai";
3
+ /**
4
+ * Input options for Gemini Video Model
5
+ */
6
+ export interface GeminiVideoModelInput extends VideoModelInput {
7
+ /**
8
+ * Text describing content that should not appear in the video
9
+ */
10
+ negativePrompt?: string;
11
+ /**
12
+ * Aspect ratio of the video
13
+ *
14
+ * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
+ * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
+ * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
+ */
18
+ aspectRatio?: string;
19
+ /**
20
+ * Resolution of the video
21
+ *
22
+ * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
+ * Veo 3: "720p" (default), "1080p" (16:9 only)
24
+ * Veo 2: Not supported
25
+ */
26
+ size?: string;
27
+ /**
28
+ * Duration of the generated video in seconds
29
+ *
30
+ * Veo 3.1: "4", "6", "8"
31
+ * Veo 3: "4", "6", "8"
32
+ * Veo 2: "5", "6", "8"
33
+ */
34
+ seconds?: string;
35
+ /**
36
+ * Control person generation
37
+ *
38
+ * For text-to-video and image-to-video:
39
+ * - Veo 3.1: "allow_all" for image-to-video, frame interpolation and reference images; only "allow_adult" for text-to-video
40
+ * - Veo 3: "allow_all" for image-to-video; only "allow_adult" for text-to-video
41
+ * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
+ */
43
+ personGeneration?: string;
44
+ }
45
+ /**
46
+ * Output from Gemini Video Model
47
+ */
48
+ export interface GeminiVideoModelOutput extends VideoModelOutput {
49
+ }
50
+ /**
51
+ * Configuration options for Gemini Video Model
52
+ */
53
+ export interface GeminiVideoModelOptions extends VideoModelOptions<GeminiVideoModelInput, GeminiVideoModelOutput> {
54
+ /**
55
+ * API key for Gemini API
56
+ *
57
+ * If not provided, will look for GEMINI_API_KEY in environment variables
58
+ */
59
+ apiKey?: string;
60
+ /**
61
+ * Base URL for Gemini API
62
+ *
63
+ * Useful for proxies or alternate endpoints
64
+ */
65
+ baseURL?: string;
66
+ /**
67
+ * Gemini model to use
68
+ *
69
+ * Defaults to 'veo-3.1-generate-preview'
70
+ */
71
+ model?: string;
72
+ /**
73
+ * Additional model options to control behavior
74
+ */
75
+ modelOptions?: Omit<Partial<GeminiVideoModelInput>, "model">;
76
+ /**
77
+ * Client options for Gemini API
78
+ */
79
+ clientOptions?: Record<string, any>;
80
+ /**
81
+ * Polling interval in milliseconds for checking video generation status
82
+ *
83
+ * Defaults to 10000ms (10 seconds)
84
+ */
85
+ pollingInterval?: number;
86
+ }
87
+ export declare class GeminiVideoModel extends VideoModel<GeminiVideoModelInput, GeminiVideoModelOutput> {
88
+ options?: GeminiVideoModelOptions | undefined;
89
+ constructor(options?: GeminiVideoModelOptions | undefined);
90
+ /**
91
+ * @hidden
92
+ */
93
+ protected _client?: GoogleGenAI;
94
+ protected apiKeyEnvName: string;
95
+ get client(): GoogleGenAI;
96
+ get credential(): {
97
+ url: string | undefined;
98
+ apiKey: string | undefined;
99
+ model: string;
100
+ };
101
+ get modelOptions(): Omit<Partial<GeminiVideoModelInput>, "model"> | undefined;
102
+ downloadToFile(dir: string, videoId: string, videoFile: {
103
+ uri?: string;
104
+ videoBytes?: any;
105
+ }): Promise<string>;
106
+ process(input: GeminiVideoModelInput, options: AgentInvokeOptions): Promise<GeminiVideoModelOutput>;
107
+ }
@@ -0,0 +1,124 @@
1
+ import { VideoModel, videoModelInputSchema, } from "@aigne/core";
2
+ import { logger } from "@aigne/core/utils/logger.js";
3
+ import { checkArguments } from "@aigne/core/utils/type-utils.js";
4
+ import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
5
+ import { GoogleGenAI } from "@google/genai";
6
+ import { z } from "zod";
7
+ const DEFAULT_MODEL = "veo-3.1-generate-preview";
8
+ const DEFAULT_SECONDS = 8;
9
+ const geminiVideoModelInputSchema = videoModelInputSchema.extend({
10
+ negativePrompt: z.string().optional(),
11
+ aspectRatio: z.string().optional(),
12
+ personGeneration: z.string().optional(),
13
+ });
14
+ const geminiVideoModelOptionsSchema = z.object({
15
+ apiKey: z.string().optional(),
16
+ baseURL: z.string().optional(),
17
+ model: z.string().optional(),
18
+ modelOptions: z.object({}).optional(),
19
+ clientOptions: z.object({}).optional(),
20
+ pollingInterval: z.number().optional(),
21
+ });
22
+ export class GeminiVideoModel extends VideoModel {
23
+ options;
24
+ constructor(options) {
25
+ super({
26
+ ...options,
27
+ description: options?.description ?? "Generate videos using Google Gemini Veo models",
28
+ inputSchema: geminiVideoModelInputSchema,
29
+ });
30
+ this.options = options;
31
+ if (options)
32
+ checkArguments(this.name, geminiVideoModelOptionsSchema, options);
33
+ }
34
+ /**
35
+ * @hidden
36
+ */
37
+ _client;
38
+ apiKeyEnvName = "GEMINI_API_KEY";
39
+ get client() {
40
+ const { apiKey } = this.credential;
41
+ if (!apiKey)
42
+ throw new Error(`${this.name} requires an API key. Please provide it via \`options.apiKey\`, or set the \`${this.apiKeyEnvName}\` environment variable`);
43
+ this._client ??= new GoogleGenAI({ apiKey, ...this.options?.clientOptions });
44
+ return this._client;
45
+ }
46
+ get credential() {
47
+ return {
48
+ url: this.options?.baseURL || process.env.GEMINI_BASE_URL,
49
+ apiKey: this.options?.apiKey || process.env[this.apiKeyEnvName],
50
+ model: this.options?.model || DEFAULT_MODEL,
51
+ };
52
+ }
53
+ get modelOptions() {
54
+ return this.options?.modelOptions;
55
+ }
56
+ async downloadToFile(dir, videoId, videoFile) {
57
+ logger.debug("Downloading video content...");
58
+ const localPath = nodejs.path.join(dir, `${videoId}.mp4`);
59
+ await this.client.files.download({ file: videoFile, downloadPath: localPath });
60
+ logger.debug(`Generated video saved to ${localPath}`);
61
+ await new Promise((resolve) => setTimeout(resolve, 300));
62
+ const buffer = await nodejs.fs.readFile(localPath);
63
+ const base64 = buffer.toString("base64");
64
+ const dataUrl = `data:video/mp4;base64,${base64}`;
65
+ return dataUrl;
66
+ }
67
+ async process(input, options) {
68
+ const model = input.model ?? this.credential.model;
69
+ const mergedInput = { ...this.modelOptions, ...input };
70
+ const config = {};
71
+ if (mergedInput.negativePrompt)
72
+ config.negativePrompt = mergedInput.negativePrompt;
73
+ if (mergedInput.aspectRatio)
74
+ config.aspectRatio = mergedInput.aspectRatio;
75
+ if (mergedInput.size)
76
+ config.resolution = mergedInput.size;
77
+ if (mergedInput.seconds)
78
+ config.durationSeconds = parseInt(mergedInput.seconds, 10);
79
+ if (mergedInput.personGeneration)
80
+ config.personGeneration = mergedInput.personGeneration;
81
+ const params = {
82
+ model,
83
+ prompt: mergedInput.prompt,
84
+ config,
85
+ };
86
+ // Start video generation
87
+ let operation = await this.client.models.generateVideos(params);
88
+ logger.debug("Video generation started...");
89
+ // Poll operation status until complete
90
+ const pollingInterval = this.options?.pollingInterval ?? 10000;
91
+ while (!operation.done) {
92
+ logger.debug("Waiting for video generation to complete...");
93
+ await new Promise((resolve) => setTimeout(resolve, pollingInterval));
94
+ operation = await this.client.operations.getVideosOperation({ operation });
95
+ }
96
+ if (!operation.response?.generatedVideos?.[0]?.video) {
97
+ throw new Error("Video generation failed: No video generated");
98
+ }
99
+ // Download the generated video
100
+ const generatedVideo = operation.response.generatedVideos[0];
101
+ const videoFile = generatedVideo.video;
102
+ if (!videoFile) {
103
+ throw new Error("Video generation failed: No video file returned");
104
+ }
105
+ // Save to temporary directory
106
+ const dir = nodejs.path.join(nodejs.os.tmpdir(), options?.context?.id || "");
107
+ await nodejs.fs.mkdir(dir, { recursive: true });
108
+ const videoId = Date.now().toString();
109
+ return {
110
+ videos: [
111
+ {
112
+ type: "file",
113
+ data: await this.downloadToFile(dir, videoId, videoFile),
114
+ },
115
+ ],
116
+ usage: {
117
+ inputTokens: 0,
118
+ outputTokens: 0,
119
+ },
120
+ model,
121
+ seconds: mergedInput.seconds ? parseInt(mergedInput.seconds, 10) : DEFAULT_SECONDS,
122
+ };
123
+ }
124
+ }
@@ -1,2 +1,3 @@
1
1
  export * from "./gemini-chat-model.js";
2
2
  export * from "./gemini-image-model.js";
3
+ export * from "./gemini-video-model.js";
package/lib/esm/index.js CHANGED
@@ -1,2 +1,3 @@
1
1
  export * from "./gemini-chat-model.js";
2
2
  export * from "./gemini-image-model.js";
3
+ export * from "./gemini-video-model.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.14.3",
3
+ "version": "0.14.4-beta.2",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -39,7 +39,7 @@
39
39
  "@google/genai": "^1.24.0",
40
40
  "zod": "^3.25.67",
41
41
  "zod-to-json-schema": "^3.24.6",
42
- "@aigne/core": "^1.64.0",
42
+ "@aigne/core": "^1.65.0-beta",
43
43
  "@aigne/platform-helpers": "^0.6.3"
44
44
  },
45
45
  "devDependencies": {
@@ -48,7 +48,7 @@
48
48
  "npm-run-all": "^4.1.5",
49
49
  "rimraf": "^6.0.1",
50
50
  "typescript": "^5.9.2",
51
- "@aigne/test-utils": "^0.5.56"
51
+ "@aigne/test-utils": "^0.5.57-beta.1"
52
52
  },
53
53
  "scripts": {
54
54
  "lint": "tsc --noEmit",