@aigne/gemini 0.14.4-beta.7 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.4](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.8...gemini-v0.14.4) (2025-10-31)
4
+
5
+
6
+ ### Dependencies
7
+
8
+ * The following workspace dependencies were updated
9
+ * dependencies
10
+ * @aigne/core bumped to 1.65.0
11
+ * devDependencies
12
+ * @aigne/test-utils bumped to 0.5.57
13
+
14
+ ## [0.14.4-beta.8](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.7...gemini-v0.14.4-beta.8) (2025-10-31)
15
+
16
+
17
+ ### Bug Fixes
18
+
19
+ * **models:** add image parameters support for video generation ([#684](https://github.com/AIGNE-io/aigne-framework/issues/684)) ([b048b7f](https://github.com/AIGNE-io/aigne-framework/commit/b048b7f92bd7a532dbdbeb6fb5fa5499bae6b953))
20
+
21
+
22
+ ### Dependencies
23
+
24
+ * The following workspace dependencies were updated
25
+ * dependencies
26
+ * @aigne/core bumped to 1.65.0-beta.5
27
+ * devDependencies
28
+ * @aigne/test-utils bumped to 0.5.57-beta.6
29
+
3
30
  ## [0.14.4-beta.7](https://github.com/AIGNE-io/aigne-framework/compare/gemini-v0.14.4-beta.6...gemini-v0.14.4-beta.7) (2025-10-29)
4
31
 
5
32
 
@@ -1,4 +1,5 @@
1
- import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
1
+ import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
2
+ import { VideoModel } from "@aigne/core";
2
3
  import { GoogleGenAI } from "@google/genai";
3
4
  /**
4
5
  * Input options for Gemini Video Model
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
13
14
  *
14
15
  * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
16
  * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
- * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
17
  */
18
- aspectRatio?: string;
18
+ aspectRatio?: "16:9" | "9:16";
19
19
  /**
20
20
  * Resolution of the video
21
21
  *
22
22
  * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
23
  * Veo 3: "720p" (default), "1080p" (16:9 only)
24
- * Veo 2: Not supported
25
24
  */
26
- size?: string;
25
+ size?: "720p" | "1080p";
27
26
  /**
28
27
  * Duration of the generated video in seconds
29
28
  *
30
29
  * Veo 3.1: "4", "6", "8"
31
30
  * Veo 3: "4", "6", "8"
32
- * Veo 2: "5", "6", "8"
33
31
  */
34
- seconds?: string;
32
+ seconds?: "4" | "6" | "8";
35
33
  /**
36
34
  * Control person generation
37
35
  *
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
41
39
  * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
40
  */
43
41
  personGeneration?: string;
42
+ /**
43
+ * Last frame for video generation (frame interpolation)
44
+ */
45
+ lastFrame?: FileUnionContent;
46
+ /**
47
+ * Reference images for video generation
48
+ * Only supported in Veo 3.1 models
49
+ */
50
+ referenceImages?: FileUnionContent[];
44
51
  }
45
52
  /**
46
53
  * Output from Gemini Video Model
@@ -7,12 +7,17 @@ const type_utils_js_1 = require("@aigne/core/utils/type-utils.js");
7
7
  const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
8
8
  const genai_1 = require("@google/genai");
9
9
  const zod_1 = require("zod");
10
+ const utils_js_1 = require("./utils.js");
10
11
  const DEFAULT_MODEL = "veo-3.1-generate-preview";
11
12
  const DEFAULT_SECONDS = 8;
12
13
  const geminiVideoModelInputSchema = core_1.videoModelInputSchema.extend({
13
14
  negativePrompt: zod_1.z.string().optional(),
14
- aspectRatio: zod_1.z.string().optional(),
15
+ aspectRatio: zod_1.z.enum(["16:9", "9:16"]).optional(),
16
+ size: zod_1.z.enum(["720p", "1080p"]).optional(),
17
+ seconds: zod_1.z.enum(["4", "6", "8"]).optional(),
15
18
  personGeneration: zod_1.z.string().optional(),
19
+ lastFrame: core_1.fileUnionContentSchema.optional(),
20
+ referenceImages: core_1.fileUnionContentSchema.array().optional(),
16
21
  });
17
22
  const geminiVideoModelOptionsSchema = zod_1.z.object({
18
23
  apiKey: zod_1.z.string().optional(),
@@ -61,13 +66,16 @@ class GeminiVideoModel extends core_1.VideoModel {
61
66
  const localPath = index_js_1.nodejs.path.join(dir, `${videoId}.mp4`);
62
67
  await this.client.files.download({ file: videoFile, downloadPath: localPath });
63
68
  logger_js_1.logger.debug(`Generated video saved to ${localPath}`);
64
- await new Promise((resolve) => setTimeout(resolve, 300));
69
+ await (0, utils_js_1.waitFileSizeStable)(localPath);
65
70
  const buffer = await index_js_1.nodejs.fs.readFile(localPath);
66
71
  return buffer.toString("base64");
67
72
  }
68
73
  async process(input, options) {
69
74
  const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
70
75
  const mergedInput = { ...this.modelOptions, ...input };
76
+ if (mergedInput.referenceImages && !model.includes("veo-3.1")) {
77
+ throw new Error("referenceImages is only supported in Veo 3.1 models");
78
+ }
71
79
  const config = {};
72
80
  if (mergedInput.negativePrompt)
73
81
  config.negativePrompt = mergedInput.negativePrompt;
@@ -79,11 +87,39 @@ class GeminiVideoModel extends core_1.VideoModel {
79
87
  config.durationSeconds = parseInt(mergedInput.seconds, 10);
80
88
  if (mergedInput.personGeneration)
81
89
  config.personGeneration = mergedInput.personGeneration;
90
+ if (mergedInput.lastFrame) {
91
+ config.lastFrame = await this.transformFileType("file", mergedInput.lastFrame, options).then((file) => {
92
+ return {
93
+ imageBytes: file.data,
94
+ mimeType: file.mimeType,
95
+ };
96
+ });
97
+ }
98
+ if (mergedInput.referenceImages) {
99
+ config.referenceImages = await Promise.all(mergedInput.referenceImages.map(async (image) => {
100
+ return await this.transformFileType("file", image, options).then((file) => {
101
+ return {
102
+ image: {
103
+ imageBytes: file.data,
104
+ mimeType: file.mimeType,
105
+ },
106
+ };
107
+ });
108
+ }));
109
+ }
82
110
  const params = {
83
111
  model,
84
112
  prompt: mergedInput.prompt,
85
113
  config,
86
114
  };
115
+ if (mergedInput.image) {
116
+ params.image = await this.transformFileType("file", mergedInput.image, options).then((file) => {
117
+ return {
118
+ imageBytes: file.data,
119
+ mimeType: file.mimeType,
120
+ };
121
+ });
122
+ }
87
123
  // Start video generation
88
124
  let operation = await this.client.models.generateVideos(params);
89
125
  logger_js_1.logger.debug("Video generation started...");
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Wait for file size to stabilize, ensuring the file download is complete.
3
+ *
4
+ * @param filePath - The path to the file to check
5
+ * @param options - Configuration options
6
+ * @param options.checkInterval - Check interval in milliseconds (default: 500ms)
7
+ * @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
8
+ * @param options.timeout - Timeout in milliseconds (default: 60000ms)
9
+ * @throws Error when timeout is reached
10
+ */
11
+ export declare function waitFileSizeStable(filePath: string, options?: {
12
+ checkInterval?: number;
13
+ stableCount?: number;
14
+ timeout?: number;
15
+ }): Promise<void>;
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.waitFileSizeStable = waitFileSizeStable;
4
+ const index_js_1 = require("@aigne/platform-helpers/nodejs/index.js");
5
+ /**
6
+ * Wait for file size to stabilize, ensuring the file download is complete.
7
+ *
8
+ * @param filePath - The path to the file to check
9
+ * @param options - Configuration options
10
+ * @param options.checkInterval - Check interval in milliseconds (default: 500ms)
11
+ * @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
12
+ * @param options.timeout - Timeout in milliseconds (default: 60000ms)
13
+ * @throws Error when timeout is reached
14
+ */
15
+ async function waitFileSizeStable(filePath, options) {
16
+ const checkInterval = options?.checkInterval ?? 500;
17
+ const requiredStableCount = options?.stableCount ?? 3;
18
+ const timeout = options?.timeout ?? 60000;
19
+ const startTime = Date.now();
20
+ let previousSize = 0;
21
+ let stableCount = 0;
22
+ while (stableCount < requiredStableCount) {
23
+ if (Date.now() - startTime > timeout) {
24
+ throw new Error(`Timeout waiting for file to stabilize: ${filePath}`);
25
+ }
26
+ await new Promise((resolve) => setTimeout(resolve, checkInterval));
27
+ const stats = await index_js_1.nodejs.fs.stat(filePath);
28
+ const currentSize = stats.size;
29
+ if (currentSize === previousSize && currentSize > 0) {
30
+ stableCount++;
31
+ }
32
+ else {
33
+ stableCount = 0;
34
+ previousSize = currentSize;
35
+ }
36
+ }
37
+ }
@@ -1,4 +1,5 @@
1
- import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
1
+ import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
2
+ import { VideoModel } from "@aigne/core";
2
3
  import { GoogleGenAI } from "@google/genai";
3
4
  /**
4
5
  * Input options for Gemini Video Model
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
13
14
  *
14
15
  * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
16
  * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
- * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
17
  */
18
- aspectRatio?: string;
18
+ aspectRatio?: "16:9" | "9:16";
19
19
  /**
20
20
  * Resolution of the video
21
21
  *
22
22
  * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
23
  * Veo 3: "720p" (default), "1080p" (16:9 only)
24
- * Veo 2: Not supported
25
24
  */
26
- size?: string;
25
+ size?: "720p" | "1080p";
27
26
  /**
28
27
  * Duration of the generated video in seconds
29
28
  *
30
29
  * Veo 3.1: "4", "6", "8"
31
30
  * Veo 3: "4", "6", "8"
32
- * Veo 2: "5", "6", "8"
33
31
  */
34
- seconds?: string;
32
+ seconds?: "4" | "6" | "8";
35
33
  /**
36
34
  * Control person generation
37
35
  *
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
41
39
  * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
40
  */
43
41
  personGeneration?: string;
42
+ /**
43
+ * Last frame for video generation (frame interpolation)
44
+ */
45
+ lastFrame?: FileUnionContent;
46
+ /**
47
+ * Reference images for video generation
48
+ * Only supported in Veo 3.1 models
49
+ */
50
+ referenceImages?: FileUnionContent[];
44
51
  }
45
52
  /**
46
53
  * Output from Gemini Video Model
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Wait for file size to stabilize, ensuring the file download is complete.
3
+ *
4
+ * @param filePath - The path to the file to check
5
+ * @param options - Configuration options
6
+ * @param options.checkInterval - Check interval in milliseconds (default: 500ms)
7
+ * @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
8
+ * @param options.timeout - Timeout in milliseconds (default: 60000ms)
9
+ * @throws Error when timeout is reached
10
+ */
11
+ export declare function waitFileSizeStable(filePath: string, options?: {
12
+ checkInterval?: number;
13
+ stableCount?: number;
14
+ timeout?: number;
15
+ }): Promise<void>;
@@ -1,4 +1,5 @@
1
- import { type AgentInvokeOptions, VideoModel, type VideoModelInput, type VideoModelOptions, type VideoModelOutput } from "@aigne/core";
1
+ import type { AgentInvokeOptions, FileUnionContent, VideoModelInput, VideoModelOptions, VideoModelOutput } from "@aigne/core";
2
+ import { VideoModel } from "@aigne/core";
2
3
  import { GoogleGenAI } from "@google/genai";
3
4
  /**
4
5
  * Input options for Gemini Video Model
@@ -13,25 +14,22 @@ export interface GeminiVideoModelInput extends VideoModelInput {
13
14
  *
14
15
  * Veo 3.1: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
15
16
  * Veo 3: "16:9" (default, 720p and 1080p), "9:16" (720p and 1080p)
16
- * Veo 2: "16:9" (default, 720p), "9:16" (720p)
17
17
  */
18
- aspectRatio?: string;
18
+ aspectRatio?: "16:9" | "9:16";
19
19
  /**
20
20
  * Resolution of the video
21
21
  *
22
22
  * Veo 3.1: "720p" (default), "1080p" (only supports 8 seconds duration)
23
23
  * Veo 3: "720p" (default), "1080p" (16:9 only)
24
- * Veo 2: Not supported
25
24
  */
26
- size?: string;
25
+ size?: "720p" | "1080p";
27
26
  /**
28
27
  * Duration of the generated video in seconds
29
28
  *
30
29
  * Veo 3.1: "4", "6", "8"
31
30
  * Veo 3: "4", "6", "8"
32
- * Veo 2: "5", "6", "8"
33
31
  */
34
- seconds?: string;
32
+ seconds?: "4" | "6" | "8";
35
33
  /**
36
34
  * Control person generation
37
35
  *
@@ -41,6 +39,15 @@ export interface GeminiVideoModelInput extends VideoModelInput {
41
39
  * - Veo 2: "allow_all", "allow_adult", "dont_allow"
42
40
  */
43
41
  personGeneration?: string;
42
+ /**
43
+ * Last frame for video generation (frame interpolation)
44
+ */
45
+ lastFrame?: FileUnionContent;
46
+ /**
47
+ * Reference images for video generation
48
+ * Only supported in Veo 3.1 models
49
+ */
50
+ referenceImages?: FileUnionContent[];
44
51
  }
45
52
  /**
46
53
  * Output from Gemini Video Model
@@ -1,15 +1,20 @@
1
- import { VideoModel, videoModelInputSchema, } from "@aigne/core";
1
+ import { fileUnionContentSchema, VideoModel, videoModelInputSchema } from "@aigne/core";
2
2
  import { logger } from "@aigne/core/utils/logger.js";
3
3
  import { checkArguments } from "@aigne/core/utils/type-utils.js";
4
4
  import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
5
5
  import { GoogleGenAI } from "@google/genai";
6
6
  import { z } from "zod";
7
+ import { waitFileSizeStable } from "./utils.js";
7
8
  const DEFAULT_MODEL = "veo-3.1-generate-preview";
8
9
  const DEFAULT_SECONDS = 8;
9
10
  const geminiVideoModelInputSchema = videoModelInputSchema.extend({
10
11
  negativePrompt: z.string().optional(),
11
- aspectRatio: z.string().optional(),
12
+ aspectRatio: z.enum(["16:9", "9:16"]).optional(),
13
+ size: z.enum(["720p", "1080p"]).optional(),
14
+ seconds: z.enum(["4", "6", "8"]).optional(),
12
15
  personGeneration: z.string().optional(),
16
+ lastFrame: fileUnionContentSchema.optional(),
17
+ referenceImages: fileUnionContentSchema.array().optional(),
13
18
  });
14
19
  const geminiVideoModelOptionsSchema = z.object({
15
20
  apiKey: z.string().optional(),
@@ -58,13 +63,16 @@ export class GeminiVideoModel extends VideoModel {
58
63
  const localPath = nodejs.path.join(dir, `${videoId}.mp4`);
59
64
  await this.client.files.download({ file: videoFile, downloadPath: localPath });
60
65
  logger.debug(`Generated video saved to ${localPath}`);
61
- await new Promise((resolve) => setTimeout(resolve, 300));
66
+ await waitFileSizeStable(localPath);
62
67
  const buffer = await nodejs.fs.readFile(localPath);
63
68
  return buffer.toString("base64");
64
69
  }
65
70
  async process(input, options) {
66
71
  const model = input.model ?? input.modelOptions?.model ?? this.credential.model;
67
72
  const mergedInput = { ...this.modelOptions, ...input };
73
+ if (mergedInput.referenceImages && !model.includes("veo-3.1")) {
74
+ throw new Error("referenceImages is only supported in Veo 3.1 models");
75
+ }
68
76
  const config = {};
69
77
  if (mergedInput.negativePrompt)
70
78
  config.negativePrompt = mergedInput.negativePrompt;
@@ -76,11 +84,39 @@ export class GeminiVideoModel extends VideoModel {
76
84
  config.durationSeconds = parseInt(mergedInput.seconds, 10);
77
85
  if (mergedInput.personGeneration)
78
86
  config.personGeneration = mergedInput.personGeneration;
87
+ if (mergedInput.lastFrame) {
88
+ config.lastFrame = await this.transformFileType("file", mergedInput.lastFrame, options).then((file) => {
89
+ return {
90
+ imageBytes: file.data,
91
+ mimeType: file.mimeType,
92
+ };
93
+ });
94
+ }
95
+ if (mergedInput.referenceImages) {
96
+ config.referenceImages = await Promise.all(mergedInput.referenceImages.map(async (image) => {
97
+ return await this.transformFileType("file", image, options).then((file) => {
98
+ return {
99
+ image: {
100
+ imageBytes: file.data,
101
+ mimeType: file.mimeType,
102
+ },
103
+ };
104
+ });
105
+ }));
106
+ }
79
107
  const params = {
80
108
  model,
81
109
  prompt: mergedInput.prompt,
82
110
  config,
83
111
  };
112
+ if (mergedInput.image) {
113
+ params.image = await this.transformFileType("file", mergedInput.image, options).then((file) => {
114
+ return {
115
+ imageBytes: file.data,
116
+ mimeType: file.mimeType,
117
+ };
118
+ });
119
+ }
84
120
  // Start video generation
85
121
  let operation = await this.client.models.generateVideos(params);
86
122
  logger.debug("Video generation started...");
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Wait for file size to stabilize, ensuring the file download is complete.
3
+ *
4
+ * @param filePath - The path to the file to check
5
+ * @param options - Configuration options
6
+ * @param options.checkInterval - Check interval in milliseconds (default: 500ms)
7
+ * @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
8
+ * @param options.timeout - Timeout in milliseconds (default: 60000ms)
9
+ * @throws Error when timeout is reached
10
+ */
11
+ export declare function waitFileSizeStable(filePath: string, options?: {
12
+ checkInterval?: number;
13
+ stableCount?: number;
14
+ timeout?: number;
15
+ }): Promise<void>;
@@ -0,0 +1,34 @@
1
+ import { nodejs } from "@aigne/platform-helpers/nodejs/index.js";
2
+ /**
3
+ * Wait for file size to stabilize, ensuring the file download is complete.
4
+ *
5
+ * @param filePath - The path to the file to check
6
+ * @param options - Configuration options
7
+ * @param options.checkInterval - Check interval in milliseconds (default: 500ms)
8
+ * @param options.stableCount - Number of consecutive checks with same size to consider stable (default: 3)
9
+ * @param options.timeout - Timeout in milliseconds (default: 60000ms)
10
+ * @throws Error when timeout is reached
11
+ */
12
+ export async function waitFileSizeStable(filePath, options) {
13
+ const checkInterval = options?.checkInterval ?? 500;
14
+ const requiredStableCount = options?.stableCount ?? 3;
15
+ const timeout = options?.timeout ?? 60000;
16
+ const startTime = Date.now();
17
+ let previousSize = 0;
18
+ let stableCount = 0;
19
+ while (stableCount < requiredStableCount) {
20
+ if (Date.now() - startTime > timeout) {
21
+ throw new Error(`Timeout waiting for file to stabilize: ${filePath}`);
22
+ }
23
+ await new Promise((resolve) => setTimeout(resolve, checkInterval));
24
+ const stats = await nodejs.fs.stat(filePath);
25
+ const currentSize = stats.size;
26
+ if (currentSize === previousSize && currentSize > 0) {
27
+ stableCount++;
28
+ }
29
+ else {
30
+ stableCount = 0;
31
+ previousSize = currentSize;
32
+ }
33
+ }
34
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/gemini",
3
- "version": "0.14.4-beta.7",
3
+ "version": "0.14.4",
4
4
  "description": "AIGNE Gemini SDK for integrating with Google's Gemini AI models",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -39,7 +39,7 @@
39
39
  "@google/genai": "^1.24.0",
40
40
  "zod": "^3.25.67",
41
41
  "zod-to-json-schema": "^3.24.6",
42
- "@aigne/core": "^1.65.0-beta.4",
42
+ "@aigne/core": "^1.65.0",
43
43
  "@aigne/platform-helpers": "^0.6.3"
44
44
  },
45
45
  "devDependencies": {
@@ -48,7 +48,7 @@
48
48
  "npm-run-all": "^4.1.5",
49
49
  "rimraf": "^6.0.1",
50
50
  "typescript": "^5.9.2",
51
- "@aigne/test-utils": "^0.5.57-beta.5"
51
+ "@aigne/test-utils": "^0.5.57"
52
52
  },
53
53
  "scripts": {
54
54
  "lint": "tsc --noEmit",