@mixio-pro/kalaasetu-mcp 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -0,0 +1,161 @@
1
+ import { z } from "zod";
2
+ import * as fs from "fs";
3
+ import * as path from "path";
4
+ import { GoogleGenAI } from "@google/genai";
5
+
6
+ async function wait(ms: number): Promise<void> {
7
+ return new Promise((resolve) => setTimeout(resolve, ms));
8
+ }
9
+
10
+ function fileToBase64(filePath: string): { data: string; mimeType: string } {
11
+ if (!fs.existsSync(filePath)) {
12
+ throw new Error(`File not found: ${filePath}`);
13
+ }
14
+ const buf = fs.readFileSync(filePath);
15
+ const data = Buffer.from(buf).toString("base64");
16
+ // Detect mime type from extension
17
+ const ext = path.extname(filePath).toLowerCase();
18
+ const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
19
+ ext === '.png' ? 'image/png' :
20
+ ext === '.webp' ? 'image/webp' : 'image/png';
21
+ return { data, mimeType };
22
+ }
23
+
24
+ export const imageToVideo = {
25
+ name: "imageToVideo",
26
+ description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
27
+ parameters: z.object({
28
+ prompt: z.string().describe("Text description for the video"),
29
+ image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
30
+ aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
31
+ duration_seconds: z.number().optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
32
+ resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
33
+ negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
34
+ person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
35
+ reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
36
+ output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
37
+ gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
+ model_id: z.string().optional().describe("Model ID (default: veo-2.0-generate-001)"),
39
+ }),
40
+ execute: async (args: {
41
+ prompt: string;
42
+ image_path?: string;
43
+ aspect_ratio?: string;
44
+ duration_seconds?: number;
45
+ resolution?: string;
46
+ negative_prompt?: string;
47
+ person_generation?: string;
48
+ reference_images?: string[];
49
+ output_path?: string;
50
+ gemini_api_key?: string;
51
+ model_id?: string;
52
+ }) => {
53
+ const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
54
+ if (!apiKey) {
55
+ throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
56
+ }
57
+
58
+ const model = args.model_id || "veo-2.0-generate-001";
59
+
60
+ // Initialize Google GenAI client
61
+ const genai = new GoogleGenAI({ apiKey });
62
+
63
+ // Build config for video generation
64
+ const config: any = {};
65
+
66
+ if (args.duration_seconds !== undefined) {
67
+ config.duration_seconds = args.duration_seconds;
68
+ } else {
69
+ config.duration_seconds = 6; // default
70
+ }
71
+
72
+ if (args.aspect_ratio) {
73
+ config.aspect_ratio = args.aspect_ratio;
74
+ }
75
+
76
+ try {
77
+ // Start video generation operation
78
+ console.log(`Starting video generation with model: ${model}`);
79
+ let operation = await genai.models.generateVideos({
80
+ model,
81
+ prompt: args.prompt,
82
+ config,
83
+ });
84
+
85
+ console.log("Operation started, waiting for completion...");
86
+
87
+ // Poll until operation is complete (max 10 minutes)
88
+ let tries = 0;
89
+ const maxTries = 60; // 10 minutes with 10s intervals
90
+
91
+ while (!operation.done && tries < maxTries) {
92
+ await wait(10000); // Wait 10 seconds
93
+ tries++;
94
+ console.log(`Polling attempt ${tries}/${maxTries}...`);
95
+
96
+ operation = await genai.operations.getVideosOperation({
97
+ operation: operation,
98
+ });
99
+ }
100
+
101
+ if (!operation.done) {
102
+ throw new Error("Video generation timed out after 10 minutes");
103
+ }
104
+
105
+ console.log("Operation completed!");
106
+ console.log("Full Response:", JSON.stringify(operation.response, null, 2));
107
+
108
+ // Extract generated videos from response
109
+ const generatedVideos = operation.response?.generatedVideos || [];
110
+
111
+ if (!generatedVideos || generatedVideos.length === 0) {
112
+ const respStr = JSON.stringify(operation.response, null, 2);
113
+ return `Video generation completed but no videos found in response.\n\nFull Response:\n${respStr.slice(0, 2000)}${respStr.length > 2000 ? '\n...(truncated)' : ''}`;
114
+ }
115
+
116
+ // Download and save videos
117
+ const outputs: string[] = [];
118
+
119
+ for (let i = 0; i < generatedVideos.length; i++) {
120
+ const generatedVideo = generatedVideos[i];
121
+ const videoUri = generatedVideo?.video?.uri;
122
+
123
+ if (!videoUri) {
124
+ console.warn(`Video ${i} has no URI`);
125
+ continue;
126
+ }
127
+
128
+ console.log(`Downloading video ${i + 1}/${generatedVideos.length}...`);
129
+
130
+ // Download video from URI
131
+ const videoUrl = `${videoUri}&key=${apiKey}`;
132
+ const response = await fetch(videoUrl);
133
+
134
+ if (!response.ok) {
135
+ throw new Error(`Failed to download video: ${response.status} ${response.statusText}`);
136
+ }
137
+
138
+ const buffer = await response.arrayBuffer();
139
+
140
+ // Save video to file
141
+ const filePath = args.output_path
142
+ ? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
143
+ : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
144
+ const absPath = path.resolve(filePath);
145
+
146
+ fs.writeFileSync(absPath, Buffer.from(buffer));
147
+ outputs.push(absPath);
148
+ console.log(`Saved video to: ${absPath}`);
149
+ }
150
+
151
+ if (outputs.length > 0) {
152
+ return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
153
+ }
154
+
155
+ return "Video generation completed but no videos were saved.";
156
+
157
+ } catch (error: any) {
158
+ throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
159
+ }
160
+ },
161
+ };
@@ -1,7 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import * as fs from "fs";
3
3
  import * as path from "path";
4
- import { GoogleGenAI } from "@google/genai";
5
4
 
6
5
  async function wait(ms: number): Promise<void> {
7
6
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -23,25 +22,27 @@ function fileToBase64(filePath: string): { data: string; mimeType: string } {
23
22
 
24
23
  export const imageToVideo = {
25
24
  name: "imageToVideo",
26
- description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
25
+ description: "Generate videos from an image as starting first frame using Gemini Veo models via HTTP API with Gemini API key.",
27
26
  parameters: z.object({
28
27
  prompt: z.string().describe("Text description for the video"),
29
28
  image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
29
+ last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
30
30
  aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
31
- duration_seconds: z.number().optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
31
+ duration_seconds: z.union([z.string(), z.number()]).optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
32
32
  resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
33
33
  negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
34
34
  person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
35
35
  reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
36
36
  output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
37
37
  gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
- model_id: z.string().optional().describe("Model ID (default: veo-2.0-generate-001)"),
38
+ model_id: z.string().optional().describe("Model ID (default: veo-3.1-generate-preview)"),
39
39
  }),
40
40
  execute: async (args: {
41
41
  prompt: string;
42
42
  image_path?: string;
43
+ last_frame_path?: string;
43
44
  aspect_ratio?: string;
44
- duration_seconds?: number;
45
+ duration_seconds?: string | number;
45
46
  resolution?: string;
46
47
  negative_prompt?: string;
47
48
  person_generation?: string;
@@ -55,87 +56,188 @@ export const imageToVideo = {
55
56
  throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
56
57
  }
57
58
 
58
- const model = args.model_id || "veo-2.0-generate-001";
59
-
60
- // Initialize Google GenAI client
61
- const genai = new GoogleGenAI({ apiKey });
62
-
63
- // Build config for video generation
64
- const config: any = {};
65
-
66
- if (args.duration_seconds !== undefined) {
67
- config.duration_seconds = args.duration_seconds;
68
- } else {
69
- config.duration_seconds = 6; // default
70
- }
71
-
72
- if (args.aspect_ratio) {
73
- config.aspect_ratio = args.aspect_ratio;
74
- }
59
+ const modelId = args.model_id || "veo-3.1-generate-preview";
60
+ const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
61
+
62
+ // Convert duration_seconds to number, handling both string and number inputs
63
+ const durationSeconds = args.duration_seconds
64
+ ? (typeof args.duration_seconds === 'string' ? parseInt(args.duration_seconds) : args.duration_seconds)
65
+ : 6; // default
75
66
 
76
67
  try {
77
- // Start video generation operation
78
- console.log(`Starting video generation with model: ${model}`);
79
- let operation = await genai.models.generateVideos({
80
- model,
81
- prompt: args.prompt,
82
- config,
68
+ // Build the request body for predictLongRunning
69
+ const instances: any[] = [
70
+ {
71
+ prompt: args.prompt,
72
+ },
73
+ ];
74
+
75
+ // Add image if provided (first frame)
76
+ if (args.image_path) {
77
+ const { data, mimeType } = fileToBase64(args.image_path);
78
+ instances[0].image = {
79
+ bytesBase64Encoded: data,
80
+ mimeType,
81
+ };
82
+ }
83
+
84
+ // Add last frame if provided (for interpolation)
85
+ if (args.last_frame_path) {
86
+ const { data, mimeType } = fileToBase64(args.last_frame_path);
87
+ instances[0].lastFrame = {
88
+ bytesBase64Encoded: data,
89
+ mimeType,
90
+ };
91
+ }
92
+
93
+ // Add reference images if provided
94
+ if (args.reference_images && args.reference_images.length > 0) {
95
+ const refImages = args.reference_images.slice(0, 3).map((imgPath) => {
96
+ const { data, mimeType } = fileToBase64(imgPath);
97
+ return {
98
+ image: {
99
+ bytesBase64Encoded: data,
100
+ mimeType,
101
+ },
102
+ referenceType: "asset",
103
+ };
104
+ });
105
+ instances[0].referenceImages = refImages;
106
+ }
107
+
108
+ // Build parameters - NOTE: Parameters go in "parameters" object, not in instances
109
+ const parameters: any = {};
110
+
111
+ if (args.aspect_ratio) {
112
+ parameters.aspectRatio = args.aspect_ratio;
113
+ }
114
+
115
+ if (durationSeconds) {
116
+ parameters.durationSeconds = durationSeconds;
117
+ }
118
+
119
+ if (args.resolution) {
120
+ parameters.resolution = args.resolution;
121
+ }
122
+
123
+ if (args.negative_prompt) {
124
+ parameters.negativePrompt = args.negative_prompt;
125
+ }
126
+
127
+ if (args.person_generation) {
128
+ parameters.personGeneration = args.person_generation;
129
+ }
130
+
131
+ const requestBody: any = {
132
+ instances,
133
+ };
134
+
135
+ // Only add parameters if we have any
136
+ if (Object.keys(parameters).length > 0) {
137
+ requestBody.parameters = parameters;
138
+ }
139
+
140
+ console.log(`Starting video generation with model: ${modelId}`);
141
+
142
+ // Step 1: Start the long-running operation
143
+ const url = `${baseUrl}/models/${modelId}:predictLongRunning`;
144
+ const response = await fetch(url, {
145
+ method: "POST",
146
+ headers: {
147
+ "x-goog-api-key": apiKey,
148
+ "Content-Type": "application/json",
149
+ },
150
+ body: JSON.stringify(requestBody),
83
151
  });
84
152
 
85
- console.log("Operation started, waiting for completion...");
153
+ if (!response.ok) {
154
+ const errorText = await response.text();
155
+ throw new Error(`Video generation request failed: ${response.status} ${errorText}`);
156
+ }
157
+
158
+ const operation = await response.json() as any;
159
+ const operationName: string = operation.name || operation.operation || "";
86
160
 
87
- // Poll until operation is complete (max 10 minutes)
161
+ if (!operationName) {
162
+ throw new Error("No operation name returned from API");
163
+ }
164
+
165
+ console.log(`Operation started: ${operationName}`);
166
+
167
+ // Step 2: Poll the operation status by getting the operation directly
168
+ let currentOp: any = operation;
169
+ let done = !!operation.done;
88
170
  let tries = 0;
89
171
  const maxTries = 60; // 10 minutes with 10s intervals
90
-
91
- while (!operation.done && tries < maxTries) {
172
+
173
+ while (!done && tries < maxTries) {
92
174
  await wait(10000); // Wait 10 seconds
93
175
  tries++;
94
176
  console.log(`Polling attempt ${tries}/${maxTries}...`);
95
-
96
- operation = await genai.operations.getVideosOperation({
97
- operation: operation,
177
+
178
+ // Poll by getting the operation status directly
179
+ const pollResponse = await fetch(`${baseUrl}/${operationName}`, {
180
+ method: "GET",
181
+ headers: {
182
+ "x-goog-api-key": apiKey,
183
+ },
98
184
  });
185
+
186
+ if (!pollResponse.ok) {
187
+ const errorText = await pollResponse.text();
188
+ throw new Error(`Operation polling failed: ${pollResponse.status} ${errorText}`);
189
+ }
190
+
191
+ currentOp = await pollResponse.json() as any;
192
+ done = !!currentOp.done || !!currentOp.response;
99
193
  }
100
194
 
101
- if (!operation.done) {
195
+ if (!done) {
102
196
  throw new Error("Video generation timed out after 10 minutes");
103
197
  }
104
198
 
105
199
  console.log("Operation completed!");
106
- console.log("Full Response:", JSON.stringify(operation.response, null, 2));
107
200
 
108
- // Extract generated videos from response
109
- const generatedVideos = operation.response?.generatedVideos || [];
201
+ // Step 3: Extract and download videos
202
+ const resp = currentOp.response || currentOp;
110
203
 
111
- if (!generatedVideos || generatedVideos.length === 0) {
112
- const respStr = JSON.stringify(operation.response, null, 2);
113
- return `Video generation completed but no videos found in response.\n\nFull Response:\n${respStr.slice(0, 2000)}${respStr.length > 2000 ? '\n...(truncated)' : ''}`;
204
+ // The response structure is: response.generateVideoResponse.generatedSamples[].video.uri
205
+ const generateVideoResponse = resp?.generateVideoResponse;
206
+ const generatedSamples = generateVideoResponse?.generatedSamples || [];
207
+
208
+ if (!generatedSamples || generatedSamples.length === 0) {
209
+ let jsonStr = "";
210
+ try { jsonStr = JSON.stringify(resp, null, 2); } catch {}
211
+ return `Video generation completed but no generatedSamples found.\n\nFull Response:\n${jsonStr.slice(0, 1000)}${jsonStr.length > 1000 ? '\n...(truncated)' : ''}`;
114
212
  }
115
213
 
116
- // Download and save videos
117
214
  const outputs: string[] = [];
118
-
119
- for (let i = 0; i < generatedVideos.length; i++) {
120
- const generatedVideo = generatedVideos[i];
121
- const videoUri = generatedVideo?.video?.uri;
215
+
216
+ // Download videos from URIs
217
+ for (let i = 0; i < generatedSamples.length; i++) {
218
+ const sample = generatedSamples[i];
219
+ const videoUri = sample?.video?.uri;
122
220
 
123
221
  if (!videoUri) {
124
- console.warn(`Video ${i} has no URI`);
222
+ console.warn(`Sample ${i} has no video URI`);
125
223
  continue;
126
224
  }
127
225
 
128
- console.log(`Downloading video ${i + 1}/${generatedVideos.length}...`);
226
+ console.log(`Downloading video ${i + 1}/${generatedSamples.length} from ${videoUri}...`);
129
227
 
130
- // Download video from URI
131
- const videoUrl = `${videoUri}&key=${apiKey}`;
132
- const response = await fetch(videoUrl);
133
-
134
- if (!response.ok) {
135
- throw new Error(`Failed to download video: ${response.status} ${response.statusText}`);
228
+ // Download video from URI with API key
229
+ const videoResponse = await fetch(videoUri, {
230
+ method: "GET",
231
+ headers: {
232
+ "x-goog-api-key": apiKey,
233
+ },
234
+ });
235
+
236
+ if (!videoResponse.ok) {
237
+ throw new Error(`Failed to download video: ${videoResponse.status} ${videoResponse.statusText}`);
136
238
  }
137
-
138
- const buffer = await response.arrayBuffer();
239
+
240
+ const videoBuffer = await videoResponse.arrayBuffer();
139
241
 
140
242
  // Save video to file
141
243
  const filePath = args.output_path
@@ -143,7 +245,7 @@ export const imageToVideo = {
143
245
  : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
144
246
  const absPath = path.resolve(filePath);
145
247
 
146
- fs.writeFileSync(absPath, Buffer.from(buffer));
248
+ fs.writeFileSync(absPath, Buffer.from(videoBuffer));
147
249
  outputs.push(absPath);
148
250
  console.log(`Saved video to: ${absPath}`);
149
251
  }