@mixio-pro/kalaasetu-mcp 1.0.3 → 1.0.5-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.3",
3
+ "version": "1.0.5-beta",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -0,0 +1,161 @@
1
+ import { z } from "zod";
2
+ import * as fs from "fs";
3
+ import * as path from "path";
4
+ import { GoogleGenAI } from "@google/genai";
5
+
6
+ async function wait(ms: number): Promise<void> {
7
+ return new Promise((resolve) => setTimeout(resolve, ms));
8
+ }
9
+
10
+ function fileToBase64(filePath: string): { data: string; mimeType: string } {
11
+ if (!fs.existsSync(filePath)) {
12
+ throw new Error(`File not found: ${filePath}`);
13
+ }
14
+ const buf = fs.readFileSync(filePath);
15
+ const data = Buffer.from(buf).toString("base64");
16
+ // Detect mime type from extension
17
+ const ext = path.extname(filePath).toLowerCase();
18
+ const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
19
+ ext === '.png' ? 'image/png' :
20
+ ext === '.webp' ? 'image/webp' : 'image/png';
21
+ return { data, mimeType };
22
+ }
23
+
24
+ export const imageToVideo = {
25
+ name: "imageToVideo",
26
+ description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
27
+ parameters: z.object({
28
+ prompt: z.string().describe("Text description for the video"),
29
+ image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
30
+ aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
31
+ duration_seconds: z.number().optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
32
+ resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
33
+ negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
34
+ person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
35
+ reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
36
+ output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
37
+ gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
+ model_id: z.string().optional().describe("Model ID (default: veo-2.0-generate-001)"),
39
+ }),
40
+ execute: async (args: {
41
+ prompt: string;
42
+ image_path?: string;
43
+ aspect_ratio?: string;
44
+ duration_seconds?: number;
45
+ resolution?: string;
46
+ negative_prompt?: string;
47
+ person_generation?: string;
48
+ reference_images?: string[];
49
+ output_path?: string;
50
+ gemini_api_key?: string;
51
+ model_id?: string;
52
+ }) => {
53
+ const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
54
+ if (!apiKey) {
55
+ throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
56
+ }
57
+
58
+ const model = args.model_id || "veo-2.0-generate-001";
59
+
60
+ // Initialize Google GenAI client
61
+ const genai = new GoogleGenAI({ apiKey });
62
+
63
+ // Build config for video generation
64
+ const config: any = {};
65
+
66
+ if (args.duration_seconds !== undefined) {
67
+ config.duration_seconds = args.duration_seconds;
68
+ } else {
69
+ config.duration_seconds = 6; // default
70
+ }
71
+
72
+ if (args.aspect_ratio) {
73
+ config.aspect_ratio = args.aspect_ratio;
74
+ }
75
+
76
+ try {
77
+ // Start video generation operation
78
+ console.log(`Starting video generation with model: ${model}`);
79
+ let operation = await genai.models.generateVideos({
80
+ model,
81
+ prompt: args.prompt,
82
+ config,
83
+ });
84
+
85
+ console.log("Operation started, waiting for completion...");
86
+
87
+ // Poll until operation is complete (max 10 minutes)
88
+ let tries = 0;
89
+ const maxTries = 60; // 10 minutes with 10s intervals
90
+
91
+ while (!operation.done && tries < maxTries) {
92
+ await wait(10000); // Wait 10 seconds
93
+ tries++;
94
+ console.log(`Polling attempt ${tries}/${maxTries}...`);
95
+
96
+ operation = await genai.operations.getVideosOperation({
97
+ operation: operation,
98
+ });
99
+ }
100
+
101
+ if (!operation.done) {
102
+ throw new Error("Video generation timed out after 10 minutes");
103
+ }
104
+
105
+ console.log("Operation completed!");
106
+ console.log("Full Response:", JSON.stringify(operation.response, null, 2));
107
+
108
+ // Extract generated videos from response
109
+ const generatedVideos = operation.response?.generatedVideos || [];
110
+
111
+ if (!generatedVideos || generatedVideos.length === 0) {
112
+ const respStr = JSON.stringify(operation.response, null, 2);
113
+ return `Video generation completed but no videos found in response.\n\nFull Response:\n${respStr.slice(0, 2000)}${respStr.length > 2000 ? '\n...(truncated)' : ''}`;
114
+ }
115
+
116
+ // Download and save videos
117
+ const outputs: string[] = [];
118
+
119
+ for (let i = 0; i < generatedVideos.length; i++) {
120
+ const generatedVideo = generatedVideos[i];
121
+ const videoUri = generatedVideo?.video?.uri;
122
+
123
+ if (!videoUri) {
124
+ console.warn(`Video ${i} has no URI`);
125
+ continue;
126
+ }
127
+
128
+ console.log(`Downloading video ${i + 1}/${generatedVideos.length}...`);
129
+
130
+ // Download video from URI
131
+ const videoUrl = `${videoUri}&key=${apiKey}`;
132
+ const response = await fetch(videoUrl);
133
+
134
+ if (!response.ok) {
135
+ throw new Error(`Failed to download video: ${response.status} ${response.statusText}`);
136
+ }
137
+
138
+ const buffer = await response.arrayBuffer();
139
+
140
+ // Save video to file
141
+ const filePath = args.output_path
142
+ ? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
143
+ : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
144
+ const absPath = path.resolve(filePath);
145
+
146
+ fs.writeFileSync(absPath, Buffer.from(buffer));
147
+ outputs.push(absPath);
148
+ console.log(`Saved video to: ${absPath}`);
149
+ }
150
+
151
+ if (outputs.length > 0) {
152
+ return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
153
+ }
154
+
155
+ return "Video generation completed but no videos were saved.";
156
+
157
+ } catch (error: any) {
158
+ throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
159
+ }
160
+ },
161
+ };
@@ -1,161 +1,232 @@
1
- import { z } from "zod";
1
+ // @ts-nocheck
2
2
  import * as fs from "fs";
3
+ import { GoogleAuth } from "google-auth-library";
4
+ import { exec } from "child_process";
3
5
  import * as path from "path";
4
- import { GoogleGenAI } from "@google/genai";
6
+ import { z } from "zod";
5
7
 
6
8
  async function wait(ms: number): Promise<void> {
7
9
  return new Promise((resolve) => setTimeout(resolve, ms));
8
10
  }
9
11
 
10
- function fileToBase64(filePath: string): { data: string; mimeType: string } {
11
- if (!fs.existsSync(filePath)) {
12
- throw new Error(`File not found: ${filePath}`);
12
+ async function fetchAccessToken(): Promise<string> {
13
+ try {
14
+ const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
15
+ const client = await auth.getClient();
16
+ const token = await client.getAccessToken();
17
+ if (!token || typeof token !== "string") {
18
+ throw new Error("No token from GoogleAuth");
19
+ }
20
+ return token;
21
+ } catch (e) {
22
+ // Fallback to gcloud
23
+ return await new Promise((resolve, reject) => {
24
+ exec("gcloud auth print-access-token", (err, stdout, stderr) => {
25
+ if (err) {
26
+ reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
27
+ return;
28
+ }
29
+ const t = (stdout || "").trim();
30
+ if (!t) {
31
+ reject(new Error("Failed to fetch an access token: empty token from gcloud"));
32
+ return;
33
+ }
34
+ resolve(t);
35
+ });
36
+ });
37
+ }
38
+ }
39
+
40
+ function fileToBase64(path: string): { data: string; mimeType: string } {
41
+ if (!fs.existsSync(path)) {
42
+ throw new Error(`File not found: ${path}`);
13
43
  }
14
- const buf = fs.readFileSync(filePath);
44
+ const buf = fs.readFileSync(path);
15
45
  const data = Buffer.from(buf).toString("base64");
16
- // Detect mime type from extension
17
- const ext = path.extname(filePath).toLowerCase();
18
- const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
19
- ext === '.png' ? 'image/png' :
20
- ext === '.webp' ? 'image/webp' : 'image/png';
46
+ // Default to PNG if not sure, similar to existing code
47
+ const mimeType = "image/png";
21
48
  return { data, mimeType };
22
49
  }
23
50
 
24
- export const imageToVideo = {
25
- name: "imageToVideo",
51
+ export const imageToVideo = ({
52
+ name: "image_to_video",
26
53
  description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
27
54
  parameters: z.object({
28
55
  prompt: z.string().describe("Text description for the video"),
29
56
  image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
57
+ last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
30
58
  aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
31
- duration_seconds: z.number().optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
59
+ duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
32
60
  resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
33
61
  negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
34
62
  person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
35
63
  reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
36
64
  output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
37
- gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
- model_id: z.string().optional().describe("Model ID (default: veo-2.0-generate-001)"),
65
+ project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
66
+ location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
67
+ model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
68
+ generate_audio: z.boolean().optional().describe("Boolean flag to enable generation of audio along with the video").default(false)
39
69
  }),
40
- execute: async (args: {
41
- prompt: string;
42
- image_path?: string;
43
- aspect_ratio?: string;
44
- duration_seconds?: number;
45
- resolution?: string;
46
- negative_prompt?: string;
47
- person_generation?: string;
48
- reference_images?: string[];
49
- output_path?: string;
50
- gemini_api_key?: string;
51
- model_id?: string;
52
- }) => {
53
- const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
54
- if (!apiKey) {
55
- throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
56
- }
70
+ async execute(args) {
71
+ const projectId = args.project_id || "mixio-pro";
72
+ const location = args.location_id || "us-central1";
73
+ const modelId = args.model_id || "veo-3.1-fast-generate-preview";
74
+
75
+ const token = await fetchAccessToken();
76
+
77
+ const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
57
78
 
58
- const model = args.model_id || "veo-2.0-generate-001";
59
-
60
- // Initialize Google GenAI client
61
- const genai = new GoogleGenAI({ apiKey });
62
-
63
- // Build config for video generation
64
- const config: any = {};
65
-
66
- if (args.duration_seconds !== undefined) {
67
- config.duration_seconds = args.duration_seconds;
68
- } else {
69
- config.duration_seconds = 6; // default
79
+ let imagePart: any = undefined;
80
+ if (args.image_path) {
81
+ const { data, mimeType } = fileToBase64(args.image_path);
82
+ imagePart = {
83
+ image: {
84
+ bytesBase64Encoded: data,
85
+ mimeType,
86
+ },
87
+ };
70
88
  }
71
-
72
- if (args.aspect_ratio) {
73
- config.aspect_ratio = args.aspect_ratio;
89
+
90
+ let lastFramePart: any = undefined;
91
+ if (args.last_frame_path) {
92
+ const { data, mimeType } = fileToBase64(args.last_frame_path);
93
+ lastFramePart = {
94
+ lastFrame: {
95
+ bytesBase64Encoded: data,
96
+ mimeType,
97
+ },
98
+ };
74
99
  }
75
100
 
76
- try {
77
- // Start video generation operation
78
- console.log(`Starting video generation with model: ${model}`);
79
- let operation = await genai.models.generateVideos({
80
- model,
81
- prompt: args.prompt,
82
- config,
83
- });
101
+ let referenceImages: any[] | undefined = undefined;
102
+ if (args.reference_images) {
103
+ let refImages: string[];
104
+ if (typeof args.reference_images === "string") {
105
+ if (args.reference_images.startsWith("[") && args.reference_images.endsWith("]")) {
106
+ try {
107
+ refImages = JSON.parse(args.reference_images);
108
+ } catch {
109
+ throw new Error("Invalid reference_images format");
110
+ }
111
+ } else {
112
+ refImages = [args.reference_images];
113
+ }
114
+ } else if (Array.isArray(args.reference_images)) {
115
+ refImages = args.reference_images;
116
+ } else {
117
+ throw new Error("Invalid reference_images: must be array or string");
118
+ }
84
119
 
85
- console.log("Operation started, waiting for completion...");
86
-
87
- // Poll until operation is complete (max 10 minutes)
88
- let tries = 0;
89
- const maxTries = 60; // 10 minutes with 10s intervals
90
-
91
- while (!operation.done && tries < maxTries) {
92
- await wait(10000); // Wait 10 seconds
93
- tries++;
94
- console.log(`Polling attempt ${tries}/${maxTries}...`);
95
-
96
- operation = await genai.operations.getVideosOperation({
97
- operation: operation,
120
+ if (refImages.length > 0) {
121
+ referenceImages = refImages.slice(0, 3).map((p) => {
122
+ const { data, mimeType } = fileToBase64(p);
123
+ return {
124
+ image: {
125
+ bytesBase64Encoded: data,
126
+ mimeType,
127
+ },
128
+ referenceType: "asset",
129
+ };
98
130
  });
99
131
  }
132
+ }
100
133
 
101
- if (!operation.done) {
102
- throw new Error("Video generation timed out after 10 minutes");
103
- }
134
+ const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
104
135
 
105
- console.log("Operation completed!");
106
- console.log("Full Response:", JSON.stringify(operation.response, null, 2));
136
+ const instances: any[] = [
137
+ {
138
+ prompt: args.prompt,
139
+ ...(imagePart || {}),
140
+ ...(lastFramePart || {}),
141
+ ...(referenceImages ? { referenceImages } : {}),
142
+ },
143
+ ];
107
144
 
108
- // Extract generated videos from response
109
- const generatedVideos = operation.response?.generatedVideos || [];
110
-
111
- if (!generatedVideos || generatedVideos.length === 0) {
112
- const respStr = JSON.stringify(operation.response, null, 2);
113
- return `Video generation completed but no videos found in response.\n\nFull Response:\n${respStr.slice(0, 2000)}${respStr.length > 2000 ? '\n...(truncated)' : ''}`;
114
- }
145
+ const parameters: any = {
146
+ aspectRatio: args.aspect_ratio || "9:16",
147
+ durationSeconds: parseInt(args.duration_seconds) || 6,
148
+ resolution: args.resolution || "720p",
149
+ negativePrompt: args.negative_prompt,
150
+ generateAudio: args.generate_audio || false,
151
+ personGeneration,
152
+ };
115
153
 
116
- // Download and save videos
117
- const outputs: string[] = [];
118
-
119
- for (let i = 0; i < generatedVideos.length; i++) {
120
- const generatedVideo = generatedVideos[i];
121
- const videoUri = generatedVideo?.video?.uri;
122
-
123
- if (!videoUri) {
124
- console.warn(`Video ${i} has no URI`);
125
- continue;
126
- }
154
+ const res = await fetch(url, {
155
+ method: "POST",
156
+ headers: {
157
+ Authorization: `Bearer ${token}`,
158
+ "Content-Type": "application/json",
159
+ },
160
+ body: JSON.stringify({ instances, parameters }),
161
+ });
127
162
 
128
- console.log(`Downloading video ${i + 1}/${generatedVideos.length}...`);
129
-
130
- // Download video from URI
131
- const videoUrl = `${videoUri}&key=${apiKey}`;
132
- const response = await fetch(videoUrl);
133
-
134
- if (!response.ok) {
135
- throw new Error(`Failed to download video: ${response.status} ${response.statusText}`);
136
- }
137
-
138
- const buffer = await response.arrayBuffer();
139
-
140
- // Save video to file
141
- const filePath = args.output_path
142
- ? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
143
- : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
144
- const absPath = path.resolve(filePath);
145
-
146
- fs.writeFileSync(absPath, Buffer.from(buffer));
147
- outputs.push(absPath);
148
- console.log(`Saved video to: ${absPath}`);
149
- }
163
+ if (!res.ok) {
164
+ const text = await res.text();
165
+ throw new Error(`Vertex request failed: ${res.status} ${text}`);
166
+ }
167
+
168
+ const op = await res.json();
169
+ const name: string = op.name || op.operation || "";
170
+ if (!name) {
171
+ throw new Error("Vertex did not return an operation name for long-running request");
172
+ }
150
173
 
151
- if (outputs.length > 0) {
152
- return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
174
+ let current = op;
175
+ let done = !!op.done;
176
+ let tries = 0;
177
+
178
+ // Poll using fetchPredictOperation as per Vertex recommendation
179
+ const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
180
+ while (!done && tries < 60) {
181
+ await wait(10000);
182
+ const poll = await fetch(fetchUrl, {
183
+ method: "POST",
184
+ headers: {
185
+ Authorization: `Bearer ${token}`,
186
+ "Content-Type": "application/json",
187
+ },
188
+ body: JSON.stringify({ operationName: name }),
189
+ });
190
+ if (!poll.ok) {
191
+ const text = await poll.text();
192
+ throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
153
193
  }
194
+ current = await poll.json();
195
+ done = !!current.done || !!current.response;
196
+ tries++;
197
+ }
198
+
199
+ const resp = current.response || current;
200
+ // Decode from response.videos[].bytesBase64Encoded only
201
+ const outputs: string[] = [];
202
+ const saveVideo = (base64: string, index: number) => {
203
+ if (!base64) return;
204
+ const filePath = args.output_path
205
+ ? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
206
+ : `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
207
+ const absPath = path.resolve(filePath);
208
+ const buf = Buffer.from(base64, 'base64');
209
+ fs.writeFileSync(absPath, buf);
210
+ outputs.push(absPath);
211
+ };
154
212
 
155
- return "Video generation completed but no videos were saved.";
156
-
157
- } catch (error: any) {
158
- throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
213
+ if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
214
+ for (let i = 0; i < resp.videos.length; i++) {
215
+ const v = resp.videos[i] || {};
216
+ if (typeof v.bytesBase64Encoded === 'string') {
217
+ saveVideo(v.bytesBase64Encoded, i);
218
+ }
219
+ }
159
220
  }
221
+ if (outputs.length > 0) {
222
+ return `Video(s) saved: ${outputs.join(', ')}`;
223
+ }
224
+
225
+ // If nothing saved, return a concise summary plus head/tail snippets of JSON
226
+ let jsonStr = "";
227
+ try { jsonStr = JSON.stringify(resp); } catch {}
228
+ const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
229
+ const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
230
+ return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
160
231
  },
161
- };
232
+ });