@mixio-pro/kalaasetu-mcp 1.0.4 → 1.0.5-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.4",
3
+ "version": "1.0.5-beta",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -1,99 +1,125 @@
1
- import { z } from "zod";
1
+ // @ts-nocheck
2
2
  import * as fs from "fs";
3
+ import { GoogleAuth } from "google-auth-library";
4
+ import { exec } from "child_process";
3
5
  import * as path from "path";
6
+ import { z } from "zod";
4
7
 
5
8
  async function wait(ms: number): Promise<void> {
6
9
  return new Promise((resolve) => setTimeout(resolve, ms));
7
10
  }
8
11
 
9
- function fileToBase64(filePath: string): { data: string; mimeType: string } {
10
- if (!fs.existsSync(filePath)) {
11
- throw new Error(`File not found: ${filePath}`);
12
+ async function fetchAccessToken(): Promise<string> {
13
+ try {
14
+ const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
15
+ const client = await auth.getClient();
16
+ const token = await client.getAccessToken();
17
+ if (!token || typeof token !== "string") {
18
+ throw new Error("No token from GoogleAuth");
19
+ }
20
+ return token;
21
+ } catch (e) {
22
+ // Fallback to gcloud
23
+ return await new Promise((resolve, reject) => {
24
+ exec("gcloud auth print-access-token", (err, stdout, stderr) => {
25
+ if (err) {
26
+ reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
27
+ return;
28
+ }
29
+ const t = (stdout || "").trim();
30
+ if (!t) {
31
+ reject(new Error("Failed to fetch an access token: empty token from gcloud"));
32
+ return;
33
+ }
34
+ resolve(t);
35
+ });
36
+ });
37
+ }
38
+ }
39
+
40
+ function fileToBase64(path: string): { data: string; mimeType: string } {
41
+ if (!fs.existsSync(path)) {
42
+ throw new Error(`File not found: ${path}`);
12
43
  }
13
- const buf = fs.readFileSync(filePath);
44
+ const buf = fs.readFileSync(path);
14
45
  const data = Buffer.from(buf).toString("base64");
15
- // Detect mime type from extension
16
- const ext = path.extname(filePath).toLowerCase();
17
- const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
18
- ext === '.png' ? 'image/png' :
19
- ext === '.webp' ? 'image/webp' : 'image/png';
46
+ // Default to PNG if not sure, similar to existing code
47
+ const mimeType = "image/png";
20
48
  return { data, mimeType };
21
49
  }
22
50
 
23
- export const imageToVideo = {
24
- name: "imageToVideo",
25
- description: "Generate videos from an image as starting first frame using Gemini Veo models via HTTP API with Gemini API key.",
51
+ export const imageToVideo = ({
52
+ name: "image_to_video",
53
+ description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
26
54
  parameters: z.object({
27
55
  prompt: z.string().describe("Text description for the video"),
28
56
  image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
29
57
  last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
30
58
  aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
31
- duration_seconds: z.union([z.string(), z.number()]).optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
59
+ duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
32
60
  resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
33
61
  negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
34
62
  person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
35
63
  reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
36
64
  output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
37
- gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
- model_id: z.string().optional().describe("Model ID (default: veo-3.1-generate-preview)"),
65
+ project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
66
+ location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
67
+ model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
68
+ generate_audio: z.boolean().optional().describe("Boolean flag to enable generation of audio along with the video").default(false)
39
69
  }),
40
- execute: async (args: {
41
- prompt: string;
42
- image_path?: string;
43
- last_frame_path?: string;
44
- aspect_ratio?: string;
45
- duration_seconds?: string | number;
46
- resolution?: string;
47
- negative_prompt?: string;
48
- person_generation?: string;
49
- reference_images?: string[];
50
- output_path?: string;
51
- gemini_api_key?: string;
52
- model_id?: string;
53
- }) => {
54
- const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
55
- if (!apiKey) {
56
- throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
57
- }
70
+ async execute(args) {
71
+ const projectId = args.project_id || "mixio-pro";
72
+ const location = args.location_id || "us-central1";
73
+ const modelId = args.model_id || "veo-3.1-fast-generate-preview";
58
74
 
59
- const modelId = args.model_id || "veo-3.1-generate-preview";
60
- const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
75
+ const token = await fetchAccessToken();
61
76
 
62
- // Convert duration_seconds to number, handling both string and number inputs
63
- const durationSeconds = args.duration_seconds
64
- ? (typeof args.duration_seconds === 'string' ? parseInt(args.duration_seconds) : args.duration_seconds)
65
- : 6; // default
66
-
67
- try {
68
- // Build the request body for predictLongRunning
69
- const instances: any[] = [
70
- {
71
- prompt: args.prompt,
72
- },
73
- ];
77
+ const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
74
78
 
75
- // Add image if provided (first frame)
76
- if (args.image_path) {
77
- const { data, mimeType } = fileToBase64(args.image_path);
78
- instances[0].image = {
79
+ let imagePart: any = undefined;
80
+ if (args.image_path) {
81
+ const { data, mimeType } = fileToBase64(args.image_path);
82
+ imagePart = {
83
+ image: {
79
84
  bytesBase64Encoded: data,
80
85
  mimeType,
81
- };
82
- }
86
+ },
87
+ };
88
+ }
83
89
 
84
- // Add last frame if provided (for interpolation)
85
- if (args.last_frame_path) {
86
- const { data, mimeType } = fileToBase64(args.last_frame_path);
87
- instances[0].lastFrame = {
90
+ let lastFramePart: any = undefined;
91
+ if (args.last_frame_path) {
92
+ const { data, mimeType } = fileToBase64(args.last_frame_path);
93
+ lastFramePart = {
94
+ lastFrame: {
88
95
  bytesBase64Encoded: data,
89
96
  mimeType,
90
- };
97
+ },
98
+ };
99
+ }
100
+
101
+ let referenceImages: any[] | undefined = undefined;
102
+ if (args.reference_images) {
103
+ let refImages: string[];
104
+ if (typeof args.reference_images === "string") {
105
+ if (args.reference_images.startsWith("[") && args.reference_images.endsWith("]")) {
106
+ try {
107
+ refImages = JSON.parse(args.reference_images);
108
+ } catch {
109
+ throw new Error("Invalid reference_images format");
110
+ }
111
+ } else {
112
+ refImages = [args.reference_images];
113
+ }
114
+ } else if (Array.isArray(args.reference_images)) {
115
+ refImages = args.reference_images;
116
+ } else {
117
+ throw new Error("Invalid reference_images: must be array or string");
91
118
  }
92
119
 
93
- // Add reference images if provided
94
- if (args.reference_images && args.reference_images.length > 0) {
95
- const refImages = args.reference_images.slice(0, 3).map((imgPath) => {
96
- const { data, mimeType } = fileToBase64(imgPath);
120
+ if (refImages.length > 0) {
121
+ referenceImages = refImages.slice(0, 3).map((p) => {
122
+ const { data, mimeType } = fileToBase64(p);
97
123
  return {
98
124
  image: {
99
125
  bytesBase64Encoded: data,
@@ -102,162 +128,105 @@ export const imageToVideo = {
102
128
  referenceType: "asset",
103
129
  };
104
130
  });
105
- instances[0].referenceImages = refImages;
106
- }
107
-
108
- // Build parameters - NOTE: Parameters go in "parameters" object, not in instances
109
- const parameters: any = {};
110
-
111
- if (args.aspect_ratio) {
112
- parameters.aspectRatio = args.aspect_ratio;
113
- }
114
-
115
- if (durationSeconds) {
116
- parameters.durationSeconds = durationSeconds;
117
- }
118
-
119
- if (args.resolution) {
120
- parameters.resolution = args.resolution;
121
- }
122
-
123
- if (args.negative_prompt) {
124
- parameters.negativePrompt = args.negative_prompt;
125
- }
126
-
127
- if (args.person_generation) {
128
- parameters.personGeneration = args.person_generation;
129
131
  }
132
+ }
130
133
 
131
- const requestBody: any = {
132
- instances,
133
- };
134
+ const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
135
+
136
+ const instances: any[] = [
137
+ {
138
+ prompt: args.prompt,
139
+ ...(imagePart || {}),
140
+ ...(lastFramePart || {}),
141
+ ...(referenceImages ? { referenceImages } : {}),
142
+ },
143
+ ];
144
+
145
+ const parameters: any = {
146
+ aspectRatio: args.aspect_ratio || "9:16",
147
+ durationSeconds: parseInt(args.duration_seconds) || 6,
148
+ resolution: args.resolution || "720p",
149
+ negativePrompt: args.negative_prompt,
150
+ generateAudio: args.generate_audio || false,
151
+ personGeneration,
152
+ };
153
+
154
+ const res = await fetch(url, {
155
+ method: "POST",
156
+ headers: {
157
+ Authorization: `Bearer ${token}`,
158
+ "Content-Type": "application/json",
159
+ },
160
+ body: JSON.stringify({ instances, parameters }),
161
+ });
162
+
163
+ if (!res.ok) {
164
+ const text = await res.text();
165
+ throw new Error(`Vertex request failed: ${res.status} ${text}`);
166
+ }
134
167
 
135
- // Only add parameters if we have any
136
- if (Object.keys(parameters).length > 0) {
137
- requestBody.parameters = parameters;
138
- }
168
+ const op = await res.json();
169
+ const name: string = op.name || op.operation || "";
170
+ if (!name) {
171
+ throw new Error("Vertex did not return an operation name for long-running request");
172
+ }
139
173
 
140
- console.log(`Starting video generation with model: ${modelId}`);
174
+ let current = op;
175
+ let done = !!op.done;
176
+ let tries = 0;
141
177
 
142
- // Step 1: Start the long-running operation
143
- const url = `${baseUrl}/models/${modelId}:predictLongRunning`;
144
- const response = await fetch(url, {
178
+ // Poll using fetchPredictOperation as per Vertex recommendation
179
+ const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
180
+ while (!done && tries < 60) {
181
+ await wait(10000);
182
+ const poll = await fetch(fetchUrl, {
145
183
  method: "POST",
146
184
  headers: {
147
- "x-goog-api-key": apiKey,
185
+ Authorization: `Bearer ${token}`,
148
186
  "Content-Type": "application/json",
149
187
  },
150
- body: JSON.stringify(requestBody),
188
+ body: JSON.stringify({ operationName: name }),
151
189
  });
152
-
153
- if (!response.ok) {
154
- const errorText = await response.text();
155
- throw new Error(`Video generation request failed: ${response.status} ${errorText}`);
156
- }
157
-
158
- const operation = await response.json() as any;
159
- const operationName: string = operation.name || operation.operation || "";
160
-
161
- if (!operationName) {
162
- throw new Error("No operation name returned from API");
163
- }
164
-
165
- console.log(`Operation started: ${operationName}`);
166
-
167
- // Step 2: Poll the operation status by getting the operation directly
168
- let currentOp: any = operation;
169
- let done = !!operation.done;
170
- let tries = 0;
171
- const maxTries = 60; // 10 minutes with 10s intervals
172
-
173
- while (!done && tries < maxTries) {
174
- await wait(10000); // Wait 10 seconds
175
- tries++;
176
- console.log(`Polling attempt ${tries}/${maxTries}...`);
177
-
178
- // Poll by getting the operation status directly
179
- const pollResponse = await fetch(`${baseUrl}/${operationName}`, {
180
- method: "GET",
181
- headers: {
182
- "x-goog-api-key": apiKey,
183
- },
184
- });
185
-
186
- if (!pollResponse.ok) {
187
- const errorText = await pollResponse.text();
188
- throw new Error(`Operation polling failed: ${pollResponse.status} ${errorText}`);
189
- }
190
-
191
- currentOp = await pollResponse.json() as any;
192
- done = !!currentOp.done || !!currentOp.response;
193
- }
194
-
195
- if (!done) {
196
- throw new Error("Video generation timed out after 10 minutes");
197
- }
198
-
199
- console.log("Operation completed!");
200
-
201
- // Step 3: Extract and download videos
202
- const resp = currentOp.response || currentOp;
203
-
204
- // The response structure is: response.generateVideoResponse.generatedSamples[].video.uri
205
- const generateVideoResponse = resp?.generateVideoResponse;
206
- const generatedSamples = generateVideoResponse?.generatedSamples || [];
207
-
208
- if (!generatedSamples || generatedSamples.length === 0) {
209
- let jsonStr = "";
210
- try { jsonStr = JSON.stringify(resp, null, 2); } catch {}
211
- return `Video generation completed but no generatedSamples found.\n\nFull Response:\n${jsonStr.slice(0, 1000)}${jsonStr.length > 1000 ? '\n...(truncated)' : ''}`;
190
+ if (!poll.ok) {
191
+ const text = await poll.text();
192
+ throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
212
193
  }
194
+ current = await poll.json();
195
+ done = !!current.done || !!current.response;
196
+ tries++;
197
+ }
213
198
 
214
- const outputs: string[] = [];
215
-
216
- // Download videos from URIs
217
- for (let i = 0; i < generatedSamples.length; i++) {
218
- const sample = generatedSamples[i];
219
- const videoUri = sample?.video?.uri;
220
-
221
- if (!videoUri) {
222
- console.warn(`Sample ${i} has no video URI`);
223
- continue;
224
- }
225
-
226
- console.log(`Downloading video ${i + 1}/${generatedSamples.length} from ${videoUri}...`);
227
-
228
- // Download video from URI with API key
229
- const videoResponse = await fetch(videoUri, {
230
- method: "GET",
231
- headers: {
232
- "x-goog-api-key": apiKey,
233
- },
234
- });
235
-
236
- if (!videoResponse.ok) {
237
- throw new Error(`Failed to download video: ${videoResponse.status} ${videoResponse.statusText}`);
199
+ const resp = current.response || current;
200
+ // Decode from response.videos[].bytesBase64Encoded only
201
+ const outputs: string[] = [];
202
+ const saveVideo = (base64: string, index: number) => {
203
+ if (!base64) return;
204
+ const filePath = args.output_path
205
+ ? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
206
+ : `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
207
+ const absPath = path.resolve(filePath);
208
+ const buf = Buffer.from(base64, 'base64');
209
+ fs.writeFileSync(absPath, buf);
210
+ outputs.push(absPath);
211
+ };
212
+
213
+ if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
214
+ for (let i = 0; i < resp.videos.length; i++) {
215
+ const v = resp.videos[i] || {};
216
+ if (typeof v.bytesBase64Encoded === 'string') {
217
+ saveVideo(v.bytesBase64Encoded, i);
238
218
  }
239
-
240
- const videoBuffer = await videoResponse.arrayBuffer();
241
-
242
- // Save video to file
243
- const filePath = args.output_path
244
- ? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
245
- : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
246
- const absPath = path.resolve(filePath);
247
-
248
- fs.writeFileSync(absPath, Buffer.from(videoBuffer));
249
- outputs.push(absPath);
250
- console.log(`Saved video to: ${absPath}`);
251
- }
252
-
253
- if (outputs.length > 0) {
254
- return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
255
219
  }
256
-
257
- return "Video generation completed but no videos were saved.";
258
-
259
- } catch (error: any) {
260
- throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
261
220
  }
221
+ if (outputs.length > 0) {
222
+ return `Video(s) saved: ${outputs.join(', ')}`;
223
+ }
224
+
225
+ // If nothing saved, return a concise summary plus head/tail snippets of JSON
226
+ let jsonStr = "";
227
+ try { jsonStr = JSON.stringify(resp); } catch {}
228
+ const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
229
+ const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
230
+ return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
262
231
  },
263
- };
232
+ });