@mixio-pro/kalaasetu-mcp 1.0.4 → 1.0.5-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/tools/image-to-video.ts +175 -206
package/package.json
CHANGED
|
@@ -1,99 +1,125 @@
|
|
|
1
|
-
|
|
1
|
+
// @ts-nocheck
|
|
2
2
|
import * as fs from "fs";
|
|
3
|
+
import { GoogleAuth } from "google-auth-library";
|
|
4
|
+
import { exec } from "child_process";
|
|
3
5
|
import * as path from "path";
|
|
6
|
+
import { z } from "zod";
|
|
4
7
|
|
|
5
8
|
async function wait(ms: number): Promise<void> {
|
|
6
9
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
7
10
|
}
|
|
8
11
|
|
|
9
|
-
function
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
async function fetchAccessToken(): Promise<string> {
|
|
13
|
+
try {
|
|
14
|
+
const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
|
|
15
|
+
const client = await auth.getClient();
|
|
16
|
+
const token = await client.getAccessToken();
|
|
17
|
+
if (!token || typeof token !== "string") {
|
|
18
|
+
throw new Error("No token from GoogleAuth");
|
|
19
|
+
}
|
|
20
|
+
return token;
|
|
21
|
+
} catch (e) {
|
|
22
|
+
// Fallback to gcloud
|
|
23
|
+
return await new Promise((resolve, reject) => {
|
|
24
|
+
exec("gcloud auth print-access-token", (err, stdout, stderr) => {
|
|
25
|
+
if (err) {
|
|
26
|
+
reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const t = (stdout || "").trim();
|
|
30
|
+
if (!t) {
|
|
31
|
+
reject(new Error("Failed to fetch an access token: empty token from gcloud"));
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
resolve(t);
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function fileToBase64(path: string): { data: string; mimeType: string } {
|
|
41
|
+
if (!fs.existsSync(path)) {
|
|
42
|
+
throw new Error(`File not found: ${path}`);
|
|
12
43
|
}
|
|
13
|
-
const buf = fs.readFileSync(
|
|
44
|
+
const buf = fs.readFileSync(path);
|
|
14
45
|
const data = Buffer.from(buf).toString("base64");
|
|
15
|
-
//
|
|
16
|
-
const
|
|
17
|
-
const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
|
|
18
|
-
ext === '.png' ? 'image/png' :
|
|
19
|
-
ext === '.webp' ? 'image/webp' : 'image/png';
|
|
46
|
+
// Default to PNG if not sure, similar to existing code
|
|
47
|
+
const mimeType = "image/png";
|
|
20
48
|
return { data, mimeType };
|
|
21
49
|
}
|
|
22
50
|
|
|
23
|
-
export const imageToVideo = {
|
|
24
|
-
name: "
|
|
25
|
-
description: "Generate videos from an image as starting first frame using
|
|
51
|
+
export const imageToVideo = ({
|
|
52
|
+
name: "image_to_video",
|
|
53
|
+
description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
|
|
26
54
|
parameters: z.object({
|
|
27
55
|
prompt: z.string().describe("Text description for the video"),
|
|
28
56
|
image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
|
|
29
57
|
last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
|
|
30
58
|
aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
|
|
31
|
-
duration_seconds: z.
|
|
59
|
+
duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
|
|
32
60
|
resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
|
|
33
61
|
negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
|
|
34
62
|
person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
|
|
35
63
|
reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
|
|
36
64
|
output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
|
|
37
|
-
|
|
38
|
-
|
|
65
|
+
project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
|
|
66
|
+
location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
|
|
67
|
+
model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
|
|
68
|
+
generate_audio: z.boolean().optional().describe("Boolean flag to enable generation of audio along with the video").default(false)
|
|
39
69
|
}),
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
aspect_ratio?: string;
|
|
45
|
-
duration_seconds?: string | number;
|
|
46
|
-
resolution?: string;
|
|
47
|
-
negative_prompt?: string;
|
|
48
|
-
person_generation?: string;
|
|
49
|
-
reference_images?: string[];
|
|
50
|
-
output_path?: string;
|
|
51
|
-
gemini_api_key?: string;
|
|
52
|
-
model_id?: string;
|
|
53
|
-
}) => {
|
|
54
|
-
const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
|
|
55
|
-
if (!apiKey) {
|
|
56
|
-
throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
|
|
57
|
-
}
|
|
70
|
+
async execute(args) {
|
|
71
|
+
const projectId = args.project_id || "mixio-pro";
|
|
72
|
+
const location = args.location_id || "us-central1";
|
|
73
|
+
const modelId = args.model_id || "veo-3.1-fast-generate-preview";
|
|
58
74
|
|
|
59
|
-
const
|
|
60
|
-
const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
|
|
75
|
+
const token = await fetchAccessToken();
|
|
61
76
|
|
|
62
|
-
|
|
63
|
-
const durationSeconds = args.duration_seconds
|
|
64
|
-
? (typeof args.duration_seconds === 'string' ? parseInt(args.duration_seconds) : args.duration_seconds)
|
|
65
|
-
: 6; // default
|
|
66
|
-
|
|
67
|
-
try {
|
|
68
|
-
// Build the request body for predictLongRunning
|
|
69
|
-
const instances: any[] = [
|
|
70
|
-
{
|
|
71
|
-
prompt: args.prompt,
|
|
72
|
-
},
|
|
73
|
-
];
|
|
77
|
+
const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
|
|
74
78
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
let imagePart: any = undefined;
|
|
80
|
+
if (args.image_path) {
|
|
81
|
+
const { data, mimeType } = fileToBase64(args.image_path);
|
|
82
|
+
imagePart = {
|
|
83
|
+
image: {
|
|
79
84
|
bytesBase64Encoded: data,
|
|
80
85
|
mimeType,
|
|
81
|
-
}
|
|
82
|
-
}
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
83
89
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
90
|
+
let lastFramePart: any = undefined;
|
|
91
|
+
if (args.last_frame_path) {
|
|
92
|
+
const { data, mimeType } = fileToBase64(args.last_frame_path);
|
|
93
|
+
lastFramePart = {
|
|
94
|
+
lastFrame: {
|
|
88
95
|
bytesBase64Encoded: data,
|
|
89
96
|
mimeType,
|
|
90
|
-
}
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
let referenceImages: any[] | undefined = undefined;
|
|
102
|
+
if (args.reference_images) {
|
|
103
|
+
let refImages: string[];
|
|
104
|
+
if (typeof args.reference_images === "string") {
|
|
105
|
+
if (args.reference_images.startsWith("[") && args.reference_images.endsWith("]")) {
|
|
106
|
+
try {
|
|
107
|
+
refImages = JSON.parse(args.reference_images);
|
|
108
|
+
} catch {
|
|
109
|
+
throw new Error("Invalid reference_images format");
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
refImages = [args.reference_images];
|
|
113
|
+
}
|
|
114
|
+
} else if (Array.isArray(args.reference_images)) {
|
|
115
|
+
refImages = args.reference_images;
|
|
116
|
+
} else {
|
|
117
|
+
throw new Error("Invalid reference_images: must be array or string");
|
|
91
118
|
}
|
|
92
119
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
const { data, mimeType } = fileToBase64(imgPath);
|
|
120
|
+
if (refImages.length > 0) {
|
|
121
|
+
referenceImages = refImages.slice(0, 3).map((p) => {
|
|
122
|
+
const { data, mimeType } = fileToBase64(p);
|
|
97
123
|
return {
|
|
98
124
|
image: {
|
|
99
125
|
bytesBase64Encoded: data,
|
|
@@ -102,162 +128,105 @@ export const imageToVideo = {
|
|
|
102
128
|
referenceType: "asset",
|
|
103
129
|
};
|
|
104
130
|
});
|
|
105
|
-
instances[0].referenceImages = refImages;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Build parameters - NOTE: Parameters go in "parameters" object, not in instances
|
|
109
|
-
const parameters: any = {};
|
|
110
|
-
|
|
111
|
-
if (args.aspect_ratio) {
|
|
112
|
-
parameters.aspectRatio = args.aspect_ratio;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (durationSeconds) {
|
|
116
|
-
parameters.durationSeconds = durationSeconds;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
if (args.resolution) {
|
|
120
|
-
parameters.resolution = args.resolution;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (args.negative_prompt) {
|
|
124
|
-
parameters.negativePrompt = args.negative_prompt;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (args.person_generation) {
|
|
128
|
-
parameters.personGeneration = args.person_generation;
|
|
129
131
|
}
|
|
132
|
+
}
|
|
130
133
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
|
|
135
|
+
|
|
136
|
+
const instances: any[] = [
|
|
137
|
+
{
|
|
138
|
+
prompt: args.prompt,
|
|
139
|
+
...(imagePart || {}),
|
|
140
|
+
...(lastFramePart || {}),
|
|
141
|
+
...(referenceImages ? { referenceImages } : {}),
|
|
142
|
+
},
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
const parameters: any = {
|
|
146
|
+
aspectRatio: args.aspect_ratio || "9:16",
|
|
147
|
+
durationSeconds: parseInt(args.duration_seconds) || 6,
|
|
148
|
+
resolution: args.resolution || "720p",
|
|
149
|
+
negativePrompt: args.negative_prompt,
|
|
150
|
+
generateAudio: args.generate_audio || false,
|
|
151
|
+
personGeneration,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const res = await fetch(url, {
|
|
155
|
+
method: "POST",
|
|
156
|
+
headers: {
|
|
157
|
+
Authorization: `Bearer ${token}`,
|
|
158
|
+
"Content-Type": "application/json",
|
|
159
|
+
},
|
|
160
|
+
body: JSON.stringify({ instances, parameters }),
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
if (!res.ok) {
|
|
164
|
+
const text = await res.text();
|
|
165
|
+
throw new Error(`Vertex request failed: ${res.status} ${text}`);
|
|
166
|
+
}
|
|
134
167
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
168
|
+
const op = await res.json();
|
|
169
|
+
const name: string = op.name || op.operation || "";
|
|
170
|
+
if (!name) {
|
|
171
|
+
throw new Error("Vertex did not return an operation name for long-running request");
|
|
172
|
+
}
|
|
139
173
|
|
|
140
|
-
|
|
174
|
+
let current = op;
|
|
175
|
+
let done = !!op.done;
|
|
176
|
+
let tries = 0;
|
|
141
177
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
178
|
+
// Poll using fetchPredictOperation as per Vertex recommendation
|
|
179
|
+
const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
|
|
180
|
+
while (!done && tries < 60) {
|
|
181
|
+
await wait(10000);
|
|
182
|
+
const poll = await fetch(fetchUrl, {
|
|
145
183
|
method: "POST",
|
|
146
184
|
headers: {
|
|
147
|
-
|
|
185
|
+
Authorization: `Bearer ${token}`,
|
|
148
186
|
"Content-Type": "application/json",
|
|
149
187
|
},
|
|
150
|
-
body: JSON.stringify(
|
|
188
|
+
body: JSON.stringify({ operationName: name }),
|
|
151
189
|
});
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
throw new Error(`Video generation request failed: ${response.status} ${errorText}`);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const operation = await response.json() as any;
|
|
159
|
-
const operationName: string = operation.name || operation.operation || "";
|
|
160
|
-
|
|
161
|
-
if (!operationName) {
|
|
162
|
-
throw new Error("No operation name returned from API");
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
console.log(`Operation started: ${operationName}`);
|
|
166
|
-
|
|
167
|
-
// Step 2: Poll the operation status by getting the operation directly
|
|
168
|
-
let currentOp: any = operation;
|
|
169
|
-
let done = !!operation.done;
|
|
170
|
-
let tries = 0;
|
|
171
|
-
const maxTries = 60; // 10 minutes with 10s intervals
|
|
172
|
-
|
|
173
|
-
while (!done && tries < maxTries) {
|
|
174
|
-
await wait(10000); // Wait 10 seconds
|
|
175
|
-
tries++;
|
|
176
|
-
console.log(`Polling attempt ${tries}/${maxTries}...`);
|
|
177
|
-
|
|
178
|
-
// Poll by getting the operation status directly
|
|
179
|
-
const pollResponse = await fetch(`${baseUrl}/${operationName}`, {
|
|
180
|
-
method: "GET",
|
|
181
|
-
headers: {
|
|
182
|
-
"x-goog-api-key": apiKey,
|
|
183
|
-
},
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
if (!pollResponse.ok) {
|
|
187
|
-
const errorText = await pollResponse.text();
|
|
188
|
-
throw new Error(`Operation polling failed: ${pollResponse.status} ${errorText}`);
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
currentOp = await pollResponse.json() as any;
|
|
192
|
-
done = !!currentOp.done || !!currentOp.response;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if (!done) {
|
|
196
|
-
throw new Error("Video generation timed out after 10 minutes");
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
console.log("Operation completed!");
|
|
200
|
-
|
|
201
|
-
// Step 3: Extract and download videos
|
|
202
|
-
const resp = currentOp.response || currentOp;
|
|
203
|
-
|
|
204
|
-
// The response structure is: response.generateVideoResponse.generatedSamples[].video.uri
|
|
205
|
-
const generateVideoResponse = resp?.generateVideoResponse;
|
|
206
|
-
const generatedSamples = generateVideoResponse?.generatedSamples || [];
|
|
207
|
-
|
|
208
|
-
if (!generatedSamples || generatedSamples.length === 0) {
|
|
209
|
-
let jsonStr = "";
|
|
210
|
-
try { jsonStr = JSON.stringify(resp, null, 2); } catch {}
|
|
211
|
-
return `Video generation completed but no generatedSamples found.\n\nFull Response:\n${jsonStr.slice(0, 1000)}${jsonStr.length > 1000 ? '\n...(truncated)' : ''}`;
|
|
190
|
+
if (!poll.ok) {
|
|
191
|
+
const text = await poll.text();
|
|
192
|
+
throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
|
|
212
193
|
}
|
|
194
|
+
current = await poll.json();
|
|
195
|
+
done = !!current.done || !!current.response;
|
|
196
|
+
tries++;
|
|
197
|
+
}
|
|
213
198
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
},
|
|
234
|
-
});
|
|
235
|
-
|
|
236
|
-
if (!videoResponse.ok) {
|
|
237
|
-
throw new Error(`Failed to download video: ${videoResponse.status} ${videoResponse.statusText}`);
|
|
199
|
+
const resp = current.response || current;
|
|
200
|
+
// Decode from response.videos[].bytesBase64Encoded only
|
|
201
|
+
const outputs: string[] = [];
|
|
202
|
+
const saveVideo = (base64: string, index: number) => {
|
|
203
|
+
if (!base64) return;
|
|
204
|
+
const filePath = args.output_path
|
|
205
|
+
? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
|
|
206
|
+
: `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
|
|
207
|
+
const absPath = path.resolve(filePath);
|
|
208
|
+
const buf = Buffer.from(base64, 'base64');
|
|
209
|
+
fs.writeFileSync(absPath, buf);
|
|
210
|
+
outputs.push(absPath);
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
|
|
214
|
+
for (let i = 0; i < resp.videos.length; i++) {
|
|
215
|
+
const v = resp.videos[i] || {};
|
|
216
|
+
if (typeof v.bytesBase64Encoded === 'string') {
|
|
217
|
+
saveVideo(v.bytesBase64Encoded, i);
|
|
238
218
|
}
|
|
239
|
-
|
|
240
|
-
const videoBuffer = await videoResponse.arrayBuffer();
|
|
241
|
-
|
|
242
|
-
// Save video to file
|
|
243
|
-
const filePath = args.output_path
|
|
244
|
-
? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
|
|
245
|
-
: `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
|
|
246
|
-
const absPath = path.resolve(filePath);
|
|
247
|
-
|
|
248
|
-
fs.writeFileSync(absPath, Buffer.from(videoBuffer));
|
|
249
|
-
outputs.push(absPath);
|
|
250
|
-
console.log(`Saved video to: ${absPath}`);
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
if (outputs.length > 0) {
|
|
254
|
-
return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
|
|
255
219
|
}
|
|
256
|
-
|
|
257
|
-
return "Video generation completed but no videos were saved.";
|
|
258
|
-
|
|
259
|
-
} catch (error: any) {
|
|
260
|
-
throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
|
|
261
220
|
}
|
|
221
|
+
if (outputs.length > 0) {
|
|
222
|
+
return `Video(s) saved: ${outputs.join(', ')}`;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// If nothing saved, return a concise summary plus head/tail snippets of JSON
|
|
226
|
+
let jsonStr = "";
|
|
227
|
+
try { jsonStr = JSON.stringify(resp); } catch {}
|
|
228
|
+
const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
|
|
229
|
+
const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
|
|
230
|
+
return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
|
|
262
231
|
},
|
|
263
|
-
};
|
|
232
|
+
});
|