@mixio-pro/kalaasetu-mcp 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,7 @@
1
- import { GoogleAuth } from "google-auth-library";
2
- import { exec } from "child_process";
3
- import * as path from "path";
4
1
  import { z } from "zod";
5
2
  import { getStorage } from "../storage";
6
3
  import { generateTimestampedFilename } from "../utils/filename";
4
+ import { safeToolExecute } from "../utils/tool-wrapper";
7
5
 
8
6
  import { getGoogleAccessToken } from "../utils/google-auth";
9
7
 
@@ -57,10 +55,12 @@ export const imageToVideo = {
57
55
  aspect_ratio: z
58
56
  .string()
59
57
  .optional()
60
- .describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
58
+ .default("16:9")
59
+ .describe("Video aspect ratio: '16:9' or '9:16'"),
61
60
  duration_seconds: z
62
61
  .string()
63
62
  .optional()
63
+ .default("6")
64
64
  .describe(
65
65
  "Video duration in seconds. MUST be one of: '4', '6', or '8' (default: '6'). Other values will be rejected by Vertex AI."
66
66
  ),
@@ -91,15 +91,18 @@ export const imageToVideo = {
91
91
  project_id: z
92
92
  .string()
93
93
  .optional()
94
+ .default("mixio-pro")
94
95
  .describe("GCP Project ID (default: mixio-pro)"),
95
96
  location_id: z
96
97
  .string()
97
98
  .optional()
99
+ .default("us-central1")
98
100
  .describe("Vertex region (default: us-central1)"),
99
101
  model_id: z
100
102
  .string()
101
103
  .optional()
102
- .describe("Model ID (default: veo-3.1-fast-generate-preview)"),
104
+ .default("veo-3.1-fast-generate-001")
105
+ .describe("Model ID (default: veo-3.1-fast-generate-001)"),
103
106
  generate_audio: z
104
107
  .boolean()
105
108
  .optional()
@@ -108,6 +111,7 @@ export const imageToVideo = {
108
111
  )
109
112
  .default(false),
110
113
  }),
114
+ timeoutMs: 1200000, // 20 minutes
111
115
  async execute(args: {
112
116
  prompt: string;
113
117
  image_path?: string;
@@ -124,223 +128,235 @@ export const imageToVideo = {
124
128
  model_id?: string;
125
129
  generate_audio?: boolean;
126
130
  }) {
127
- const projectId = args.project_id || "mixio-pro";
128
- const location = args.location_id || "us-central1";
129
- const modelId = args.model_id || "veo-3.1-fast-generate-preview";
131
+ return safeToolExecute(async () => {
132
+ const projectId = args.project_id || "mixio-pro";
133
+ const location = args.location_id || "us-central1";
134
+ const modelId = args.model_id || "veo-3.1-fast-generate-preview";
130
135
 
131
- // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
132
- let durationSeconds = parseInt(args.duration_seconds || "6");
133
- if (isNaN(durationSeconds)) durationSeconds = 6;
136
+ // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
137
+ let durationSeconds = parseInt(args.duration_seconds || "6");
138
+ if (isNaN(durationSeconds)) durationSeconds = 6;
134
139
 
135
- const validDurations = [4, 6, 8];
136
- // Find nearest valid duration
137
- durationSeconds = validDurations.reduce((prev, curr) => {
138
- return Math.abs(curr - durationSeconds) < Math.abs(prev - durationSeconds)
139
- ? curr
140
- : prev;
141
- });
140
+ const validDurations = [4, 6, 8];
141
+ // Find nearest valid duration
142
+ durationSeconds = validDurations.reduce((prev, curr) => {
143
+ return Math.abs(curr - durationSeconds) <
144
+ Math.abs(prev - durationSeconds)
145
+ ? curr
146
+ : prev;
147
+ });
142
148
 
143
- // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
144
- // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
145
- // Let's explicitly handle the equidistant cases or just use a custom finder.
146
- // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
147
- // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
149
+ // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
150
+ // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
151
+ // Let's explicitly handle the equidistant cases or just use a custom finder.
152
+ // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
153
+ // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
148
154
 
149
- // Simpler logic for these specific values:
150
- if (durationSeconds === 4 && parseInt(args.duration_seconds || "6") === 5) {
151
- durationSeconds = 6;
152
- }
153
- if (durationSeconds === 6 && parseInt(args.duration_seconds || "6") === 7) {
154
- durationSeconds = 8;
155
- }
155
+ // Simpler logic for these specific values:
156
+ if (
157
+ durationSeconds === 4 &&
158
+ parseInt(args.duration_seconds || "6") === 5
159
+ ) {
160
+ durationSeconds = 6;
161
+ }
162
+ if (
163
+ durationSeconds === 6 &&
164
+ parseInt(args.duration_seconds || "6") === 7
165
+ ) {
166
+ durationSeconds = 8;
167
+ }
156
168
 
157
- const token = await getGoogleAccessToken();
169
+ const token = await getGoogleAccessToken();
158
170
 
159
- const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
171
+ const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
160
172
 
161
- let imagePart: any = undefined;
162
- if (args.image_path) {
163
- const { data, mimeType } = await fileToBase64(args.image_path);
164
- imagePart = {
165
- image: {
166
- bytesBase64Encoded: data,
167
- mimeType,
168
- },
169
- };
170
- }
173
+ let imagePart: any = undefined;
174
+ if (args.image_path) {
175
+ const { data, mimeType } = await fileToBase64(args.image_path);
176
+ imagePart = {
177
+ image: {
178
+ bytesBase64Encoded: data,
179
+ mimeType,
180
+ },
181
+ };
182
+ }
171
183
 
172
- let lastFramePart: any = undefined;
173
- if (args.last_frame_path) {
174
- const { data, mimeType } = await fileToBase64(args.last_frame_path);
175
- lastFramePart = {
176
- lastFrame: {
177
- bytesBase64Encoded: data,
178
- mimeType,
179
- },
180
- };
181
- }
184
+ let lastFramePart: any = undefined;
185
+ if (args.last_frame_path) {
186
+ const { data, mimeType } = await fileToBase64(args.last_frame_path);
187
+ lastFramePart = {
188
+ lastFrame: {
189
+ bytesBase64Encoded: data,
190
+ mimeType,
191
+ },
192
+ };
193
+ }
182
194
 
183
- let referenceImages: any[] | undefined = undefined;
184
- if (args.reference_images) {
185
- let refImages: string[];
186
- if (typeof args.reference_images === "string") {
187
- if (
188
- args.reference_images.startsWith("[") &&
189
- args.reference_images.endsWith("]")
190
- ) {
191
- try {
192
- refImages = JSON.parse(args.reference_images);
193
- } catch {
194
- throw new Error("Invalid reference_images format");
195
+ let referenceImages: any[] | undefined = undefined;
196
+ if (args.reference_images) {
197
+ let refImages: string[];
198
+ if (typeof args.reference_images === "string") {
199
+ if (
200
+ args.reference_images.startsWith("[") &&
201
+ args.reference_images.endsWith("]")
202
+ ) {
203
+ try {
204
+ refImages = JSON.parse(args.reference_images);
205
+ } catch {
206
+ throw new Error("Invalid reference_images format");
207
+ }
208
+ } else {
209
+ refImages = [args.reference_images];
195
210
  }
211
+ } else if (Array.isArray(args.reference_images)) {
212
+ refImages = args.reference_images;
196
213
  } else {
197
- refImages = [args.reference_images];
214
+ throw new Error("Invalid reference_images: must be array or string");
198
215
  }
199
- } else if (Array.isArray(args.reference_images)) {
200
- refImages = args.reference_images;
201
- } else {
202
- throw new Error("Invalid reference_images: must be array or string");
203
- }
204
216
 
205
- if (refImages.length > 0) {
206
- referenceImages = await Promise.all(
207
- refImages.slice(0, 3).map(async (p) => {
208
- const { data, mimeType } = await fileToBase64(p);
209
- return {
210
- image: {
211
- bytesBase64Encoded: data,
212
- mimeType,
213
- },
214
- referenceType: "asset",
215
- };
216
- })
217
- );
217
+ if (refImages.length > 0) {
218
+ referenceImages = await Promise.all(
219
+ refImages.slice(0, 3).map(async (p) => {
220
+ const { data, mimeType } = await fileToBase64(p);
221
+ return {
222
+ image: {
223
+ bytesBase64Encoded: data,
224
+ mimeType,
225
+ },
226
+ referenceType: "asset",
227
+ };
228
+ })
229
+ );
230
+ }
218
231
  }
219
- }
220
232
 
221
- const personGeneration =
222
- args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
223
-
224
- const instances: any[] = [
225
- {
226
- prompt: args.prompt,
227
- ...(imagePart || {}),
228
- ...(lastFramePart || {}),
229
- ...(referenceImages ? { referenceImages } : {}),
230
- },
231
- ];
232
-
233
- const parameters: any = {
234
- aspectRatio: args.aspect_ratio || "9:16",
235
- durationSeconds: durationSeconds,
236
- resolution: args.resolution || "720p",
237
- negativePrompt: args.negative_prompt,
238
- generateAudio: args.generate_audio || false,
239
- personGeneration,
240
- };
241
-
242
- const res = await fetch(url, {
243
- method: "POST",
244
- headers: {
245
- Authorization: `Bearer ${token}`,
246
- "Content-Type": "application/json",
247
- },
248
- body: JSON.stringify({ instances, parameters }),
249
- });
250
-
251
- if (!res.ok) {
252
- const text = await res.text();
253
- throw new Error(`Vertex request failed: ${res.status} ${text}`);
254
- }
233
+ const personGeneration =
234
+ args.person_generation ||
235
+ (args.image_path ? "allow_adult" : "allow_all");
255
236
 
256
- const op = (await res.json()) as any;
257
- const name: string = op.name || op.operation || "";
258
- if (!name) {
259
- throw new Error(
260
- "Vertex did not return an operation name for long-running request"
261
- );
262
- }
237
+ const instances: any[] = [
238
+ {
239
+ prompt: args.prompt,
240
+ ...(imagePart || {}),
241
+ ...(lastFramePart || {}),
242
+ ...(referenceImages ? { referenceImages } : {}),
243
+ },
244
+ ];
263
245
 
264
- let current = op;
265
- let done = !!op.done;
266
- let tries = 0;
246
+ const parameters: any = {
247
+ aspectRatio: args.aspect_ratio || "9:16",
248
+ durationSeconds: durationSeconds,
249
+ resolution: args.resolution || "720p",
250
+ negativePrompt: args.negative_prompt,
251
+ generateAudio: args.generate_audio || false,
252
+ personGeneration,
253
+ };
267
254
 
268
- // Poll using fetchPredictOperation as per Vertex recommendation
269
- const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
270
- while (!done && tries < 60) {
271
- await wait(10000);
272
- const poll = await fetch(fetchUrl, {
255
+ const res = await fetch(url, {
273
256
  method: "POST",
274
257
  headers: {
275
258
  Authorization: `Bearer ${token}`,
276
259
  "Content-Type": "application/json",
277
260
  },
278
- body: JSON.stringify({ operationName: name }),
261
+ body: JSON.stringify({ instances, parameters }),
279
262
  });
280
- if (!poll.ok) {
281
- const text = await poll.text();
282
- throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
263
+
264
+ if (!res.ok) {
265
+ const text = await res.text();
266
+ throw new Error(`Vertex request failed: ${res.status} ${text}`);
283
267
  }
284
- current = (await poll.json()) as any;
285
- done = !!current.done || !!current.response;
286
- tries++;
287
- }
288
268
 
289
- const resp = current.response || current;
290
- // Decode from response.videos[].bytesBase64Encoded only
291
- const videos: Array<{ url: string; filename: string; mimeType: string }> =
292
- [];
293
- const saveVideo = async (base64: string, index: number) => {
294
- if (!base64) return;
269
+ const op = (await res.json()) as any;
270
+ const name: string = op.name || op.operation || "";
271
+ if (!name) {
272
+ throw new Error(
273
+ "Vertex did not return an operation name for long-running request"
274
+ );
275
+ }
276
+
277
+ let current = op;
278
+ let done = !!op.done;
279
+ let tries = 0;
295
280
 
296
- // Use provided output path or generate default with timestamp
297
- let filePath: string;
298
- if (args.output_path) {
299
- // User provided path - use as-is for first video, add index for subsequent
300
- filePath =
301
- index === 0
302
- ? args.output_path
303
- : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`);
304
- } else {
305
- // No path provided - generate timestamped default
306
- const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`;
307
- filePath = generateTimestampedFilename(defaultName);
281
+ // Poll using fetchPredictOperation as per Vertex recommendation
282
+ const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
283
+ while (!done && tries < 60) {
284
+ await wait(10000);
285
+ const poll = await fetch(fetchUrl, {
286
+ method: "POST",
287
+ headers: {
288
+ Authorization: `Bearer ${token}`,
289
+ "Content-Type": "application/json",
290
+ },
291
+ body: JSON.stringify({ operationName: name }),
292
+ });
293
+ if (!poll.ok) {
294
+ const text = await poll.text();
295
+ throw new Error(
296
+ `Vertex operation poll failed: ${poll.status} ${text}`
297
+ );
298
+ }
299
+ current = (await poll.json()) as any;
300
+ done = !!current.done || !!current.response;
301
+ tries++;
308
302
  }
309
303
 
310
- const buf = Buffer.from(base64, "base64");
311
- const storage = getStorage();
312
- const url = await storage.writeFile(filePath, buf);
313
- videos.push({
314
- url,
315
- filename: filePath,
316
- mimeType: "video/mp4",
317
- });
318
- };
304
+ const resp = current.response || current;
305
+ // Decode from response.videos[].bytesBase64Encoded only
306
+ const videos: Array<{ url: string; filename: string; mimeType: string }> =
307
+ [];
308
+ const saveVideo = async (base64: string, index: number) => {
309
+ if (!base64) return;
319
310
 
320
- if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
321
- for (let i = 0; i < resp.videos.length; i++) {
322
- const v = resp.videos[i] || {};
323
- if (typeof v.bytesBase64Encoded === "string") {
324
- await saveVideo(v.bytesBase64Encoded, i);
311
+ // Use provided output path or generate default with timestamp
312
+ let filePath: string;
313
+ if (args.output_path) {
314
+ // User provided path - use as-is for first video, add index for subsequent
315
+ filePath =
316
+ index === 0
317
+ ? args.output_path
318
+ : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`);
319
+ } else {
320
+ // No path provided - generate timestamped default
321
+ const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`;
322
+ filePath = generateTimestampedFilename(defaultName);
323
+ }
324
+
325
+ const buf = Buffer.from(base64, "base64");
326
+ const storage = getStorage();
327
+ const url = await storage.writeFile(filePath, buf);
328
+ videos.push({
329
+ url,
330
+ filename: filePath,
331
+ mimeType: "video/mp4",
332
+ });
333
+ };
334
+
335
+ if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
336
+ for (let i = 0; i < resp.videos.length; i++) {
337
+ const v = resp.videos[i] || {};
338
+ if (typeof v.bytesBase64Encoded === "string") {
339
+ await saveVideo(v.bytesBase64Encoded, i);
340
+ }
325
341
  }
326
342
  }
327
- }
328
- if (videos.length > 0) {
329
- return JSON.stringify({
330
- videos,
331
- message: "Video(s) generated successfully",
332
- });
333
- }
343
+ if (videos.length > 0) {
344
+ return JSON.stringify({
345
+ videos,
346
+ message: "Video(s) generated successfully",
347
+ });
348
+ }
334
349
 
335
- // If nothing saved, return a concise summary plus head/tail snippets of JSON
336
- let jsonStr = "";
337
- try {
338
- jsonStr = JSON.stringify(resp);
339
- } catch {}
340
- const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
341
- const tail50 = jsonStr
342
- ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
343
- : "";
344
- return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
350
+ // If nothing saved, return a concise summary plus head/tail snippets of JSON
351
+ let jsonStr = "";
352
+ try {
353
+ jsonStr = JSON.stringify(resp);
354
+ } catch {}
355
+ const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
356
+ const tail50 = jsonStr
357
+ ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
358
+ : "";
359
+ return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
360
+ }, "imageToVideo");
345
361
  },
346
362
  };