@mixio-pro/kalaasetu-mcp 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { z } from "zod";
2
2
  import { getStorage } from "../storage";
3
3
  import { generateTimestampedFilename } from "../utils/filename";
4
+ import { safeToolExecute } from "../utils/tool-wrapper";
4
5
 
5
6
  import { getGoogleAccessToken } from "../utils/google-auth";
6
7
 
@@ -110,6 +111,7 @@ export const imageToVideo = {
110
111
  )
111
112
  .default(false),
112
113
  }),
114
+ timeoutMs: 1200000, // 20 minutes
113
115
  async execute(args: {
114
116
  prompt: string;
115
117
  image_path?: string;
@@ -126,223 +128,235 @@ export const imageToVideo = {
126
128
  model_id?: string;
127
129
  generate_audio?: boolean;
128
130
  }) {
129
- const projectId = args.project_id || "mixio-pro";
130
- const location = args.location_id || "us-central1";
131
- const modelId = args.model_id || "veo-3.1-fast-generate-preview";
131
+ return safeToolExecute(async () => {
132
+ const projectId = args.project_id || "mixio-pro";
133
+ const location = args.location_id || "us-central1";
134
+ const modelId = args.model_id || "veo-3.1-fast-generate-preview";
132
135
 
133
- // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
134
- let durationSeconds = parseInt(args.duration_seconds || "6");
135
- if (isNaN(durationSeconds)) durationSeconds = 6;
136
+ // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
137
+ let durationSeconds = parseInt(args.duration_seconds || "6");
138
+ if (isNaN(durationSeconds)) durationSeconds = 6;
136
139
 
137
- const validDurations = [4, 6, 8];
138
- // Find nearest valid duration
139
- durationSeconds = validDurations.reduce((prev, curr) => {
140
- return Math.abs(curr - durationSeconds) < Math.abs(prev - durationSeconds)
141
- ? curr
142
- : prev;
143
- });
140
+ const validDurations = [4, 6, 8];
141
+ // Find nearest valid duration
142
+ durationSeconds = validDurations.reduce((prev, curr) => {
143
+ return Math.abs(curr - durationSeconds) <
144
+ Math.abs(prev - durationSeconds)
145
+ ? curr
146
+ : prev;
147
+ });
144
148
 
145
- // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
146
- // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
147
- // Let's explicitly handle the equidistant cases or just use a custom finder.
148
- // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
149
- // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
149
+ // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
150
+ // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
151
+ // Let's explicitly handle the equidistant cases or just use a custom finder.
152
+ // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
153
+ // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
150
154
 
151
- // Simpler logic for these specific values:
152
- if (durationSeconds === 4 && parseInt(args.duration_seconds || "6") === 5) {
153
- durationSeconds = 6;
154
- }
155
- if (durationSeconds === 6 && parseInt(args.duration_seconds || "6") === 7) {
156
- durationSeconds = 8;
157
- }
155
+ // Simpler logic for these specific values:
156
+ if (
157
+ durationSeconds === 4 &&
158
+ parseInt(args.duration_seconds || "6") === 5
159
+ ) {
160
+ durationSeconds = 6;
161
+ }
162
+ if (
163
+ durationSeconds === 6 &&
164
+ parseInt(args.duration_seconds || "6") === 7
165
+ ) {
166
+ durationSeconds = 8;
167
+ }
158
168
 
159
- const token = await getGoogleAccessToken();
169
+ const token = await getGoogleAccessToken();
160
170
 
161
- const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
171
+ const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
162
172
 
163
- let imagePart: any = undefined;
164
- if (args.image_path) {
165
- const { data, mimeType } = await fileToBase64(args.image_path);
166
- imagePart = {
167
- image: {
168
- bytesBase64Encoded: data,
169
- mimeType,
170
- },
171
- };
172
- }
173
+ let imagePart: any = undefined;
174
+ if (args.image_path) {
175
+ const { data, mimeType } = await fileToBase64(args.image_path);
176
+ imagePart = {
177
+ image: {
178
+ bytesBase64Encoded: data,
179
+ mimeType,
180
+ },
181
+ };
182
+ }
173
183
 
174
- let lastFramePart: any = undefined;
175
- if (args.last_frame_path) {
176
- const { data, mimeType } = await fileToBase64(args.last_frame_path);
177
- lastFramePart = {
178
- lastFrame: {
179
- bytesBase64Encoded: data,
180
- mimeType,
181
- },
182
- };
183
- }
184
+ let lastFramePart: any = undefined;
185
+ if (args.last_frame_path) {
186
+ const { data, mimeType } = await fileToBase64(args.last_frame_path);
187
+ lastFramePart = {
188
+ lastFrame: {
189
+ bytesBase64Encoded: data,
190
+ mimeType,
191
+ },
192
+ };
193
+ }
184
194
 
185
- let referenceImages: any[] | undefined = undefined;
186
- if (args.reference_images) {
187
- let refImages: string[];
188
- if (typeof args.reference_images === "string") {
189
- if (
190
- args.reference_images.startsWith("[") &&
191
- args.reference_images.endsWith("]")
192
- ) {
193
- try {
194
- refImages = JSON.parse(args.reference_images);
195
- } catch {
196
- throw new Error("Invalid reference_images format");
195
+ let referenceImages: any[] | undefined = undefined;
196
+ if (args.reference_images) {
197
+ let refImages: string[];
198
+ if (typeof args.reference_images === "string") {
199
+ if (
200
+ args.reference_images.startsWith("[") &&
201
+ args.reference_images.endsWith("]")
202
+ ) {
203
+ try {
204
+ refImages = JSON.parse(args.reference_images);
205
+ } catch {
206
+ throw new Error("Invalid reference_images format");
207
+ }
208
+ } else {
209
+ refImages = [args.reference_images];
197
210
  }
211
+ } else if (Array.isArray(args.reference_images)) {
212
+ refImages = args.reference_images;
198
213
  } else {
199
- refImages = [args.reference_images];
214
+ throw new Error("Invalid reference_images: must be array or string");
200
215
  }
201
- } else if (Array.isArray(args.reference_images)) {
202
- refImages = args.reference_images;
203
- } else {
204
- throw new Error("Invalid reference_images: must be array or string");
205
- }
206
216
 
207
- if (refImages.length > 0) {
208
- referenceImages = await Promise.all(
209
- refImages.slice(0, 3).map(async (p) => {
210
- const { data, mimeType } = await fileToBase64(p);
211
- return {
212
- image: {
213
- bytesBase64Encoded: data,
214
- mimeType,
215
- },
216
- referenceType: "asset",
217
- };
218
- })
219
- );
217
+ if (refImages.length > 0) {
218
+ referenceImages = await Promise.all(
219
+ refImages.slice(0, 3).map(async (p) => {
220
+ const { data, mimeType } = await fileToBase64(p);
221
+ return {
222
+ image: {
223
+ bytesBase64Encoded: data,
224
+ mimeType,
225
+ },
226
+ referenceType: "asset",
227
+ };
228
+ })
229
+ );
230
+ }
220
231
  }
221
- }
222
232
 
223
- const personGeneration =
224
- args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
225
-
226
- const instances: any[] = [
227
- {
228
- prompt: args.prompt,
229
- ...(imagePart || {}),
230
- ...(lastFramePart || {}),
231
- ...(referenceImages ? { referenceImages } : {}),
232
- },
233
- ];
234
-
235
- const parameters: any = {
236
- aspectRatio: args.aspect_ratio || "9:16",
237
- durationSeconds: durationSeconds,
238
- resolution: args.resolution || "720p",
239
- negativePrompt: args.negative_prompt,
240
- generateAudio: args.generate_audio || false,
241
- personGeneration,
242
- };
243
-
244
- const res = await fetch(url, {
245
- method: "POST",
246
- headers: {
247
- Authorization: `Bearer ${token}`,
248
- "Content-Type": "application/json",
249
- },
250
- body: JSON.stringify({ instances, parameters }),
251
- });
252
-
253
- if (!res.ok) {
254
- const text = await res.text();
255
- throw new Error(`Vertex request failed: ${res.status} ${text}`);
256
- }
233
+ const personGeneration =
234
+ args.person_generation ||
235
+ (args.image_path ? "allow_adult" : "allow_all");
257
236
 
258
- const op = (await res.json()) as any;
259
- const name: string = op.name || op.operation || "";
260
- if (!name) {
261
- throw new Error(
262
- "Vertex did not return an operation name for long-running request"
263
- );
264
- }
237
+ const instances: any[] = [
238
+ {
239
+ prompt: args.prompt,
240
+ ...(imagePart || {}),
241
+ ...(lastFramePart || {}),
242
+ ...(referenceImages ? { referenceImages } : {}),
243
+ },
244
+ ];
265
245
 
266
- let current = op;
267
- let done = !!op.done;
268
- let tries = 0;
246
+ const parameters: any = {
247
+ aspectRatio: args.aspect_ratio || "9:16",
248
+ durationSeconds: durationSeconds,
249
+ resolution: args.resolution || "720p",
250
+ negativePrompt: args.negative_prompt,
251
+ generateAudio: args.generate_audio || false,
252
+ personGeneration,
253
+ };
269
254
 
270
- // Poll using fetchPredictOperation as per Vertex recommendation
271
- const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
272
- while (!done && tries < 60) {
273
- await wait(10000);
274
- const poll = await fetch(fetchUrl, {
255
+ const res = await fetch(url, {
275
256
  method: "POST",
276
257
  headers: {
277
258
  Authorization: `Bearer ${token}`,
278
259
  "Content-Type": "application/json",
279
260
  },
280
- body: JSON.stringify({ operationName: name }),
261
+ body: JSON.stringify({ instances, parameters }),
281
262
  });
282
- if (!poll.ok) {
283
- const text = await poll.text();
284
- throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
263
+
264
+ if (!res.ok) {
265
+ const text = await res.text();
266
+ throw new Error(`Vertex request failed: ${res.status} ${text}`);
285
267
  }
286
- current = (await poll.json()) as any;
287
- done = !!current.done || !!current.response;
288
- tries++;
289
- }
290
268
 
291
- const resp = current.response || current;
292
- // Decode from response.videos[].bytesBase64Encoded only
293
- const videos: Array<{ url: string; filename: string; mimeType: string }> =
294
- [];
295
- const saveVideo = async (base64: string, index: number) => {
296
- if (!base64) return;
269
+ const op = (await res.json()) as any;
270
+ const name: string = op.name || op.operation || "";
271
+ if (!name) {
272
+ throw new Error(
273
+ "Vertex did not return an operation name for long-running request"
274
+ );
275
+ }
276
+
277
+ let current = op;
278
+ let done = !!op.done;
279
+ let tries = 0;
297
280
 
298
- // Use provided output path or generate default with timestamp
299
- let filePath: string;
300
- if (args.output_path) {
301
- // User provided path - use as-is for first video, add index for subsequent
302
- filePath =
303
- index === 0
304
- ? args.output_path
305
- : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`);
306
- } else {
307
- // No path provided - generate timestamped default
308
- const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`;
309
- filePath = generateTimestampedFilename(defaultName);
281
+ // Poll using fetchPredictOperation as per Vertex recommendation
282
+ const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
283
+ while (!done && tries < 60) {
284
+ await wait(10000);
285
+ const poll = await fetch(fetchUrl, {
286
+ method: "POST",
287
+ headers: {
288
+ Authorization: `Bearer ${token}`,
289
+ "Content-Type": "application/json",
290
+ },
291
+ body: JSON.stringify({ operationName: name }),
292
+ });
293
+ if (!poll.ok) {
294
+ const text = await poll.text();
295
+ throw new Error(
296
+ `Vertex operation poll failed: ${poll.status} ${text}`
297
+ );
298
+ }
299
+ current = (await poll.json()) as any;
300
+ done = !!current.done || !!current.response;
301
+ tries++;
310
302
  }
311
303
 
312
- const buf = Buffer.from(base64, "base64");
313
- const storage = getStorage();
314
- const url = await storage.writeFile(filePath, buf);
315
- videos.push({
316
- url,
317
- filename: filePath,
318
- mimeType: "video/mp4",
319
- });
320
- };
304
+ const resp = current.response || current;
305
+ // Decode from response.videos[].bytesBase64Encoded only
306
+ const videos: Array<{ url: string; filename: string; mimeType: string }> =
307
+ [];
308
+ const saveVideo = async (base64: string, index: number) => {
309
+ if (!base64) return;
321
310
 
322
- if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
323
- for (let i = 0; i < resp.videos.length; i++) {
324
- const v = resp.videos[i] || {};
325
- if (typeof v.bytesBase64Encoded === "string") {
326
- await saveVideo(v.bytesBase64Encoded, i);
311
+ // Use provided output path or generate default with timestamp
312
+ let filePath: string;
313
+ if (args.output_path) {
314
+ // User provided path - use as-is for first video, add index for subsequent
315
+ filePath =
316
+ index === 0
317
+ ? args.output_path
318
+ : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`);
319
+ } else {
320
+ // No path provided - generate timestamped default
321
+ const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`;
322
+ filePath = generateTimestampedFilename(defaultName);
323
+ }
324
+
325
+ const buf = Buffer.from(base64, "base64");
326
+ const storage = getStorage();
327
+ const url = await storage.writeFile(filePath, buf);
328
+ videos.push({
329
+ url,
330
+ filename: filePath,
331
+ mimeType: "video/mp4",
332
+ });
333
+ };
334
+
335
+ if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
336
+ for (let i = 0; i < resp.videos.length; i++) {
337
+ const v = resp.videos[i] || {};
338
+ if (typeof v.bytesBase64Encoded === "string") {
339
+ await saveVideo(v.bytesBase64Encoded, i);
340
+ }
327
341
  }
328
342
  }
329
- }
330
- if (videos.length > 0) {
331
- return JSON.stringify({
332
- videos,
333
- message: "Video(s) generated successfully",
334
- });
335
- }
343
+ if (videos.length > 0) {
344
+ return JSON.stringify({
345
+ videos,
346
+ message: "Video(s) generated successfully",
347
+ });
348
+ }
336
349
 
337
- // If nothing saved, return a concise summary plus head/tail snippets of JSON
338
- let jsonStr = "";
339
- try {
340
- jsonStr = JSON.stringify(resp);
341
- } catch {}
342
- const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
343
- const tail50 = jsonStr
344
- ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
345
- : "";
346
- return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
350
+ // If nothing saved, return a concise summary plus head/tail snippets of JSON
351
+ let jsonStr = "";
352
+ try {
353
+ jsonStr = JSON.stringify(resp);
354
+ } catch {}
355
+ const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
356
+ const tail50 = jsonStr
357
+ ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
358
+ : "";
359
+ return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
360
+ }, "imageToVideo");
347
361
  },
348
362
  };