@mixio-pro/kalaasetu-mcp 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ import * as wav from "wav";
11
11
  import { PassThrough } from "stream";
12
12
  import { getStorage } from "../storage";
13
13
  import { generateTimestampedFilename } from "../utils/filename";
14
+ import { safeToolExecute } from "../utils/tool-wrapper";
14
15
 
15
16
  const ai = new GoogleGenAI({
16
17
  apiKey: process.env.GEMINI_API_KEY || "",
@@ -128,11 +129,17 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
128
129
  fs.unlinkSync(localPath);
129
130
  }
130
131
 
131
- // Wait for file processing to complete
132
+ // Wait for file processing to complete (max 60 seconds)
132
133
  let getFile = await ai.files.get({ name: uploadedFile.name! });
133
- while (getFile.state === "PROCESSING") {
134
+ let attempts = 0;
135
+ while (getFile.state === "PROCESSING" && attempts < 20) {
134
136
  await new Promise((resolve) => setTimeout(resolve, 3000));
135
137
  getFile = await ai.files.get({ name: uploadedFile.name! });
138
+ attempts++;
139
+ }
140
+
141
+ if (getFile.state === "PROCESSING") {
142
+ throw new Error("File processing timed out after 60 seconds");
136
143
  }
137
144
 
138
145
  if (getFile.state === "FAILED") {
@@ -213,74 +220,77 @@ export const geminiTextToImage = {
213
220
  .optional()
214
221
  .describe("Optional reference image file paths to guide generation"),
215
222
  }),
223
+ timeoutMs: 300000,
216
224
  execute: async (args: {
217
225
  prompt: string;
218
226
  aspect_ratio?: string;
219
227
  output_path?: string;
220
228
  reference_images?: string[];
221
229
  }) => {
222
- try {
223
- const contents: any[] = [args.prompt];
230
+ return safeToolExecute(async () => {
231
+ try {
232
+ const contents: any[] = [args.prompt];
224
233
 
225
- if (args.reference_images && Array.isArray(args.reference_images)) {
226
- for (const refPath of args.reference_images) {
227
- contents.push(await fileToGenerativePart(refPath));
234
+ if (args.reference_images && Array.isArray(args.reference_images)) {
235
+ for (const refPath of args.reference_images) {
236
+ contents.push(await fileToGenerativePart(refPath));
237
+ }
228
238
  }
229
- }
230
239
 
231
- const response = await ai.models.generateContent({
232
- model: "gemini-3-pro-image-preview",
233
- contents: contents,
234
- config: {
235
- responseModalities: ["TEXT", "IMAGE"],
236
- imageConfig: {
237
- aspectRatio: args.aspect_ratio || "9:16",
240
+ const response = await ai.models.generateContent({
241
+ model: "gemini-3-pro-image-preview",
242
+ contents: contents,
243
+ config: {
244
+ responseModalities: ["TEXT", "IMAGE"],
245
+ imageConfig: {
246
+ aspectRatio: args.aspect_ratio || "9:16",
247
+ },
238
248
  },
239
- },
240
- });
241
-
242
- const images = [];
243
- let textResponse = "";
244
-
245
- if (response.candidates && response.candidates[0]?.content?.parts) {
246
- for (const part of response.candidates[0].content.parts) {
247
- if (part.text) {
248
- textResponse += part.text;
249
- } else if (part.inlineData?.data) {
250
- const imageData = part.inlineData.data;
251
- // Always save the image - use provided path or generate one
252
- const outputPath =
253
- args.output_path ||
254
- generateTimestampedFilename("generated_image.png");
255
- const storage = getStorage();
256
- const url = await storage.writeFile(
257
- outputPath,
258
- Buffer.from(imageData, "base64")
259
- );
260
- images.push({
261
- url,
262
- filename: outputPath,
263
- mimeType: "image/png",
264
- });
249
+ });
250
+
251
+ const images = [];
252
+ let textResponse = "";
253
+
254
+ if (response.candidates && response.candidates[0]?.content?.parts) {
255
+ for (const part of response.candidates[0].content.parts) {
256
+ if (part.text) {
257
+ textResponse += part.text;
258
+ } else if (part.inlineData?.data) {
259
+ const imageData = part.inlineData.data;
260
+ // Always save the image - use provided path or generate one
261
+ const outputPath =
262
+ args.output_path ||
263
+ generateTimestampedFilename("generated_image.png");
264
+ const storage = getStorage();
265
+ const url = await storage.writeFile(
266
+ outputPath,
267
+ Buffer.from(imageData, "base64")
268
+ );
269
+ images.push({
270
+ url,
271
+ filename: outputPath,
272
+ mimeType: "image/png",
273
+ });
274
+ }
265
275
  }
266
276
  }
267
- }
268
277
 
269
- if (images.length > 0) {
270
- // Return the URL directly for easy parsing
271
- return JSON.stringify({
272
- url: images?.[0]?.url,
273
- images,
274
- message: textResponse || "Image generated successfully",
275
- });
276
- }
278
+ if (images.length > 0) {
279
+ // Return the URL directly for easy parsing
280
+ return JSON.stringify({
281
+ url: images?.[0]?.url,
282
+ images,
283
+ message: textResponse || "Image generated successfully",
284
+ });
285
+ }
277
286
 
278
- return (
279
- textResponse || "Image generation completed but no image was produced"
280
- );
281
- } catch (error: any) {
282
- throw new Error(`Image generation failed: ${error.message}`);
283
- }
287
+ return (
288
+ textResponse || "Image generation completed but no image was produced"
289
+ );
290
+ } catch (error: any) {
291
+ throw new Error(`Image generation failed: ${error.message}`);
292
+ }
293
+ }, "gemini-generateImage");
284
294
  },
285
295
  };
286
296
 
@@ -300,63 +310,68 @@ export const geminiEditImage = {
300
310
  .optional()
301
311
  .describe("Additional image paths for reference"),
302
312
  }),
313
+ timeoutMs: 300000,
303
314
  execute: async (args: {
304
315
  image_path: string;
305
316
  prompt: string;
306
317
  output_path?: string;
307
318
  reference_images?: string[];
308
319
  }) => {
309
- try {
310
- const imagePart = await fileToGenerativePart(args.image_path);
311
- const contents: any[] = [args.prompt, imagePart];
320
+ return safeToolExecute(async () => {
321
+ try {
322
+ const imagePart = await fileToGenerativePart(args.image_path);
323
+ const contents: any[] = [args.prompt, imagePart];
312
324
 
313
- if (args.reference_images) {
314
- for (const refPath of args.reference_images) {
315
- contents.push(await fileToGenerativePart(refPath));
325
+ if (args.reference_images) {
326
+ for (const refPath of args.reference_images) {
327
+ contents.push(await fileToGenerativePart(refPath));
328
+ }
316
329
  }
317
- }
318
330
 
319
- const response = await ai.models.generateContent({
320
- model: "gemini-3-pro-image-preview",
321
- contents: contents,
322
- });
323
-
324
- const images = [];
325
- let textResponse = "";
326
-
327
- if (response.candidates && response.candidates[0]?.content?.parts) {
328
- for (const part of response.candidates[0].content.parts) {
329
- if (part.text) {
330
- textResponse += part.text;
331
- } else if (part.inlineData?.data) {
332
- const imageData = part.inlineData.data;
333
- if (args.output_path) {
334
- const storage = getStorage();
335
- const url = await storage.writeFile(
336
- args.output_path,
337
- Buffer.from(imageData, "base64")
338
- );
339
- images.push({
340
- url,
341
- filename: args.output_path,
342
- mimeType: "image/png",
343
- });
331
+ const response = await ai.models.generateContent({
332
+ model: "gemini-3-pro-image-preview",
333
+ contents: contents,
334
+ });
335
+
336
+ const images = [];
337
+ let textResponse = "";
338
+
339
+ if (response.candidates && response.candidates[0]?.content?.parts) {
340
+ for (const part of response.candidates[0].content.parts) {
341
+ if (part.text) {
342
+ textResponse += part.text;
343
+ } else if (part.inlineData?.data) {
344
+ const imageData = part.inlineData.data;
345
+ if (args.output_path) {
346
+ const storage = getStorage();
347
+ const url = await storage.writeFile(
348
+ args.output_path,
349
+ Buffer.from(imageData, "base64")
350
+ );
351
+ images.push({
352
+ url,
353
+ filename: args.output_path,
354
+ mimeType: "image/png",
355
+ });
356
+ }
344
357
  }
345
358
  }
346
359
  }
347
- }
348
360
 
349
- if (images.length > 0) {
350
- return JSON.stringify({
351
- images,
352
- message: textResponse || "Image edited successfully",
353
- });
354
- }
361
+ if (images.length > 0) {
362
+ return JSON.stringify({
363
+ images,
364
+ message: textResponse || "Image edited successfully",
365
+ });
366
+ }
355
367
 
356
- return textResponse || "Image editing completed but no response received";
357
- } catch (error: any) {
358
- throw new Error(`Image editing failed: ${error.message}`);
359
- }
368
+ return (
369
+ textResponse || "Image editing completed but no response received"
370
+ );
371
+ } catch (error: any) {
372
+ throw new Error(`Image editing failed: ${error.message}`);
373
+ }
374
+ }, "gemini-editImage");
360
375
  },
361
376
  };
362
377
 
@@ -370,59 +385,62 @@ export const geminiAnalyzeImages = {
370
385
  .describe("Array of image file paths to analyze"),
371
386
  prompt: z.string().describe("Text prompt or question about the images"),
372
387
  }),
388
+ timeoutMs: 300000,
373
389
  execute: async (args: { image_paths: string[]; prompt: string }) => {
374
- try {
375
- // Handle array parsing
376
- if (!args.image_paths) {
377
- throw new Error("Image paths not provided");
378
- }
390
+ return safeToolExecute(async () => {
391
+ try {
392
+ // Handle array parsing
393
+ if (!args.image_paths) {
394
+ throw new Error("Image paths not provided");
395
+ }
379
396
 
380
- // Convert to array if passed as string
381
- let imagePaths: string[];
382
- if (typeof args.image_paths === "string") {
383
- const strValue = args.image_paths as string;
384
- if (strValue.startsWith("[") && strValue.endsWith("]")) {
385
- try {
386
- imagePaths = JSON.parse(strValue);
387
- } catch {
388
- throw new Error("Invalid image_paths format");
397
+ // Convert to array if passed as string
398
+ let imagePaths: string[];
399
+ if (typeof args.image_paths === "string") {
400
+ const strValue = args.image_paths as string;
401
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
402
+ try {
403
+ imagePaths = JSON.parse(strValue);
404
+ } catch {
405
+ throw new Error("Invalid image_paths format");
406
+ }
407
+ } else {
408
+ imagePaths = [strValue];
389
409
  }
410
+ } else if (Array.isArray(args.image_paths)) {
411
+ imagePaths = args.image_paths;
390
412
  } else {
391
- imagePaths = [strValue];
413
+ throw new Error("Invalid image_paths: must be array or string");
392
414
  }
393
- } else if (Array.isArray(args.image_paths)) {
394
- imagePaths = args.image_paths;
395
- } else {
396
- throw new Error("Invalid image_paths: must be array or string");
397
- }
398
415
 
399
- if (imagePaths.length === 0) {
400
- throw new Error("At least one image path must be provided");
401
- }
416
+ if (imagePaths.length === 0) {
417
+ throw new Error("At least one image path must be provided");
418
+ }
402
419
 
403
- const contents: any[] = [args.prompt];
420
+ const contents: any[] = [args.prompt];
404
421
 
405
- for (const imagePath of imagePaths) {
406
- contents.push(await fileToGenerativePart(imagePath));
407
- }
422
+ for (const imagePath of imagePaths) {
423
+ contents.push(await fileToGenerativePart(imagePath));
424
+ }
408
425
 
409
- const response = await ai.models.generateContent({
410
- model: "gemini-2.5-pro",
411
- contents: contents,
412
- });
426
+ const response = await ai.models.generateContent({
427
+ model: "gemini-2.5-pro",
428
+ contents: contents,
429
+ });
413
430
 
414
- let result = "";
415
- if (response.candidates && response.candidates[0]?.content?.parts) {
416
- for (const part of response.candidates[0].content.parts) {
417
- if (part.text) {
418
- result += part.text;
431
+ let result = "";
432
+ if (response.candidates && response.candidates[0]?.content?.parts) {
433
+ for (const part of response.candidates[0].content.parts) {
434
+ if (part.text) {
435
+ result += part.text;
436
+ }
419
437
  }
420
438
  }
439
+ return result || "Analysis completed but no text response received";
440
+ } catch (error: any) {
441
+ throw new Error(`Image analysis failed: ${error.message}`);
421
442
  }
422
- return result || "Analysis completed but no text response received";
423
- } catch (error: any) {
424
- throw new Error(`Image analysis failed: ${error.message}`);
425
- }
443
+ }, "gemini-analyzeImages");
426
444
  },
427
445
  };
428
446
 
@@ -444,53 +462,56 @@ export const geminiSingleSpeakerTts = {
444
462
  "Output WAV file path (optional, defaults to timestamp-based filename)"
445
463
  ),
446
464
  }),
465
+ timeoutMs: 300000,
447
466
  execute: async (args: {
448
467
  text: string;
449
468
  voice_name: string;
450
469
  output_path?: string;
451
470
  }) => {
452
- try {
453
- const response = await ai.models.generateContent({
454
- model: "gemini-2.5-pro-preview-tts",
455
- contents: [{ parts: [{ text: args.text }] }],
456
- config: {
457
- responseModalities: ["AUDIO"],
458
- speechConfig: {
459
- voiceConfig: {
460
- prebuiltVoiceConfig: {
461
- voiceName: args.voice_name || "Despina",
471
+ return safeToolExecute(async () => {
472
+ try {
473
+ const response = await ai.models.generateContent({
474
+ model: "gemini-2.5-pro-preview-tts",
475
+ contents: [{ parts: [{ text: args.text }] }],
476
+ config: {
477
+ responseModalities: ["AUDIO"],
478
+ speechConfig: {
479
+ voiceConfig: {
480
+ prebuiltVoiceConfig: {
481
+ voiceName: args.voice_name || "Despina",
482
+ },
462
483
  },
463
484
  },
464
485
  },
465
- },
466
- });
486
+ });
467
487
 
468
- const data =
469
- response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
470
- if (!data) {
471
- throw new Error("No audio data received from Gemini API");
472
- }
488
+ const data =
489
+ response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
490
+ if (!data) {
491
+ throw new Error("No audio data received from Gemini API");
492
+ }
473
493
 
474
- const audioBuffer = Buffer.from(data, "base64");
494
+ const audioBuffer = Buffer.from(data, "base64");
475
495
 
476
- // Use provided output path or generate default with timestamp
477
- const outputPath =
478
- args.output_path || generateTimestampedFilename("voice_output.wav");
496
+ // Use provided output path or generate default with timestamp
497
+ const outputPath =
498
+ args.output_path || generateTimestampedFilename("voice_output.wav");
479
499
 
480
- const storage = getStorage();
481
- const url = await storage.writeFile(outputPath, audioBuffer);
482
-
483
- return JSON.stringify({
484
- audio: {
485
- url,
486
- filename: outputPath,
487
- mimeType: "audio/wav",
488
- },
489
- message: "Audio generated successfully",
490
- });
491
- } catch (error: any) {
492
- throw new Error(`Voice generation failed: ${error.message}`);
493
- }
500
+ const storage = getStorage();
501
+ const url = await storage.writeFile(outputPath, audioBuffer);
502
+
503
+ return JSON.stringify({
504
+ audio: {
505
+ url,
506
+ filename: outputPath,
507
+ mimeType: "audio/wav",
508
+ },
509
+ message: "Audio generated successfully",
510
+ });
511
+ } catch (error: any) {
512
+ throw new Error(`Voice generation failed: ${error.message}`);
513
+ }
514
+ }, "gemini-generateSpeech");
494
515
  },
495
516
  };
496
517
 
@@ -530,6 +551,7 @@ export const geminiAnalyzeVideos = {
530
551
  "Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"
531
552
  ),
532
553
  }),
554
+ timeoutMs: 300000,
533
555
  execute: async (args: {
534
556
  video_inputs: string[];
535
557
  prompt: string;
@@ -538,86 +560,90 @@ export const geminiAnalyzeVideos = {
538
560
  end_offset?: string;
539
561
  media_resolution?: string;
540
562
  }) => {
541
- try {
542
- // Handle array parsing
543
- if (!args.video_inputs) {
544
- throw new Error("Video inputs not provided");
545
- }
563
+ return safeToolExecute(async () => {
564
+ try {
565
+ // Handle array parsing
566
+ if (!args.video_inputs) {
567
+ throw new Error("Video inputs not provided");
568
+ }
546
569
 
547
- // Convert to array if passed as string
548
- let videoInputs: string[];
549
- if (typeof args.video_inputs === "string") {
550
- const strValue = args.video_inputs as string;
551
- if (strValue.startsWith("[") && strValue.endsWith("]")) {
552
- try {
553
- videoInputs = JSON.parse(strValue);
554
- } catch {
555
- throw new Error("Invalid video_inputs format");
570
+ // Convert to array if passed as string
571
+ let videoInputs: string[];
572
+ if (typeof args.video_inputs === "string") {
573
+ const strValue = args.video_inputs as string;
574
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
575
+ try {
576
+ videoInputs = JSON.parse(strValue);
577
+ } catch {
578
+ throw new Error("Invalid video_inputs format");
579
+ }
580
+ } else {
581
+ videoInputs = [strValue];
556
582
  }
583
+ } else if (Array.isArray(args.video_inputs)) {
584
+ videoInputs = args.video_inputs;
557
585
  } else {
558
- videoInputs = [strValue];
586
+ throw new Error("Invalid video_inputs: must be array or string");
559
587
  }
560
- } else if (Array.isArray(args.video_inputs)) {
561
- videoInputs = args.video_inputs;
562
- } else {
563
- throw new Error("Invalid video_inputs: must be array or string");
564
- }
565
588
 
566
- if (videoInputs.length === 0) {
567
- throw new Error("At least one video input must be provided");
568
- }
589
+ if (videoInputs.length === 0) {
590
+ throw new Error("At least one video input must be provided");
591
+ }
569
592
 
570
- if (videoInputs.length > 10) {
571
- throw new Error(
572
- "Maximum 10 videos per request allowed for Gemini 2.5+ models"
573
- );
574
- }
593
+ if (videoInputs.length > 10) {
594
+ throw new Error(
595
+ "Maximum 10 videos per request allowed for Gemini 2.5+ models"
596
+ );
597
+ }
575
598
 
576
- // Prepare video parts for content
577
- const videoParts: any[] = [];
599
+ // Prepare video parts for content
600
+ const videoParts: any[] = [];
578
601
 
579
- // Process each video input
580
- for (const videoInput of videoInputs) {
581
- const videoConfig = {
582
- fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
583
- startOffset: args.start_offset,
584
- endOffset: args.end_offset,
585
- };
602
+ // Process each video input
603
+ for (const videoInput of videoInputs) {
604
+ const videoConfig = {
605
+ fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
606
+ startOffset: args.start_offset,
607
+ endOffset: args.end_offset,
608
+ };
586
609
 
587
- const videoPart = await processVideoInput(videoInput, videoConfig);
588
- videoParts.push(videoPart);
589
- }
610
+ const videoPart = await processVideoInput(videoInput, videoConfig);
611
+ videoParts.push(videoPart);
612
+ }
590
613
 
591
- // Build content using createUserContent and createPartFromUri for uploaded files
592
- const contentParts: any[] = [args.prompt];
614
+ // Build content using createUserContent and createPartFromUri for uploaded files
615
+ const contentParts: any[] = [args.prompt];
593
616
 
594
- for (const videoPart of videoParts) {
595
- if (videoPart.uri && videoPart.mimeType) {
596
- contentParts.push(
597
- createPartFromUri(videoPart.uri, videoPart.mimeType)
598
- );
617
+ for (const videoPart of videoParts) {
618
+ if (videoPart.uri && videoPart.mimeType) {
619
+ contentParts.push(
620
+ createPartFromUri(videoPart.uri, videoPart.mimeType)
621
+ );
622
+ }
599
623
  }
600
- }
601
624
 
602
- const finalContents = createUserContent(contentParts);
625
+ const finalContents = createUserContent(contentParts);
603
626
 
604
- const response = await ai.models.generateContent({
605
- model: "gemini-2.5-pro",
606
- contents: finalContents,
607
- });
627
+ const response = await ai.models.generateContent({
628
+ model: "gemini-2.5-pro",
629
+ contents: finalContents,
630
+ });
608
631
 
609
- let result = "";
610
- if (response.candidates && response.candidates[0]?.content?.parts) {
611
- for (const part of response.candidates[0].content.parts) {
612
- if (part.text) {
613
- result += part.text;
632
+ let result = "";
633
+ if (response.candidates && response.candidates[0]?.content?.parts) {
634
+ for (const part of response.candidates[0].content.parts) {
635
+ if (part.text) {
636
+ result += part.text;
637
+ }
614
638
  }
615
639
  }
616
- }
617
640
 
618
- return result || "Video analysis completed but no text response received";
619
- } catch (error: any) {
620
- throw new Error(`Video analysis failed: ${error.message}`);
621
- }
641
+ return (
642
+ result || "Video analysis completed but no text response received"
643
+ );
644
+ } catch (error: any) {
645
+ throw new Error(`Video analysis failed: ${error.message}`);
646
+ }
647
+ }, "gemini-analyzeVideos");
622
648
  },
623
649
  };