@mixio-pro/kalaasetu-mcp 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ import * as wav from "wav";
11
11
  import { PassThrough } from "stream";
12
12
  import { getStorage } from "../storage";
13
13
  import { generateTimestampedFilename } from "../utils/filename";
14
+ import { safeToolExecute } from "../utils/tool-wrapper";
14
15
 
15
16
  const ai = new GoogleGenAI({
16
17
  apiKey: process.env.GEMINI_API_KEY || "",
@@ -128,11 +129,17 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
128
129
  fs.unlinkSync(localPath);
129
130
  }
130
131
 
131
- // Wait for file processing to complete
132
+ // Wait for file processing to complete (max 60 seconds)
132
133
  let getFile = await ai.files.get({ name: uploadedFile.name! });
133
- while (getFile.state === "PROCESSING") {
134
+ let attempts = 0;
135
+ while (getFile.state === "PROCESSING" && attempts < 20) {
134
136
  await new Promise((resolve) => setTimeout(resolve, 3000));
135
137
  getFile = await ai.files.get({ name: uploadedFile.name! });
138
+ attempts++;
139
+ }
140
+
141
+ if (getFile.state === "PROCESSING") {
142
+ throw new Error("File processing timed out after 60 seconds");
136
143
  }
137
144
 
138
145
  if (getFile.state === "FAILED") {
@@ -219,68 +226,70 @@ export const geminiTextToImage = {
219
226
  output_path?: string;
220
227
  reference_images?: string[];
221
228
  }) => {
222
- try {
223
- const contents: any[] = [args.prompt];
229
+ return safeToolExecute(async () => {
230
+ try {
231
+ const contents: any[] = [args.prompt];
224
232
 
225
- if (args.reference_images && Array.isArray(args.reference_images)) {
226
- for (const refPath of args.reference_images) {
227
- contents.push(await fileToGenerativePart(refPath));
233
+ if (args.reference_images && Array.isArray(args.reference_images)) {
234
+ for (const refPath of args.reference_images) {
235
+ contents.push(await fileToGenerativePart(refPath));
236
+ }
228
237
  }
229
- }
230
238
 
231
- const response = await ai.models.generateContent({
232
- model: "gemini-3-pro-image-preview",
233
- contents: contents,
234
- config: {
235
- responseModalities: ["TEXT", "IMAGE"],
236
- imageConfig: {
237
- aspectRatio: args.aspect_ratio || "9:16",
239
+ const response = await ai.models.generateContent({
240
+ model: "gemini-3-pro-image-preview",
241
+ contents: contents,
242
+ config: {
243
+ responseModalities: ["TEXT", "IMAGE"],
244
+ imageConfig: {
245
+ aspectRatio: args.aspect_ratio || "9:16",
246
+ },
238
247
  },
239
- },
240
- });
241
-
242
- const images = [];
243
- let textResponse = "";
244
-
245
- if (response.candidates && response.candidates[0]?.content?.parts) {
246
- for (const part of response.candidates[0].content.parts) {
247
- if (part.text) {
248
- textResponse += part.text;
249
- } else if (part.inlineData?.data) {
250
- const imageData = part.inlineData.data;
251
- // Always save the image - use provided path or generate one
252
- const outputPath =
253
- args.output_path ||
254
- generateTimestampedFilename("generated_image.png");
255
- const storage = getStorage();
256
- const url = await storage.writeFile(
257
- outputPath,
258
- Buffer.from(imageData, "base64")
259
- );
260
- images.push({
261
- url,
262
- filename: outputPath,
263
- mimeType: "image/png",
264
- });
248
+ });
249
+
250
+ const images = [];
251
+ let textResponse = "";
252
+
253
+ if (response.candidates && response.candidates[0]?.content?.parts) {
254
+ for (const part of response.candidates[0].content.parts) {
255
+ if (part.text) {
256
+ textResponse += part.text;
257
+ } else if (part.inlineData?.data) {
258
+ const imageData = part.inlineData.data;
259
+ // Always save the image - use provided path or generate one
260
+ const outputPath =
261
+ args.output_path ||
262
+ generateTimestampedFilename("generated_image.png");
263
+ const storage = getStorage();
264
+ const url = await storage.writeFile(
265
+ outputPath,
266
+ Buffer.from(imageData, "base64")
267
+ );
268
+ images.push({
269
+ url,
270
+ filename: outputPath,
271
+ mimeType: "image/png",
272
+ });
273
+ }
265
274
  }
266
275
  }
267
- }
268
276
 
269
- if (images.length > 0) {
270
- // Return the URL directly for easy parsing
271
- return JSON.stringify({
272
- url: images?.[0]?.url,
273
- images,
274
- message: textResponse || "Image generated successfully",
275
- });
276
- }
277
+ if (images.length > 0) {
278
+ // Return the URL directly for easy parsing
279
+ return JSON.stringify({
280
+ url: images?.[0]?.url,
281
+ images,
282
+ message: textResponse || "Image generated successfully",
283
+ });
284
+ }
277
285
 
278
- return (
279
- textResponse || "Image generation completed but no image was produced"
280
- );
281
- } catch (error: any) {
282
- throw new Error(`Image generation failed: ${error.message}`);
283
- }
286
+ return (
287
+ textResponse || "Image generation completed but no image was produced"
288
+ );
289
+ } catch (error: any) {
290
+ throw new Error(`Image generation failed: ${error.message}`);
291
+ }
292
+ }, "gemini-generateImage");
284
293
  },
285
294
  };
286
295
 
@@ -306,57 +315,61 @@ export const geminiEditImage = {
306
315
  output_path?: string;
307
316
  reference_images?: string[];
308
317
  }) => {
309
- try {
310
- const imagePart = await fileToGenerativePart(args.image_path);
311
- const contents: any[] = [args.prompt, imagePart];
318
+ return safeToolExecute(async () => {
319
+ try {
320
+ const imagePart = await fileToGenerativePart(args.image_path);
321
+ const contents: any[] = [args.prompt, imagePart];
312
322
 
313
- if (args.reference_images) {
314
- for (const refPath of args.reference_images) {
315
- contents.push(await fileToGenerativePart(refPath));
323
+ if (args.reference_images) {
324
+ for (const refPath of args.reference_images) {
325
+ contents.push(await fileToGenerativePart(refPath));
326
+ }
316
327
  }
317
- }
318
328
 
319
- const response = await ai.models.generateContent({
320
- model: "gemini-3-pro-image-preview",
321
- contents: contents,
322
- });
323
-
324
- const images = [];
325
- let textResponse = "";
326
-
327
- if (response.candidates && response.candidates[0]?.content?.parts) {
328
- for (const part of response.candidates[0].content.parts) {
329
- if (part.text) {
330
- textResponse += part.text;
331
- } else if (part.inlineData?.data) {
332
- const imageData = part.inlineData.data;
333
- if (args.output_path) {
334
- const storage = getStorage();
335
- const url = await storage.writeFile(
336
- args.output_path,
337
- Buffer.from(imageData, "base64")
338
- );
339
- images.push({
340
- url,
341
- filename: args.output_path,
342
- mimeType: "image/png",
343
- });
329
+ const response = await ai.models.generateContent({
330
+ model: "gemini-3-pro-image-preview",
331
+ contents: contents,
332
+ });
333
+
334
+ const images = [];
335
+ let textResponse = "";
336
+
337
+ if (response.candidates && response.candidates[0]?.content?.parts) {
338
+ for (const part of response.candidates[0].content.parts) {
339
+ if (part.text) {
340
+ textResponse += part.text;
341
+ } else if (part.inlineData?.data) {
342
+ const imageData = part.inlineData.data;
343
+ if (args.output_path) {
344
+ const storage = getStorage();
345
+ const url = await storage.writeFile(
346
+ args.output_path,
347
+ Buffer.from(imageData, "base64")
348
+ );
349
+ images.push({
350
+ url,
351
+ filename: args.output_path,
352
+ mimeType: "image/png",
353
+ });
354
+ }
344
355
  }
345
356
  }
346
357
  }
347
- }
348
358
 
349
- if (images.length > 0) {
350
- return JSON.stringify({
351
- images,
352
- message: textResponse || "Image edited successfully",
353
- });
354
- }
359
+ if (images.length > 0) {
360
+ return JSON.stringify({
361
+ images,
362
+ message: textResponse || "Image edited successfully",
363
+ });
364
+ }
355
365
 
356
- return textResponse || "Image editing completed but no response received";
357
- } catch (error: any) {
358
- throw new Error(`Image editing failed: ${error.message}`);
359
- }
366
+ return (
367
+ textResponse || "Image editing completed but no response received"
368
+ );
369
+ } catch (error: any) {
370
+ throw new Error(`Image editing failed: ${error.message}`);
371
+ }
372
+ }, "gemini-editImage");
360
373
  },
361
374
  };
362
375
 
@@ -371,58 +384,60 @@ export const geminiAnalyzeImages = {
371
384
  prompt: z.string().describe("Text prompt or question about the images"),
372
385
  }),
373
386
  execute: async (args: { image_paths: string[]; prompt: string }) => {
374
- try {
375
- // Handle array parsing
376
- if (!args.image_paths) {
377
- throw new Error("Image paths not provided");
378
- }
387
+ return safeToolExecute(async () => {
388
+ try {
389
+ // Handle array parsing
390
+ if (!args.image_paths) {
391
+ throw new Error("Image paths not provided");
392
+ }
379
393
 
380
- // Convert to array if passed as string
381
- let imagePaths: string[];
382
- if (typeof args.image_paths === "string") {
383
- const strValue = args.image_paths as string;
384
- if (strValue.startsWith("[") && strValue.endsWith("]")) {
385
- try {
386
- imagePaths = JSON.parse(strValue);
387
- } catch {
388
- throw new Error("Invalid image_paths format");
394
+ // Convert to array if passed as string
395
+ let imagePaths: string[];
396
+ if (typeof args.image_paths === "string") {
397
+ const strValue = args.image_paths as string;
398
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
399
+ try {
400
+ imagePaths = JSON.parse(strValue);
401
+ } catch {
402
+ throw new Error("Invalid image_paths format");
403
+ }
404
+ } else {
405
+ imagePaths = [strValue];
389
406
  }
407
+ } else if (Array.isArray(args.image_paths)) {
408
+ imagePaths = args.image_paths;
390
409
  } else {
391
- imagePaths = [strValue];
410
+ throw new Error("Invalid image_paths: must be array or string");
392
411
  }
393
- } else if (Array.isArray(args.image_paths)) {
394
- imagePaths = args.image_paths;
395
- } else {
396
- throw new Error("Invalid image_paths: must be array or string");
397
- }
398
412
 
399
- if (imagePaths.length === 0) {
400
- throw new Error("At least one image path must be provided");
401
- }
413
+ if (imagePaths.length === 0) {
414
+ throw new Error("At least one image path must be provided");
415
+ }
402
416
 
403
- const contents: any[] = [args.prompt];
417
+ const contents: any[] = [args.prompt];
404
418
 
405
- for (const imagePath of imagePaths) {
406
- contents.push(await fileToGenerativePart(imagePath));
407
- }
419
+ for (const imagePath of imagePaths) {
420
+ contents.push(await fileToGenerativePart(imagePath));
421
+ }
408
422
 
409
- const response = await ai.models.generateContent({
410
- model: "gemini-2.5-pro",
411
- contents: contents,
412
- });
423
+ const response = await ai.models.generateContent({
424
+ model: "gemini-2.5-pro",
425
+ contents: contents,
426
+ });
413
427
 
414
- let result = "";
415
- if (response.candidates && response.candidates[0]?.content?.parts) {
416
- for (const part of response.candidates[0].content.parts) {
417
- if (part.text) {
418
- result += part.text;
428
+ let result = "";
429
+ if (response.candidates && response.candidates[0]?.content?.parts) {
430
+ for (const part of response.candidates[0].content.parts) {
431
+ if (part.text) {
432
+ result += part.text;
433
+ }
419
434
  }
420
435
  }
436
+ return result || "Analysis completed but no text response received";
437
+ } catch (error: any) {
438
+ throw new Error(`Image analysis failed: ${error.message}`);
421
439
  }
422
- return result || "Analysis completed but no text response received";
423
- } catch (error: any) {
424
- throw new Error(`Image analysis failed: ${error.message}`);
425
- }
440
+ }, "gemini-analyzeImages");
426
441
  },
427
442
  };
428
443
 
@@ -449,48 +464,50 @@ export const geminiSingleSpeakerTts = {
449
464
  voice_name: string;
450
465
  output_path?: string;
451
466
  }) => {
452
- try {
453
- const response = await ai.models.generateContent({
454
- model: "gemini-2.5-pro-preview-tts",
455
- contents: [{ parts: [{ text: args.text }] }],
456
- config: {
457
- responseModalities: ["AUDIO"],
458
- speechConfig: {
459
- voiceConfig: {
460
- prebuiltVoiceConfig: {
461
- voiceName: args.voice_name || "Despina",
467
+ return safeToolExecute(async () => {
468
+ try {
469
+ const response = await ai.models.generateContent({
470
+ model: "gemini-2.5-pro-preview-tts",
471
+ contents: [{ parts: [{ text: args.text }] }],
472
+ config: {
473
+ responseModalities: ["AUDIO"],
474
+ speechConfig: {
475
+ voiceConfig: {
476
+ prebuiltVoiceConfig: {
477
+ voiceName: args.voice_name || "Despina",
478
+ },
462
479
  },
463
480
  },
464
481
  },
465
- },
466
- });
482
+ });
467
483
 
468
- const data =
469
- response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
470
- if (!data) {
471
- throw new Error("No audio data received from Gemini API");
472
- }
484
+ const data =
485
+ response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
486
+ if (!data) {
487
+ throw new Error("No audio data received from Gemini API");
488
+ }
473
489
 
474
- const audioBuffer = Buffer.from(data, "base64");
490
+ const audioBuffer = Buffer.from(data, "base64");
475
491
 
476
- // Use provided output path or generate default with timestamp
477
- const outputPath =
478
- args.output_path || generateTimestampedFilename("voice_output.wav");
492
+ // Use provided output path or generate default with timestamp
493
+ const outputPath =
494
+ args.output_path || generateTimestampedFilename("voice_output.wav");
479
495
 
480
- const storage = getStorage();
481
- const url = await storage.writeFile(outputPath, audioBuffer);
482
-
483
- return JSON.stringify({
484
- audio: {
485
- url,
486
- filename: outputPath,
487
- mimeType: "audio/wav",
488
- },
489
- message: "Audio generated successfully",
490
- });
491
- } catch (error: any) {
492
- throw new Error(`Voice generation failed: ${error.message}`);
493
- }
496
+ const storage = getStorage();
497
+ const url = await storage.writeFile(outputPath, audioBuffer);
498
+
499
+ return JSON.stringify({
500
+ audio: {
501
+ url,
502
+ filename: outputPath,
503
+ mimeType: "audio/wav",
504
+ },
505
+ message: "Audio generated successfully",
506
+ });
507
+ } catch (error: any) {
508
+ throw new Error(`Voice generation failed: ${error.message}`);
509
+ }
510
+ }, "gemini-generateSpeech");
494
511
  },
495
512
  };
496
513
 
@@ -538,86 +555,90 @@ export const geminiAnalyzeVideos = {
538
555
  end_offset?: string;
539
556
  media_resolution?: string;
540
557
  }) => {
541
- try {
542
- // Handle array parsing
543
- if (!args.video_inputs) {
544
- throw new Error("Video inputs not provided");
545
- }
558
+ return safeToolExecute(async () => {
559
+ try {
560
+ // Handle array parsing
561
+ if (!args.video_inputs) {
562
+ throw new Error("Video inputs not provided");
563
+ }
546
564
 
547
- // Convert to array if passed as string
548
- let videoInputs: string[];
549
- if (typeof args.video_inputs === "string") {
550
- const strValue = args.video_inputs as string;
551
- if (strValue.startsWith("[") && strValue.endsWith("]")) {
552
- try {
553
- videoInputs = JSON.parse(strValue);
554
- } catch {
555
- throw new Error("Invalid video_inputs format");
565
+ // Convert to array if passed as string
566
+ let videoInputs: string[];
567
+ if (typeof args.video_inputs === "string") {
568
+ const strValue = args.video_inputs as string;
569
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
570
+ try {
571
+ videoInputs = JSON.parse(strValue);
572
+ } catch {
573
+ throw new Error("Invalid video_inputs format");
574
+ }
575
+ } else {
576
+ videoInputs = [strValue];
556
577
  }
578
+ } else if (Array.isArray(args.video_inputs)) {
579
+ videoInputs = args.video_inputs;
557
580
  } else {
558
- videoInputs = [strValue];
581
+ throw new Error("Invalid video_inputs: must be array or string");
559
582
  }
560
- } else if (Array.isArray(args.video_inputs)) {
561
- videoInputs = args.video_inputs;
562
- } else {
563
- throw new Error("Invalid video_inputs: must be array or string");
564
- }
565
583
 
566
- if (videoInputs.length === 0) {
567
- throw new Error("At least one video input must be provided");
568
- }
584
+ if (videoInputs.length === 0) {
585
+ throw new Error("At least one video input must be provided");
586
+ }
569
587
 
570
- if (videoInputs.length > 10) {
571
- throw new Error(
572
- "Maximum 10 videos per request allowed for Gemini 2.5+ models"
573
- );
574
- }
588
+ if (videoInputs.length > 10) {
589
+ throw new Error(
590
+ "Maximum 10 videos per request allowed for Gemini 2.5+ models"
591
+ );
592
+ }
575
593
 
576
- // Prepare video parts for content
577
- const videoParts: any[] = [];
594
+ // Prepare video parts for content
595
+ const videoParts: any[] = [];
578
596
 
579
- // Process each video input
580
- for (const videoInput of videoInputs) {
581
- const videoConfig = {
582
- fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
583
- startOffset: args.start_offset,
584
- endOffset: args.end_offset,
585
- };
597
+ // Process each video input
598
+ for (const videoInput of videoInputs) {
599
+ const videoConfig = {
600
+ fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
601
+ startOffset: args.start_offset,
602
+ endOffset: args.end_offset,
603
+ };
586
604
 
587
- const videoPart = await processVideoInput(videoInput, videoConfig);
588
- videoParts.push(videoPart);
589
- }
605
+ const videoPart = await processVideoInput(videoInput, videoConfig);
606
+ videoParts.push(videoPart);
607
+ }
590
608
 
591
- // Build content using createUserContent and createPartFromUri for uploaded files
592
- const contentParts: any[] = [args.prompt];
609
+ // Build content using createUserContent and createPartFromUri for uploaded files
610
+ const contentParts: any[] = [args.prompt];
593
611
 
594
- for (const videoPart of videoParts) {
595
- if (videoPart.uri && videoPart.mimeType) {
596
- contentParts.push(
597
- createPartFromUri(videoPart.uri, videoPart.mimeType)
598
- );
612
+ for (const videoPart of videoParts) {
613
+ if (videoPart.uri && videoPart.mimeType) {
614
+ contentParts.push(
615
+ createPartFromUri(videoPart.uri, videoPart.mimeType)
616
+ );
617
+ }
599
618
  }
600
- }
601
619
 
602
- const finalContents = createUserContent(contentParts);
620
+ const finalContents = createUserContent(contentParts);
603
621
 
604
- const response = await ai.models.generateContent({
605
- model: "gemini-2.5-pro",
606
- contents: finalContents,
607
- });
622
+ const response = await ai.models.generateContent({
623
+ model: "gemini-2.5-pro",
624
+ contents: finalContents,
625
+ });
608
626
 
609
- let result = "";
610
- if (response.candidates && response.candidates[0]?.content?.parts) {
611
- for (const part of response.candidates[0].content.parts) {
612
- if (part.text) {
613
- result += part.text;
627
+ let result = "";
628
+ if (response.candidates && response.candidates[0]?.content?.parts) {
629
+ for (const part of response.candidates[0].content.parts) {
630
+ if (part.text) {
631
+ result += part.text;
632
+ }
614
633
  }
615
634
  }
616
- }
617
635
 
618
- return result || "Video analysis completed but no text response received";
619
- } catch (error: any) {
620
- throw new Error(`Video analysis failed: ${error.message}`);
621
- }
636
+ return (
637
+ result || "Video analysis completed but no text response received"
638
+ );
639
+ } catch (error: any) {
640
+ throw new Error(`Video analysis failed: ${error.message}`);
641
+ }
642
+ }, "gemini-analyzeVideos");
622
643
  },
623
644
  };