@mixio-pro/kalaasetu-mcp 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.ts +10 -9
- package/src/test-context.ts +52 -0
- package/src/test-error-handling.ts +31 -0
- package/src/tools/fal/generate.ts +14 -9
- package/src/tools/fal/storage.ts +61 -58
- package/src/tools/gemini.ts +258 -237
- package/src/tools/image-to-video.ts +199 -185
- package/src/tools/perplexity.ts +192 -154
- package/src/tools/youtube.ts +51 -33
- package/src/utils/tool-wrapper.ts +86 -0
package/src/tools/gemini.ts
CHANGED
|
@@ -11,6 +11,7 @@ import * as wav from "wav";
|
|
|
11
11
|
import { PassThrough } from "stream";
|
|
12
12
|
import { getStorage } from "../storage";
|
|
13
13
|
import { generateTimestampedFilename } from "../utils/filename";
|
|
14
|
+
import { safeToolExecute } from "../utils/tool-wrapper";
|
|
14
15
|
|
|
15
16
|
const ai = new GoogleGenAI({
|
|
16
17
|
apiKey: process.env.GEMINI_API_KEY || "",
|
|
@@ -128,11 +129,17 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
|
|
|
128
129
|
fs.unlinkSync(localPath);
|
|
129
130
|
}
|
|
130
131
|
|
|
131
|
-
// Wait for file processing to complete
|
|
132
|
+
// Wait for file processing to complete (max 60 seconds)
|
|
132
133
|
let getFile = await ai.files.get({ name: uploadedFile.name! });
|
|
133
|
-
|
|
134
|
+
let attempts = 0;
|
|
135
|
+
while (getFile.state === "PROCESSING" && attempts < 20) {
|
|
134
136
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
|
135
137
|
getFile = await ai.files.get({ name: uploadedFile.name! });
|
|
138
|
+
attempts++;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (getFile.state === "PROCESSING") {
|
|
142
|
+
throw new Error("File processing timed out after 60 seconds");
|
|
136
143
|
}
|
|
137
144
|
|
|
138
145
|
if (getFile.state === "FAILED") {
|
|
@@ -219,68 +226,70 @@ export const geminiTextToImage = {
|
|
|
219
226
|
output_path?: string;
|
|
220
227
|
reference_images?: string[];
|
|
221
228
|
}) => {
|
|
222
|
-
|
|
223
|
-
|
|
229
|
+
return safeToolExecute(async () => {
|
|
230
|
+
try {
|
|
231
|
+
const contents: any[] = [args.prompt];
|
|
224
232
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
233
|
+
if (args.reference_images && Array.isArray(args.reference_images)) {
|
|
234
|
+
for (const refPath of args.reference_images) {
|
|
235
|
+
contents.push(await fileToGenerativePart(refPath));
|
|
236
|
+
}
|
|
228
237
|
}
|
|
229
|
-
}
|
|
230
238
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
239
|
+
const response = await ai.models.generateContent({
|
|
240
|
+
model: "gemini-3-pro-image-preview",
|
|
241
|
+
contents: contents,
|
|
242
|
+
config: {
|
|
243
|
+
responseModalities: ["TEXT", "IMAGE"],
|
|
244
|
+
imageConfig: {
|
|
245
|
+
aspectRatio: args.aspect_ratio || "9:16",
|
|
246
|
+
},
|
|
238
247
|
},
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
}
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
const images = [];
|
|
251
|
+
let textResponse = "";
|
|
252
|
+
|
|
253
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
254
|
+
for (const part of response.candidates[0].content.parts) {
|
|
255
|
+
if (part.text) {
|
|
256
|
+
textResponse += part.text;
|
|
257
|
+
} else if (part.inlineData?.data) {
|
|
258
|
+
const imageData = part.inlineData.data;
|
|
259
|
+
// Always save the image - use provided path or generate one
|
|
260
|
+
const outputPath =
|
|
261
|
+
args.output_path ||
|
|
262
|
+
generateTimestampedFilename("generated_image.png");
|
|
263
|
+
const storage = getStorage();
|
|
264
|
+
const url = await storage.writeFile(
|
|
265
|
+
outputPath,
|
|
266
|
+
Buffer.from(imageData, "base64")
|
|
267
|
+
);
|
|
268
|
+
images.push({
|
|
269
|
+
url,
|
|
270
|
+
filename: outputPath,
|
|
271
|
+
mimeType: "image/png",
|
|
272
|
+
});
|
|
273
|
+
}
|
|
265
274
|
}
|
|
266
275
|
}
|
|
267
|
-
}
|
|
268
276
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
+
if (images.length > 0) {
|
|
278
|
+
// Return the URL directly for easy parsing
|
|
279
|
+
return JSON.stringify({
|
|
280
|
+
url: images?.[0]?.url,
|
|
281
|
+
images,
|
|
282
|
+
message: textResponse || "Image generated successfully",
|
|
283
|
+
});
|
|
284
|
+
}
|
|
277
285
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
286
|
+
return (
|
|
287
|
+
textResponse || "Image generation completed but no image was produced"
|
|
288
|
+
);
|
|
289
|
+
} catch (error: any) {
|
|
290
|
+
throw new Error(`Image generation failed: ${error.message}`);
|
|
291
|
+
}
|
|
292
|
+
}, "gemini-generateImage");
|
|
284
293
|
},
|
|
285
294
|
};
|
|
286
295
|
|
|
@@ -306,57 +315,61 @@ export const geminiEditImage = {
|
|
|
306
315
|
output_path?: string;
|
|
307
316
|
reference_images?: string[];
|
|
308
317
|
}) => {
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
318
|
+
return safeToolExecute(async () => {
|
|
319
|
+
try {
|
|
320
|
+
const imagePart = await fileToGenerativePart(args.image_path);
|
|
321
|
+
const contents: any[] = [args.prompt, imagePart];
|
|
312
322
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
323
|
+
if (args.reference_images) {
|
|
324
|
+
for (const refPath of args.reference_images) {
|
|
325
|
+
contents.push(await fileToGenerativePart(refPath));
|
|
326
|
+
}
|
|
316
327
|
}
|
|
317
|
-
}
|
|
318
328
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
329
|
+
const response = await ai.models.generateContent({
|
|
330
|
+
model: "gemini-3-pro-image-preview",
|
|
331
|
+
contents: contents,
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
const images = [];
|
|
335
|
+
let textResponse = "";
|
|
336
|
+
|
|
337
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
338
|
+
for (const part of response.candidates[0].content.parts) {
|
|
339
|
+
if (part.text) {
|
|
340
|
+
textResponse += part.text;
|
|
341
|
+
} else if (part.inlineData?.data) {
|
|
342
|
+
const imageData = part.inlineData.data;
|
|
343
|
+
if (args.output_path) {
|
|
344
|
+
const storage = getStorage();
|
|
345
|
+
const url = await storage.writeFile(
|
|
346
|
+
args.output_path,
|
|
347
|
+
Buffer.from(imageData, "base64")
|
|
348
|
+
);
|
|
349
|
+
images.push({
|
|
350
|
+
url,
|
|
351
|
+
filename: args.output_path,
|
|
352
|
+
mimeType: "image/png",
|
|
353
|
+
});
|
|
354
|
+
}
|
|
344
355
|
}
|
|
345
356
|
}
|
|
346
357
|
}
|
|
347
|
-
}
|
|
348
358
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
359
|
+
if (images.length > 0) {
|
|
360
|
+
return JSON.stringify({
|
|
361
|
+
images,
|
|
362
|
+
message: textResponse || "Image edited successfully",
|
|
363
|
+
});
|
|
364
|
+
}
|
|
355
365
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
366
|
+
return (
|
|
367
|
+
textResponse || "Image editing completed but no response received"
|
|
368
|
+
);
|
|
369
|
+
} catch (error: any) {
|
|
370
|
+
throw new Error(`Image editing failed: ${error.message}`);
|
|
371
|
+
}
|
|
372
|
+
}, "gemini-editImage");
|
|
360
373
|
},
|
|
361
374
|
};
|
|
362
375
|
|
|
@@ -371,58 +384,60 @@ export const geminiAnalyzeImages = {
|
|
|
371
384
|
prompt: z.string().describe("Text prompt or question about the images"),
|
|
372
385
|
}),
|
|
373
386
|
execute: async (args: { image_paths: string[]; prompt: string }) => {
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
387
|
+
return safeToolExecute(async () => {
|
|
388
|
+
try {
|
|
389
|
+
// Handle array parsing
|
|
390
|
+
if (!args.image_paths) {
|
|
391
|
+
throw new Error("Image paths not provided");
|
|
392
|
+
}
|
|
379
393
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
394
|
+
// Convert to array if passed as string
|
|
395
|
+
let imagePaths: string[];
|
|
396
|
+
if (typeof args.image_paths === "string") {
|
|
397
|
+
const strValue = args.image_paths as string;
|
|
398
|
+
if (strValue.startsWith("[") && strValue.endsWith("]")) {
|
|
399
|
+
try {
|
|
400
|
+
imagePaths = JSON.parse(strValue);
|
|
401
|
+
} catch {
|
|
402
|
+
throw new Error("Invalid image_paths format");
|
|
403
|
+
}
|
|
404
|
+
} else {
|
|
405
|
+
imagePaths = [strValue];
|
|
389
406
|
}
|
|
407
|
+
} else if (Array.isArray(args.image_paths)) {
|
|
408
|
+
imagePaths = args.image_paths;
|
|
390
409
|
} else {
|
|
391
|
-
|
|
410
|
+
throw new Error("Invalid image_paths: must be array or string");
|
|
392
411
|
}
|
|
393
|
-
} else if (Array.isArray(args.image_paths)) {
|
|
394
|
-
imagePaths = args.image_paths;
|
|
395
|
-
} else {
|
|
396
|
-
throw new Error("Invalid image_paths: must be array or string");
|
|
397
|
-
}
|
|
398
412
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
413
|
+
if (imagePaths.length === 0) {
|
|
414
|
+
throw new Error("At least one image path must be provided");
|
|
415
|
+
}
|
|
402
416
|
|
|
403
|
-
|
|
417
|
+
const contents: any[] = [args.prompt];
|
|
404
418
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
419
|
+
for (const imagePath of imagePaths) {
|
|
420
|
+
contents.push(await fileToGenerativePart(imagePath));
|
|
421
|
+
}
|
|
408
422
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
423
|
+
const response = await ai.models.generateContent({
|
|
424
|
+
model: "gemini-2.5-pro",
|
|
425
|
+
contents: contents,
|
|
426
|
+
});
|
|
413
427
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
428
|
+
let result = "";
|
|
429
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
430
|
+
for (const part of response.candidates[0].content.parts) {
|
|
431
|
+
if (part.text) {
|
|
432
|
+
result += part.text;
|
|
433
|
+
}
|
|
419
434
|
}
|
|
420
435
|
}
|
|
436
|
+
return result || "Analysis completed but no text response received";
|
|
437
|
+
} catch (error: any) {
|
|
438
|
+
throw new Error(`Image analysis failed: ${error.message}`);
|
|
421
439
|
}
|
|
422
|
-
|
|
423
|
-
} catch (error: any) {
|
|
424
|
-
throw new Error(`Image analysis failed: ${error.message}`);
|
|
425
|
-
}
|
|
440
|
+
}, "gemini-analyzeImages");
|
|
426
441
|
},
|
|
427
442
|
};
|
|
428
443
|
|
|
@@ -449,48 +464,50 @@ export const geminiSingleSpeakerTts = {
|
|
|
449
464
|
voice_name: string;
|
|
450
465
|
output_path?: string;
|
|
451
466
|
}) => {
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
467
|
+
return safeToolExecute(async () => {
|
|
468
|
+
try {
|
|
469
|
+
const response = await ai.models.generateContent({
|
|
470
|
+
model: "gemini-2.5-pro-preview-tts",
|
|
471
|
+
contents: [{ parts: [{ text: args.text }] }],
|
|
472
|
+
config: {
|
|
473
|
+
responseModalities: ["AUDIO"],
|
|
474
|
+
speechConfig: {
|
|
475
|
+
voiceConfig: {
|
|
476
|
+
prebuiltVoiceConfig: {
|
|
477
|
+
voiceName: args.voice_name || "Despina",
|
|
478
|
+
},
|
|
462
479
|
},
|
|
463
480
|
},
|
|
464
481
|
},
|
|
465
|
-
}
|
|
466
|
-
});
|
|
482
|
+
});
|
|
467
483
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
484
|
+
const data =
|
|
485
|
+
response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
486
|
+
if (!data) {
|
|
487
|
+
throw new Error("No audio data received from Gemini API");
|
|
488
|
+
}
|
|
473
489
|
|
|
474
|
-
|
|
490
|
+
const audioBuffer = Buffer.from(data, "base64");
|
|
475
491
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
492
|
+
// Use provided output path or generate default with timestamp
|
|
493
|
+
const outputPath =
|
|
494
|
+
args.output_path || generateTimestampedFilename("voice_output.wav");
|
|
479
495
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
496
|
+
const storage = getStorage();
|
|
497
|
+
const url = await storage.writeFile(outputPath, audioBuffer);
|
|
498
|
+
|
|
499
|
+
return JSON.stringify({
|
|
500
|
+
audio: {
|
|
501
|
+
url,
|
|
502
|
+
filename: outputPath,
|
|
503
|
+
mimeType: "audio/wav",
|
|
504
|
+
},
|
|
505
|
+
message: "Audio generated successfully",
|
|
506
|
+
});
|
|
507
|
+
} catch (error: any) {
|
|
508
|
+
throw new Error(`Voice generation failed: ${error.message}`);
|
|
509
|
+
}
|
|
510
|
+
}, "gemini-generateSpeech");
|
|
494
511
|
},
|
|
495
512
|
};
|
|
496
513
|
|
|
@@ -538,86 +555,90 @@ export const geminiAnalyzeVideos = {
|
|
|
538
555
|
end_offset?: string;
|
|
539
556
|
media_resolution?: string;
|
|
540
557
|
}) => {
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
558
|
+
return safeToolExecute(async () => {
|
|
559
|
+
try {
|
|
560
|
+
// Handle array parsing
|
|
561
|
+
if (!args.video_inputs) {
|
|
562
|
+
throw new Error("Video inputs not provided");
|
|
563
|
+
}
|
|
546
564
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
565
|
+
// Convert to array if passed as string
|
|
566
|
+
let videoInputs: string[];
|
|
567
|
+
if (typeof args.video_inputs === "string") {
|
|
568
|
+
const strValue = args.video_inputs as string;
|
|
569
|
+
if (strValue.startsWith("[") && strValue.endsWith("]")) {
|
|
570
|
+
try {
|
|
571
|
+
videoInputs = JSON.parse(strValue);
|
|
572
|
+
} catch {
|
|
573
|
+
throw new Error("Invalid video_inputs format");
|
|
574
|
+
}
|
|
575
|
+
} else {
|
|
576
|
+
videoInputs = [strValue];
|
|
556
577
|
}
|
|
578
|
+
} else if (Array.isArray(args.video_inputs)) {
|
|
579
|
+
videoInputs = args.video_inputs;
|
|
557
580
|
} else {
|
|
558
|
-
|
|
581
|
+
throw new Error("Invalid video_inputs: must be array or string");
|
|
559
582
|
}
|
|
560
|
-
} else if (Array.isArray(args.video_inputs)) {
|
|
561
|
-
videoInputs = args.video_inputs;
|
|
562
|
-
} else {
|
|
563
|
-
throw new Error("Invalid video_inputs: must be array or string");
|
|
564
|
-
}
|
|
565
583
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
584
|
+
if (videoInputs.length === 0) {
|
|
585
|
+
throw new Error("At least one video input must be provided");
|
|
586
|
+
}
|
|
569
587
|
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
588
|
+
if (videoInputs.length > 10) {
|
|
589
|
+
throw new Error(
|
|
590
|
+
"Maximum 10 videos per request allowed for Gemini 2.5+ models"
|
|
591
|
+
);
|
|
592
|
+
}
|
|
575
593
|
|
|
576
|
-
|
|
577
|
-
|
|
594
|
+
// Prepare video parts for content
|
|
595
|
+
const videoParts: any[] = [];
|
|
578
596
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
597
|
+
// Process each video input
|
|
598
|
+
for (const videoInput of videoInputs) {
|
|
599
|
+
const videoConfig = {
|
|
600
|
+
fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
|
|
601
|
+
startOffset: args.start_offset,
|
|
602
|
+
endOffset: args.end_offset,
|
|
603
|
+
};
|
|
586
604
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
605
|
+
const videoPart = await processVideoInput(videoInput, videoConfig);
|
|
606
|
+
videoParts.push(videoPart);
|
|
607
|
+
}
|
|
590
608
|
|
|
591
|
-
|
|
592
|
-
|
|
609
|
+
// Build content using createUserContent and createPartFromUri for uploaded files
|
|
610
|
+
const contentParts: any[] = [args.prompt];
|
|
593
611
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
612
|
+
for (const videoPart of videoParts) {
|
|
613
|
+
if (videoPart.uri && videoPart.mimeType) {
|
|
614
|
+
contentParts.push(
|
|
615
|
+
createPartFromUri(videoPart.uri, videoPart.mimeType)
|
|
616
|
+
);
|
|
617
|
+
}
|
|
599
618
|
}
|
|
600
|
-
}
|
|
601
619
|
|
|
602
|
-
|
|
620
|
+
const finalContents = createUserContent(contentParts);
|
|
603
621
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
622
|
+
const response = await ai.models.generateContent({
|
|
623
|
+
model: "gemini-2.5-pro",
|
|
624
|
+
contents: finalContents,
|
|
625
|
+
});
|
|
608
626
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
627
|
+
let result = "";
|
|
628
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
629
|
+
for (const part of response.candidates[0].content.parts) {
|
|
630
|
+
if (part.text) {
|
|
631
|
+
result += part.text;
|
|
632
|
+
}
|
|
614
633
|
}
|
|
615
634
|
}
|
|
616
|
-
}
|
|
617
635
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
636
|
+
return (
|
|
637
|
+
result || "Video analysis completed but no text response received"
|
|
638
|
+
);
|
|
639
|
+
} catch (error: any) {
|
|
640
|
+
throw new Error(`Video analysis failed: ${error.message}`);
|
|
641
|
+
}
|
|
642
|
+
}, "gemini-analyzeVideos");
|
|
622
643
|
},
|
|
623
644
|
};
|