@mixio-pro/kalaasetu-mcp 2.0.11-beta → 2.1.1-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/src/index.ts +4 -3
- package/src/storage/index.ts +4 -3
- package/src/tools/fal/config.ts +9 -8
- package/src/tools/fal/dynamic-tools.ts +214 -237
- package/src/tools/fal/models.ts +115 -93
- package/src/tools/fal/storage.ts +66 -61
- package/src/tools/gemini.ts +302 -281
- package/src/tools/get-status.ts +50 -46
- package/src/tools/image-to-video.ts +309 -300
- package/src/tools/perplexity.ts +188 -172
- package/src/tools/youtube.ts +45 -41
- package/src/utils/llm-prompt-enhancer.ts +3 -2
- package/src/utils/logger.ts +71 -0
- package/src/utils/openmeter.ts +123 -0
- package/src/utils/prompt-enhancer-presets.ts +7 -5
- package/src/utils/remote-sync.ts +19 -10
- package/src/utils/tool-credits.ts +104 -0
- package/src/utils/tool-wrapper.ts +37 -6
- package/src/utils/url-file.ts +4 -3
- package/src/test-context.ts +0 -52
- package/src/test-error-handling.ts +0 -31
- package/src/tools/image-to-video.sdk-backup.ts +0 -218
package/src/tools/gemini.ts
CHANGED
|
@@ -56,7 +56,7 @@ async function saveWaveFile(
|
|
|
56
56
|
pcmData: Buffer,
|
|
57
57
|
channels = 1,
|
|
58
58
|
rate = 24000,
|
|
59
|
-
sampleWidth = 2
|
|
59
|
+
sampleWidth = 2,
|
|
60
60
|
): Promise<void> {
|
|
61
61
|
return new Promise((resolve, reject) => {
|
|
62
62
|
const writer = new wav.Writer({
|
|
@@ -159,7 +159,7 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
|
|
|
159
159
|
// Helper function to process video input intelligently
|
|
160
160
|
async function processVideoInput(
|
|
161
161
|
input: string,
|
|
162
|
-
config?: { fps?: number; startOffset?: string; endOffset?: string }
|
|
162
|
+
config?: { fps?: number; startOffset?: string; endOffset?: string },
|
|
163
163
|
): Promise<any> {
|
|
164
164
|
if (isYouTubeUrl(input)) {
|
|
165
165
|
return {
|
|
@@ -221,27 +221,27 @@ export const geminiTextToImage = {
|
|
|
221
221
|
.string()
|
|
222
222
|
.optional()
|
|
223
223
|
.describe(
|
|
224
|
-
"Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16."
|
|
224
|
+
"Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16.",
|
|
225
225
|
),
|
|
226
226
|
output_path: z
|
|
227
227
|
.string()
|
|
228
228
|
.optional()
|
|
229
229
|
.describe(
|
|
230
230
|
"Optional: specific local path or filename to save the image (e.g., 'outputs/hero.png'). " +
|
|
231
|
-
"If omitted, a timestamped filename is generated automatically."
|
|
231
|
+
"If omitted, a timestamped filename is generated automatically.",
|
|
232
232
|
),
|
|
233
233
|
reference_images: z
|
|
234
234
|
.array(z.string())
|
|
235
235
|
.optional()
|
|
236
236
|
.describe(
|
|
237
|
-
"Optional: local paths or URLs of images to use as visual references for style or composition."
|
|
237
|
+
"Optional: local paths or URLs of images to use as visual references for style or composition.",
|
|
238
238
|
),
|
|
239
239
|
enhancer_preset: z
|
|
240
240
|
.string()
|
|
241
241
|
.optional()
|
|
242
242
|
.describe(
|
|
243
243
|
"Optional: Name of a prompt enhancer preset to apply (e.g., 'cinematic', 'photorealistic', 'anime'). " +
|
|
244
|
-
"Automatically enhances the prompt with professional style modifiers."
|
|
244
|
+
"Automatically enhances the prompt with professional style modifiers.",
|
|
245
245
|
),
|
|
246
246
|
}),
|
|
247
247
|
timeoutMs: 300000,
|
|
@@ -252,79 +252,84 @@ export const geminiTextToImage = {
|
|
|
252
252
|
reference_images?: string[];
|
|
253
253
|
enhancer_preset?: string;
|
|
254
254
|
}) => {
|
|
255
|
-
return safeToolExecute(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
255
|
+
return safeToolExecute(
|
|
256
|
+
async () => {
|
|
257
|
+
try {
|
|
258
|
+
// Apply prompt enhancement if preset specified
|
|
259
|
+
let enhancedPrompt = args.prompt;
|
|
260
|
+
if (args.enhancer_preset) {
|
|
261
|
+
const enhancer = resolveEnhancer(args.enhancer_preset);
|
|
262
|
+
if (enhancer.hasTransformations()) {
|
|
263
|
+
enhancedPrompt = enhancer.enhance(args.prompt);
|
|
264
|
+
}
|
|
263
265
|
}
|
|
264
|
-
}
|
|
265
266
|
|
|
266
|
-
|
|
267
|
+
const contents: any[] = [enhancedPrompt];
|
|
267
268
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
269
|
+
if (args.reference_images && Array.isArray(args.reference_images)) {
|
|
270
|
+
for (const refPath of args.reference_images) {
|
|
271
|
+
contents.push(await fileToGenerativePart(refPath));
|
|
272
|
+
}
|
|
271
273
|
}
|
|
272
|
-
}
|
|
273
274
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
275
|
+
const response = await ai.models.generateContent({
|
|
276
|
+
model: "gemini-3-pro-image-preview",
|
|
277
|
+
contents: contents,
|
|
278
|
+
config: {
|
|
279
|
+
responseModalities: ["TEXT", "IMAGE"],
|
|
280
|
+
imageConfig: {
|
|
281
|
+
aspectRatio: args.aspect_ratio || "9:16",
|
|
282
|
+
},
|
|
281
283
|
},
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
}
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
const images = [];
|
|
287
|
+
let textResponse = "";
|
|
288
|
+
|
|
289
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
290
|
+
for (const part of response.candidates[0].content.parts) {
|
|
291
|
+
if (part.text) {
|
|
292
|
+
textResponse += part.text;
|
|
293
|
+
} else if (part.inlineData?.data) {
|
|
294
|
+
const imageData = part.inlineData.data;
|
|
295
|
+
// Always save the image - use provided path or generate one
|
|
296
|
+
const outputPath =
|
|
297
|
+
args.output_path ||
|
|
298
|
+
generateTimestampedFilename("generated_image.png");
|
|
299
|
+
const storage = getStorage();
|
|
300
|
+
const url = await storage.writeFile(
|
|
301
|
+
outputPath,
|
|
302
|
+
Buffer.from(imageData, "base64"),
|
|
303
|
+
);
|
|
304
|
+
images.push({
|
|
305
|
+
url,
|
|
306
|
+
filename: outputPath,
|
|
307
|
+
mimeType: "image/png",
|
|
308
|
+
});
|
|
309
|
+
}
|
|
308
310
|
}
|
|
309
311
|
}
|
|
310
|
-
}
|
|
311
312
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
313
|
+
if (images.length > 0) {
|
|
314
|
+
// Return the URL directly for easy parsing
|
|
315
|
+
return JSON.stringify({
|
|
316
|
+
url: images?.[0]?.url,
|
|
317
|
+
images,
|
|
318
|
+
message: textResponse || "Image generated successfully",
|
|
319
|
+
});
|
|
320
|
+
}
|
|
320
321
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
322
|
+
return (
|
|
323
|
+
textResponse ||
|
|
324
|
+
"Image generation completed but no image was produced"
|
|
325
|
+
);
|
|
326
|
+
} catch (error: any) {
|
|
327
|
+
throw new Error(`Image generation failed: ${error.message}`);
|
|
328
|
+
}
|
|
329
|
+
},
|
|
330
|
+
"gemini-generateImage",
|
|
331
|
+
{ toolName: "generateImage" },
|
|
332
|
+
);
|
|
328
333
|
},
|
|
329
334
|
};
|
|
330
335
|
|
|
@@ -339,31 +344,31 @@ export const geminiEditImage = {
|
|
|
339
344
|
image_path: z
|
|
340
345
|
.string()
|
|
341
346
|
.describe(
|
|
342
|
-
"Absolute local path or URL to the source image file to be edited."
|
|
347
|
+
"Absolute local path or URL to the source image file to be edited.",
|
|
343
348
|
),
|
|
344
349
|
prompt: z
|
|
345
350
|
.string()
|
|
346
351
|
.describe(
|
|
347
|
-
"Instructional text describing the edits or modifications required."
|
|
352
|
+
"Instructional text describing the edits or modifications required.",
|
|
348
353
|
),
|
|
349
354
|
output_path: z
|
|
350
355
|
.string()
|
|
351
356
|
.optional()
|
|
352
357
|
.describe(
|
|
353
|
-
"Optional: specific local path to save the edited result. Defaults to generated timestamp."
|
|
358
|
+
"Optional: specific local path to save the edited result. Defaults to generated timestamp.",
|
|
354
359
|
),
|
|
355
360
|
reference_images: z
|
|
356
361
|
.array(z.string())
|
|
357
362
|
.optional()
|
|
358
363
|
.describe(
|
|
359
|
-
"Optional: additional images to guide the edit (e.g., to reference a specific character or object style)."
|
|
364
|
+
"Optional: additional images to guide the edit (e.g., to reference a specific character or object style).",
|
|
360
365
|
),
|
|
361
366
|
enhancer_preset: z
|
|
362
367
|
.string()
|
|
363
368
|
.optional()
|
|
364
369
|
.describe(
|
|
365
370
|
"Optional: Name of a prompt enhancer preset to apply (e.g., 'cinematic', 'photorealistic'). " +
|
|
366
|
-
"Enhances the edit instructions with professional style modifiers."
|
|
371
|
+
"Enhances the edit instructions with professional style modifiers.",
|
|
367
372
|
),
|
|
368
373
|
}),
|
|
369
374
|
timeoutMs: 300000,
|
|
@@ -374,70 +379,74 @@ export const geminiEditImage = {
|
|
|
374
379
|
reference_images?: string[];
|
|
375
380
|
enhancer_preset?: string;
|
|
376
381
|
}) => {
|
|
377
|
-
return safeToolExecute(
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
382
|
+
return safeToolExecute(
|
|
383
|
+
async () => {
|
|
384
|
+
try {
|
|
385
|
+
// Apply prompt enhancement if preset specified
|
|
386
|
+
let enhancedPrompt = args.prompt;
|
|
387
|
+
if (args.enhancer_preset) {
|
|
388
|
+
const enhancer = resolveEnhancer(args.enhancer_preset);
|
|
389
|
+
if (enhancer.hasTransformations()) {
|
|
390
|
+
enhancedPrompt = enhancer.enhance(args.prompt);
|
|
391
|
+
}
|
|
385
392
|
}
|
|
386
|
-
}
|
|
387
393
|
|
|
388
|
-
|
|
389
|
-
|
|
394
|
+
const imagePart = await fileToGenerativePart(args.image_path);
|
|
395
|
+
const contents: any[] = [enhancedPrompt, imagePart];
|
|
390
396
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
397
|
+
if (args.reference_images) {
|
|
398
|
+
for (const refPath of args.reference_images) {
|
|
399
|
+
contents.push(await fileToGenerativePart(refPath));
|
|
400
|
+
}
|
|
394
401
|
}
|
|
395
|
-
}
|
|
396
402
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
403
|
+
const response = await ai.models.generateContent({
|
|
404
|
+
model: "gemini-3-pro-image-preview",
|
|
405
|
+
contents: contents,
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
const images = [];
|
|
409
|
+
let textResponse = "";
|
|
410
|
+
|
|
411
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
412
|
+
for (const part of response.candidates[0].content.parts) {
|
|
413
|
+
if (part.text) {
|
|
414
|
+
textResponse += part.text;
|
|
415
|
+
} else if (part.inlineData?.data) {
|
|
416
|
+
const imageData = part.inlineData.data;
|
|
417
|
+
if (args.output_path) {
|
|
418
|
+
const storage = getStorage();
|
|
419
|
+
const url = await storage.writeFile(
|
|
420
|
+
args.output_path,
|
|
421
|
+
Buffer.from(imageData, "base64"),
|
|
422
|
+
);
|
|
423
|
+
images.push({
|
|
424
|
+
url,
|
|
425
|
+
filename: args.output_path,
|
|
426
|
+
mimeType: "image/png",
|
|
427
|
+
});
|
|
428
|
+
}
|
|
422
429
|
}
|
|
423
430
|
}
|
|
424
431
|
}
|
|
425
|
-
}
|
|
426
432
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
+
if (images.length > 0) {
|
|
434
|
+
return JSON.stringify({
|
|
435
|
+
images,
|
|
436
|
+
message: textResponse || "Image edited successfully",
|
|
437
|
+
});
|
|
438
|
+
}
|
|
433
439
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
440
|
+
return (
|
|
441
|
+
textResponse || "Image editing completed but no response received"
|
|
442
|
+
);
|
|
443
|
+
} catch (error: any) {
|
|
444
|
+
throw new Error(`Image editing failed: ${error.message}`);
|
|
445
|
+
}
|
|
446
|
+
},
|
|
447
|
+
"gemini-editImage",
|
|
448
|
+
{ toolName: "editImage" },
|
|
449
|
+
);
|
|
441
450
|
},
|
|
442
451
|
};
|
|
443
452
|
|
|
@@ -452,7 +461,7 @@ export const geminiAnalyzeImages = {
|
|
|
452
461
|
image_paths: z
|
|
453
462
|
.array(z.string())
|
|
454
463
|
.describe(
|
|
455
|
-
"An array of absolute local file paths or publicly accessible URLs to analyze."
|
|
464
|
+
"An array of absolute local file paths or publicly accessible URLs to analyze.",
|
|
456
465
|
),
|
|
457
466
|
prompt: z
|
|
458
467
|
.string()
|
|
@@ -460,60 +469,64 @@ export const geminiAnalyzeImages = {
|
|
|
460
469
|
}),
|
|
461
470
|
timeoutMs: 300000,
|
|
462
471
|
execute: async (args: { image_paths: string[]; prompt: string }) => {
|
|
463
|
-
return safeToolExecute(
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
472
|
+
return safeToolExecute(
|
|
473
|
+
async () => {
|
|
474
|
+
try {
|
|
475
|
+
// Handle array parsing
|
|
476
|
+
if (!args.image_paths) {
|
|
477
|
+
throw new Error("Image paths not provided");
|
|
478
|
+
}
|
|
469
479
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
480
|
+
// Convert to array if passed as string
|
|
481
|
+
let imagePaths: string[];
|
|
482
|
+
if (typeof args.image_paths === "string") {
|
|
483
|
+
const strValue = args.image_paths as string;
|
|
484
|
+
if (strValue.startsWith("[") && strValue.endsWith("]")) {
|
|
485
|
+
try {
|
|
486
|
+
imagePaths = JSON.parse(strValue);
|
|
487
|
+
} catch {
|
|
488
|
+
throw new Error("Invalid image_paths format");
|
|
489
|
+
}
|
|
490
|
+
} else {
|
|
491
|
+
imagePaths = [strValue];
|
|
479
492
|
}
|
|
493
|
+
} else if (Array.isArray(args.image_paths)) {
|
|
494
|
+
imagePaths = args.image_paths;
|
|
480
495
|
} else {
|
|
481
|
-
|
|
496
|
+
throw new Error("Invalid image_paths: must be array or string");
|
|
482
497
|
}
|
|
483
|
-
} else if (Array.isArray(args.image_paths)) {
|
|
484
|
-
imagePaths = args.image_paths;
|
|
485
|
-
} else {
|
|
486
|
-
throw new Error("Invalid image_paths: must be array or string");
|
|
487
|
-
}
|
|
488
498
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
499
|
+
if (imagePaths.length === 0) {
|
|
500
|
+
throw new Error("At least one image path must be provided");
|
|
501
|
+
}
|
|
492
502
|
|
|
493
|
-
|
|
503
|
+
const contents: any[] = [args.prompt];
|
|
494
504
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
505
|
+
for (const imagePath of imagePaths) {
|
|
506
|
+
contents.push(await fileToGenerativePart(imagePath));
|
|
507
|
+
}
|
|
498
508
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
509
|
+
const response = await ai.models.generateContent({
|
|
510
|
+
model: "gemini-2.5-pro",
|
|
511
|
+
contents: contents,
|
|
512
|
+
});
|
|
503
513
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
514
|
+
let result = "";
|
|
515
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
516
|
+
for (const part of response.candidates[0].content.parts) {
|
|
517
|
+
if (part.text) {
|
|
518
|
+
result += part.text;
|
|
519
|
+
}
|
|
509
520
|
}
|
|
510
521
|
}
|
|
522
|
+
return result || "Analysis completed but no text response received";
|
|
523
|
+
} catch (error: any) {
|
|
524
|
+
throw new Error(`Image analysis failed: ${error.message}`);
|
|
511
525
|
}
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
}, "gemini-analyzeImages");
|
|
526
|
+
},
|
|
527
|
+
"gemini-analyzeImages",
|
|
528
|
+
{ toolName: "analyzeImages" },
|
|
529
|
+
);
|
|
517
530
|
},
|
|
518
531
|
};
|
|
519
532
|
|
|
@@ -529,13 +542,13 @@ export const geminiSingleSpeakerTts = {
|
|
|
529
542
|
voice_name: z
|
|
530
543
|
.string()
|
|
531
544
|
.describe(
|
|
532
|
-
"Supported voices: 'Despina' (Female, versatile), 'Kore' (Female, calm), 'Erinome' (Female, expressive), or 'Enceladus' (Male, neutral)."
|
|
545
|
+
"Supported voices: 'Despina' (Female, versatile), 'Kore' (Female, calm), 'Erinome' (Female, expressive), or 'Enceladus' (Male, neutral).",
|
|
533
546
|
),
|
|
534
547
|
output_path: z
|
|
535
548
|
.string()
|
|
536
549
|
.optional()
|
|
537
550
|
.describe(
|
|
538
|
-
"Optional: Output WAV file path. Defaults to a timestamped filename in the output directory."
|
|
551
|
+
"Optional: Output WAV file path. Defaults to a timestamped filename in the output directory.",
|
|
539
552
|
),
|
|
540
553
|
}),
|
|
541
554
|
timeoutMs: 300000,
|
|
@@ -544,50 +557,54 @@ export const geminiSingleSpeakerTts = {
|
|
|
544
557
|
voice_name: string;
|
|
545
558
|
output_path?: string;
|
|
546
559
|
}) => {
|
|
547
|
-
return safeToolExecute(
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
560
|
+
return safeToolExecute(
|
|
561
|
+
async () => {
|
|
562
|
+
try {
|
|
563
|
+
const response = await ai.models.generateContent({
|
|
564
|
+
model: "gemini-2.5-pro-preview-tts",
|
|
565
|
+
contents: [{ parts: [{ text: args.text }] }],
|
|
566
|
+
config: {
|
|
567
|
+
responseModalities: ["AUDIO"],
|
|
568
|
+
speechConfig: {
|
|
569
|
+
voiceConfig: {
|
|
570
|
+
prebuiltVoiceConfig: {
|
|
571
|
+
voiceName: args.voice_name || "Despina",
|
|
572
|
+
},
|
|
558
573
|
},
|
|
559
574
|
},
|
|
560
575
|
},
|
|
561
|
-
}
|
|
562
|
-
});
|
|
576
|
+
});
|
|
563
577
|
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
578
|
+
const data =
|
|
579
|
+
response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
580
|
+
if (!data) {
|
|
581
|
+
throw new Error("No audio data received from Gemini API");
|
|
582
|
+
}
|
|
569
583
|
|
|
570
|
-
|
|
584
|
+
const audioBuffer = Buffer.from(data, "base64");
|
|
571
585
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
586
|
+
// Use provided output path or generate default with timestamp
|
|
587
|
+
const outputPath =
|
|
588
|
+
args.output_path || generateTimestampedFilename("voice_output.wav");
|
|
575
589
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
590
|
+
const storage = getStorage();
|
|
591
|
+
const url = await storage.writeFile(outputPath, audioBuffer);
|
|
592
|
+
|
|
593
|
+
return JSON.stringify({
|
|
594
|
+
audio: {
|
|
595
|
+
url,
|
|
596
|
+
filename: outputPath,
|
|
597
|
+
mimeType: "audio/wav",
|
|
598
|
+
},
|
|
599
|
+
message: "Audio generated successfully",
|
|
600
|
+
});
|
|
601
|
+
} catch (error: any) {
|
|
602
|
+
throw new Error(`Voice generation failed: ${error.message}`);
|
|
603
|
+
}
|
|
604
|
+
},
|
|
605
|
+
"gemini-generateSpeech",
|
|
606
|
+
{ toolName: "generateSpeech" },
|
|
607
|
+
);
|
|
591
608
|
},
|
|
592
609
|
};
|
|
593
610
|
|
|
@@ -603,18 +620,18 @@ export const geminiAnalyzeVideos = {
|
|
|
603
620
|
.array(z.string())
|
|
604
621
|
.describe(
|
|
605
622
|
"An array containing absolute paths to local videos or YouTube URLs. Max 10 per request. " +
|
|
606
|
-
"Note: Local files are automatically optimized for processing."
|
|
623
|
+
"Note: Local files are automatically optimized for processing.",
|
|
607
624
|
),
|
|
608
625
|
prompt: z
|
|
609
626
|
.string()
|
|
610
627
|
.describe(
|
|
611
|
-
"The question or instruction regarding the video. Use MM:SS or HH:MM:SS for precise time references."
|
|
628
|
+
"The question or instruction regarding the video. Use MM:SS or HH:MM:SS for precise time references.",
|
|
612
629
|
),
|
|
613
630
|
fps: z
|
|
614
631
|
.number()
|
|
615
632
|
.optional()
|
|
616
633
|
.describe(
|
|
617
|
-
"Optional: Target frames per second for processing. Lower FPS (1-5) is recommended for long videos to save tokens."
|
|
634
|
+
"Optional: Target frames per second for processing. Lower FPS (1-5) is recommended for long videos to save tokens.",
|
|
618
635
|
),
|
|
619
636
|
start_offset: z
|
|
620
637
|
.string()
|
|
@@ -628,7 +645,7 @@ export const geminiAnalyzeVideos = {
|
|
|
628
645
|
.string()
|
|
629
646
|
.optional()
|
|
630
647
|
.describe(
|
|
631
|
-
"Processing resolution: 'default' or 'low'. 'low' significantly reduces token usage for simple visual tasks."
|
|
648
|
+
"Processing resolution: 'default' or 'low'. 'low' significantly reduces token usage for simple visual tasks.",
|
|
632
649
|
),
|
|
633
650
|
}),
|
|
634
651
|
timeoutMs: 300000,
|
|
@@ -640,90 +657,94 @@ export const geminiAnalyzeVideos = {
|
|
|
640
657
|
end_offset?: string;
|
|
641
658
|
media_resolution?: string;
|
|
642
659
|
}) => {
|
|
643
|
-
return safeToolExecute(
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
660
|
+
return safeToolExecute(
|
|
661
|
+
async () => {
|
|
662
|
+
try {
|
|
663
|
+
// Handle array parsing
|
|
664
|
+
if (!args.video_inputs) {
|
|
665
|
+
throw new Error("Video inputs not provided");
|
|
666
|
+
}
|
|
649
667
|
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
668
|
+
// Convert to array if passed as string
|
|
669
|
+
let videoInputs: string[];
|
|
670
|
+
if (typeof args.video_inputs === "string") {
|
|
671
|
+
const strValue = args.video_inputs as string;
|
|
672
|
+
if (strValue.startsWith("[") && strValue.endsWith("]")) {
|
|
673
|
+
try {
|
|
674
|
+
videoInputs = JSON.parse(strValue);
|
|
675
|
+
} catch {
|
|
676
|
+
throw new Error("Invalid video_inputs format");
|
|
677
|
+
}
|
|
678
|
+
} else {
|
|
679
|
+
videoInputs = [strValue];
|
|
659
680
|
}
|
|
681
|
+
} else if (Array.isArray(args.video_inputs)) {
|
|
682
|
+
videoInputs = args.video_inputs;
|
|
660
683
|
} else {
|
|
661
|
-
|
|
684
|
+
throw new Error("Invalid video_inputs: must be array or string");
|
|
662
685
|
}
|
|
663
|
-
} else if (Array.isArray(args.video_inputs)) {
|
|
664
|
-
videoInputs = args.video_inputs;
|
|
665
|
-
} else {
|
|
666
|
-
throw new Error("Invalid video_inputs: must be array or string");
|
|
667
|
-
}
|
|
668
686
|
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
687
|
+
if (videoInputs.length === 0) {
|
|
688
|
+
throw new Error("At least one video input must be provided");
|
|
689
|
+
}
|
|
672
690
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
691
|
+
if (videoInputs.length > 10) {
|
|
692
|
+
throw new Error(
|
|
693
|
+
"Maximum 10 videos per request allowed for Gemini 2.5+ models",
|
|
694
|
+
);
|
|
695
|
+
}
|
|
678
696
|
|
|
679
|
-
|
|
680
|
-
|
|
697
|
+
// Prepare video parts for content
|
|
698
|
+
const videoParts: any[] = [];
|
|
681
699
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
700
|
+
// Process each video input
|
|
701
|
+
for (const videoInput of videoInputs) {
|
|
702
|
+
const videoConfig = {
|
|
703
|
+
fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
|
|
704
|
+
startOffset: args.start_offset,
|
|
705
|
+
endOffset: args.end_offset,
|
|
706
|
+
};
|
|
689
707
|
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
708
|
+
const videoPart = await processVideoInput(videoInput, videoConfig);
|
|
709
|
+
videoParts.push(videoPart);
|
|
710
|
+
}
|
|
693
711
|
|
|
694
|
-
|
|
695
|
-
|
|
712
|
+
// Build content using createUserContent and createPartFromUri for uploaded files
|
|
713
|
+
const contentParts: any[] = [args.prompt];
|
|
696
714
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
715
|
+
for (const videoPart of videoParts) {
|
|
716
|
+
if (videoPart.uri && videoPart.mimeType) {
|
|
717
|
+
contentParts.push(
|
|
718
|
+
createPartFromUri(videoPart.uri, videoPart.mimeType),
|
|
719
|
+
);
|
|
720
|
+
}
|
|
702
721
|
}
|
|
703
|
-
}
|
|
704
722
|
|
|
705
|
-
|
|
723
|
+
const finalContents = createUserContent(contentParts);
|
|
706
724
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
725
|
+
const response = await ai.models.generateContent({
|
|
726
|
+
model: "gemini-2.5-pro",
|
|
727
|
+
contents: finalContents,
|
|
728
|
+
});
|
|
711
729
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
730
|
+
let result = "";
|
|
731
|
+
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
732
|
+
for (const part of response.candidates[0].content.parts) {
|
|
733
|
+
if (part.text) {
|
|
734
|
+
result += part.text;
|
|
735
|
+
}
|
|
717
736
|
}
|
|
718
737
|
}
|
|
719
|
-
}
|
|
720
738
|
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
739
|
+
return (
|
|
740
|
+
result || "Video analysis completed but no text response received"
|
|
741
|
+
);
|
|
742
|
+
} catch (error: any) {
|
|
743
|
+
throw new Error(`Video analysis failed: ${error.message}`);
|
|
744
|
+
}
|
|
745
|
+
},
|
|
746
|
+
"gemini-analyzeVideos",
|
|
747
|
+
{ toolName: "analyzeVideos" },
|
|
748
|
+
);
|
|
728
749
|
},
|
|
729
750
|
};
|