@mixio-pro/kalaasetu-mcp 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/tools/fal/generate.ts +41 -13
- package/src/tools/fal/models.ts +12 -3
- package/src/tools/fal/storage.ts +9 -2
- package/src/tools/gemini.ts +66 -24
- package/src/tools/image-to-video.ts +30 -15
- package/src/tools/perplexity.ts +61 -61
- package/src/tools/youtube.ts +8 -3
package/package.json
CHANGED
|
@@ -58,27 +58,36 @@ function sanitizeParameters(
|
|
|
58
58
|
);
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
/**
|
|
62
|
-
* Unified generation tool using presets defined in configuration.
|
|
63
|
-
*/
|
|
64
61
|
export const falGenerate = {
|
|
65
62
|
name: "fal_generate",
|
|
66
63
|
description:
|
|
67
|
-
"
|
|
64
|
+
"The primary tool for generating AI content (images, videos, etc.) using fal.ai. " +
|
|
65
|
+
"This tool follows a 'Preset' pattern: you choose a high-level intent (preset_name) and provide optional parameters. " +
|
|
66
|
+
"Use 'fal_list_presets' to discover available intents and names. " +
|
|
67
|
+
"PREREQUISITE: If using local files as parameters, you MUST upload them first using 'fal_upload_file' and use the resulting CDN URL. " +
|
|
68
|
+
"If a task is expected to take longer than 10-20 seconds, set 'queue: true' to receive status and result URLs instead of a direct result. " +
|
|
69
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
68
70
|
parameters: z.object({
|
|
69
71
|
preset_name: z
|
|
70
72
|
.string()
|
|
71
|
-
.describe(
|
|
73
|
+
.describe(
|
|
74
|
+
"The unique name of the generation preset (e.g., 'ltx_image_to_video', 'cinematic_image'). Obtain this from 'fal_list_presets'."
|
|
75
|
+
),
|
|
72
76
|
parameters: z
|
|
73
77
|
.record(z.string(), z.any())
|
|
74
78
|
.optional()
|
|
75
|
-
.describe(
|
|
79
|
+
.describe(
|
|
80
|
+
"A dictionary of model-specific parameters (e.g., { 'prompt': '...', 'image_url': '...' }). " +
|
|
81
|
+
"These override the default values defined in the preset. " +
|
|
82
|
+
"NOTE: For image-to-video or video-to-video tasks, use 'fal_upload_file' first and pass the resulting CDN URL here."
|
|
83
|
+
),
|
|
76
84
|
queue: z
|
|
77
85
|
.boolean()
|
|
78
86
|
.optional()
|
|
79
87
|
.default(false)
|
|
80
88
|
.describe(
|
|
81
|
-
"
|
|
89
|
+
"Set to true for asynchronous execution. Use this for high-resolution video or complex tasks. " +
|
|
90
|
+
"When true, returns 'status_url' and 'cancel_url' instead of the final result."
|
|
82
91
|
),
|
|
83
92
|
}),
|
|
84
93
|
timeoutMs: 300000,
|
|
@@ -126,9 +135,17 @@ export const falGenerate = {
|
|
|
126
135
|
*/
|
|
127
136
|
export const falGetResult = {
|
|
128
137
|
name: "fal_get_result",
|
|
129
|
-
description:
|
|
138
|
+
description:
|
|
139
|
+
"Retrieve the final output of a queued/asynchronous fal.ai request. " +
|
|
140
|
+
"PREREQUISITE: This tool can ONLY be used with request 'response_url's obtained from 'fal_generate' or 'fal_get_status'. " +
|
|
141
|
+
"Only call this after 'fal_get_status' indicates that the request status is 'COMPLETED'. " +
|
|
142
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
130
143
|
parameters: z.object({
|
|
131
|
-
url: z
|
|
144
|
+
url: z
|
|
145
|
+
.string()
|
|
146
|
+
.describe(
|
|
147
|
+
"The 'response_url' provided by a queued 'fal_generate' or 'fal_get_status' call."
|
|
148
|
+
),
|
|
132
149
|
}),
|
|
133
150
|
timeoutMs: 300000,
|
|
134
151
|
execute: async (args: { url: string }) => {
|
|
@@ -142,9 +159,15 @@ export const falGetResult = {
|
|
|
142
159
|
*/
|
|
143
160
|
export const falGetStatus = {
|
|
144
161
|
name: "fal_get_status",
|
|
145
|
-
description:
|
|
162
|
+
description:
|
|
163
|
+
"Check the current progress or status of an asynchronous fal.ai request. " +
|
|
164
|
+
"PREREQUISITE: This tool can ONLY be used with request 'status_url's obtained from a queued 'fal_generate' call. " +
|
|
165
|
+
"Use this for polling until the status becomes 'COMPLETED', then use 'fal_get_result' for the final output. " +
|
|
166
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
146
167
|
parameters: z.object({
|
|
147
|
-
url: z
|
|
168
|
+
url: z
|
|
169
|
+
.string()
|
|
170
|
+
.describe("The 'status_url' provided by a queued 'fal_generate' call."),
|
|
148
171
|
}),
|
|
149
172
|
timeoutMs: 300000,
|
|
150
173
|
execute: async (args: { url: string }) => {
|
|
@@ -158,9 +181,14 @@ export const falGetStatus = {
|
|
|
158
181
|
*/
|
|
159
182
|
export const falCancelRequest = {
|
|
160
183
|
name: "fal_cancel_request",
|
|
161
|
-
description:
|
|
184
|
+
description:
|
|
185
|
+
"Terminate and cancel an ongoing asynchronous fal.ai request. " +
|
|
186
|
+
"PREREQUISITE: This tool can ONLY be used with request 'cancel_url's obtained from a queued 'fal_generate' call. " +
|
|
187
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
162
188
|
parameters: z.object({
|
|
163
|
-
url: z
|
|
189
|
+
url: z
|
|
190
|
+
.string()
|
|
191
|
+
.describe("The 'cancel_url' provided by a queued 'fal_generate' call."),
|
|
164
192
|
}),
|
|
165
193
|
timeoutMs: 300000,
|
|
166
194
|
execute: async (args: { url: string }) => {
|
package/src/tools/fal/models.ts
CHANGED
|
@@ -13,7 +13,9 @@ import { safeToolExecute } from "../../utils/tool-wrapper";
|
|
|
13
13
|
export const falListPresets = {
|
|
14
14
|
name: "fal_list_presets",
|
|
15
15
|
description:
|
|
16
|
-
"
|
|
16
|
+
"The entry point for discovering fal.ai capabilities on this server. " +
|
|
17
|
+
"Lists all available generation presets, including their high-level 'intent' (e.g., 'Generate cinematic video'), " +
|
|
18
|
+
"and the types of input/output they support. Call this first when you need to perform an AI generation task.",
|
|
17
19
|
parameters: z.object({}),
|
|
18
20
|
timeoutMs: 30000,
|
|
19
21
|
execute: async () => {
|
|
@@ -37,9 +39,16 @@ export const falListPresets = {
|
|
|
37
39
|
export const falGetPresetDetails = {
|
|
38
40
|
name: "fal_get_preset_details",
|
|
39
41
|
description:
|
|
40
|
-
"
|
|
42
|
+
"Retrieve full details for a specific generation preset. " +
|
|
43
|
+
"Use this to see the 'modelId' being used and, most importantly, the 'defaultParams'. " +
|
|
44
|
+
"The default parameters shown here can be overridden in the 'parameters' argument of 'fal_generate'. " +
|
|
45
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
41
46
|
parameters: z.object({
|
|
42
|
-
preset_name: z
|
|
47
|
+
preset_name: z
|
|
48
|
+
.string()
|
|
49
|
+
.describe(
|
|
50
|
+
"The name of the preset to inspect (e.g., 'ltx_image_to_video')."
|
|
51
|
+
),
|
|
43
52
|
}),
|
|
44
53
|
timeoutMs: 30000,
|
|
45
54
|
execute: async (args: { preset_name: string }) => {
|
package/src/tools/fal/storage.ts
CHANGED
|
@@ -38,9 +38,16 @@ function getMimeType(filePath: string): string {
|
|
|
38
38
|
*/
|
|
39
39
|
export const falUploadFile = {
|
|
40
40
|
name: "fal_upload_file",
|
|
41
|
-
description:
|
|
41
|
+
description:
|
|
42
|
+
"Upload a local file (image, video, audio) to fal.ai CDN storage. " +
|
|
43
|
+
"CRITICAL: You MUST use this tool to upload local files before passing their URLs to generation tools in FAL. ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS" +
|
|
44
|
+
"It returns a public 'file_url' which should be used as input for 'fal_generate'.",
|
|
42
45
|
parameters: z.object({
|
|
43
|
-
path: z
|
|
46
|
+
path: z
|
|
47
|
+
.string()
|
|
48
|
+
.describe(
|
|
49
|
+
"The absolute local path to the file to upload (e.g., '/Users/name/images/input.jpg')."
|
|
50
|
+
),
|
|
44
51
|
}),
|
|
45
52
|
timeoutMs: 300000,
|
|
46
53
|
execute: async (args: { path: string }) => {
|
package/src/tools/gemini.ts
CHANGED
|
@@ -202,23 +202,36 @@ async function processVideoInput(
|
|
|
202
202
|
export const geminiTextToImage = {
|
|
203
203
|
name: "generateImage",
|
|
204
204
|
description:
|
|
205
|
-
"Generate images from text prompts using
|
|
205
|
+
"Generate high-quality images from text prompts using Google's Imagen 3 model via Gemini. " +
|
|
206
|
+
"This tool is highly capable of following complex instructions. " +
|
|
207
|
+
"Best practices: " +
|
|
208
|
+
"1. Be descriptive: instead of 'a dog', use 'a golden retriever playing in a sunlit meadow, cinematic lighting'. " +
|
|
209
|
+
"2. Specify style: e.g., '3D render', 'oil painting', 'minimalist vector art'. " +
|
|
210
|
+
"3. Use reference images: you can provide existing images to guide the style or content. " +
|
|
211
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
206
212
|
parameters: z.object({
|
|
207
|
-
prompt: z
|
|
213
|
+
prompt: z
|
|
214
|
+
.string()
|
|
215
|
+
.describe("Detailed text description of the image to generate."),
|
|
208
216
|
aspect_ratio: z
|
|
209
217
|
.string()
|
|
210
218
|
.optional()
|
|
211
|
-
.describe(
|
|
219
|
+
.describe(
|
|
220
|
+
"Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16."
|
|
221
|
+
),
|
|
212
222
|
output_path: z
|
|
213
223
|
.string()
|
|
214
224
|
.optional()
|
|
215
225
|
.describe(
|
|
216
|
-
"
|
|
226
|
+
"Optional: specific local path or filename to save the image (e.g., 'outputs/hero.png'). " +
|
|
227
|
+
"If omitted, a timestamped filename is generated automatically."
|
|
217
228
|
),
|
|
218
229
|
reference_images: z
|
|
219
230
|
.array(z.string())
|
|
220
231
|
.optional()
|
|
221
|
-
.describe(
|
|
232
|
+
.describe(
|
|
233
|
+
"Optional: local paths or URLs of images to use as visual references for style or composition."
|
|
234
|
+
),
|
|
222
235
|
}),
|
|
223
236
|
timeoutMs: 300000,
|
|
224
237
|
execute: async (args: {
|
|
@@ -297,18 +310,33 @@ export const geminiTextToImage = {
|
|
|
297
310
|
export const geminiEditImage = {
|
|
298
311
|
name: "editImage",
|
|
299
312
|
description:
|
|
300
|
-
"
|
|
313
|
+
"Modify or edit an existing image based on text instructions using Google's Imagen 3 model via Gemini. " +
|
|
314
|
+
"This can be used for inpainting (changing specific parts), style transfer, or adding/removing elements. " +
|
|
315
|
+
"Describe the desired changes relative to the source image (e.g., 'Change the white shirt to a blue one' or 'Add a cat sitting on the sofa'). " +
|
|
316
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
301
317
|
parameters: z.object({
|
|
302
|
-
image_path: z
|
|
303
|
-
|
|
318
|
+
image_path: z
|
|
319
|
+
.string()
|
|
320
|
+
.describe(
|
|
321
|
+
"Absolute local path or URL to the source image file to be edited."
|
|
322
|
+
),
|
|
323
|
+
prompt: z
|
|
324
|
+
.string()
|
|
325
|
+
.describe(
|
|
326
|
+
"Instructional text describing the edits or modifications required."
|
|
327
|
+
),
|
|
304
328
|
output_path: z
|
|
305
329
|
.string()
|
|
306
330
|
.optional()
|
|
307
|
-
.describe(
|
|
331
|
+
.describe(
|
|
332
|
+
"Optional: specific local path to save the edited result. Defaults to generated timestamp."
|
|
333
|
+
),
|
|
308
334
|
reference_images: z
|
|
309
335
|
.array(z.string())
|
|
310
336
|
.optional()
|
|
311
|
-
.describe(
|
|
337
|
+
.describe(
|
|
338
|
+
"Optional: additional images to guide the edit (e.g., to reference a specific character or object style)."
|
|
339
|
+
),
|
|
312
340
|
}),
|
|
313
341
|
timeoutMs: 300000,
|
|
314
342
|
execute: async (args: {
|
|
@@ -378,12 +406,19 @@ export const geminiEditImage = {
|
|
|
378
406
|
export const geminiAnalyzeImages = {
|
|
379
407
|
name: "analyzeImages",
|
|
380
408
|
description:
|
|
381
|
-
"
|
|
409
|
+
"Perform advanced multimodal analysis on one or more images using Google's Gemini 2.5 Pro model. " +
|
|
410
|
+
"Use this for complex reasoning, visual question answering, OCR, or describing scenes in detail. " +
|
|
411
|
+
"You can compare multiple images by providing them in the array. " +
|
|
412
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
382
413
|
parameters: z.object({
|
|
383
414
|
image_paths: z
|
|
384
415
|
.array(z.string())
|
|
385
|
-
.describe(
|
|
386
|
-
|
|
416
|
+
.describe(
|
|
417
|
+
"An array of absolute local file paths or publicly accessible URLs to analyze."
|
|
418
|
+
),
|
|
419
|
+
prompt: z
|
|
420
|
+
.string()
|
|
421
|
+
.describe("The question, query, or instruction to apply to the images."),
|
|
387
422
|
}),
|
|
388
423
|
timeoutMs: 300000,
|
|
389
424
|
execute: async (args: { image_paths: string[]; prompt: string }) => {
|
|
@@ -447,19 +482,22 @@ export const geminiAnalyzeImages = {
|
|
|
447
482
|
export const geminiSingleSpeakerTts = {
|
|
448
483
|
name: "generateSpeech",
|
|
449
484
|
description:
|
|
450
|
-
"
|
|
485
|
+
"Convert text to natural-sounding speech using Google's Gemini 2.5 Pro Preview TTS model. " +
|
|
486
|
+
"This tool generates a single speaker's voice in a WAV format. " +
|
|
487
|
+
"Best for long-form narration or simple voiceovers. " +
|
|
488
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
451
489
|
parameters: z.object({
|
|
452
|
-
text: z.string().describe("
|
|
490
|
+
text: z.string().describe("The text content to be converted into speech."),
|
|
453
491
|
voice_name: z
|
|
454
492
|
.string()
|
|
455
493
|
.describe(
|
|
456
|
-
"
|
|
494
|
+
"Supported voices: 'Despina' (Female, versatile), 'Kore' (Female, calm), 'Erinome' (Female, expressive), or 'Enceladus' (Male, neutral)."
|
|
457
495
|
),
|
|
458
496
|
output_path: z
|
|
459
497
|
.string()
|
|
460
498
|
.optional()
|
|
461
499
|
.describe(
|
|
462
|
-
"Output WAV file path
|
|
500
|
+
"Optional: Output WAV file path. Defaults to a timestamped filename in the output directory."
|
|
463
501
|
),
|
|
464
502
|
}),
|
|
465
503
|
timeoutMs: 300000,
|
|
@@ -518,37 +556,41 @@ export const geminiSingleSpeakerTts = {
|
|
|
518
556
|
export const geminiAnalyzeVideos = {
|
|
519
557
|
name: "analyzeVideos",
|
|
520
558
|
description:
|
|
521
|
-
"
|
|
559
|
+
"Comprehensive video understanding using Google's Gemini 2.5 Pro model. " +
|
|
560
|
+
"Capable of analyzing both longitudinal content (YouTube) and specific local files. " +
|
|
561
|
+
"Supports time-aware queries (e.g., 'What color is the car at 02:45?'), clipping, and advanced visual reasoning over video streams. " +
|
|
562
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
522
563
|
parameters: z.object({
|
|
523
564
|
video_inputs: z
|
|
524
565
|
.array(z.string())
|
|
525
566
|
.describe(
|
|
526
|
-
"
|
|
567
|
+
"An array containing absolute paths to local videos or YouTube URLs. Max 10 per request. " +
|
|
568
|
+
"Note: Local files are automatically optimized for processing."
|
|
527
569
|
),
|
|
528
570
|
prompt: z
|
|
529
571
|
.string()
|
|
530
572
|
.describe(
|
|
531
|
-
"
|
|
573
|
+
"The question or instruction regarding the video. Use MM:SS or HH:MM:SS for precise time references."
|
|
532
574
|
),
|
|
533
575
|
fps: z
|
|
534
576
|
.number()
|
|
535
577
|
.optional()
|
|
536
578
|
.describe(
|
|
537
|
-
"
|
|
579
|
+
"Optional: Target frames per second for processing. Lower FPS (1-5) is recommended for long videos to save tokens."
|
|
538
580
|
),
|
|
539
581
|
start_offset: z
|
|
540
582
|
.string()
|
|
541
583
|
.optional()
|
|
542
|
-
.describe("
|
|
584
|
+
.describe("Start time of the segment to analyze (e.g., '10s', '01:30')."),
|
|
543
585
|
end_offset: z
|
|
544
586
|
.string()
|
|
545
587
|
.optional()
|
|
546
|
-
.describe("
|
|
588
|
+
.describe("End time of the segment to analyze (e.g., '20s', '02:00')."),
|
|
547
589
|
media_resolution: z
|
|
548
590
|
.string()
|
|
549
591
|
.optional()
|
|
550
592
|
.describe(
|
|
551
|
-
"
|
|
593
|
+
"Processing resolution: 'default' or 'low'. 'low' significantly reduces token usage for simple visual tasks."
|
|
552
594
|
),
|
|
553
595
|
}),
|
|
554
596
|
timeoutMs: 300000,
|
|
@@ -41,73 +41,88 @@ async function fileToBase64(
|
|
|
41
41
|
export const imageToVideo = {
|
|
42
42
|
name: "generateVideoi2v",
|
|
43
43
|
description:
|
|
44
|
-
"Generate videos from
|
|
44
|
+
"Generate professional-quality cinematic videos from a starting image and text prompt using Vertex AI's Veo models. " +
|
|
45
|
+
"This is a high-latency tool (often takes 5-15 minutes) but produces state-of-the-art results. " +
|
|
46
|
+
"It supports guided generation with start/end frames and specific durations. " +
|
|
47
|
+
"ONLY USE WHEN WORKING WITH GOOGLE VERTEX AI MODELS.",
|
|
45
48
|
parameters: z.object({
|
|
46
|
-
prompt: z
|
|
49
|
+
prompt: z
|
|
50
|
+
.string()
|
|
51
|
+
.describe(
|
|
52
|
+
"Descriptive text for the video action and style (e.g., 'A robot walking through a neon city at night')."
|
|
53
|
+
),
|
|
47
54
|
image_path: z
|
|
48
55
|
.string()
|
|
49
56
|
.optional()
|
|
50
|
-
.describe("
|
|
57
|
+
.describe("Absolute local path or URL to the STARTING image frame."),
|
|
51
58
|
last_frame_path: z
|
|
52
59
|
.string()
|
|
53
60
|
.optional()
|
|
54
|
-
.describe(
|
|
61
|
+
.describe(
|
|
62
|
+
"Optional: Absolute local path or URL to the ENDING image frame to guide the video's conclusion."
|
|
63
|
+
),
|
|
55
64
|
aspect_ratio: z
|
|
56
65
|
.string()
|
|
57
66
|
.optional()
|
|
58
67
|
.default("16:9")
|
|
59
|
-
.describe(
|
|
68
|
+
.describe(
|
|
69
|
+
"Target aspect ratio: '16:9' (landscape) or '9:16' (vertical)."
|
|
70
|
+
),
|
|
60
71
|
duration_seconds: z
|
|
61
72
|
.string()
|
|
62
73
|
.optional()
|
|
63
74
|
.default("6")
|
|
64
75
|
.describe(
|
|
65
|
-
"
|
|
76
|
+
"Target duration. Vertex AI ONLY supports exactly '4', '6', or '8' seconds. Other values will be rounded to the nearest supported step."
|
|
66
77
|
),
|
|
67
78
|
resolution: z
|
|
68
79
|
.string()
|
|
69
80
|
.optional()
|
|
70
|
-
.describe("
|
|
81
|
+
.describe("Target resolution: '720p' or '1080p'. Default is '720p'."),
|
|
71
82
|
negative_prompt: z
|
|
72
83
|
.string()
|
|
73
84
|
.optional()
|
|
74
|
-
.describe(
|
|
85
|
+
.describe(
|
|
86
|
+
"Visual elements or styles to EXCLUDE from the generated video."
|
|
87
|
+
),
|
|
75
88
|
person_generation: z
|
|
76
89
|
.string()
|
|
77
90
|
.optional()
|
|
78
91
|
.describe(
|
|
79
|
-
"
|
|
92
|
+
"Policy for generating people: 'allow_adult' (standard) or 'allow_all'. Note: Gemini 1.5+ safety filters apply."
|
|
80
93
|
),
|
|
81
94
|
reference_images: z
|
|
82
95
|
.array(z.string())
|
|
83
96
|
.optional()
|
|
84
|
-
.describe(
|
|
97
|
+
.describe(
|
|
98
|
+
"Optional: Additional images (up to 3) to guide style or character consistency."
|
|
99
|
+
),
|
|
85
100
|
output_path: z
|
|
86
101
|
.string()
|
|
87
102
|
.optional()
|
|
88
103
|
.describe(
|
|
89
|
-
"
|
|
104
|
+
"Optional: Local path to save the resulting .mp4 file. Defaults to timestamped filename."
|
|
90
105
|
),
|
|
91
106
|
project_id: z
|
|
92
107
|
.string()
|
|
93
108
|
.optional()
|
|
94
109
|
.default("mixio-pro")
|
|
95
|
-
.describe("GCP Project ID
|
|
110
|
+
.describe("GCP Project ID for Vertex billing."),
|
|
96
111
|
location_id: z
|
|
97
112
|
.string()
|
|
98
113
|
.optional()
|
|
99
114
|
.default("us-central1")
|
|
100
|
-
.describe("
|
|
115
|
+
.describe("GCP region for Vertex AI processing (e.g., 'us-central1')."),
|
|
101
116
|
model_id: z
|
|
102
117
|
.string()
|
|
103
118
|
.optional()
|
|
104
119
|
.default("veo-3.1-fast-generate-001")
|
|
105
|
-
.describe("
|
|
120
|
+
.describe("Specific Vertex Veo model ID to use."),
|
|
106
121
|
generate_audio: z
|
|
107
122
|
.boolean()
|
|
108
123
|
.optional()
|
|
109
124
|
.describe(
|
|
110
|
-
"
|
|
125
|
+
"If true, Vertex will attempt to synthesize synchronized audio for the video."
|
|
111
126
|
)
|
|
112
127
|
.default(false),
|
|
113
128
|
}),
|
package/src/tools/perplexity.ts
CHANGED
|
@@ -4,21 +4,26 @@ import { safeToolExecute } from "../utils/tool-wrapper";
|
|
|
4
4
|
export const perplexityImages = {
|
|
5
5
|
name: "perplexityImages",
|
|
6
6
|
description:
|
|
7
|
-
"
|
|
7
|
+
"Search for and discover real-world images using the Perplexity Sonar API. " +
|
|
8
|
+
"This tool provides a summarized text description of findings along with a verified list of image URLs. " +
|
|
9
|
+
"Citations in the text are mapped [N] to the numbered images in the list. " +
|
|
10
|
+
"Ideal for finding visual inspiration, reference photos, or stock-like images from the web.",
|
|
8
11
|
parameters: z.object({
|
|
9
|
-
query: z
|
|
12
|
+
query: z
|
|
13
|
+
.string()
|
|
14
|
+
.describe(
|
|
15
|
+
"Descriptive search terms (e.g., 'SpaceX Starship launch photos')."
|
|
16
|
+
),
|
|
10
17
|
image_domain_filter: z
|
|
11
18
|
.array(z.string())
|
|
12
19
|
.optional()
|
|
13
20
|
.describe(
|
|
14
|
-
"
|
|
21
|
+
"Filter results by domain. Use 'domain.com' to include, or '-domain.com' to exclude. (e.g., ['wikimedia.org', '-pinterest.com'])."
|
|
15
22
|
),
|
|
16
23
|
image_format_filter: z
|
|
17
24
|
.array(z.string())
|
|
18
25
|
.optional()
|
|
19
|
-
.describe(
|
|
20
|
-
"A list of allowed image formats. E.g., ['jpg', 'png', 'gif']."
|
|
21
|
-
),
|
|
26
|
+
.describe("Target specific formats: 'jpg', 'png', 'gif', etc."),
|
|
22
27
|
}),
|
|
23
28
|
timeoutMs: 300000,
|
|
24
29
|
execute: async (args: {
|
|
@@ -33,28 +38,26 @@ export const perplexityImages = {
|
|
|
33
38
|
}
|
|
34
39
|
|
|
35
40
|
const url = "https://api.perplexity.ai/chat/completions";
|
|
36
|
-
const headers = {
|
|
41
|
+
const headers: Record<string, string> = {
|
|
37
42
|
Authorization: `Bearer ${apiKey}`,
|
|
38
43
|
"Content-Type": "application/json",
|
|
39
44
|
accept: "application/json",
|
|
40
45
|
};
|
|
41
46
|
|
|
42
|
-
const payload
|
|
47
|
+
const payload = {
|
|
43
48
|
model: "sonar",
|
|
44
49
|
messages: [
|
|
45
50
|
{ role: "user", content: `Show me images of ${args.query}` },
|
|
46
51
|
],
|
|
47
52
|
return_images: true,
|
|
53
|
+
...(args.image_domain_filter
|
|
54
|
+
? { image_domain_filter: args.image_domain_filter }
|
|
55
|
+
: {}),
|
|
56
|
+
...(args.image_format_filter
|
|
57
|
+
? { image_format_filter: args.image_format_filter }
|
|
58
|
+
: {}),
|
|
48
59
|
};
|
|
49
60
|
|
|
50
|
-
if (args.image_domain_filter) {
|
|
51
|
-
payload.image_domain_filter = args.image_domain_filter;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
if (args.image_format_filter) {
|
|
55
|
-
payload.image_format_filter = args.image_format_filter;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
61
|
const res = await fetch(url, {
|
|
59
62
|
method: "POST",
|
|
60
63
|
headers: headers,
|
|
@@ -66,51 +69,49 @@ export const perplexityImages = {
|
|
|
66
69
|
throw new Error(`Perplexity API request failed: ${res.status} ${text}`);
|
|
67
70
|
}
|
|
68
71
|
|
|
69
|
-
const data
|
|
72
|
+
const data = (await res.json()) as any;
|
|
70
73
|
let content = data.choices?.[0]?.message?.content;
|
|
71
|
-
const images = data.images;
|
|
72
|
-
const citations = data.citations;
|
|
74
|
+
const images = (data.images || []) as any[];
|
|
75
|
+
const citations = (data.citations || []) as string[];
|
|
73
76
|
|
|
74
|
-
if (
|
|
77
|
+
if (images.length === 0) {
|
|
75
78
|
return `No direct image URLs found in the API response. The text content was: ${content}`;
|
|
76
79
|
}
|
|
77
80
|
|
|
78
81
|
// Create a map of origin_url -> new 1-based index
|
|
79
|
-
const originUrlToImageIndex:
|
|
80
|
-
images.forEach((img
|
|
82
|
+
const originUrlToImageIndex: Record<string, number> = {};
|
|
83
|
+
images.forEach((img, index) => {
|
|
81
84
|
if (img.origin_url) {
|
|
82
85
|
originUrlToImageIndex[img.origin_url] = index + 1;
|
|
83
86
|
}
|
|
84
87
|
});
|
|
85
88
|
|
|
86
89
|
// Create a map of old citation index -> new image index
|
|
87
|
-
const oldToNewCitationMap:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
});
|
|
94
|
-
}
|
|
90
|
+
const oldToNewCitationMap: Record<number, number> = {};
|
|
91
|
+
citations.forEach((citationUrl, index) => {
|
|
92
|
+
if (originUrlToImageIndex[citationUrl]) {
|
|
93
|
+
oldToNewCitationMap[index + 1] = originUrlToImageIndex[citationUrl];
|
|
94
|
+
}
|
|
95
|
+
});
|
|
95
96
|
|
|
96
97
|
// Replace citations in the content
|
|
97
98
|
if (content && typeof content === "string") {
|
|
98
99
|
content = content
|
|
99
|
-
.replace(/\[(\d+)\]/g, (
|
|
100
|
+
.replace(/\[(\d+)\]/g, (_match, oldIndexStr) => {
|
|
100
101
|
const oldIndex = parseInt(oldIndexStr, 10);
|
|
101
102
|
const newIndex = oldToNewCitationMap[oldIndex];
|
|
102
103
|
if (newIndex) {
|
|
103
104
|
return `[${newIndex}]`;
|
|
104
105
|
}
|
|
105
|
-
return "";
|
|
106
|
+
return "";
|
|
106
107
|
})
|
|
107
108
|
.replace(/(\s\s+)/g, " ")
|
|
108
|
-
.trim();
|
|
109
|
+
.trim();
|
|
109
110
|
}
|
|
110
111
|
|
|
111
112
|
// Build the final formatted output
|
|
112
113
|
let output = content + "\n\n--- Images ---\n";
|
|
113
|
-
images.forEach((img
|
|
114
|
+
images.forEach((img, index) => {
|
|
114
115
|
output += `${index + 1}. ${img.image_url}\n (Source: ${
|
|
115
116
|
img.origin_url
|
|
116
117
|
})\n`;
|
|
@@ -124,14 +125,16 @@ export const perplexityImages = {
|
|
|
124
125
|
export const perplexityVideos = {
|
|
125
126
|
name: "perplexityVideos",
|
|
126
127
|
description:
|
|
127
|
-
"
|
|
128
|
+
"Search for web videos (e.g., from YouTube, Vimeo) using Perplexity Sonar Pro. " +
|
|
129
|
+
"Provides a textual summary of the content found and direct links to the videos. " +
|
|
130
|
+
"Perfect for finding research material or specific clips based on natural language queries.",
|
|
128
131
|
parameters: z.object({
|
|
129
|
-
query: z.string().describe("The search query for videos."),
|
|
132
|
+
query: z.string().describe("The natural language search query for videos."),
|
|
130
133
|
search_domain_filter: z
|
|
131
134
|
.array(z.string())
|
|
132
135
|
.optional()
|
|
133
136
|
.describe(
|
|
134
|
-
"
|
|
137
|
+
"Optional: Restrict search to specific domains (e.g., ['youtube.com']) or exclude them with '-' prefix."
|
|
135
138
|
),
|
|
136
139
|
}),
|
|
137
140
|
timeoutMs: 300000,
|
|
@@ -143,24 +146,23 @@ export const perplexityVideos = {
|
|
|
143
146
|
}
|
|
144
147
|
|
|
145
148
|
const url = "https://api.perplexity.ai/chat/completions";
|
|
146
|
-
const headers = {
|
|
149
|
+
const headers: Record<string, string> = {
|
|
147
150
|
Authorization: `Bearer ${apiKey}`,
|
|
148
151
|
"Content-Type": "application/json",
|
|
149
152
|
accept: "application/json",
|
|
150
153
|
};
|
|
151
154
|
|
|
152
|
-
const payload
|
|
155
|
+
const payload = {
|
|
153
156
|
model: "sonar-pro",
|
|
154
157
|
messages: [
|
|
155
158
|
{ role: "user", content: `Show me videos of ${args.query}` },
|
|
156
159
|
],
|
|
157
160
|
media_response: { overrides: { return_videos: true } },
|
|
161
|
+
...(args.search_domain_filter
|
|
162
|
+
? { search_domain_filter: args.search_domain_filter }
|
|
163
|
+
: {}),
|
|
158
164
|
};
|
|
159
165
|
|
|
160
|
-
if (args.search_domain_filter) {
|
|
161
|
-
payload.search_domain_filter = args.search_domain_filter;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
166
|
const res = await fetch(url, {
|
|
165
167
|
method: "POST",
|
|
166
168
|
headers: headers,
|
|
@@ -172,12 +174,12 @@ export const perplexityVideos = {
|
|
|
172
174
|
throw new Error(`Perplexity API request failed: ${res.status} ${text}`);
|
|
173
175
|
}
|
|
174
176
|
|
|
175
|
-
const data
|
|
177
|
+
const data = (await res.json()) as any;
|
|
176
178
|
let content = data.choices?.[0]?.message?.content;
|
|
177
|
-
const videos = data.videos;
|
|
178
|
-
const citations = data.citations;
|
|
179
|
+
const videos = (data.videos || []) as any[];
|
|
180
|
+
const citations = (data.citations || []) as string[];
|
|
179
181
|
|
|
180
|
-
if (
|
|
182
|
+
if (videos.length === 0) {
|
|
181
183
|
return `No direct video URLs found in the API response. Full API Response: ${JSON.stringify(
|
|
182
184
|
data,
|
|
183
185
|
null,
|
|
@@ -186,41 +188,39 @@ export const perplexityVideos = {
|
|
|
186
188
|
}
|
|
187
189
|
|
|
188
190
|
// Create a map of video url -> new 1-based index
|
|
189
|
-
const urlToVideoIndex:
|
|
190
|
-
videos.forEach((video
|
|
191
|
+
const urlToVideoIndex: Record<string, number> = {};
|
|
192
|
+
videos.forEach((video, index) => {
|
|
191
193
|
if (video.url) {
|
|
192
194
|
urlToVideoIndex[video.url] = index + 1;
|
|
193
195
|
}
|
|
194
196
|
});
|
|
195
197
|
|
|
196
198
|
// Create a map of old citation index -> new video index
|
|
197
|
-
const oldToNewCitationMap:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
});
|
|
204
|
-
}
|
|
199
|
+
const oldToNewCitationMap: Record<number, number> = {};
|
|
200
|
+
citations.forEach((citationUrl, index) => {
|
|
201
|
+
if (urlToVideoIndex[citationUrl]) {
|
|
202
|
+
oldToNewCitationMap[index + 1] = urlToVideoIndex[citationUrl];
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
205
|
|
|
206
206
|
// Replace citations in the content
|
|
207
207
|
if (content && typeof content === "string") {
|
|
208
208
|
content = content
|
|
209
|
-
.replace(/\[(\d+)\]/g, (
|
|
209
|
+
.replace(/\[(\d+)\]/g, (_match, oldIndexStr) => {
|
|
210
210
|
const oldIndex = parseInt(oldIndexStr, 10);
|
|
211
211
|
const newIndex = oldToNewCitationMap[oldIndex];
|
|
212
212
|
if (newIndex) {
|
|
213
213
|
return `[${newIndex}]`;
|
|
214
214
|
}
|
|
215
|
-
return "";
|
|
215
|
+
return "";
|
|
216
216
|
})
|
|
217
217
|
.replace(/(\s\s+)/g, " ")
|
|
218
|
-
.trim();
|
|
218
|
+
.trim();
|
|
219
219
|
}
|
|
220
220
|
|
|
221
221
|
// Build the final formatted output
|
|
222
222
|
let output = content + "\n\n--- Videos ---\n";
|
|
223
|
-
videos.forEach((video
|
|
223
|
+
videos.forEach((video, index) => {
|
|
224
224
|
output += `${index + 1}. ${video.url}\n`;
|
|
225
225
|
});
|
|
226
226
|
|
package/src/tools/youtube.ts
CHANGED
|
@@ -9,16 +9,21 @@ const ai = new GoogleGenAI({
|
|
|
9
9
|
export const analyzeYoutubeVideo = {
|
|
10
10
|
name: "analyzeYoutubeVideo",
|
|
11
11
|
description:
|
|
12
|
-
"
|
|
12
|
+
"Perform deep semantic analysis of a YouTube video using Google's Gemini 2.5 Flash model. " +
|
|
13
|
+
"Use this to summarize long videos, identify specific scenes, extract information, or ask visual questions about the video's content. " +
|
|
14
|
+
"This tool treats the YouTube URL as a first-class video stream for the model. " +
|
|
15
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
13
16
|
parameters: z.object({
|
|
14
17
|
youtube_url: z
|
|
15
18
|
.string()
|
|
16
19
|
.describe(
|
|
17
|
-
"
|
|
20
|
+
"The full URL of the YouTube video (e.g., 'https://www.youtube.com/watch?v=dQw4w9WgXcQ')."
|
|
18
21
|
),
|
|
19
22
|
prompt: z
|
|
20
23
|
.string()
|
|
21
|
-
.describe(
|
|
24
|
+
.describe(
|
|
25
|
+
"Instruction or question about the video content (e.g., 'Summarize the main points' or 'What color was the car?')."
|
|
26
|
+
),
|
|
22
27
|
}),
|
|
23
28
|
timeoutMs: 300000,
|
|
24
29
|
execute: async (args: { youtube_url: string; prompt: string }) => {
|