@mixio-pro/kalaasetu-mcp 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.2.1",
3
+ "version": "1.2.2",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -58,27 +58,36 @@ function sanitizeParameters(
58
58
  );
59
59
  }
60
60
 
61
- /**
62
- * Unified generation tool using presets defined in configuration.
63
- */
64
61
  export const falGenerate = {
65
62
  name: "fal_generate",
66
63
  description:
67
- "Generate content using a named preset and optional parameters. Use fal_list_presets to discover available intents and preset names.",
64
+ "The primary tool for generating AI content (images, videos, etc.) using fal.ai. " +
65
+ "This tool follows a 'Preset' pattern: you choose a high-level intent (preset_name) and provide optional parameters. " +
66
+ "Use 'fal_list_presets' to discover available intents and names. " +
67
+ "PREREQUISITE: If using local files as parameters, you MUST upload them first using 'fal_upload_file' and use the resulting CDN URL. " +
68
+ "If a task is expected to take longer than 10-20 seconds, set 'queue: true' to receive status and result URLs instead of a direct result. " +
69
+ "ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
68
70
  parameters: z.object({
69
71
  preset_name: z
70
72
  .string()
71
- .describe("The name of the preset to use (e.g., 'cinematic_image')"),
73
+ .describe(
74
+ "The unique name of the generation preset (e.g., 'ltx_image_to_video', 'cinematic_image'). Obtain this from 'fal_list_presets'."
75
+ ),
72
76
  parameters: z
73
77
  .record(z.string(), z.any())
74
78
  .optional()
75
- .describe("Optional model-specific parameters to override defaults"),
79
+ .describe(
80
+ "A dictionary of model-specific parameters (e.g., { 'prompt': '...', 'image_url': '...' }). " +
81
+ "These override the default values defined in the preset. " +
82
+ "NOTE: For image-to-video or video-to-video tasks, use 'fal_upload_file' first and pass the resulting CDN URL here."
83
+ ),
76
84
  queue: z
77
85
  .boolean()
78
86
  .optional()
79
87
  .default(false)
80
88
  .describe(
81
- "Whether to use the queuing system for long-running tasks. Default: false"
89
+ "Set to true for asynchronous execution. Use this for high-resolution video or complex tasks. " +
90
+ "When true, returns 'status_url' and 'cancel_url' instead of the final result."
82
91
  ),
83
92
  }),
84
93
  timeoutMs: 300000,
@@ -126,9 +135,17 @@ export const falGenerate = {
126
135
  */
127
136
  export const falGetResult = {
128
137
  name: "fal_get_result",
129
- description: "Get the result of a queued fal.ai request.",
138
+ description:
139
+ "Retrieve the final output of a queued/asynchronous fal.ai request. " +
140
+ "PREREQUISITE: This tool can ONLY be used with request 'response_url's obtained from 'fal_generate' or 'fal_get_status'. " +
141
+ "Only call this after 'fal_get_status' indicates that the request status is 'COMPLETED'. " +
142
+ "ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
130
143
  parameters: z.object({
131
- url: z.string().describe("The response_url from a queued request"),
144
+ url: z
145
+ .string()
146
+ .describe(
147
+ "The 'response_url' provided by a queued 'fal_generate' or 'fal_get_status' call."
148
+ ),
132
149
  }),
133
150
  timeoutMs: 300000,
134
151
  execute: async (args: { url: string }) => {
@@ -142,9 +159,15 @@ export const falGetResult = {
142
159
  */
143
160
  export const falGetStatus = {
144
161
  name: "fal_get_status",
145
- description: "Check the status of a queued fal.ai request.",
162
+ description:
163
+ "Check the current progress or status of an asynchronous fal.ai request. " +
164
+ "PREREQUISITE: This tool can ONLY be used with request 'status_url's obtained from a queued 'fal_generate' call. " +
165
+ "Use this for polling until the status becomes 'COMPLETED', then use 'fal_get_result' for the final output. " +
166
+ "ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
146
167
  parameters: z.object({
147
- url: z.string().describe("The status_url from a queued request"),
168
+ url: z
169
+ .string()
170
+ .describe("The 'status_url' provided by a queued 'fal_generate' call."),
148
171
  }),
149
172
  timeoutMs: 300000,
150
173
  execute: async (args: { url: string }) => {
@@ -158,9 +181,14 @@ export const falGetStatus = {
158
181
  */
159
182
  export const falCancelRequest = {
160
183
  name: "fal_cancel_request",
161
- description: "Cancel a queued fal.ai request.",
184
+ description:
185
+ "Terminate and cancel an ongoing asynchronous fal.ai request. " +
186
+ "PREREQUISITE: This tool can ONLY be used with request 'cancel_url's obtained from a queued 'fal_generate' call. " +
187
+ "ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
162
188
  parameters: z.object({
163
- url: z.string().describe("The cancel_url from a queued request"),
189
+ url: z
190
+ .string()
191
+ .describe("The 'cancel_url' provided by a queued 'fal_generate' call."),
164
192
  }),
165
193
  timeoutMs: 300000,
166
194
  execute: async (args: { url: string }) => {
@@ -13,7 +13,9 @@ import { safeToolExecute } from "../../utils/tool-wrapper";
13
13
  export const falListPresets = {
14
14
  name: "fal_list_presets",
15
15
  description:
16
- "List all available generation presets, including their intents, input types, and output types. Use this to find the right preset for a task.",
16
+ "The entry point for discovering fal.ai capabilities on this server. " +
17
+ "Lists all available generation presets, including their high-level 'intent' (e.g., 'Generate cinematic video'), " +
18
+ "and the types of input/output they support. Call this first when you need to perform an AI generation task.",
17
19
  parameters: z.object({}),
18
20
  timeoutMs: 30000,
19
21
  execute: async () => {
@@ -37,9 +39,16 @@ export const falListPresets = {
37
39
  export const falGetPresetDetails = {
38
40
  name: "fal_get_preset_details",
39
41
  description:
40
- "Get full details for a specific generation preset, including its model ID and default parameters.",
42
+ "Retrieve full details for a specific generation preset. " +
43
+ "Use this to see the 'modelId' being used and, most importantly, the 'defaultParams'. " +
44
+ "The default parameters shown here can be overridden in the 'parameters' argument of 'fal_generate'. " +
45
+ "ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
41
46
  parameters: z.object({
42
- preset_name: z.string().describe("The name of the preset to inspect"),
47
+ preset_name: z
48
+ .string()
49
+ .describe(
50
+ "The name of the preset to inspect (e.g., 'ltx_image_to_video')."
51
+ ),
43
52
  }),
44
53
  timeoutMs: 30000,
45
54
  execute: async (args: { preset_name: string }) => {
@@ -38,9 +38,16 @@ function getMimeType(filePath: string): string {
38
38
  */
39
39
  export const falUploadFile = {
40
40
  name: "fal_upload_file",
41
- description: "Upload a file to fal.ai CDN storage.",
41
+ description:
42
+ "Upload a local file (image, video, audio) to fal.ai CDN storage. " +
43
+ "CRITICAL: You MUST use this tool to upload local files before passing their URLs to generation tools in FAL. ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS" +
44
+ "It returns a public 'file_url' which should be used as input for 'fal_generate'.",
42
45
  parameters: z.object({
43
- path: z.string().describe("The absolute path to the file to upload"),
46
+ path: z
47
+ .string()
48
+ .describe(
49
+ "The absolute local path to the file to upload (e.g., '/Users/name/images/input.jpg')."
50
+ ),
44
51
  }),
45
52
  timeoutMs: 300000,
46
53
  execute: async (args: { path: string }) => {
@@ -202,23 +202,36 @@ async function processVideoInput(
202
202
  export const geminiTextToImage = {
203
203
  name: "generateImage",
204
204
  description:
205
- "Generate images from text prompts using Gemini image models with optional reference images. Returns the URL of the generated image.",
205
+ "Generate high-quality images from text prompts using Google's Imagen 3 model via Gemini. " +
206
+ "This tool is highly capable of following complex instructions. " +
207
+ "Best practices: " +
208
+ "1. Be descriptive: instead of 'a dog', use 'a golden retriever playing in a sunlit meadow, cinematic lighting'. " +
209
+ "2. Specify style: e.g., '3D render', 'oil painting', 'minimalist vector art'. " +
210
+ "3. Use reference images: you can provide existing images to guide the style or content. " +
211
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
206
212
  parameters: z.object({
207
- prompt: z.string().describe("Text description of the image to generate"),
213
+ prompt: z
214
+ .string()
215
+ .describe("Detailed text description of the image to generate."),
208
216
  aspect_ratio: z
209
217
  .string()
210
218
  .optional()
211
- .describe("Aspect ratio: 1:1, 3:4, 4:3, 9:16, or 16:9 (default 9:16)"),
219
+ .describe(
220
+ "Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16."
221
+ ),
212
222
  output_path: z
213
223
  .string()
214
224
  .optional()
215
225
  .describe(
216
- "File path to save the generated image (optional, auto-generated if not provided)"
226
+ "Optional: specific local path or filename to save the image (e.g., 'outputs/hero.png'). " +
227
+ "If omitted, a timestamped filename is generated automatically."
217
228
  ),
218
229
  reference_images: z
219
230
  .array(z.string())
220
231
  .optional()
221
- .describe("Optional reference image file paths to guide generation"),
232
+ .describe(
233
+ "Optional: local paths or URLs of images to use as visual references for style or composition."
234
+ ),
222
235
  }),
223
236
  timeoutMs: 300000,
224
237
  execute: async (args: {
@@ -297,18 +310,33 @@ export const geminiTextToImage = {
297
310
  export const geminiEditImage = {
298
311
  name: "editImage",
299
312
  description:
300
- "Edit existing images with text instructions using Gemini 3 Pro Image Preview",
313
+ "Modify or edit an existing image based on text instructions using Google's Imagen 3 model via Gemini. " +
314
+ "This can be used for inpainting (changing specific parts), style transfer, or adding/removing elements. " +
315
+ "Describe the desired changes relative to the source image (e.g., 'Change the white shirt to a blue one' or 'Add a cat sitting on the sofa'). " +
316
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
301
317
  parameters: z.object({
302
- image_path: z.string().describe("Path to the source image file"),
303
- prompt: z.string().describe("Text instructions for editing the image"),
318
+ image_path: z
319
+ .string()
320
+ .describe(
321
+ "Absolute local path or URL to the source image file to be edited."
322
+ ),
323
+ prompt: z
324
+ .string()
325
+ .describe(
326
+ "Instructional text describing the edits or modifications required."
327
+ ),
304
328
  output_path: z
305
329
  .string()
306
330
  .optional()
307
- .describe("File path to save the edited image"),
331
+ .describe(
332
+ "Optional: specific local path to save the edited result. Defaults to generated timestamp."
333
+ ),
308
334
  reference_images: z
309
335
  .array(z.string())
310
336
  .optional()
311
- .describe("Additional image paths for reference"),
337
+ .describe(
338
+ "Optional: additional images to guide the edit (e.g., to reference a specific character or object style)."
339
+ ),
312
340
  }),
313
341
  timeoutMs: 300000,
314
342
  execute: async (args: {
@@ -378,12 +406,19 @@ export const geminiEditImage = {
378
406
  export const geminiAnalyzeImages = {
379
407
  name: "analyzeImages",
380
408
  description:
381
- "Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
409
+ "Perform advanced multimodal analysis on one or more images using Google's Gemini 2.5 Pro model. " +
410
+ "Use this for complex reasoning, visual question answering, OCR, or describing scenes in detail. " +
411
+ "You can compare multiple images by providing them in the array. " +
412
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
382
413
  parameters: z.object({
383
414
  image_paths: z
384
415
  .array(z.string())
385
- .describe("Array of image file paths to analyze"),
386
- prompt: z.string().describe("Text prompt or question about the images"),
416
+ .describe(
417
+ "An array of absolute local file paths or publicly accessible URLs to analyze."
418
+ ),
419
+ prompt: z
420
+ .string()
421
+ .describe("The question, query, or instruction to apply to the images."),
387
422
  }),
388
423
  timeoutMs: 300000,
389
424
  execute: async (args: { image_paths: string[]; prompt: string }) => {
@@ -447,19 +482,22 @@ export const geminiAnalyzeImages = {
447
482
  export const geminiSingleSpeakerTts = {
448
483
  name: "generateSpeech",
449
484
  description:
450
- "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
485
+ "Convert text to natural-sounding speech using Google's Gemini 2.5 Pro Preview TTS model. " +
486
+ "This tool generates a single speaker's voice in a WAV format. " +
487
+ "Best for long-form narration or simple voiceovers. " +
488
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
451
489
  parameters: z.object({
452
- text: z.string().describe("Text to convert to speech"),
490
+ text: z.string().describe("The text content to be converted into speech."),
453
491
  voice_name: z
454
492
  .string()
455
493
  .describe(
456
- "Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."
494
+ "Supported voices: 'Despina' (Female, versatile), 'Kore' (Female, calm), 'Erinome' (Female, expressive), or 'Enceladus' (Male, neutral)."
457
495
  ),
458
496
  output_path: z
459
497
  .string()
460
498
  .optional()
461
499
  .describe(
462
- "Output WAV file path (optional, defaults to timestamp-based filename)"
500
+ "Optional: Output WAV file path. Defaults to a timestamped filename in the output directory."
463
501
  ),
464
502
  }),
465
503
  timeoutMs: 300000,
@@ -518,37 +556,41 @@ export const geminiSingleSpeakerTts = {
518
556
  export const geminiAnalyzeVideos = {
519
557
  name: "analyzeVideos",
520
558
  description:
521
- "Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
559
+ "Comprehensive video understanding using Google's Gemini 2.5 Pro model. " +
560
+ "Capable of analyzing both longitudinal content (YouTube) and specific local files. " +
561
+ "Supports time-aware queries (e.g., 'What color is the car at 02:45?'), clipping, and advanced visual reasoning over video streams. " +
562
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
522
563
  parameters: z.object({
523
564
  video_inputs: z
524
565
  .array(z.string())
525
566
  .describe(
526
- "Array of video inputs - mix of local file paths and YouTube URLs (max 10 videos). Local files <20MB processed inline, larger files uploaded via File API automatically."
567
+ "An array containing absolute paths to local videos or YouTube URLs. Max 10 per request. " +
568
+ "Note: Local files are automatically optimized for processing."
527
569
  ),
528
570
  prompt: z
529
571
  .string()
530
572
  .describe(
531
- "Text prompt or question about the videos. Use MM:SS format for timestamp references (e.g., 'What happens at 01:30?')."
573
+ "The question or instruction regarding the video. Use MM:SS or HH:MM:SS for precise time references."
532
574
  ),
533
575
  fps: z
534
576
  .number()
535
577
  .optional()
536
578
  .describe(
537
- "Frame rate for video processing (default: 5 FPS for local videos to reduce file size, 1 FPS for YouTube URLs)"
579
+ "Optional: Target frames per second for processing. Lower FPS (1-5) is recommended for long videos to save tokens."
538
580
  ),
539
581
  start_offset: z
540
582
  .string()
541
583
  .optional()
542
- .describe("Clip start time in seconds with 's' suffix (e.g., '40s')"),
584
+ .describe("Start time of the segment to analyze (e.g., '10s', '01:30')."),
543
585
  end_offset: z
544
586
  .string()
545
587
  .optional()
546
- .describe("Clip end time in seconds with 's' suffix (e.g., '80s')"),
588
+ .describe("End time of the segment to analyze (e.g., '20s', '02:00')."),
547
589
  media_resolution: z
548
590
  .string()
549
591
  .optional()
550
592
  .describe(
551
- "Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"
593
+ "Processing resolution: 'default' or 'low'. 'low' significantly reduces token usage for simple visual tasks."
552
594
  ),
553
595
  }),
554
596
  timeoutMs: 300000,
@@ -41,73 +41,88 @@ async function fileToBase64(
41
41
  export const imageToVideo = {
42
42
  name: "generateVideoi2v",
43
43
  description:
44
- "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
44
+ "Generate professional-quality cinematic videos from a starting image and text prompt using Vertex AI's Veo models. " +
45
+ "This is a high-latency tool (often takes 5-15 minutes) but produces state-of-the-art results. " +
46
+ "It supports guided generation with start/end frames and specific durations. " +
47
+ "ONLY USE WHEN WORKING WITH GOOGLE VERTEX AI MODELS.",
45
48
  parameters: z.object({
46
- prompt: z.string().describe("Text description for the video"),
49
+ prompt: z
50
+ .string()
51
+ .describe(
52
+ "Descriptive text for the video action and style (e.g., 'A robot walking through a neon city at night')."
53
+ ),
47
54
  image_path: z
48
55
  .string()
49
56
  .optional()
50
- .describe("Path to source image for image-to-video generation"),
57
+ .describe("Absolute local path or URL to the STARTING image frame."),
51
58
  last_frame_path: z
52
59
  .string()
53
60
  .optional()
54
- .describe("Path to last frame image to guide ending frame (optional)"),
61
+ .describe(
62
+ "Optional: Absolute local path or URL to the ENDING image frame to guide the video's conclusion."
63
+ ),
55
64
  aspect_ratio: z
56
65
  .string()
57
66
  .optional()
58
67
  .default("16:9")
59
- .describe("Video aspect ratio: '16:9' or '9:16'"),
68
+ .describe(
69
+ "Target aspect ratio: '16:9' (landscape) or '9:16' (vertical)."
70
+ ),
60
71
  duration_seconds: z
61
72
  .string()
62
73
  .optional()
63
74
  .default("6")
64
75
  .describe(
65
- "Video duration in seconds. MUST be one of: '4', '6', or '8' (default: '6'). Other values will be rejected by Vertex AI."
76
+ "Target duration. Vertex AI ONLY supports exactly '4', '6', or '8' seconds. Other values will be rounded to the nearest supported step."
66
77
  ),
67
78
  resolution: z
68
79
  .string()
69
80
  .optional()
70
- .describe("Video resolution: '720p' or '1080p' (default: '720p')"),
81
+ .describe("Target resolution: '720p' or '1080p'. Default is '720p'."),
71
82
  negative_prompt: z
72
83
  .string()
73
84
  .optional()
74
- .describe("Text describing what not to include in the video"),
85
+ .describe(
86
+ "Visual elements or styles to EXCLUDE from the generated video."
87
+ ),
75
88
  person_generation: z
76
89
  .string()
77
90
  .optional()
78
91
  .describe(
79
- "Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"
92
+ "Policy for generating people: 'allow_adult' (standard) or 'allow_all'. Note: Gemini 1.5+ safety filters apply."
80
93
  ),
81
94
  reference_images: z
82
95
  .array(z.string())
83
96
  .optional()
84
- .describe("Additional image paths for reference (max 3)"),
97
+ .describe(
98
+ "Optional: Additional images (up to 3) to guide style or character consistency."
99
+ ),
85
100
  output_path: z
86
101
  .string()
87
102
  .optional()
88
103
  .describe(
89
- "Output MP4 file path (if multiple predictions, index suffix is added)"
104
+ "Optional: Local path to save the resulting .mp4 file. Defaults to timestamped filename."
90
105
  ),
91
106
  project_id: z
92
107
  .string()
93
108
  .optional()
94
109
  .default("mixio-pro")
95
- .describe("GCP Project ID (default: mixio-pro)"),
110
+ .describe("GCP Project ID for Vertex billing."),
96
111
  location_id: z
97
112
  .string()
98
113
  .optional()
99
114
  .default("us-central1")
100
- .describe("Vertex region (default: us-central1)"),
115
+ .describe("GCP region for Vertex AI processing (e.g., 'us-central1')."),
101
116
  model_id: z
102
117
  .string()
103
118
  .optional()
104
119
  .default("veo-3.1-fast-generate-001")
105
- .describe("Model ID (default: veo-3.1-fast-generate-001)"),
120
+ .describe("Specific Vertex Veo model ID to use."),
106
121
  generate_audio: z
107
122
  .boolean()
108
123
  .optional()
109
124
  .describe(
110
- "Boolean flag to enable generation of audio along with the video"
125
+ "If true, Vertex will attempt to synthesize synchronized audio for the video."
111
126
  )
112
127
  .default(false),
113
128
  }),
@@ -4,21 +4,26 @@ import { safeToolExecute } from "../utils/tool-wrapper";
4
4
  export const perplexityImages = {
5
5
  name: "perplexityImages",
6
6
  description:
7
- "Searches for images using the Perplexity API. Returns a formatted text response that includes a summary and a numbered list of image URLs with citations mapped to the text.",
7
+ "Search for and discover real-world images using the Perplexity Sonar API. " +
8
+ "This tool provides a summarized text description of findings along with a verified list of image URLs. " +
9
+ "Citations in the text are mapped [N] to the numbered images in the list. " +
10
+ "Ideal for finding visual inspiration, reference photos, or stock-like images from the web.",
8
11
  parameters: z.object({
9
- query: z.string().describe("The search query for images."),
12
+ query: z
13
+ .string()
14
+ .describe(
15
+ "Descriptive search terms (e.g., 'SpaceX Starship launch photos')."
16
+ ),
10
17
  image_domain_filter: z
11
18
  .array(z.string())
12
19
  .optional()
13
20
  .describe(
14
- "A list of domains to include or exclude. To exclude, prefix with '-'. E.g., ['wikimedia.org', '-gettyimages.com']."
21
+ "Filter results by domain. Use 'domain.com' to include, or '-domain.com' to exclude. (e.g., ['wikimedia.org', '-pinterest.com'])."
15
22
  ),
16
23
  image_format_filter: z
17
24
  .array(z.string())
18
25
  .optional()
19
- .describe(
20
- "A list of allowed image formats. E.g., ['jpg', 'png', 'gif']."
21
- ),
26
+ .describe("Target specific formats: 'jpg', 'png', 'gif', etc."),
22
27
  }),
23
28
  timeoutMs: 300000,
24
29
  execute: async (args: {
@@ -33,28 +38,26 @@ export const perplexityImages = {
33
38
  }
34
39
 
35
40
  const url = "https://api.perplexity.ai/chat/completions";
36
- const headers = {
41
+ const headers: Record<string, string> = {
37
42
  Authorization: `Bearer ${apiKey}`,
38
43
  "Content-Type": "application/json",
39
44
  accept: "application/json",
40
45
  };
41
46
 
42
- const payload: any = {
47
+ const payload = {
43
48
  model: "sonar",
44
49
  messages: [
45
50
  { role: "user", content: `Show me images of ${args.query}` },
46
51
  ],
47
52
  return_images: true,
53
+ ...(args.image_domain_filter
54
+ ? { image_domain_filter: args.image_domain_filter }
55
+ : {}),
56
+ ...(args.image_format_filter
57
+ ? { image_format_filter: args.image_format_filter }
58
+ : {}),
48
59
  };
49
60
 
50
- if (args.image_domain_filter) {
51
- payload.image_domain_filter = args.image_domain_filter;
52
- }
53
-
54
- if (args.image_format_filter) {
55
- payload.image_format_filter = args.image_format_filter;
56
- }
57
-
58
61
  const res = await fetch(url, {
59
62
  method: "POST",
60
63
  headers: headers,
@@ -66,51 +69,49 @@ export const perplexityImages = {
66
69
  throw new Error(`Perplexity API request failed: ${res.status} ${text}`);
67
70
  }
68
71
 
69
- const data: any = await res.json();
72
+ const data = (await res.json()) as any;
70
73
  let content = data.choices?.[0]?.message?.content;
71
- const images = data.images;
72
- const citations = data.citations;
74
+ const images = (data.images || []) as any[];
75
+ const citations = (data.citations || []) as string[];
73
76
 
74
- if (!images || images.length === 0) {
77
+ if (images.length === 0) {
75
78
  return `No direct image URLs found in the API response. The text content was: ${content}`;
76
79
  }
77
80
 
78
81
  // Create a map of origin_url -> new 1-based index
79
- const originUrlToImageIndex: { [key: string]: number } = {};
80
- images.forEach((img: any, index: number) => {
82
+ const originUrlToImageIndex: Record<string, number> = {};
83
+ images.forEach((img, index) => {
81
84
  if (img.origin_url) {
82
85
  originUrlToImageIndex[img.origin_url] = index + 1;
83
86
  }
84
87
  });
85
88
 
86
89
  // Create a map of old citation index -> new image index
87
- const oldToNewCitationMap: { [key: number]: number } = {};
88
- if (citations && Array.isArray(citations)) {
89
- citations.forEach((citationUrl: string, index: number) => {
90
- if (originUrlToImageIndex[citationUrl]) {
91
- oldToNewCitationMap[index + 1] = originUrlToImageIndex[citationUrl];
92
- }
93
- });
94
- }
90
+ const oldToNewCitationMap: Record<number, number> = {};
91
+ citations.forEach((citationUrl, index) => {
92
+ if (originUrlToImageIndex[citationUrl]) {
93
+ oldToNewCitationMap[index + 1] = originUrlToImageIndex[citationUrl];
94
+ }
95
+ });
95
96
 
96
97
  // Replace citations in the content
97
98
  if (content && typeof content === "string") {
98
99
  content = content
99
- .replace(/\[(\d+)\]/g, (match: string, oldIndexStr: string) => {
100
+ .replace(/\[(\d+)\]/g, (_match, oldIndexStr) => {
100
101
  const oldIndex = parseInt(oldIndexStr, 10);
101
102
  const newIndex = oldToNewCitationMap[oldIndex];
102
103
  if (newIndex) {
103
104
  return `[${newIndex}]`;
104
105
  }
105
- return ""; // Remove citation if it doesn't correspond to an image
106
+ return "";
106
107
  })
107
108
  .replace(/(\s\s+)/g, " ")
108
- .trim(); // Clean up extra spaces
109
+ .trim();
109
110
  }
110
111
 
111
112
  // Build the final formatted output
112
113
  let output = content + "\n\n--- Images ---\n";
113
- images.forEach((img: any, index: number) => {
114
+ images.forEach((img, index) => {
114
115
  output += `${index + 1}. ${img.image_url}\n (Source: ${
115
116
  img.origin_url
116
117
  })\n`;
@@ -124,14 +125,16 @@ export const perplexityImages = {
124
125
  export const perplexityVideos = {
125
126
  name: "perplexityVideos",
126
127
  description:
127
- "Searches for videos using the Perplexity API. Returns a formatted text response that includes a summary and a numbered list of video URLs with citations mapped to the text.",
128
+ "Search for web videos (e.g., from YouTube, Vimeo) using Perplexity Sonar Pro. " +
129
+ "Provides a textual summary of the content found and direct links to the videos. " +
130
+ "Perfect for finding research material or specific clips based on natural language queries.",
128
131
  parameters: z.object({
129
- query: z.string().describe("The search query for videos."),
132
+ query: z.string().describe("The natural language search query for videos."),
130
133
  search_domain_filter: z
131
134
  .array(z.string())
132
135
  .optional()
133
136
  .describe(
134
- "A list of domains to limit the search to (e.g., ['youtube.com']). Use a '-' prefix to exclude a domain."
137
+ "Optional: Restrict search to specific domains (e.g., ['youtube.com']) or exclude them with '-' prefix."
135
138
  ),
136
139
  }),
137
140
  timeoutMs: 300000,
@@ -143,24 +146,23 @@ export const perplexityVideos = {
143
146
  }
144
147
 
145
148
  const url = "https://api.perplexity.ai/chat/completions";
146
- const headers = {
149
+ const headers: Record<string, string> = {
147
150
  Authorization: `Bearer ${apiKey}`,
148
151
  "Content-Type": "application/json",
149
152
  accept: "application/json",
150
153
  };
151
154
 
152
- const payload: any = {
155
+ const payload = {
153
156
  model: "sonar-pro",
154
157
  messages: [
155
158
  { role: "user", content: `Show me videos of ${args.query}` },
156
159
  ],
157
160
  media_response: { overrides: { return_videos: true } },
161
+ ...(args.search_domain_filter
162
+ ? { search_domain_filter: args.search_domain_filter }
163
+ : {}),
158
164
  };
159
165
 
160
- if (args.search_domain_filter) {
161
- payload.search_domain_filter = args.search_domain_filter;
162
- }
163
-
164
166
  const res = await fetch(url, {
165
167
  method: "POST",
166
168
  headers: headers,
@@ -172,12 +174,12 @@ export const perplexityVideos = {
172
174
  throw new Error(`Perplexity API request failed: ${res.status} ${text}`);
173
175
  }
174
176
 
175
- const data: any = await res.json();
177
+ const data = (await res.json()) as any;
176
178
  let content = data.choices?.[0]?.message?.content;
177
- const videos = data.videos;
178
- const citations = data.citations;
179
+ const videos = (data.videos || []) as any[];
180
+ const citations = (data.citations || []) as string[];
179
181
 
180
- if (!videos || videos.length === 0) {
182
+ if (videos.length === 0) {
181
183
  return `No direct video URLs found in the API response. Full API Response: ${JSON.stringify(
182
184
  data,
183
185
  null,
@@ -186,41 +188,39 @@ export const perplexityVideos = {
186
188
  }
187
189
 
188
190
  // Create a map of video url -> new 1-based index
189
- const urlToVideoIndex: { [key: string]: number } = {};
190
- videos.forEach((video: any, index: number) => {
191
+ const urlToVideoIndex: Record<string, number> = {};
192
+ videos.forEach((video, index) => {
191
193
  if (video.url) {
192
194
  urlToVideoIndex[video.url] = index + 1;
193
195
  }
194
196
  });
195
197
 
196
198
  // Create a map of old citation index -> new video index
197
- const oldToNewCitationMap: { [key: number]: number } = {};
198
- if (citations && Array.isArray(citations)) {
199
- citations.forEach((citationUrl: string, index: number) => {
200
- if (urlToVideoIndex[citationUrl]) {
201
- oldToNewCitationMap[index + 1] = urlToVideoIndex[citationUrl];
202
- }
203
- });
204
- }
199
+ const oldToNewCitationMap: Record<number, number> = {};
200
+ citations.forEach((citationUrl, index) => {
201
+ if (urlToVideoIndex[citationUrl]) {
202
+ oldToNewCitationMap[index + 1] = urlToVideoIndex[citationUrl];
203
+ }
204
+ });
205
205
 
206
206
  // Replace citations in the content
207
207
  if (content && typeof content === "string") {
208
208
  content = content
209
- .replace(/\[(\d+)\]/g, (match: string, oldIndexStr: string) => {
209
+ .replace(/\[(\d+)\]/g, (_match, oldIndexStr) => {
210
210
  const oldIndex = parseInt(oldIndexStr, 10);
211
211
  const newIndex = oldToNewCitationMap[oldIndex];
212
212
  if (newIndex) {
213
213
  return `[${newIndex}]`;
214
214
  }
215
- return ""; // Remove citation if it doesn't correspond to a video
215
+ return "";
216
216
  })
217
217
  .replace(/(\s\s+)/g, " ")
218
- .trim(); // Clean up extra spaces
218
+ .trim();
219
219
  }
220
220
 
221
221
  // Build the final formatted output
222
222
  let output = content + "\n\n--- Videos ---\n";
223
- videos.forEach((video: any, index: number) => {
223
+ videos.forEach((video, index) => {
224
224
  output += `${index + 1}. ${video.url}\n`;
225
225
  });
226
226
 
@@ -9,16 +9,21 @@ const ai = new GoogleGenAI({
9
9
  export const analyzeYoutubeVideo = {
10
10
  name: "analyzeYoutubeVideo",
11
11
  description:
12
- "Analyze YouTube videos for content using the correct GenAI JS API approach with FileData fileUri. Perfect for extracting stock media content, analyzing video content, or getting descriptions of YouTube videos",
12
+ "Perform deep semantic analysis of a YouTube video using Google's Gemini 2.5 Flash model. " +
13
+ "Use this to summarize long videos, identify specific scenes, extract information, or ask visual questions about the video's content. " +
14
+ "This tool treats the YouTube URL as a first-class video stream for the model. " +
15
+ "ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
13
16
  parameters: z.object({
14
17
  youtube_url: z
15
18
  .string()
16
19
  .describe(
17
- "YouTube video URL to analyze (format: https://www.youtube.com/watch?v=VIDEO_ID)"
20
+ "The full URL of the YouTube video (e.g., 'https://www.youtube.com/watch?v=dQw4w9WgXcQ')."
18
21
  ),
19
22
  prompt: z
20
23
  .string()
21
- .describe("Analysis prompt or question about the YouTube video content"),
24
+ .describe(
25
+ "Instruction or question about the video content (e.g., 'Summarize the main points' or 'What color was the car?')."
26
+ ),
22
27
  }),
23
28
  timeoutMs: 300000,
24
29
  execute: async (args: { youtube_url: string; prompt: string }) => {