@mixio-pro/kalaasetu-mcp 1.2.1 → 2.0.1-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/fal-config.json +106 -0
- package/package.json +2 -1
- package/src/index.ts +0 -9
- package/src/tools/fal/config.ts +120 -23
- package/src/tools/fal/generate.ts +370 -84
- package/src/tools/fal/index.ts +2 -7
- package/src/tools/fal/models.ts +163 -29
- package/src/tools/fal/storage.ts +9 -2
- package/src/tools/gemini.ts +106 -26
- package/src/tools/image-to-video.ts +359 -129
- package/src/tools/perplexity.ts +61 -61
- package/src/tools/youtube.ts +8 -3
- package/src/utils/llm-prompt-enhancer.ts +302 -0
- package/src/utils/prompt-enhancer-presets.ts +303 -0
- package/src/utils/prompt-enhancer.ts +186 -0
package/src/tools/fal/models.ts
CHANGED
|
@@ -1,45 +1,159 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import {
|
|
3
|
+
loadFalConfig,
|
|
4
|
+
saveFalConfig,
|
|
5
|
+
FAL_BASE_URL,
|
|
6
|
+
DEFAULT_TIMEOUT,
|
|
7
|
+
} from "./config";
|
|
8
|
+
import { safeToolExecute } from "../../utils/tool-wrapper";
|
|
9
|
+
|
|
1
10
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
11
|
+
* Extract simplified input schema from FAL OpenAPI response.
|
|
12
|
+
* Returns only the properties object from the input schema.
|
|
4
13
|
*/
|
|
14
|
+
function extractInputSchema(openApiSchema: any): Record<string, any> | null {
|
|
15
|
+
try {
|
|
16
|
+
const schemas = openApiSchema?.components?.schemas;
|
|
17
|
+
if (!schemas) return null;
|
|
5
18
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
19
|
+
// Find the input schema - usually named like "Ltx2ImageToVideoInput" or similar
|
|
20
|
+
const inputSchemaKey = Object.keys(schemas).find(
|
|
21
|
+
(key) =>
|
|
22
|
+
key.toLowerCase().includes("input") &&
|
|
23
|
+
!key.toLowerCase().includes("output")
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
if (!inputSchemaKey) return null;
|
|
27
|
+
|
|
28
|
+
const inputSchema = schemas[inputSchemaKey];
|
|
29
|
+
if (!inputSchema?.properties) return null;
|
|
30
|
+
|
|
31
|
+
// Extract simplified properties
|
|
32
|
+
const simplified: Record<string, any> = {};
|
|
33
|
+
for (const [propName, propDef] of Object.entries(
|
|
34
|
+
inputSchema.properties as Record<string, any>
|
|
35
|
+
)) {
|
|
36
|
+
simplified[propName] = {
|
|
37
|
+
type: propDef.type,
|
|
38
|
+
description: propDef.description,
|
|
39
|
+
...(propDef.enum && { enum: propDef.enum }),
|
|
40
|
+
...(propDef.default !== undefined && { default: propDef.default }),
|
|
41
|
+
...(propDef.examples && { example: propDef.examples[0] }),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return simplified;
|
|
46
|
+
} catch (e) {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
9
50
|
|
|
10
51
|
/**
|
|
11
52
|
* Tool to list available generation presets and their intents.
|
|
53
|
+
* Dynamically fetches and caches input schemas from FAL API.
|
|
12
54
|
*/
|
|
13
55
|
export const falListPresets = {
|
|
14
56
|
name: "fal_list_presets",
|
|
15
57
|
description:
|
|
16
|
-
"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
58
|
+
"The entry point for discovering fal.ai capabilities on this server. " +
|
|
59
|
+
"Lists all available generation presets, including their high-level 'intent' (e.g., 'Generate cinematic video'), " +
|
|
60
|
+
"input/output types, and INPUT SCHEMA with parameter details. Call this first when you need to perform an AI generation task.",
|
|
61
|
+
parameters: z.object({
|
|
62
|
+
refresh_schemas: z
|
|
63
|
+
.boolean()
|
|
64
|
+
.optional()
|
|
65
|
+
.describe("If true, re-fetch schemas from FAL API even if cached."),
|
|
66
|
+
}),
|
|
67
|
+
timeoutMs: 60000, // Allow more time for schema fetching
|
|
68
|
+
execute: async (args: { refresh_schemas?: boolean }) => {
|
|
20
69
|
return safeToolExecute(async () => {
|
|
21
70
|
const config = loadFalConfig();
|
|
71
|
+
let updated = false;
|
|
72
|
+
|
|
73
|
+
// Fetch schemas for presets that don't have them (or if refresh requested)
|
|
74
|
+
for (const preset of config.presets) {
|
|
75
|
+
const shouldFetch = !preset.input_schema || args.refresh_schemas;
|
|
76
|
+
console.log(
|
|
77
|
+
`[fal_list_presets] ${
|
|
78
|
+
preset.presetName
|
|
79
|
+
}: shouldFetch=${shouldFetch}, hasSchema=${!!preset.input_schema}, refresh=${
|
|
80
|
+
args.refresh_schemas
|
|
81
|
+
}`
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
if (shouldFetch) {
|
|
85
|
+
try {
|
|
86
|
+
const url = `https://fal.ai/api/openapi/queue/openapi.json?endpoint_id=${preset.modelId}`;
|
|
87
|
+
console.log(`[fal_list_presets] Fetching schema from: ${url}`);
|
|
88
|
+
const response = await fetch(url, {
|
|
89
|
+
method: "GET",
|
|
90
|
+
signal: AbortSignal.timeout(10000),
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
if (response.ok) {
|
|
94
|
+
const openApiSchema = await response.json();
|
|
95
|
+
const simplified = extractInputSchema(openApiSchema);
|
|
96
|
+
console.log(
|
|
97
|
+
`[fal_list_presets] Extracted schema for ${preset.presetName}:`,
|
|
98
|
+
simplified ? Object.keys(simplified) : null
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
if (simplified) {
|
|
102
|
+
preset.input_schema = simplified;
|
|
103
|
+
updated = true;
|
|
104
|
+
}
|
|
105
|
+
} else {
|
|
106
|
+
console.log(
|
|
107
|
+
`[fal_list_presets] Fetch failed: ${response.status}`
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
} catch (e: any) {
|
|
111
|
+
console.log(
|
|
112
|
+
`[fal_list_presets] Error fetching schema for ${preset.presetName}:`,
|
|
113
|
+
e.message
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Save updated config if schemas were fetched
|
|
120
|
+
if (updated) {
|
|
121
|
+
console.log(`[fal_list_presets] Saving updated config...`);
|
|
122
|
+
const saved = saveFalConfig(config);
|
|
123
|
+
console.log(`[fal_list_presets] Config saved: ${saved}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Return enriched preset list
|
|
22
127
|
const summary = config.presets.map((p) => ({
|
|
23
128
|
presetName: p.presetName,
|
|
24
129
|
intent: p.intent,
|
|
25
130
|
inputType: p.inputType,
|
|
26
131
|
outputType: p.outputType,
|
|
27
132
|
description: p.description,
|
|
133
|
+
inputSchema: p.input_schema,
|
|
28
134
|
}));
|
|
135
|
+
|
|
29
136
|
return JSON.stringify(summary, null, 2);
|
|
30
137
|
}, "fal_list_presets");
|
|
31
138
|
},
|
|
32
139
|
};
|
|
33
140
|
|
|
34
141
|
/**
|
|
35
|
-
* Tool to get full details for a specific preset, including default parameters
|
|
142
|
+
* Tool to get full details for a specific preset, including default parameters
|
|
143
|
+
* and real-time model metadata from fal.ai.
|
|
36
144
|
*/
|
|
37
145
|
export const falGetPresetDetails = {
|
|
38
146
|
name: "fal_get_preset_details",
|
|
39
147
|
description:
|
|
40
|
-
"
|
|
148
|
+
"Retrieve full details for a specific generation preset. " +
|
|
149
|
+
"This tool fetches both the local preset configuration (like 'defaultParams') " +
|
|
150
|
+
"and the live model metadata (schema, benchmarks, etc.) from fal.ai. " +
|
|
151
|
+
"Use this to understand the full capabilities and constraints of a model. " +
|
|
152
|
+
"ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS.",
|
|
41
153
|
parameters: z.object({
|
|
42
|
-
preset_name: z
|
|
154
|
+
preset_name: z
|
|
155
|
+
.string()
|
|
156
|
+
.describe("The name of the preset to inspect (e.g., 'cinematic_image')."),
|
|
43
157
|
}),
|
|
44
158
|
timeoutMs: 30000,
|
|
45
159
|
execute: async (args: { preset_name: string }) => {
|
|
@@ -51,7 +165,43 @@ export const falGetPresetDetails = {
|
|
|
51
165
|
if (!preset) {
|
|
52
166
|
throw new Error(`Preset '${args.preset_name}' not found.`);
|
|
53
167
|
}
|
|
54
|
-
|
|
168
|
+
|
|
169
|
+
// Fetch live model metadata/schema from fal.ai API
|
|
170
|
+
// Based on: https://fal.ai/api/openapi/queue/openapi.json?endpoint_id={model_id}
|
|
171
|
+
let modelMetadata: any = null;
|
|
172
|
+
let schemaSource = "none";
|
|
173
|
+
|
|
174
|
+
try {
|
|
175
|
+
const url = `https://fal.ai/api/openapi/queue/openapi.json?endpoint_id=${preset.modelId}`;
|
|
176
|
+
const schema = await publicRequest(url);
|
|
177
|
+
|
|
178
|
+
// Extract relevant schema parts if possible, or return full schema
|
|
179
|
+
if (schema) {
|
|
180
|
+
modelMetadata = schema;
|
|
181
|
+
schemaSource = "api";
|
|
182
|
+
}
|
|
183
|
+
} catch (e) {
|
|
184
|
+
// console.error(`Failed to fetch schema for ${preset.modelId}:`, e);
|
|
185
|
+
// Fallback to locally defined schema if available
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Fallback: If API failed or returned nothing, use manual schema from config
|
|
189
|
+
if (!modelMetadata && preset.input_schema) {
|
|
190
|
+
modelMetadata = preset.input_schema;
|
|
191
|
+
schemaSource = "local_config";
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return JSON.stringify(
|
|
195
|
+
{
|
|
196
|
+
preset,
|
|
197
|
+
modelMetadata,
|
|
198
|
+
_meta: {
|
|
199
|
+
schemaSource,
|
|
200
|
+
},
|
|
201
|
+
},
|
|
202
|
+
null,
|
|
203
|
+
2
|
|
204
|
+
);
|
|
55
205
|
}, "fal_get_preset_details");
|
|
56
206
|
},
|
|
57
207
|
};
|
|
@@ -73,22 +223,6 @@ async function publicRequest(url: string): Promise<any> {
|
|
|
73
223
|
return response.json();
|
|
74
224
|
}
|
|
75
225
|
|
|
76
|
-
/**
|
|
77
|
-
* Tool to retrieve the current full FAL configuration.
|
|
78
|
-
*/
|
|
79
|
-
export const falGetConfig = {
|
|
80
|
-
name: "fal_get_config",
|
|
81
|
-
description: "Retrieve the full FAL configuration JSON file content.",
|
|
82
|
-
parameters: z.object({}),
|
|
83
|
-
timeoutMs: 30000,
|
|
84
|
-
execute: async () => {
|
|
85
|
-
return safeToolExecute(async () => {
|
|
86
|
-
const config = loadFalConfig();
|
|
87
|
-
return JSON.stringify(config, null, 2);
|
|
88
|
-
}, "fal_get_config");
|
|
89
|
-
},
|
|
90
|
-
};
|
|
91
|
-
|
|
92
226
|
/*
|
|
93
227
|
// ORIGINAL UNRESTRICTED TOOLS - Commented out for reference
|
|
94
228
|
|
package/src/tools/fal/storage.ts
CHANGED
|
@@ -38,9 +38,16 @@ function getMimeType(filePath: string): string {
|
|
|
38
38
|
*/
|
|
39
39
|
export const falUploadFile = {
|
|
40
40
|
name: "fal_upload_file",
|
|
41
|
-
description:
|
|
41
|
+
description:
|
|
42
|
+
"Upload a local file (image, video, audio) to fal.ai CDN storage. " +
|
|
43
|
+
"CRITICAL: You MUST use this tool to upload local files before passing their URLs to generation tools in FAL. ONLY USE WHEN WORKING WITH FAL MODELS/PRESETS" +
|
|
44
|
+
"It returns a public 'file_url' which should be used as input for 'fal_generate'.",
|
|
42
45
|
parameters: z.object({
|
|
43
|
-
path: z
|
|
46
|
+
path: z
|
|
47
|
+
.string()
|
|
48
|
+
.describe(
|
|
49
|
+
"The absolute local path to the file to upload (e.g., '/Users/name/images/input.jpg')."
|
|
50
|
+
),
|
|
44
51
|
}),
|
|
45
52
|
timeoutMs: 300000,
|
|
46
53
|
execute: async (args: { path: string }) => {
|
package/src/tools/gemini.ts
CHANGED
|
@@ -12,6 +12,10 @@ import { PassThrough } from "stream";
|
|
|
12
12
|
import { getStorage } from "../storage";
|
|
13
13
|
import { generateTimestampedFilename } from "../utils/filename";
|
|
14
14
|
import { safeToolExecute } from "../utils/tool-wrapper";
|
|
15
|
+
import {
|
|
16
|
+
resolveEnhancer,
|
|
17
|
+
listImageEnhancerPresets,
|
|
18
|
+
} from "../utils/prompt-enhancer-presets";
|
|
15
19
|
|
|
16
20
|
const ai = new GoogleGenAI({
|
|
17
21
|
apiKey: process.env.GEMINI_API_KEY || "",
|
|
@@ -202,23 +206,43 @@ async function processVideoInput(
|
|
|
202
206
|
export const geminiTextToImage = {
|
|
203
207
|
name: "generateImage",
|
|
204
208
|
description:
|
|
205
|
-
"Generate images from text prompts using
|
|
209
|
+
"Generate high-quality images from text prompts using Google's Imagen 3 model via Gemini. " +
|
|
210
|
+
"This tool is highly capable of following complex instructions. " +
|
|
211
|
+
"Best practices: " +
|
|
212
|
+
"1. Be descriptive: instead of 'a dog', use 'a golden retriever playing in a sunlit meadow, cinematic lighting'. " +
|
|
213
|
+
"2. Specify style: e.g., '3D render', 'oil painting', 'minimalist vector art'. " +
|
|
214
|
+
"3. Use reference images: you can provide existing images to guide the style or content. " +
|
|
215
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
206
216
|
parameters: z.object({
|
|
207
|
-
prompt: z
|
|
217
|
+
prompt: z
|
|
218
|
+
.string()
|
|
219
|
+
.describe("Detailed text description of the image to generate."),
|
|
208
220
|
aspect_ratio: z
|
|
209
221
|
.string()
|
|
210
222
|
.optional()
|
|
211
|
-
.describe(
|
|
223
|
+
.describe(
|
|
224
|
+
"Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16."
|
|
225
|
+
),
|
|
212
226
|
output_path: z
|
|
213
227
|
.string()
|
|
214
228
|
.optional()
|
|
215
229
|
.describe(
|
|
216
|
-
"
|
|
230
|
+
"Optional: specific local path or filename to save the image (e.g., 'outputs/hero.png'). " +
|
|
231
|
+
"If omitted, a timestamped filename is generated automatically."
|
|
217
232
|
),
|
|
218
233
|
reference_images: z
|
|
219
234
|
.array(z.string())
|
|
220
235
|
.optional()
|
|
221
|
-
.describe(
|
|
236
|
+
.describe(
|
|
237
|
+
"Optional: local paths or URLs of images to use as visual references for style or composition."
|
|
238
|
+
),
|
|
239
|
+
enhancer_preset: z
|
|
240
|
+
.string()
|
|
241
|
+
.optional()
|
|
242
|
+
.describe(
|
|
243
|
+
"Optional: Name of a prompt enhancer preset to apply (e.g., 'cinematic', 'photorealistic', 'anime'). " +
|
|
244
|
+
"Automatically enhances the prompt with professional style modifiers."
|
|
245
|
+
),
|
|
222
246
|
}),
|
|
223
247
|
timeoutMs: 300000,
|
|
224
248
|
execute: async (args: {
|
|
@@ -226,10 +250,20 @@ export const geminiTextToImage = {
|
|
|
226
250
|
aspect_ratio?: string;
|
|
227
251
|
output_path?: string;
|
|
228
252
|
reference_images?: string[];
|
|
253
|
+
enhancer_preset?: string;
|
|
229
254
|
}) => {
|
|
230
255
|
return safeToolExecute(async () => {
|
|
231
256
|
try {
|
|
232
|
-
|
|
257
|
+
// Apply prompt enhancement if preset specified
|
|
258
|
+
let enhancedPrompt = args.prompt;
|
|
259
|
+
if (args.enhancer_preset) {
|
|
260
|
+
const enhancer = resolveEnhancer(args.enhancer_preset);
|
|
261
|
+
if (enhancer.hasTransformations()) {
|
|
262
|
+
enhancedPrompt = enhancer.enhance(args.prompt);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const contents: any[] = [enhancedPrompt];
|
|
233
267
|
|
|
234
268
|
if (args.reference_images && Array.isArray(args.reference_images)) {
|
|
235
269
|
for (const refPath of args.reference_images) {
|
|
@@ -297,18 +331,40 @@ export const geminiTextToImage = {
|
|
|
297
331
|
export const geminiEditImage = {
|
|
298
332
|
name: "editImage",
|
|
299
333
|
description:
|
|
300
|
-
"
|
|
334
|
+
"Modify or edit an existing image based on text instructions using Google's Imagen 3 model via Gemini. " +
|
|
335
|
+
"This can be used for inpainting (changing specific parts), style transfer, or adding/removing elements. " +
|
|
336
|
+
"Describe the desired changes relative to the source image (e.g., 'Change the white shirt to a blue one' or 'Add a cat sitting on the sofa'). " +
|
|
337
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
301
338
|
parameters: z.object({
|
|
302
|
-
image_path: z
|
|
303
|
-
|
|
339
|
+
image_path: z
|
|
340
|
+
.string()
|
|
341
|
+
.describe(
|
|
342
|
+
"Absolute local path or URL to the source image file to be edited."
|
|
343
|
+
),
|
|
344
|
+
prompt: z
|
|
345
|
+
.string()
|
|
346
|
+
.describe(
|
|
347
|
+
"Instructional text describing the edits or modifications required."
|
|
348
|
+
),
|
|
304
349
|
output_path: z
|
|
305
350
|
.string()
|
|
306
351
|
.optional()
|
|
307
|
-
.describe(
|
|
352
|
+
.describe(
|
|
353
|
+
"Optional: specific local path to save the edited result. Defaults to generated timestamp."
|
|
354
|
+
),
|
|
308
355
|
reference_images: z
|
|
309
356
|
.array(z.string())
|
|
310
357
|
.optional()
|
|
311
|
-
.describe(
|
|
358
|
+
.describe(
|
|
359
|
+
"Optional: additional images to guide the edit (e.g., to reference a specific character or object style)."
|
|
360
|
+
),
|
|
361
|
+
enhancer_preset: z
|
|
362
|
+
.string()
|
|
363
|
+
.optional()
|
|
364
|
+
.describe(
|
|
365
|
+
"Optional: Name of a prompt enhancer preset to apply (e.g., 'cinematic', 'photorealistic'). " +
|
|
366
|
+
"Enhances the edit instructions with professional style modifiers."
|
|
367
|
+
),
|
|
312
368
|
}),
|
|
313
369
|
timeoutMs: 300000,
|
|
314
370
|
execute: async (args: {
|
|
@@ -316,11 +372,21 @@ export const geminiEditImage = {
|
|
|
316
372
|
prompt: string;
|
|
317
373
|
output_path?: string;
|
|
318
374
|
reference_images?: string[];
|
|
375
|
+
enhancer_preset?: string;
|
|
319
376
|
}) => {
|
|
320
377
|
return safeToolExecute(async () => {
|
|
321
378
|
try {
|
|
379
|
+
// Apply prompt enhancement if preset specified
|
|
380
|
+
let enhancedPrompt = args.prompt;
|
|
381
|
+
if (args.enhancer_preset) {
|
|
382
|
+
const enhancer = resolveEnhancer(args.enhancer_preset);
|
|
383
|
+
if (enhancer.hasTransformations()) {
|
|
384
|
+
enhancedPrompt = enhancer.enhance(args.prompt);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
322
388
|
const imagePart = await fileToGenerativePart(args.image_path);
|
|
323
|
-
const contents: any[] = [
|
|
389
|
+
const contents: any[] = [enhancedPrompt, imagePart];
|
|
324
390
|
|
|
325
391
|
if (args.reference_images) {
|
|
326
392
|
for (const refPath of args.reference_images) {
|
|
@@ -378,12 +444,19 @@ export const geminiEditImage = {
|
|
|
378
444
|
export const geminiAnalyzeImages = {
|
|
379
445
|
name: "analyzeImages",
|
|
380
446
|
description:
|
|
381
|
-
"
|
|
447
|
+
"Perform advanced multimodal analysis on one or more images using Google's Gemini 2.5 Pro model. " +
|
|
448
|
+
"Use this for complex reasoning, visual question answering, OCR, or describing scenes in detail. " +
|
|
449
|
+
"You can compare multiple images by providing them in the array. " +
|
|
450
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
382
451
|
parameters: z.object({
|
|
383
452
|
image_paths: z
|
|
384
453
|
.array(z.string())
|
|
385
|
-
.describe(
|
|
386
|
-
|
|
454
|
+
.describe(
|
|
455
|
+
"An array of absolute local file paths or publicly accessible URLs to analyze."
|
|
456
|
+
),
|
|
457
|
+
prompt: z
|
|
458
|
+
.string()
|
|
459
|
+
.describe("The question, query, or instruction to apply to the images."),
|
|
387
460
|
}),
|
|
388
461
|
timeoutMs: 300000,
|
|
389
462
|
execute: async (args: { image_paths: string[]; prompt: string }) => {
|
|
@@ -447,19 +520,22 @@ export const geminiAnalyzeImages = {
|
|
|
447
520
|
export const geminiSingleSpeakerTts = {
|
|
448
521
|
name: "generateSpeech",
|
|
449
522
|
description:
|
|
450
|
-
"
|
|
523
|
+
"Convert text to natural-sounding speech using Google's Gemini 2.5 Pro Preview TTS model. " +
|
|
524
|
+
"This tool generates a single speaker's voice in a WAV format. " +
|
|
525
|
+
"Best for long-form narration or simple voiceovers. " +
|
|
526
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
451
527
|
parameters: z.object({
|
|
452
|
-
text: z.string().describe("
|
|
528
|
+
text: z.string().describe("The text content to be converted into speech."),
|
|
453
529
|
voice_name: z
|
|
454
530
|
.string()
|
|
455
531
|
.describe(
|
|
456
|
-
"
|
|
532
|
+
"Supported voices: 'Despina' (Female, versatile), 'Kore' (Female, calm), 'Erinome' (Female, expressive), or 'Enceladus' (Male, neutral)."
|
|
457
533
|
),
|
|
458
534
|
output_path: z
|
|
459
535
|
.string()
|
|
460
536
|
.optional()
|
|
461
537
|
.describe(
|
|
462
|
-
"Output WAV file path
|
|
538
|
+
"Optional: Output WAV file path. Defaults to a timestamped filename in the output directory."
|
|
463
539
|
),
|
|
464
540
|
}),
|
|
465
541
|
timeoutMs: 300000,
|
|
@@ -518,37 +594,41 @@ export const geminiSingleSpeakerTts = {
|
|
|
518
594
|
export const geminiAnalyzeVideos = {
|
|
519
595
|
name: "analyzeVideos",
|
|
520
596
|
description:
|
|
521
|
-
"
|
|
597
|
+
"Comprehensive video understanding using Google's Gemini 2.5 Pro model. " +
|
|
598
|
+
"Capable of analyzing both longitudinal content (YouTube) and specific local files. " +
|
|
599
|
+
"Supports time-aware queries (e.g., 'What color is the car at 02:45?'), clipping, and advanced visual reasoning over video streams. " +
|
|
600
|
+
"ONLY USE WHEN WORKING WITH GOOGLE/GEMINI MODELS.",
|
|
522
601
|
parameters: z.object({
|
|
523
602
|
video_inputs: z
|
|
524
603
|
.array(z.string())
|
|
525
604
|
.describe(
|
|
526
|
-
"
|
|
605
|
+
"An array containing absolute paths to local videos or YouTube URLs. Max 10 per request. " +
|
|
606
|
+
"Note: Local files are automatically optimized for processing."
|
|
527
607
|
),
|
|
528
608
|
prompt: z
|
|
529
609
|
.string()
|
|
530
610
|
.describe(
|
|
531
|
-
"
|
|
611
|
+
"The question or instruction regarding the video. Use MM:SS or HH:MM:SS for precise time references."
|
|
532
612
|
),
|
|
533
613
|
fps: z
|
|
534
614
|
.number()
|
|
535
615
|
.optional()
|
|
536
616
|
.describe(
|
|
537
|
-
"
|
|
617
|
+
"Optional: Target frames per second for processing. Lower FPS (1-5) is recommended for long videos to save tokens."
|
|
538
618
|
),
|
|
539
619
|
start_offset: z
|
|
540
620
|
.string()
|
|
541
621
|
.optional()
|
|
542
|
-
.describe("
|
|
622
|
+
.describe("Start time of the segment to analyze (e.g., '10s', '01:30')."),
|
|
543
623
|
end_offset: z
|
|
544
624
|
.string()
|
|
545
625
|
.optional()
|
|
546
|
-
.describe("
|
|
626
|
+
.describe("End time of the segment to analyze (e.g., '20s', '02:00')."),
|
|
547
627
|
media_resolution: z
|
|
548
628
|
.string()
|
|
549
629
|
.optional()
|
|
550
630
|
.describe(
|
|
551
|
-
"
|
|
631
|
+
"Processing resolution: 'default' or 'low'. 'low' significantly reduces token usage for simple visual tasks."
|
|
552
632
|
),
|
|
553
633
|
}),
|
|
554
634
|
timeoutMs: 300000,
|