@polpo-ai/tools 0.6.32 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/email-tools.test.d.ts +2 -0
- package/dist/__tests__/email-tools.test.d.ts.map +1 -0
- package/dist/__tests__/email-tools.test.js +705 -0
- package/dist/__tests__/email-tools.test.js.map +1 -0
- package/dist/__tests__/extended-tools.test.d.ts +2 -0
- package/dist/__tests__/extended-tools.test.d.ts.map +1 -0
- package/dist/__tests__/extended-tools.test.js +743 -0
- package/dist/__tests__/extended-tools.test.js.map +1 -0
- package/dist/__tests__/external-api-tools.test.d.ts +2 -0
- package/dist/__tests__/external-api-tools.test.d.ts.map +1 -0
- package/dist/__tests__/external-api-tools.test.js +1731 -0
- package/dist/__tests__/external-api-tools.test.js.map +1 -0
- package/dist/__tests__/memory-tools.test.d.ts +2 -0
- package/dist/__tests__/memory-tools.test.d.ts.map +1 -0
- package/dist/__tests__/memory-tools.test.js +0 -0
- package/dist/__tests__/memory-tools.test.js.map +1 -0
- package/dist/audio-tools.d.ts +25 -27
- package/dist/audio-tools.d.ts.map +1 -1
- package/dist/audio-tools.js +156 -438
- package/dist/audio-tools.js.map +1 -1
- package/dist/browser-tools.d.ts.map +1 -1
- package/dist/browser-tools.js +5 -1
- package/dist/browser-tools.js.map +1 -1
- package/dist/email-tools.d.ts.map +1 -1
- package/dist/email-tools.js +11 -3
- package/dist/email-tools.js.map +1 -1
- package/dist/image-tools.d.ts +27 -25
- package/dist/image-tools.d.ts.map +1 -1
- package/dist/image-tools.js +151 -332
- package/dist/image-tools.js.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/lib/edge-speech-model.d.ts +61 -0
- package/dist/lib/edge-speech-model.d.ts.map +1 -0
- package/dist/lib/edge-speech-model.js +144 -0
- package/dist/lib/edge-speech-model.js.map +1 -0
- package/dist/lib/exa-search-provider.d.ts +27 -0
- package/dist/lib/exa-search-provider.d.ts.map +1 -0
- package/dist/lib/exa-search-provider.js +109 -0
- package/dist/lib/exa-search-provider.js.map +1 -0
- package/dist/lib/provider-resolver.d.ts +54 -0
- package/dist/lib/provider-resolver.d.ts.map +1 -0
- package/dist/lib/provider-resolver.js +115 -0
- package/dist/lib/provider-resolver.js.map +1 -0
- package/dist/search-tools.d.ts +10 -13
- package/dist/search-tools.d.ts.map +1 -1
- package/dist/search-tools.js +63 -140
- package/dist/search-tools.js.map +1 -1
- package/dist/system-tools.d.ts +19 -5
- package/dist/system-tools.d.ts.map +1 -1
- package/dist/system-tools.js +16 -10
- package/dist/system-tools.js.map +1 -1
- package/package.json +12 -2
- package/dist/phone-tools.d.ts +0 -27
- package/dist/phone-tools.d.ts.map +0 -1
- package/dist/phone-tools.js +0 -577
- package/dist/phone-tools.js.map +0 -1
package/dist/image-tools.js
CHANGED
|
@@ -1,49 +1,50 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Image & video tools for generation and vision/analysis.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* Architecture: thin wrappers over the Vercel AI SDK v6.
|
|
5
|
+
* - image_generate → `generateImage` against a configurable provider
|
|
6
|
+
* - video_generate → `experimental_generateVideo` against a configurable provider
|
|
7
|
+
* - image_analyze → `generateText` (multimodal) against a configurable provider
|
|
8
8
|
*
|
|
9
|
-
*
|
|
9
|
+
* Model selection: each tool picks its model in this order:
|
|
10
|
+
* 1. per-call `model` input parameter (`<provider>/<model>` string),
|
|
11
|
+
* 2. agent-config default passed to the factory (image/video/vision),
|
|
12
|
+
* 3. hardcoded fallback constant from @polpo-ai/core.
|
|
10
13
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* Vision/analysis: openai (gpt-4.1-mini), anthropic (Claude)
|
|
15
|
-
*
|
|
16
|
-
* Credential resolution order (same as email tools):
|
|
17
|
-
* 1. Agent vault (per-agent credentials — e.g. service "fal" with key "key")
|
|
18
|
-
* 2. Environment variables (global fallback)
|
|
19
|
-
*
|
|
20
|
-
* Environment variables (fallback):
|
|
21
|
-
* FAL_KEY — fal.ai image/video generation
|
|
22
|
-
* OPENAI_API_KEY — openai vision provider
|
|
23
|
-
* ANTHROPIC_API_KEY — anthropic vision provider
|
|
14
|
+
* Provider names are not in the input schema anymore — they ride along
|
|
15
|
+
* with the model string. Every supported provider has a vault key
|
|
16
|
+
* convention (fal-ai, openai, anthropic) with an env-var fallback.
|
|
24
17
|
*/
|
|
25
18
|
import { resolve, dirname, extname } from "node:path";
|
|
26
19
|
import { Type } from "@sinclair/typebox";
|
|
20
|
+
import { parseModelString, DEFAULT_IMAGE_MODEL, DEFAULT_VIDEO_MODEL, DEFAULT_VISION_MODEL, } from "@polpo-ai/core";
|
|
27
21
|
import { NodeFileSystem } from "./adapters/node-filesystem.js";
|
|
28
22
|
import { resolveAllowedPaths, assertPathAllowed } from "./path-sandbox.js";
|
|
29
|
-
|
|
23
|
+
import { resolveImageProvider, resolveVideoProvider, resolveVisionProvider, } from "./lib/provider-resolver.js";
|
|
30
24
|
const MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20 MB
|
|
31
|
-
const DEFAULT_TIMEOUT = 120_000; // 2 min for image generation
|
|
32
|
-
const VIDEO_TIMEOUT = 300_000; // 5 min for video generation
|
|
33
|
-
const FAL_QUEUE_POLL_INTERVAL = 3_000; // 3 sec polling for async queue
|
|
34
|
-
// ─── Helpers ───
|
|
35
25
|
function requireEnv(key) {
|
|
36
26
|
const val = process.env[key];
|
|
37
27
|
if (!val)
|
|
38
28
|
throw new Error(`Missing environment variable: ${key}. Set it before using this tool.`);
|
|
39
29
|
return val;
|
|
40
30
|
}
|
|
41
|
-
/** Resolve
|
|
42
|
-
function
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
31
|
+
/** Resolve which model to actually use, in priority order. */
|
|
32
|
+
function resolveEffectiveModel(override, configured, fallback) {
|
|
33
|
+
return parseModelString(override ?? configured ?? fallback);
|
|
34
|
+
}
|
|
35
|
+
/** Vault-key resolution per provider. Throws with a clear message
|
|
36
|
+
* when neither vault nor env var has the credential. */
|
|
37
|
+
function resolveProviderKey(provider, vault) {
|
|
38
|
+
switch (provider) {
|
|
39
|
+
case "fal":
|
|
40
|
+
return vault?.getKey("fal-ai", "key") ?? requireEnv("FAL_KEY");
|
|
41
|
+
case "openai":
|
|
42
|
+
return vault?.getKey("openai", "key") ?? requireEnv("OPENAI_API_KEY");
|
|
43
|
+
case "anthropic":
|
|
44
|
+
return vault?.getKey("anthropic", "key") ?? requireEnv("ANTHROPIC_API_KEY");
|
|
45
|
+
default:
|
|
46
|
+
throw new Error(`Unknown provider '${provider}': no credential lookup defined`);
|
|
47
|
+
}
|
|
47
48
|
}
|
|
48
49
|
function imageMime(ext) {
|
|
49
50
|
const map = {
|
|
@@ -58,76 +59,13 @@ function imageMime(ext) {
|
|
|
58
59
|
};
|
|
59
60
|
return map[ext.toLowerCase()] ?? "image/png";
|
|
60
61
|
}
|
|
61
|
-
/**
|
|
62
|
-
* Submit a request to fal.ai queue and poll until completion.
|
|
63
|
-
* Uses the queue endpoint (POST https://queue.fal.run/<model>) for reliability,
|
|
64
|
-
* then polls the status endpoint until the result is ready.
|
|
65
|
-
*/
|
|
66
|
-
async function falQueueRequest(modelId, input, apiKey, timeout, signal) {
|
|
67
|
-
const controller = new AbortController();
|
|
68
|
-
const timer = setTimeout(() => controller.abort(), timeout);
|
|
69
|
-
if (signal)
|
|
70
|
-
signal.addEventListener("abort", () => controller.abort(), { once: true });
|
|
71
|
-
try {
|
|
72
|
-
// Submit to queue
|
|
73
|
-
const submitResp = await fetch(`https://queue.fal.run/${modelId}`, {
|
|
74
|
-
method: "POST",
|
|
75
|
-
headers: {
|
|
76
|
-
Authorization: `Key ${apiKey}`,
|
|
77
|
-
"Content-Type": "application/json",
|
|
78
|
-
},
|
|
79
|
-
body: JSON.stringify(input),
|
|
80
|
-
signal: controller.signal,
|
|
81
|
-
});
|
|
82
|
-
if (!submitResp.ok) {
|
|
83
|
-
const errText = await submitResp.text();
|
|
84
|
-
throw new Error(`fal.ai queue submit ${submitResp.status}: ${errText}`);
|
|
85
|
-
}
|
|
86
|
-
const queueData = await submitResp.json();
|
|
87
|
-
const requestId = queueData.request_id;
|
|
88
|
-
const statusUrl = queueData.status_url ?? `https://queue.fal.run/${modelId}/requests/${requestId}/status`;
|
|
89
|
-
const responseUrl = queueData.response_url ?? `https://queue.fal.run/${modelId}/requests/${requestId}`;
|
|
90
|
-
// Poll for completion
|
|
91
|
-
while (true) {
|
|
92
|
-
await new Promise(r => setTimeout(r, FAL_QUEUE_POLL_INTERVAL));
|
|
93
|
-
const statusResp = await fetch(statusUrl, {
|
|
94
|
-
headers: { Authorization: `Key ${apiKey}` },
|
|
95
|
-
signal: controller.signal,
|
|
96
|
-
});
|
|
97
|
-
if (!statusResp.ok) {
|
|
98
|
-
throw new Error(`fal.ai status poll ${statusResp.status}`);
|
|
99
|
-
}
|
|
100
|
-
const status = await statusResp.json();
|
|
101
|
-
if (status.status === "COMPLETED") {
|
|
102
|
-
break;
|
|
103
|
-
}
|
|
104
|
-
if (status.status === "FAILED") {
|
|
105
|
-
throw new Error(`fal.ai request failed: ${status.error ?? "unknown error"}`);
|
|
106
|
-
}
|
|
107
|
-
// IN_QUEUE or IN_PROGRESS — keep polling
|
|
108
|
-
}
|
|
109
|
-
// Fetch result
|
|
110
|
-
const resultResp = await fetch(responseUrl, {
|
|
111
|
-
headers: { Authorization: `Key ${apiKey}` },
|
|
112
|
-
signal: controller.signal,
|
|
113
|
-
});
|
|
114
|
-
if (!resultResp.ok) {
|
|
115
|
-
const errText = await resultResp.text();
|
|
116
|
-
throw new Error(`fal.ai result fetch ${resultResp.status}: ${errText}`);
|
|
117
|
-
}
|
|
118
|
-
return await resultResp.json();
|
|
119
|
-
}
|
|
120
|
-
finally {
|
|
121
|
-
clearTimeout(timer);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
62
|
// ─── Tool: image_generate ───
|
|
125
63
|
const ImageGenerateSchema = Type.Object({
|
|
126
64
|
prompt: Type.String({ description: "Text prompt describing the image to generate" }),
|
|
127
65
|
path: Type.String({ description: "Output file path (e.g. 'output.png'). Format inferred from extension." }),
|
|
128
66
|
model: Type.Optional(Type.String({
|
|
129
|
-
description: "
|
|
130
|
-
"
|
|
67
|
+
description: "Override the agent's image_model for this call. Format: '<provider>/<model>' " +
|
|
68
|
+
"(e.g. 'fal/fal-ai/flux/dev', 'fal/fal-ai/flux-pro/v1.1'). When omitted, uses the agent's configured image_model.",
|
|
131
69
|
})),
|
|
132
70
|
size: Type.Optional(Type.String({
|
|
133
71
|
description: "Image size as 'WIDTHxHEIGHT' (e.g. '1024x1024', '1024x768', '768x1024'). Default: '1024x1024'.",
|
|
@@ -142,20 +80,21 @@ const ImageGenerateSchema = Type.Object({
|
|
|
142
80
|
description: "Random seed for reproducible results. Omit for random.",
|
|
143
81
|
})),
|
|
144
82
|
});
|
|
145
|
-
function createGenerateTool(cwd, sandbox, fs, vault) {
|
|
83
|
+
function createGenerateTool(cwd, sandbox, fs, configuredModel, vault) {
|
|
146
84
|
return {
|
|
147
85
|
name: "image_generate",
|
|
148
86
|
label: "Generate Image",
|
|
149
|
-
description: "Generate an image from a text prompt
|
|
87
|
+
description: "Generate an image from a text prompt. " +
|
|
150
88
|
"Output format inferred from file extension (png, jpg, webp). " +
|
|
151
|
-
"
|
|
152
|
-
"fal-ai/flux/
|
|
89
|
+
"Model is configured at agent level (image_model) — pass `model` here only to override per-call. " +
|
|
90
|
+
"Default: fal/fal-ai/flux/dev. Currently supports fal as image provider.",
|
|
153
91
|
parameters: ImageGenerateSchema,
|
|
154
92
|
async execute(_id, params, signal) {
|
|
155
93
|
const filePath = resolve(cwd, params.path);
|
|
156
94
|
assertPathAllowed(filePath, sandbox, "image_generate");
|
|
157
95
|
try {
|
|
158
|
-
|
|
96
|
+
const parsed = resolveEffectiveModel(params.model, configuredModel, DEFAULT_IMAGE_MODEL);
|
|
97
|
+
return await generateImageWithSdk(filePath, parsed, params, fs, vault, signal);
|
|
159
98
|
}
|
|
160
99
|
catch (err) {
|
|
161
100
|
return {
|
|
@@ -166,59 +105,51 @@ function createGenerateTool(cwd, sandbox, fs, vault) {
|
|
|
166
105
|
},
|
|
167
106
|
};
|
|
168
107
|
}
|
|
169
|
-
async function
|
|
170
|
-
const
|
|
171
|
-
const
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
if (parts.length === 2 && parts[0] > 0 && parts[1] > 0) {
|
|
177
|
-
width = parts[0];
|
|
178
|
-
height = parts[1];
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
const input = {
|
|
182
|
-
prompt: params.prompt,
|
|
183
|
-
image_size: { width, height },
|
|
184
|
-
num_images: 1,
|
|
185
|
-
};
|
|
108
|
+
async function generateImageWithSdk(filePath, parsed, params, fs, vault, signal) {
|
|
109
|
+
const { generateImage } = await import("ai");
|
|
110
|
+
const apiKey = resolveProviderKey(parsed.provider, vault);
|
|
111
|
+
const provider = await resolveImageProvider(parsed.provider, apiKey);
|
|
112
|
+
// fal-specific knobs go through providerOptions; the SDK passes them
|
|
113
|
+
// through to the model's input untouched.
|
|
114
|
+
const falOptions = {};
|
|
186
115
|
if (params.num_inference_steps != null)
|
|
187
|
-
|
|
116
|
+
falOptions.num_inference_steps = params.num_inference_steps;
|
|
188
117
|
if (params.guidance_scale != null)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
118
|
+
falOptions.guidance_scale = params.guidance_scale;
|
|
119
|
+
const result = await generateImage({
|
|
120
|
+
model: provider.image(parsed.model),
|
|
121
|
+
prompt: params.prompt,
|
|
122
|
+
size: params.size,
|
|
123
|
+
seed: params.seed,
|
|
124
|
+
providerOptions: parsed.provider === "fal" && Object.keys(falOptions).length
|
|
125
|
+
? { fal: falOptions }
|
|
126
|
+
: undefined,
|
|
127
|
+
abortSignal: signal,
|
|
128
|
+
});
|
|
129
|
+
const bytes = result.image.uint8Array;
|
|
130
|
+
if (!bytes || bytes.byteLength === 0) {
|
|
131
|
+
throw new Error("No image bytes in SDK response");
|
|
197
132
|
}
|
|
198
|
-
const imageUrl = images[0].url;
|
|
199
|
-
const imgResp = await fetch(imageUrl);
|
|
200
|
-
if (!imgResp.ok)
|
|
201
|
-
throw new Error(`Failed to download generated image: ${imgResp.status}`);
|
|
202
|
-
const buffer = Buffer.from(await imgResp.arrayBuffer());
|
|
203
133
|
if (!fs.writeFileBuffer) {
|
|
204
134
|
throw new Error("FileSystem implementation does not support writeFileBuffer (required for binary writes).");
|
|
205
135
|
}
|
|
206
136
|
await fs.mkdir(dirname(filePath));
|
|
207
|
-
await fs.writeFileBuffer(filePath,
|
|
137
|
+
await fs.writeFileBuffer(filePath, bytes);
|
|
208
138
|
const info = [
|
|
209
139
|
`Image saved: ${filePath}`,
|
|
210
|
-
`Size: ${(
|
|
211
|
-
`Model: ${model}`,
|
|
212
|
-
`Dimensions: ${images[0].width}x${images[0].height}`,
|
|
140
|
+
`Size: ${(bytes.byteLength / 1024).toFixed(1)} KB`,
|
|
141
|
+
`Model: ${parsed.provider}/${parsed.model}`,
|
|
213
142
|
];
|
|
143
|
+
if (params.size)
|
|
144
|
+
info.push(`Dimensions: ${params.size}`);
|
|
214
145
|
return {
|
|
215
146
|
content: [{ type: "text", text: info.join("\n") }],
|
|
216
147
|
details: {
|
|
217
|
-
provider:
|
|
218
|
-
model,
|
|
219
|
-
size:
|
|
148
|
+
provider: parsed.provider,
|
|
149
|
+
model: parsed.model,
|
|
150
|
+
size: params.size,
|
|
220
151
|
path: filePath,
|
|
221
|
-
bytes:
|
|
152
|
+
bytes: bytes.byteLength,
|
|
222
153
|
},
|
|
223
154
|
};
|
|
224
155
|
}
|
|
@@ -227,40 +158,39 @@ const VideoGenerateSchema = Type.Object({
|
|
|
227
158
|
prompt: Type.String({ description: "Text prompt describing the video to generate" }),
|
|
228
159
|
path: Type.String({ description: "Output file path (e.g. 'output.mp4')." }),
|
|
229
160
|
model: Type.Optional(Type.String({
|
|
230
|
-
description: "
|
|
231
|
-
"
|
|
161
|
+
description: "Override the agent's video_model for this call. Format: '<provider>/<model>' " +
|
|
162
|
+
"(e.g. 'fal/luma-ray-2-flash', 'fal/luma-ray-2', 'fal/hunyuan-video'). When omitted, uses the agent's configured video_model.",
|
|
232
163
|
})),
|
|
233
|
-
|
|
234
|
-
description: "
|
|
164
|
+
aspect_ratio: Type.Optional(Type.String({
|
|
165
|
+
description: "Aspect ratio as 'WIDTH:HEIGHT' (e.g. '16:9', '9:16', '1:1').",
|
|
235
166
|
})),
|
|
236
167
|
resolution: Type.Optional(Type.String({
|
|
237
|
-
description: "
|
|
168
|
+
description: "Resolution as 'WIDTHxHEIGHT' (e.g. '1280x720'). Provider-dependent.",
|
|
238
169
|
})),
|
|
239
|
-
|
|
240
|
-
description: "
|
|
170
|
+
duration: Type.Optional(Type.Number({
|
|
171
|
+
description: "Video duration in seconds. Provider-dependent — typical range 4-10.",
|
|
241
172
|
})),
|
|
242
|
-
|
|
243
|
-
description: "
|
|
173
|
+
fps: Type.Optional(Type.Number({
|
|
174
|
+
description: "Frames per second. Provider-dependent.",
|
|
244
175
|
})),
|
|
245
176
|
seed: Type.Optional(Type.Number({
|
|
246
177
|
description: "Random seed for reproducible results. Omit for random.",
|
|
247
178
|
})),
|
|
248
179
|
});
|
|
249
|
-
function createVideoGenerateTool(cwd, sandbox, fs, vault) {
|
|
180
|
+
function createVideoGenerateTool(cwd, sandbox, fs, configuredModel, vault) {
|
|
250
181
|
return {
|
|
251
182
|
name: "video_generate",
|
|
252
183
|
label: "Generate Video",
|
|
253
|
-
description: "Generate a video from a text prompt
|
|
254
|
-
"Output saved as MP4.
|
|
255
|
-
"
|
|
256
|
-
"Video generation takes 1-5 minutes depending on model and resolution. " +
|
|
257
|
-
"Credentials resolved from: agent vault > FAL_KEY env var.",
|
|
184
|
+
description: "Generate a video from a text prompt. " +
|
|
185
|
+
"Output saved as MP4. Model is configured at agent level (video_model) — pass `model` here only to override " +
|
|
186
|
+
"per-call. Default: fal/luma-ray-2-flash. Currently supports fal as video provider.",
|
|
258
187
|
parameters: VideoGenerateSchema,
|
|
259
188
|
async execute(_id, params, signal) {
|
|
260
189
|
const filePath = resolve(cwd, params.path);
|
|
261
190
|
assertPathAllowed(filePath, sandbox, "video_generate");
|
|
262
191
|
try {
|
|
263
|
-
|
|
192
|
+
const parsed = resolveEffectiveModel(params.model, configuredModel, DEFAULT_VIDEO_MODEL);
|
|
193
|
+
return await generateVideoWithSdk(filePath, parsed, params, fs, vault, signal);
|
|
264
194
|
}
|
|
265
195
|
catch (err) {
|
|
266
196
|
return {
|
|
@@ -271,55 +201,42 @@ function createVideoGenerateTool(cwd, sandbox, fs, vault) {
|
|
|
271
201
|
},
|
|
272
202
|
};
|
|
273
203
|
}
|
|
274
|
-
async function
|
|
275
|
-
const
|
|
276
|
-
const
|
|
277
|
-
const
|
|
204
|
+
async function generateVideoWithSdk(filePath, parsed, params, fs, vault, signal) {
|
|
205
|
+
const { experimental_generateVideo } = await import("ai");
|
|
206
|
+
const apiKey = resolveProviderKey(parsed.provider, vault);
|
|
207
|
+
const provider = await resolveVideoProvider(parsed.provider, apiKey);
|
|
208
|
+
const result = await experimental_generateVideo({
|
|
209
|
+
model: provider.video(parsed.model),
|
|
278
210
|
prompt: params.prompt,
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if (params.resolution) {
|
|
290
|
-
const parts = params.resolution.split("x").map(Number);
|
|
291
|
-
if (parts.length === 2 && parts[0] > 0 && parts[1] > 0) {
|
|
292
|
-
input.resolution = { width: parts[0], height: parts[1] };
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
const result = await falQueueRequest(model, input, apiKey, VIDEO_TIMEOUT, signal);
|
|
296
|
-
// fal.ai video response: { video: { url, content_type, file_name, file_size } }
|
|
297
|
-
const video = result.video;
|
|
298
|
-
if (!video?.url) {
|
|
299
|
-
throw new Error("No video in fal.ai response");
|
|
211
|
+
aspectRatio: params.aspect_ratio,
|
|
212
|
+
resolution: params.resolution,
|
|
213
|
+
duration: params.duration,
|
|
214
|
+
fps: params.fps,
|
|
215
|
+
seed: params.seed,
|
|
216
|
+
abortSignal: signal,
|
|
217
|
+
});
|
|
218
|
+
const bytes = result.video?.uint8Array;
|
|
219
|
+
if (!bytes || bytes.byteLength === 0) {
|
|
220
|
+
throw new Error("No video bytes in SDK response");
|
|
300
221
|
}
|
|
301
|
-
const videoResp = await fetch(video.url);
|
|
302
|
-
if (!videoResp.ok)
|
|
303
|
-
throw new Error(`Failed to download generated video: ${videoResp.status}`);
|
|
304
|
-
const buffer = Buffer.from(await videoResp.arrayBuffer());
|
|
305
222
|
if (!fs.writeFileBuffer) {
|
|
306
223
|
throw new Error("FileSystem implementation does not support writeFileBuffer (required for binary writes).");
|
|
307
224
|
}
|
|
308
225
|
await fs.mkdir(dirname(filePath));
|
|
309
|
-
await fs.writeFileBuffer(filePath,
|
|
310
|
-
const sizeMB = (
|
|
226
|
+
await fs.writeFileBuffer(filePath, bytes);
|
|
227
|
+
const sizeMB = (bytes.byteLength / 1024 / 1024).toFixed(2);
|
|
311
228
|
const info = [
|
|
312
229
|
`Video saved: ${filePath}`,
|
|
313
230
|
`Size: ${sizeMB} MB`,
|
|
314
|
-
`Model: ${model}`,
|
|
231
|
+
`Model: ${parsed.provider}/${parsed.model}`,
|
|
315
232
|
];
|
|
316
233
|
return {
|
|
317
234
|
content: [{ type: "text", text: info.join("\n") }],
|
|
318
235
|
details: {
|
|
319
|
-
provider:
|
|
320
|
-
model,
|
|
236
|
+
provider: parsed.provider,
|
|
237
|
+
model: parsed.model,
|
|
321
238
|
path: filePath,
|
|
322
|
-
bytes:
|
|
239
|
+
bytes: bytes.byteLength,
|
|
323
240
|
},
|
|
324
241
|
};
|
|
325
242
|
}
|
|
@@ -327,21 +244,20 @@ async function generateVideo(filePath, params, fs, vault, signal) {
|
|
|
327
244
|
const ImageAnalyzeSchema = Type.Object({
|
|
328
245
|
path: Type.String({ description: "Path to the image file to analyze" }),
|
|
329
246
|
prompt: Type.Optional(Type.String({ description: "Question or instruction for the vision model (default: 'Describe this image in detail')" })),
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
model: Type.Optional(Type.String({ description: "Model name. OpenAI: 'gpt-4.1-mini' (default). Anthropic: 'claude-sonnet-4-20250514' (default)." })),
|
|
247
|
+
model: Type.Optional(Type.String({
|
|
248
|
+
description: "Override the agent's vision_model for this call. Format: '<provider>/<model>' " +
|
|
249
|
+
"(e.g. 'openai/gpt-4o-mini', 'anthropic/claude-sonnet-4-20250514'). When omitted, uses the agent's configured vision_model.",
|
|
250
|
+
})),
|
|
335
251
|
max_tokens: Type.Optional(Type.Number({ description: "Max tokens in response (default: 1024)" })),
|
|
336
252
|
});
|
|
337
|
-
function createAnalyzeTool(cwd, sandbox, fs, vault) {
|
|
253
|
+
function createAnalyzeTool(cwd, sandbox, fs, configuredModel, vault) {
|
|
338
254
|
return {
|
|
339
255
|
name: "image_analyze",
|
|
340
256
|
label: "Analyze Image",
|
|
341
257
|
description: "Analyze an image using AI vision models. Can describe contents, extract text (OCR), " +
|
|
342
258
|
"answer questions about the image, identify objects, read charts, etc. " +
|
|
343
|
-
"
|
|
344
|
-
"
|
|
259
|
+
"Model is configured at agent level (vision_model) — pass `model` here only to override per-call. " +
|
|
260
|
+
"Default: openai/gpt-4o-mini. Supported providers: openai, anthropic.",
|
|
345
261
|
parameters: ImageAnalyzeSchema,
|
|
346
262
|
async execute(_id, params, signal) {
|
|
347
263
|
const filePath = resolve(cwd, params.path);
|
|
@@ -369,143 +285,47 @@ function createAnalyzeTool(cwd, sandbox, fs, vault) {
|
|
|
369
285
|
details: { error: "file_too_large", size: fileBuffer.byteLength },
|
|
370
286
|
};
|
|
371
287
|
}
|
|
372
|
-
const provider = params.provider ?? "openai";
|
|
373
288
|
try {
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
}
|
|
377
|
-
else {
|
|
378
|
-
return await analyzeAnthropic(filePath, fileBuffer, params, vault, signal);
|
|
379
|
-
}
|
|
289
|
+
const parsed = resolveEffectiveModel(params.model, configuredModel, DEFAULT_VISION_MODEL);
|
|
290
|
+
return await analyzeWithSdk(filePath, fileBuffer, parsed, params, vault, signal);
|
|
380
291
|
}
|
|
381
292
|
catch (err) {
|
|
382
293
|
return {
|
|
383
|
-
content: [{ type: "text", text: `Image analysis error
|
|
384
|
-
details: {
|
|
294
|
+
content: [{ type: "text", text: `Image analysis error: ${err.message}` }],
|
|
295
|
+
details: { error: err.message },
|
|
385
296
|
};
|
|
386
297
|
}
|
|
387
298
|
},
|
|
388
299
|
};
|
|
389
300
|
}
|
|
390
|
-
async function
|
|
391
|
-
const
|
|
392
|
-
const
|
|
393
|
-
const
|
|
394
|
-
const maxTokens = params.max_tokens ?? 1024;
|
|
395
|
-
const ext = extname(filePath).toLowerCase();
|
|
396
|
-
const mime = imageMime(ext);
|
|
397
|
-
const base64 = fileBuffer.toString("base64");
|
|
398
|
-
const dataUrl = `data:${mime};base64,${base64}`;
|
|
399
|
-
const controller = new AbortController();
|
|
400
|
-
const timer = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT);
|
|
401
|
-
if (signal)
|
|
402
|
-
signal.addEventListener("abort", () => controller.abort(), { once: true });
|
|
403
|
-
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
404
|
-
method: "POST",
|
|
405
|
-
headers: {
|
|
406
|
-
Authorization: `Bearer ${apiKey}`,
|
|
407
|
-
"Content-Type": "application/json",
|
|
408
|
-
},
|
|
409
|
-
body: JSON.stringify({
|
|
410
|
-
model,
|
|
411
|
-
max_tokens: maxTokens,
|
|
412
|
-
messages: [
|
|
413
|
-
{
|
|
414
|
-
role: "user",
|
|
415
|
-
content: [
|
|
416
|
-
{ type: "text", text: prompt },
|
|
417
|
-
{ type: "image_url", image_url: { url: dataUrl, detail: "auto" } },
|
|
418
|
-
],
|
|
419
|
-
},
|
|
420
|
-
],
|
|
421
|
-
}),
|
|
422
|
-
signal: controller.signal,
|
|
423
|
-
});
|
|
424
|
-
clearTimeout(timer);
|
|
425
|
-
if (!response.ok) {
|
|
426
|
-
const errText = await response.text();
|
|
427
|
-
throw new Error(`OpenAI Vision API ${response.status}: ${errText}`);
|
|
428
|
-
}
|
|
429
|
-
const data = await response.json();
|
|
430
|
-
const analysis = data.choices[0]?.message?.content ?? "";
|
|
431
|
-
const usage = data.usage;
|
|
432
|
-
return {
|
|
433
|
-
content: [{ type: "text", text: analysis }],
|
|
434
|
-
details: {
|
|
435
|
-
provider: "openai",
|
|
436
|
-
model,
|
|
437
|
-
path: filePath,
|
|
438
|
-
imageSize: fileBuffer.byteLength,
|
|
439
|
-
tokens: usage?.total_tokens,
|
|
440
|
-
promptTokens: usage?.prompt_tokens,
|
|
441
|
-
completionTokens: usage?.completion_tokens,
|
|
442
|
-
},
|
|
443
|
-
};
|
|
444
|
-
}
|
|
445
|
-
async function analyzeAnthropic(filePath, fileBuffer, params, vault, signal) {
|
|
446
|
-
const apiKey = vault?.getKey("anthropic", "key") ?? requireEnv("ANTHROPIC_API_KEY");
|
|
447
|
-
const model = params.model ?? "claude-sonnet-4-20250514";
|
|
448
|
-
const prompt = params.prompt ?? "Describe this image in detail.";
|
|
449
|
-
const maxTokens = params.max_tokens ?? 1024;
|
|
301
|
+
async function analyzeWithSdk(filePath, fileBuffer, parsed, params, vault, signal) {
|
|
302
|
+
const { generateText } = await import("ai");
|
|
303
|
+
const apiKey = resolveProviderKey(parsed.provider, vault);
|
|
304
|
+
const provider = await resolveVisionProvider(parsed.provider, apiKey);
|
|
450
305
|
const ext = extname(filePath).toLowerCase();
|
|
451
|
-
const
|
|
452
|
-
const
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
"x-api-key": apiKey,
|
|
464
|
-
"anthropic-version": "2023-06-01",
|
|
465
|
-
"Content-Type": "application/json",
|
|
466
|
-
},
|
|
467
|
-
body: JSON.stringify({
|
|
468
|
-
model,
|
|
469
|
-
max_tokens: maxTokens,
|
|
470
|
-
messages: [
|
|
471
|
-
{
|
|
472
|
-
role: "user",
|
|
473
|
-
content: [
|
|
474
|
-
{
|
|
475
|
-
type: "image",
|
|
476
|
-
source: {
|
|
477
|
-
type: "base64",
|
|
478
|
-
media_type: mediaType,
|
|
479
|
-
data: base64,
|
|
480
|
-
},
|
|
481
|
-
},
|
|
482
|
-
{ type: "text", text: prompt },
|
|
483
|
-
],
|
|
484
|
-
},
|
|
485
|
-
],
|
|
486
|
-
}),
|
|
487
|
-
signal: controller.signal,
|
|
306
|
+
const mediaType = imageMime(ext);
|
|
307
|
+
const result = await generateText({
|
|
308
|
+
model: provider(parsed.model),
|
|
309
|
+
maxOutputTokens: params.max_tokens ?? 1024,
|
|
310
|
+
messages: [{
|
|
311
|
+
role: "user",
|
|
312
|
+
content: [
|
|
313
|
+
{ type: "text", text: params.prompt ?? "Describe this image in detail." },
|
|
314
|
+
{ type: "image", image: new Uint8Array(fileBuffer), mediaType },
|
|
315
|
+
],
|
|
316
|
+
}],
|
|
317
|
+
abortSignal: signal,
|
|
488
318
|
});
|
|
489
|
-
clearTimeout(timer);
|
|
490
|
-
if (!response.ok) {
|
|
491
|
-
const errText = await response.text();
|
|
492
|
-
throw new Error(`Anthropic Vision API ${response.status}: ${errText}`);
|
|
493
|
-
}
|
|
494
|
-
const data = await response.json();
|
|
495
|
-
const analysis = data.content
|
|
496
|
-
.filter(b => b.type === "text" && b.text)
|
|
497
|
-
.map(b => b.text)
|
|
498
|
-
.join("\n");
|
|
499
|
-
const usage = data.usage;
|
|
500
319
|
return {
|
|
501
|
-
content: [{ type: "text", text:
|
|
320
|
+
content: [{ type: "text", text: result.text }],
|
|
502
321
|
details: {
|
|
503
|
-
provider:
|
|
504
|
-
model,
|
|
322
|
+
provider: parsed.provider,
|
|
323
|
+
model: parsed.model,
|
|
505
324
|
path: filePath,
|
|
506
325
|
imageSize: fileBuffer.byteLength,
|
|
507
|
-
|
|
508
|
-
|
|
326
|
+
tokens: result.usage?.totalTokens,
|
|
327
|
+
promptTokens: result.usage?.inputTokens,
|
|
328
|
+
completionTokens: result.usage?.outputTokens,
|
|
509
329
|
},
|
|
510
330
|
};
|
|
511
331
|
}
|
|
@@ -513,23 +333,22 @@ export const ALL_IMAGE_TOOL_NAMES = ["image_generate", "image_analyze", "video_g
|
|
|
513
333
|
/**
|
|
514
334
|
* Create image & video tools for generation, vision analysis, and video creation.
|
|
515
335
|
*
|
|
516
|
-
*
|
|
517
|
-
*
|
|
518
|
-
* @param allowedTools - Optional filter — only include tools whose names appear here.
|
|
519
|
-
* Supports wildcards expanded upstream (e.g. "image_*", "video_*").
|
|
520
|
-
* @param vault - Resolved vault for credential resolution (fal-ai, openai, anthropic).
|
|
521
|
-
* Credentials are resolved as: vault > environment variable.
|
|
336
|
+
* The 6-arg positional signature is preserved for back-compat. Prefer the
|
|
337
|
+
* options-object form (`{ cwd, vault, imageModel, ... }`) for new callers.
|
|
522
338
|
*/
|
|
523
339
|
export function createImageTools(cwd, allowedPaths, allowedTools, vault, fs) {
|
|
524
|
-
const
|
|
525
|
-
|
|
340
|
+
const opts = typeof cwd === "string"
|
|
341
|
+
? { cwd, allowedPaths, allowedTools, vault, fs }
|
|
342
|
+
: cwd;
|
|
343
|
+
const sandbox = resolveAllowedPaths(opts.cwd, opts.allowedPaths);
|
|
344
|
+
const _fs = opts.fs ?? new NodeFileSystem();
|
|
526
345
|
const factories = {
|
|
527
|
-
image_generate: () => createGenerateTool(cwd, sandbox, _fs, vault),
|
|
528
|
-
image_analyze: () => createAnalyzeTool(cwd, sandbox, _fs, vault),
|
|
529
|
-
video_generate: () => createVideoGenerateTool(cwd, sandbox, _fs, vault),
|
|
346
|
+
image_generate: () => createGenerateTool(opts.cwd, sandbox, _fs, opts.imageModel, opts.vault),
|
|
347
|
+
image_analyze: () => createAnalyzeTool(opts.cwd, sandbox, _fs, opts.visionModel, opts.vault),
|
|
348
|
+
video_generate: () => createVideoGenerateTool(opts.cwd, sandbox, _fs, opts.videoModel, opts.vault),
|
|
530
349
|
};
|
|
531
|
-
const names = allowedTools
|
|
532
|
-
? ALL_IMAGE_TOOL_NAMES.filter(n => allowedTools.some(a => a.toLowerCase() === n))
|
|
350
|
+
const names = opts.allowedTools
|
|
351
|
+
? ALL_IMAGE_TOOL_NAMES.filter(n => opts.allowedTools.some(a => a.toLowerCase() === n))
|
|
533
352
|
: ALL_IMAGE_TOOL_NAMES;
|
|
534
353
|
return names.map(n => factories[n]());
|
|
535
354
|
}
|