@polpo-ai/tools 0.6.32 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/__tests__/email-tools.test.d.ts +2 -0
  2. package/dist/__tests__/email-tools.test.d.ts.map +1 -0
  3. package/dist/__tests__/email-tools.test.js +705 -0
  4. package/dist/__tests__/email-tools.test.js.map +1 -0
  5. package/dist/__tests__/extended-tools.test.d.ts +2 -0
  6. package/dist/__tests__/extended-tools.test.d.ts.map +1 -0
  7. package/dist/__tests__/extended-tools.test.js +743 -0
  8. package/dist/__tests__/extended-tools.test.js.map +1 -0
  9. package/dist/__tests__/external-api-tools.test.d.ts +2 -0
  10. package/dist/__tests__/external-api-tools.test.d.ts.map +1 -0
  11. package/dist/__tests__/external-api-tools.test.js +1731 -0
  12. package/dist/__tests__/external-api-tools.test.js.map +1 -0
  13. package/dist/__tests__/memory-tools.test.d.ts +2 -0
  14. package/dist/__tests__/memory-tools.test.d.ts.map +1 -0
  15. package/dist/__tests__/memory-tools.test.js +0 -0
  16. package/dist/__tests__/memory-tools.test.js.map +1 -0
  17. package/dist/audio-tools.d.ts +25 -27
  18. package/dist/audio-tools.d.ts.map +1 -1
  19. package/dist/audio-tools.js +156 -438
  20. package/dist/audio-tools.js.map +1 -1
  21. package/dist/browser-tools.d.ts.map +1 -1
  22. package/dist/browser-tools.js +5 -1
  23. package/dist/browser-tools.js.map +1 -1
  24. package/dist/email-tools.d.ts.map +1 -1
  25. package/dist/email-tools.js +11 -3
  26. package/dist/email-tools.js.map +1 -1
  27. package/dist/image-tools.d.ts +27 -25
  28. package/dist/image-tools.d.ts.map +1 -1
  29. package/dist/image-tools.js +151 -332
  30. package/dist/image-tools.js.map +1 -1
  31. package/dist/index.d.ts +1 -2
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +3 -2
  34. package/dist/index.js.map +1 -1
  35. package/dist/lib/edge-speech-model.d.ts +61 -0
  36. package/dist/lib/edge-speech-model.d.ts.map +1 -0
  37. package/dist/lib/edge-speech-model.js +144 -0
  38. package/dist/lib/edge-speech-model.js.map +1 -0
  39. package/dist/lib/exa-search-provider.d.ts +27 -0
  40. package/dist/lib/exa-search-provider.d.ts.map +1 -0
  41. package/dist/lib/exa-search-provider.js +109 -0
  42. package/dist/lib/exa-search-provider.js.map +1 -0
  43. package/dist/lib/provider-resolver.d.ts +54 -0
  44. package/dist/lib/provider-resolver.d.ts.map +1 -0
  45. package/dist/lib/provider-resolver.js +115 -0
  46. package/dist/lib/provider-resolver.js.map +1 -0
  47. package/dist/search-tools.d.ts +10 -13
  48. package/dist/search-tools.d.ts.map +1 -1
  49. package/dist/search-tools.js +63 -140
  50. package/dist/search-tools.js.map +1 -1
  51. package/dist/system-tools.d.ts +19 -5
  52. package/dist/system-tools.d.ts.map +1 -1
  53. package/dist/system-tools.js +16 -10
  54. package/dist/system-tools.js.map +1 -1
  55. package/package.json +12 -2
  56. package/dist/phone-tools.d.ts +0 -27
  57. package/dist/phone-tools.d.ts.map +0 -1
  58. package/dist/phone-tools.js +0 -577
  59. package/dist/phone-tools.js.map +0 -1
@@ -0,0 +1,1731 @@
1
+ /**
2
+ * Behavioral tests for the external-API tool wrappers — every tool
3
+ * whose body reduces to a fetch() against a third-party REST endpoint:
4
+ *
5
+ * - image_generate / video_generate → fal.ai queue API
6
+ * - image_analyze → OpenAI chat completions vision
7
+ * - audio_transcribe → OpenAI Whisper transcriptions
8
+ * - search_web / search_find_similar → Exa
9
+ *
10
+ * Each test stubs `globalThis.fetch` with a tiny URL router so we
11
+ * pin the request payload (method/body/headers — what the tool
12
+ * sends) AND the response handling (how it parses success, errors,
13
+ * malformed data, network failures). No real network ever leaves
14
+ * the test process.
15
+ *
16
+ * Adversarial coverage focuses on what production breaks on:
17
+ * 401/429/500, malformed JSON, missing fields, fetch throwing,
18
+ * sandbox escapes for outputs that write a file.
19
+ */
20
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
21
+ import { mkdtempSync, rmSync, existsSync, writeFileSync, statSync } from "node:fs";
22
+ import { tmpdir } from "node:os";
23
+ import { join } from "node:path";
24
+ import { createImageTools } from "../image-tools.js";
25
+ import { createAudioTools } from "../audio-tools.js";
26
+ import { createSearchTools } from "../search-tools.js";
27
+ import { ExaSearchProvider } from "../lib/exa-search-provider.js";
28
+ // ── AI SDK mocks (image_generate goes through `generateImage`) ──
29
+ //
30
+ // vi.hoisted lets us share these across the test file *and* the
31
+ // vi.mock factories below, which run before module init. Each test
32
+ // resets the mocks in beforeEach.
33
+ const sdkMocks = vi.hoisted(() => ({
34
+ generateImage: vi.fn(),
35
+ experimental_generateVideo: vi.fn(),
36
+ generateText: vi.fn(),
37
+ experimental_transcribe: vi.fn(),
38
+ resolveImageProvider: vi.fn(async (_name, apiKey) => ({
39
+ _calledWithKey: apiKey,
40
+ image: (modelId) => ({ _isMockModel: true, modelId }),
41
+ })),
42
+ resolveVideoProvider: vi.fn(async (_name, apiKey) => ({
43
+ _calledWithKey: apiKey,
44
+ video: (modelId) => ({ _isMockVideoModel: true, modelId }),
45
+ })),
46
+ resolveVisionProvider: vi.fn(async (name, apiKey) => {
47
+ const fn = (modelId) => ({ _isMockVisionModel: true, providerName: name, modelId });
48
+ fn._calledWithKey = apiKey;
49
+ return fn;
50
+ }),
51
+ resolveTranscribeProvider: vi.fn(async (name, apiKey) => ({
52
+ _calledWithKey: apiKey,
53
+ transcription: (modelId) => ({ _isMockTranscribeModel: true, providerName: name, modelId }),
54
+ })),
55
+ experimental_generateSpeech: vi.fn(),
56
+ resolveSpeakProvider: vi.fn(async (name, config) => ({
57
+ _calledWith: { name, apiKey: config.apiKey, hasShell: Boolean(config.shell), hasFs: Boolean(config.fs) },
58
+ speech: (modelId) => ({ _isMockSpeechModel: true, providerName: name, modelId }),
59
+ })),
60
+ }));
61
+ vi.mock("../lib/provider-resolver.js", () => ({
62
+ resolveImageProvider: sdkMocks.resolveImageProvider,
63
+ resolveVideoProvider: sdkMocks.resolveVideoProvider,
64
+ resolveVisionProvider: sdkMocks.resolveVisionProvider,
65
+ resolveTranscribeProvider: sdkMocks.resolveTranscribeProvider,
66
+ resolveSpeakProvider: sdkMocks.resolveSpeakProvider,
67
+ }));
68
+ vi.mock("ai", async () => {
69
+ const actual = await vi.importActual("ai");
70
+ return {
71
+ ...actual,
72
+ generateImage: sdkMocks.generateImage,
73
+ experimental_generateVideo: sdkMocks.experimental_generateVideo,
74
+ generateText: sdkMocks.generateText,
75
+ experimental_transcribe: sdkMocks.experimental_transcribe,
76
+ experimental_generateSpeech: sdkMocks.experimental_generateSpeech,
77
+ };
78
+ });
79
+ let cwd;
80
+ let originalFetch;
81
+ let lastRequests = [];
82
+ function pick(tools, name) {
83
+ const t = tools.find((x) => x.name === name);
84
+ if (!t)
85
+ throw new Error(`Tool '${name}' not registered: ${tools.map(x => x.name).join(", ")}`);
86
+ return t;
87
+ }
88
+ function text(result) {
89
+ const block = result?.content?.[0];
90
+ if (block?.type !== "text")
91
+ throw new Error(`expected text block, got ${block?.type}`);
92
+ return block.text;
93
+ }
94
+ /** Assert that a tool surfaced a failure — either by throwing OR by
95
+ * returning a structured error. Pattern is matched against both
96
+ * the rejection message and the result text/details. */
97
+ async function expectFailure(call, pattern) {
98
+ let resolved;
99
+ let threw;
100
+ try {
101
+ resolved = await call;
102
+ }
103
+ catch (err) {
104
+ threw = err;
105
+ }
106
+ if (threw) {
107
+ if (!pattern.test(threw.message ?? String(threw))) {
108
+ throw new Error(`thrown error didn't match ${pattern}: ${threw.message ?? threw}`);
109
+ }
110
+ return;
111
+ }
112
+ const blob = (text(resolved) + JSON.stringify(resolved.details ?? {})).toLowerCase();
113
+ if (!pattern.test(blob)) {
114
+ throw new Error(`expected failure matching ${pattern}, got: ${blob.slice(0, 300)}`);
115
+ }
116
+ }
117
+ /** Build a fetch router from a list of (matcher, response) pairs.
118
+ * First match wins; unmatched URLs throw to surface unexpected
119
+ * network calls that the tool shouldn't be making. */
120
+ function routeFetch(routes) {
121
+ globalThis.fetch = vi.fn(async (input, init) => {
122
+ const url = typeof input === "string" ? input : (input?.url ?? String(input));
123
+ lastRequests.push({ url, init });
124
+ for (const r of routes) {
125
+ if (r.match(url))
126
+ return r.response();
127
+ }
128
+ throw new Error(`unrouted fetch in test: ${url}`);
129
+ });
130
+ }
131
+ function makeVault() {
132
+ // Service names + key paths must match what the wrappers look up:
133
+ // image_generate / video_generate → vault.getKey("fal-ai", "key")
134
+ // image_analyze (OpenAI vision) → vault.getKey("openai", "key")
135
+ // audio_transcribe → vault.getKey("openai", "key")
136
+ // search_web / search_find_similar → vault.getKey("exa", "key")
137
+ const services = {
138
+ "fal-ai": { key: "fake-fal-key" },
139
+ openai: { key: "fake-openai-key" },
140
+ anthropic: { key: "fake-anthropic-key" },
141
+ deepgram: { key: "fake-deepgram-key" },
142
+ elevenlabs: { key: "fake-elevenlabs-key" },
143
+ exa: { key: "fake-exa-key" },
144
+ };
145
+ return {
146
+ get: (s) => services[s],
147
+ getSmtp: () => undefined,
148
+ getImap: () => undefined,
149
+ getKey: (s, k) => services[s]?.[k],
150
+ has: (s) => s in services,
151
+ list: () => Object.entries(services).map(([service, v]) => ({
152
+ service, type: "api_key", keys: Object.keys(v),
153
+ })),
154
+ };
155
+ }
156
+ // A tiny valid 1×1 PNG so the image-download leg of image_generate
157
+ // succeeds without leaning on a real network. RFC: PNG header +
158
+ // IHDR + IDAT + IEND, ~67 bytes.
159
+ const TINY_PNG = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==", "base64");
160
+ beforeEach(() => {
161
+ cwd = mkdtempSync(join(tmpdir(), "polpo-ext-api-"));
162
+ originalFetch = globalThis.fetch;
163
+ lastRequests = [];
164
+ // Reset SDK mocks; install a sane happy-path default for image_generate.
165
+ sdkMocks.generateImage.mockReset();
166
+ sdkMocks.generateImage.mockResolvedValue({
167
+ image: { uint8Array: new Uint8Array(TINY_PNG), base64: TINY_PNG.toString("base64"), mediaType: "image/png" },
168
+ images: [{ uint8Array: new Uint8Array(TINY_PNG), base64: TINY_PNG.toString("base64"), mediaType: "image/png" }],
169
+ providerMetadata: {},
170
+ warnings: [],
171
+ responses: [{}],
172
+ });
173
+ sdkMocks.resolveImageProvider.mockClear();
174
+ sdkMocks.experimental_generateVideo.mockReset();
175
+ // Use a tiny but recognizable byte payload — 12 bytes is more than
176
+ // the 0-byte "empty" guard but less than the ">20 bytes saved" check
177
+ // would need from real video, which is fine for behavioral tests.
178
+ sdkMocks.experimental_generateVideo.mockResolvedValue({
179
+ video: { uint8Array: new Uint8Array([0, 0, 0, 0x18, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d]), base64: "", mediaType: "video/mp4" },
180
+ videos: [],
181
+ providerMetadata: {},
182
+ warnings: [],
183
+ responses: [{}],
184
+ });
185
+ sdkMocks.resolveVideoProvider.mockClear();
186
+ sdkMocks.generateText.mockReset();
187
+ sdkMocks.generateText.mockResolvedValue({
188
+ text: "A small calico cat sits on a wooden floor.",
189
+ usage: { inputTokens: 12, outputTokens: 9, totalTokens: 21 },
190
+ providerMetadata: {},
191
+ warnings: [],
192
+ response: {},
193
+ });
194
+ sdkMocks.resolveVisionProvider.mockClear();
195
+ sdkMocks.experimental_transcribe.mockReset();
196
+ sdkMocks.experimental_transcribe.mockResolvedValue({
197
+ text: "Hello world, this is a test.",
198
+ segments: [{ text: "Hello world, this is a test.", startSecond: 0, endSecond: 3.4 }],
199
+ language: "en",
200
+ durationInSeconds: 3.4,
201
+ warnings: [],
202
+ providerMetadata: {},
203
+ responses: [{}],
204
+ });
205
+ sdkMocks.resolveTranscribeProvider.mockClear();
206
+ sdkMocks.experimental_generateSpeech.mockReset();
207
+ // Tiny but recognizable mp3 prefix bytes — enough to satisfy the
208
+ // ">0 bytes" guard in the tool layer.
209
+ sdkMocks.experimental_generateSpeech.mockResolvedValue({
210
+ audio: { uint8Array: new Uint8Array([0xff, 0xfb, 0x90, 0x00, 0x00, 0x00]), base64: "", mediaType: "audio/mpeg" },
211
+ warnings: [],
212
+ request: {},
213
+ response: { timestamp: new Date(), modelId: "tts-1" },
214
+ providerMetadata: {},
215
+ });
216
+ sdkMocks.resolveSpeakProvider.mockClear();
217
+ });
218
+ afterEach(() => {
219
+ globalThis.fetch = originalFetch;
220
+ rmSync(cwd, { recursive: true, force: true });
221
+ });
222
+ // ────────────────────────────────────────────────────────────
223
+ // image_generate (Vercel AI SDK — generateImage)
224
+ // ────────────────────────────────────────────────────────────
225
+ describe("image_generate", () => {
226
+ function build() {
227
+ return createImageTools(cwd, [cwd], ["image_generate"], makeVault());
228
+ }
229
+ it("calls the SDK with the resolved fal model handle and writes the bytes", async () => {
230
+ const t = pick(build(), "image_generate");
231
+ const result = await t.execute("c", { prompt: "a cat", path: "out.png" });
232
+ expect(existsSync(join(cwd, "out.png"))).toBe(true);
233
+ expect(statSync(join(cwd, "out.png")).size).toBeGreaterThan(20);
234
+ expect(JSON.stringify(result.details)).toContain("out.png");
235
+ // Resolver was invoked with the fal-ai vault key.
236
+ expect(sdkMocks.resolveImageProvider).toHaveBeenCalledWith("fal", "fake-fal-key");
237
+ // The SDK got the prompt and the default fal-ai/flux/dev model.
238
+ const args = sdkMocks.generateImage.mock.calls[0][0];
239
+ expect(args.prompt).toBe("a cat");
240
+ expect(args.model).toEqual({ _isMockModel: true, modelId: "fal-ai/flux/dev" });
241
+ });
242
+ it("forwards size, seed, and provider-specific knobs to the SDK", async () => {
243
+ const t = pick(build(), "image_generate");
244
+ await t.execute("c", {
245
+ prompt: "x", path: "out.png",
246
+ model: "fal/fal-ai/flux-pro/v1.1",
247
+ size: "768x1024",
248
+ seed: 42,
249
+ num_inference_steps: 50,
250
+ guidance_scale: 7.5,
251
+ });
252
+ const args = sdkMocks.generateImage.mock.calls[0][0];
253
+ expect(args.size).toBe("768x1024");
254
+ expect(args.seed).toBe(42);
255
+ expect(args.providerOptions).toEqual({
256
+ fal: { num_inference_steps: 50, guidance_scale: 7.5 },
257
+ });
258
+ expect(args.model.modelId).toBe("fal-ai/flux-pro/v1.1");
259
+ });
260
+ it("omits providerOptions when no fal-specific knobs are passed", async () => {
261
+ const t = pick(build(), "image_generate");
262
+ await t.execute("c", { prompt: "x", path: "out.png" });
263
+ expect(sdkMocks.generateImage.mock.calls[0][0].providerOptions).toBeUndefined();
264
+ });
265
+ it("surfaces an SDK error as a structured tool failure (no file written)", async () => {
266
+ sdkMocks.generateImage.mockRejectedValueOnce(new Error("AI_APICallError: 401 invalid key"));
267
+ const t = pick(build(), "image_generate");
268
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.png" }), /401|invalid key|api/i);
269
+ expect(existsSync(join(cwd, "out.png"))).toBe(false);
270
+ });
271
+ it("rejects when the SDK returns no image bytes", async () => {
272
+ sdkMocks.generateImage.mockResolvedValueOnce({
273
+ image: { uint8Array: new Uint8Array(0), base64: "", mediaType: "image/png" },
274
+ images: [], providerMetadata: {}, warnings: [], responses: [{}],
275
+ });
276
+ const t = pick(build(), "image_generate");
277
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.png" }), /no image bytes|empty|response/i);
278
+ expect(existsSync(join(cwd, "out.png"))).toBe(false);
279
+ });
280
+ it("refuses an output path outside the sandbox before the SDK is called", async () => {
281
+ const t = pick(build(), "image_generate");
282
+ await expect(t.execute("c", { prompt: "x", path: "/etc/escape.png" }))
283
+ .rejects.toThrow(/sandbox|allowed|denied/i);
284
+ expect(sdkMocks.generateImage).not.toHaveBeenCalled();
285
+ });
286
+ it("forwards the abort signal to the SDK", async () => {
287
+ const t = pick(build(), "image_generate");
288
+ const ctrl = new AbortController();
289
+ await t.execute("c", { prompt: "x", path: "out.png" }, ctrl.signal);
290
+ expect(sdkMocks.generateImage.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
291
+ });
292
+ });
293
+ // ────────────────────────────────────────────────────────────
294
+ // video_generate (Vercel AI SDK — experimental_generateVideo)
295
+ // ────────────────────────────────────────────────────────────
296
+ describe("video_generate", () => {
297
+ function build() {
298
+ return createImageTools(cwd, [cwd], ["video_generate"], makeVault());
299
+ }
300
+ it("calls the SDK with the resolved fal video model handle and writes the bytes", async () => {
301
+ const t = pick(build(), "video_generate");
302
+ const result = await t.execute("c", { prompt: "a sunset", path: "out.mp4" });
303
+ expect(existsSync(join(cwd, "out.mp4"))).toBe(true);
304
+ expect(JSON.stringify(result.details)).toContain("out.mp4");
305
+ expect(sdkMocks.resolveVideoProvider).toHaveBeenCalledWith("fal", "fake-fal-key");
306
+ const args = sdkMocks.experimental_generateVideo.mock.calls[0][0];
307
+ expect(args.prompt).toBe("a sunset");
308
+ expect(args.model).toEqual({ _isMockVideoModel: true, modelId: "luma-ray-2-flash" });
309
+ });
310
+ it("forwards aspect_ratio, resolution, duration, fps, seed to the SDK", async () => {
311
+ const t = pick(build(), "video_generate");
312
+ await t.execute("c", {
313
+ prompt: "x", path: "out.mp4",
314
+ model: "fal/luma-ray-2",
315
+ aspect_ratio: "16:9",
316
+ resolution: "1280x720",
317
+ duration: 6,
318
+ fps: 24,
319
+ seed: 7,
320
+ });
321
+ const args = sdkMocks.experimental_generateVideo.mock.calls[0][0];
322
+ expect(args.aspectRatio).toBe("16:9");
323
+ expect(args.resolution).toBe("1280x720");
324
+ expect(args.duration).toBe(6);
325
+ expect(args.fps).toBe(24);
326
+ expect(args.seed).toBe(7);
327
+ expect(args.model.modelId).toBe("luma-ray-2");
328
+ });
329
+ it("surfaces an SDK error as a structured failure (no file written)", async () => {
330
+ sdkMocks.experimental_generateVideo.mockRejectedValueOnce(new Error("AI_APICallError: 503"));
331
+ const t = pick(build(), "video_generate");
332
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.mp4" }), /503|api|error/i);
333
+ expect(existsSync(join(cwd, "out.mp4"))).toBe(false);
334
+ });
335
+ it("rejects when the SDK returns no video bytes", async () => {
336
+ sdkMocks.experimental_generateVideo.mockResolvedValueOnce({
337
+ video: { uint8Array: new Uint8Array(0), base64: "", mediaType: "video/mp4" },
338
+ videos: [], providerMetadata: {}, warnings: [], responses: [{}],
339
+ });
340
+ const t = pick(build(), "video_generate");
341
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.mp4" }), /no video bytes|empty/i);
342
+ });
343
+ it("refuses an output path outside the sandbox before the SDK is called", async () => {
344
+ const t = pick(build(), "video_generate");
345
+ await expect(t.execute("c", { prompt: "x", path: "/etc/escape.mp4" }))
346
+ .rejects.toThrow(/sandbox|allowed|denied/i);
347
+ expect(sdkMocks.experimental_generateVideo).not.toHaveBeenCalled();
348
+ });
349
+ it("forwards the abort signal to the SDK", async () => {
350
+ const t = pick(build(), "video_generate");
351
+ const ctrl = new AbortController();
352
+ await t.execute("c", { prompt: "x", path: "out.mp4" }, ctrl.signal);
353
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
354
+ });
355
+ it("falls back to FAL_KEY env when no vault key is present", async () => {
356
+ process.env.FAL_KEY = "env-fal-key";
357
+ try {
358
+ const noKeysVault = {
359
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
360
+ getKey: () => undefined, has: () => false, list: () => [],
361
+ };
362
+ const t = pick(createImageTools(cwd, [cwd], ["video_generate"], noKeysVault), "video_generate");
363
+ await t.execute("c", { prompt: "x", path: "out.mp4" });
364
+ expect(sdkMocks.resolveVideoProvider).toHaveBeenCalledWith("fal", "env-fal-key");
365
+ }
366
+ finally {
367
+ delete process.env.FAL_KEY;
368
+ }
369
+ });
370
+ });
371
+ // ────────────────────────────────────────────────────────────
372
+ // image_analyze (Vercel AI SDK — generateText multimodal)
373
+ // ────────────────────────────────────────────────────────────
374
+ describe("image_analyze", () => {
375
+ function build() {
376
+ return createImageTools(cwd, [cwd], ["image_analyze"], makeVault());
377
+ }
378
+ it("calls the SDK with an OpenAI vision model + multimodal messages by default", async () => {
379
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
380
+ const t = pick(build(), "image_analyze");
381
+ const result = await t.execute("c", { path: "input.png", prompt: "What is this?" });
382
+ expect(text(result)).toContain("calico cat");
383
+ expect(sdkMocks.resolveVisionProvider).toHaveBeenCalledWith("openai", "fake-openai-key");
384
+ const args = sdkMocks.generateText.mock.calls[0][0];
385
+ expect(args.model.providerName).toBe("openai");
386
+ expect(args.model.modelId).toBe("gpt-4o-mini");
387
+ // Multimodal: text + image content parts in a single user message.
388
+ expect(args.messages).toHaveLength(1);
389
+ expect(args.messages[0].role).toBe("user");
390
+ expect(args.messages[0].content).toEqual([
391
+ { type: "text", text: "What is this?" },
392
+ { type: "image", image: expect.any(Uint8Array), mediaType: "image/png" },
393
+ ]);
394
+ });
395
+ it("routes to anthropic when an anthropic/* model override is passed", async () => {
396
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
397
+ const t = pick(build(), "image_analyze");
398
+ await t.execute("c", { path: "input.png", model: "anthropic/claude-sonnet-4-20250514" });
399
+ expect(sdkMocks.resolveVisionProvider).toHaveBeenCalledWith("anthropic", expect.any(String));
400
+ const args = sdkMocks.generateText.mock.calls[0][0];
401
+ expect(args.model.modelId).toBe("claude-sonnet-4-20250514");
402
+ });
403
+ it("forwards the user's model override (provider/model string) to the resolver", async () => {
404
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
405
+ const t = pick(build(), "image_analyze");
406
+ await t.execute("c", { path: "input.png", model: "openai/gpt-4o" });
407
+ expect(sdkMocks.generateText.mock.calls[0][0].model.modelId).toBe("gpt-4o");
408
+ });
409
+ it("forwards max_tokens as the SDK's maxOutputTokens", async () => {
410
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
411
+ const t = pick(build(), "image_analyze");
412
+ await t.execute("c", { path: "input.png", max_tokens: 256 });
413
+ expect(sdkMocks.generateText.mock.calls[0][0].maxOutputTokens).toBe(256);
414
+ });
415
+ it("returns the SDK's normalized usage on the result details", async () => {
416
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
417
+ const t = pick(build(), "image_analyze");
418
+ const result = await t.execute("c", { path: "input.png" });
419
+ expect(result.details).toMatchObject({
420
+ provider: "openai",
421
+ tokens: 21,
422
+ promptTokens: 12,
423
+ completionTokens: 9,
424
+ });
425
+ });
426
+ it("surfaces an SDK error as a structured failure", async () => {
427
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
428
+ sdkMocks.generateText.mockRejectedValueOnce(new Error("AI_APICallError: 401 invalid key"));
429
+ const t = pick(build(), "image_analyze");
430
+ await expectFailure(t.execute("c", { path: "input.png" }), /401|invalid|unauthorized/i);
431
+ });
432
+ it("refuses a path that escapes the sandbox before the SDK is called", async () => {
433
+ const t = pick(build(), "image_analyze");
434
+ await expect(t.execute("c", { path: "/etc/hostname" }))
435
+ .rejects.toThrow(/sandbox|allowed|denied/i);
436
+ expect(sdkMocks.generateText).not.toHaveBeenCalled();
437
+ });
438
+ it("forwards the abort signal to the SDK", async () => {
439
+ writeFileSync(join(cwd, "input.png"), TINY_PNG);
440
+ const t = pick(build(), "image_analyze");
441
+ const ctrl = new AbortController();
442
+ await t.execute("c", { path: "input.png" }, ctrl.signal);
443
+ expect(sdkMocks.generateText.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
444
+ });
445
+ });
446
+ // ────────────────────────────────────────────────────────────
447
+ // audio_transcribe (Vercel AI SDK — experimental_transcribe)
448
+ // ────────────────────────────────────────────────────────────
449
+ describe("audio_transcribe", () => {
450
+ function build() {
451
+ return createAudioTools(cwd, [cwd], ["audio_transcribe"], makeVault());
452
+ }
453
+ it("calls the SDK with the resolved openai whisper model and returns the transcript", async () => {
454
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("ID3\x03\x00\x00\x00\x00\x00\x00audio"));
455
+ const t = pick(build(), "audio_transcribe");
456
+ const result = await t.execute("c", { path: "rec.mp3" });
457
+ expect(text(result)).toContain("Hello world");
458
+ expect(text(result)).toMatch(/Language: en/i);
459
+ expect(text(result)).toMatch(/Duration: 3\.4s/);
460
+ expect(sdkMocks.resolveTranscribeProvider).toHaveBeenCalledWith("openai", "fake-openai-key");
461
+ const args = sdkMocks.experimental_transcribe.mock.calls[0][0];
462
+ expect(args.model).toEqual({ _isMockTranscribeModel: true, providerName: "openai", modelId: "whisper-1" });
463
+ expect(args.audio).toBeInstanceOf(Uint8Array);
464
+ });
465
+ it("routes to deepgram with smart_format / punctuate when provider=deepgram", async () => {
466
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
467
+ const t = pick(build(), "audio_transcribe");
468
+ await t.execute("c", { path: "rec.mp3", model: "deepgram/nova-3" });
469
+ expect(sdkMocks.resolveTranscribeProvider).toHaveBeenCalledWith("deepgram", "fake-deepgram-key");
470
+ const args = sdkMocks.experimental_transcribe.mock.calls[0][0];
471
+ expect(args.model.modelId).toBe("nova-3");
472
+ expect(args.providerOptions).toEqual({
473
+ deepgram: { smart_format: true, punctuate: true },
474
+ });
475
+ });
476
+ it("forwards openai-specific knobs (language, prompt) via providerOptions", async () => {
477
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
478
+ const t = pick(build(), "audio_transcribe");
479
+ await t.execute("c", {
480
+ path: "rec.mp3",
481
+ language: "it",
482
+ prompt: "Glossary: Polpo, Lumea, Daytona.",
483
+ });
484
+ const args = sdkMocks.experimental_transcribe.mock.calls[0][0];
485
+ expect(args.providerOptions).toEqual({
486
+ openai: { language: "it", prompt: "Glossary: Polpo, Lumea, Daytona." },
487
+ });
488
+ });
489
+ it("forwards language to deepgram alongside the always-on options", async () => {
490
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
491
+ const t = pick(build(), "audio_transcribe");
492
+ await t.execute("c", { path: "rec.mp3", model: "deepgram/nova-3", language: "es" });
493
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].providerOptions).toEqual({
494
+ deepgram: { smart_format: true, punctuate: true, language: "es" },
495
+ });
496
+ });
497
+ it("respects a custom model id (passes through to provider.transcription)", async () => {
498
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
499
+ const t = pick(build(), "audio_transcribe");
500
+ await t.execute("c", { path: "rec.mp3", model: "openai/gpt-4o-transcribe" });
501
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].model.modelId).toBe("gpt-4o-transcribe");
502
+ });
503
+ it("surfaces an SDK error as a structured failure", async () => {
504
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
505
+ sdkMocks.experimental_transcribe.mockRejectedValueOnce(new Error("AI_APICallError: 500"));
506
+ const t = pick(build(), "audio_transcribe");
507
+ await expectFailure(t.execute("c", { path: "rec.mp3" }), /500|api|error/i);
508
+ });
509
+ it("rejects an audio path outside the sandbox before the SDK is called", async () => {
510
+ const t = pick(build(), "audio_transcribe");
511
+ await expect(t.execute("c", { path: "/etc/hostname" }))
512
+ .rejects.toThrow(/sandbox|allowed|denied/i);
513
+ expect(sdkMocks.experimental_transcribe).not.toHaveBeenCalled();
514
+ });
515
+ it("surfaces a missing audio file with a structured error (no SDK call)", async () => {
516
+ const t = pick(build(), "audio_transcribe");
517
+ const result = await t.execute("c", { path: "ghost.mp3" });
518
+ expect(JSON.stringify(result.details)).toMatch(/file_read_error|enoent|no such/i);
519
+ expect(sdkMocks.experimental_transcribe).not.toHaveBeenCalled();
520
+ });
521
+ it("forwards the abort signal to the SDK", async () => {
522
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
523
+ const t = pick(build(), "audio_transcribe");
524
+ const ctrl = new AbortController();
525
+ await t.execute("c", { path: "rec.mp3" }, ctrl.signal);
526
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
527
+ });
528
+ it("returns the SDK's normalized duration on result.details (the audio billing signal)", async () => {
529
+ writeFileSync(join(cwd, "rec.mp3"), Buffer.from("data"));
530
+ const t = pick(build(), "audio_transcribe");
531
+ const result = await t.execute("c", { path: "rec.mp3" });
532
+ expect(result.details).toMatchObject({
533
+ provider: "openai",
534
+ model: "whisper-1",
535
+ language: "en",
536
+ duration: 3.4,
537
+ textLength: "Hello world, this is a test.".length,
538
+ });
539
+ });
540
+ });
541
+ // ────────────────────────────────────────────────────────────
542
+ // search_web / search_find_similar (Exa)
543
+ // ────────────────────────────────────────────────────────────
544
+ describe("search_web", () => {
545
+ function build() {
546
+ return createSearchTools(new ExaSearchProvider({ apiKey: "fake-exa-key" }), ["search_web"]);
547
+ }
548
+ it("posts to Exa /search and formats the results", async () => {
549
+ routeFetch([
550
+ { match: (u) => u.includes("api.exa.ai/search"),
551
+ response: () => new Response(JSON.stringify({
552
+ results: [
553
+ { title: "Polpo docs", url: "https://docs.polpo.sh", publishedDate: "2026-01-01", text: "Build agents..." },
554
+ { title: "Hacker News", url: "https://news.ycombinator.com", publishedDate: "2025-12-20", text: "..." },
555
+ ],
556
+ }), { status: 200 }) },
557
+ ]);
558
+ const t = pick(build(), "search_web");
559
+ const result = await t.execute("c", { query: "polpo agents framework" });
560
+ const out = text(result);
561
+ expect(out).toContain("Polpo docs");
562
+ expect(out).toContain("docs.polpo.sh");
563
+ expect(out).toContain("Hacker News");
564
+ const headers = (lastRequests[0].init?.headers ?? {});
565
+ expect(headers["x-api-key"]).toBe("fake-exa-key");
566
+ expect(JSON.parse(lastRequests[0].init?.body)).toMatchObject({ query: "polpo agents framework" });
567
+ });
568
+ it("returns a clean message when Exa returns 0 results", async () => {
569
+ routeFetch([
570
+ { match: (u) => u.includes("api.exa.ai/search"),
571
+ response: () => new Response(JSON.stringify({ results: [] }), { status: 200 }) },
572
+ ]);
573
+ const t = pick(build(), "search_web");
574
+ const result = await t.execute("c", { query: "nonsensical query xyzzy123" });
575
+ expect(text(result)).toMatch(/0|no.*found|none|empty/i);
576
+ });
577
+ it("returns a structured error on a 401", async () => {
578
+ routeFetch([
579
+ { match: () => true,
580
+ response: () => new Response("forbidden", { status: 401 }) },
581
+ ]);
582
+ const t = pick(build(), "search_web");
583
+ const result = await t.execute("c", { query: "x" });
584
+ expect(text(result).toLowerCase()).toMatch(/401|forbidden|error/);
585
+ });
586
+ it("returns a structured error on a network failure (fetch throws)", async () => {
587
+ globalThis.fetch = vi.fn(async () => { throw new Error("DNS fail"); });
588
+ const t = pick(build(), "search_web");
589
+ const result = await t.execute("c", { query: "x" });
590
+ expect(text(result).toLowerCase()).toMatch(/dns|error|fail/);
591
+ });
592
+ it("propagates includeDomains / excludeDomains into the body", async () => {
593
+ routeFetch([
594
+ { match: () => true,
595
+ response: () => new Response(JSON.stringify({ results: [] }), { status: 200 }) },
596
+ ]);
597
+ const t = pick(build(), "search_web");
598
+ await t.execute("c", {
599
+ query: "q",
600
+ includeDomains: ["polpo.sh", "github.com"],
601
+ excludeDomains: ["spammy.io"],
602
+ });
603
+ const body = JSON.parse(lastRequests[0].init?.body);
604
+ expect(body.includeDomains).toEqual(["polpo.sh", "github.com"]);
605
+ expect(body.excludeDomains).toEqual(["spammy.io"]);
606
+ });
607
+ });
608
+ describe("search_find_similar", () => {
609
+ function build() {
610
+ return createSearchTools(new ExaSearchProvider({ apiKey: "fake-exa-key" }), ["search_find_similar"]);
611
+ }
612
+ it("posts to Exa /findSimilar and returns formatted results", async () => {
613
+ routeFetch([
614
+ { match: (u) => u.includes("api.exa.ai/findSimilar"),
615
+ response: () => new Response(JSON.stringify({
616
+ results: [
617
+ { title: "Similar 1", url: "https://example.com/a" },
618
+ { title: "Similar 2", url: "https://example.com/b" },
619
+ ],
620
+ }), { status: 200 }) },
621
+ ]);
622
+ const t = pick(build(), "search_find_similar");
623
+ const result = await t.execute("c", { url: "https://docs.polpo.sh" });
624
+ const out = text(result);
625
+ expect(out).toContain("Similar 1");
626
+ expect(out).toContain("example.com/b");
627
+ expect(JSON.parse(lastRequests[0].init?.body)).toMatchObject({
628
+ url: "https://docs.polpo.sh",
629
+ });
630
+ });
631
+ it("returns a structured error on 500", async () => {
632
+ routeFetch([
633
+ { match: () => true,
634
+ response: () => new Response("server down", { status: 500 }) },
635
+ ]);
636
+ const t = pick(build(), "search_find_similar");
637
+ const result = await t.execute("c", { url: "https://x" });
638
+ expect(text(result).toLowerCase()).toMatch(/500|server|error/);
639
+ });
640
+ });
641
+ // ════════════════════════════════════════════════════════════
642
+ // PARANOID — what real production agents actually do wrong
643
+ // ════════════════════════════════════════════════════════════
644
+ describe("image_generate — paranoid", () => {
645
+ function build() { return createImageTools(cwd, [cwd], ["image_generate"], makeVault()); }
646
+ it("forwards a 5KB prompt to the SDK verbatim (no truncation)", async () => {
647
+ const giantPrompt = "Draw " + "tiny ".repeat(1000) + "details.";
648
+ const t = pick(build(), "image_generate");
649
+ await t.execute("c", { prompt: giantPrompt, path: "big.png" });
650
+ expect(sdkMocks.generateImage.mock.calls[0][0].prompt).toBe(giantPrompt);
651
+ });
652
+ it("falls back to FAL_KEY env when no vault key is present", async () => {
653
+ process.env.FAL_KEY = "env-fal-key";
654
+ try {
655
+ const noKeysVault = {
656
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
657
+ getKey: () => undefined, has: () => false, list: () => [],
658
+ };
659
+ const t = pick(createImageTools(cwd, [cwd], ["image_generate"], noKeysVault), "image_generate");
660
+ await t.execute("c", { prompt: "x", path: "out.png" });
661
+ expect(sdkMocks.resolveImageProvider).toHaveBeenCalledWith("fal", "env-fal-key");
662
+ }
663
+ finally {
664
+ delete process.env.FAL_KEY;
665
+ }
666
+ });
667
+ it("returns a structured error when neither vault nor env has a key", async () => {
668
+ delete process.env.FAL_KEY;
669
+ const noKeysVault = {
670
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
671
+ getKey: () => undefined, has: () => false, list: () => [],
672
+ };
673
+ const t = pick(createImageTools(cwd, [cwd], ["image_generate"], noKeysVault), "image_generate");
674
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.png" }), /missing|fal_key|env/i);
675
+ expect(sdkMocks.resolveImageProvider).not.toHaveBeenCalled();
676
+ });
677
+ it("does not write a partial file when the SDK throws after some progress", async () => {
678
+ sdkMocks.generateImage.mockRejectedValueOnce(new Error("provider went away"));
679
+ const t = pick(build(), "image_generate");
680
+ await t.execute("c", { prompt: "x", path: "out.png" }).catch(() => { });
681
+ expect(existsSync(join(cwd, "out.png"))).toBe(false);
682
+ });
683
+ it("surfaces a non-Error rejection from the SDK without crashing", async () => {
684
+ sdkMocks.generateImage.mockRejectedValueOnce("plain string rejection");
685
+ const t = pick(build(), "image_generate");
686
+ // The tool wraps in try/catch and reads .message — we expect
687
+ // *something* coherent, not an unhandled rejection.
688
+ const result = await t.execute("c", { prompt: "x", path: "out.png" });
689
+ expect(JSON.stringify(result)).toMatch(/error/i);
690
+ });
691
+ it("respects a custom model id (passes through to provider.image)", async () => {
692
+ const t = pick(build(), "image_generate");
693
+ await t.execute("c", { prompt: "x", path: "out.png", model: "fal/fal-ai/flux/schnell" });
694
+ const args = sdkMocks.generateImage.mock.calls[0][0];
695
+ expect(args.model.modelId).toBe("fal-ai/flux/schnell");
696
+ });
697
+ it("forwards an empty-string prompt verbatim (no auto-padding, no crash)", async () => {
698
+ const t = pick(build(), "image_generate");
699
+ await t.execute("c", { prompt: "", path: "out.png" });
700
+ expect(sdkMocks.generateImage.mock.calls[0][0].prompt).toBe("");
701
+ });
702
+ it("forwards a 200KB prompt verbatim (no truncation, no JSON.stringify blow-up)", async () => {
703
+ const huge = "draw " + "a tiny pixel of detail. ".repeat(10000);
704
+ expect(huge.length).toBeGreaterThan(200_000);
705
+ const t = pick(build(), "image_generate");
706
+ await t.execute("c", { prompt: huge, path: "out.png" });
707
+ expect(sdkMocks.generateImage.mock.calls[0][0].prompt.length).toBe(huge.length);
708
+ });
709
+ it("preserves nasty unicode (NUL, RTL override, surrogate pair, ZWJ) in the prompt", async () => {
710
+ const nasty = "beforeafter‮flip‍🚀end";
711
+ const t = pick(build(), "image_generate");
712
+ await t.execute("c", { prompt: nasty, path: "out.png" });
713
+ expect(sdkMocks.generateImage.mock.calls[0][0].prompt).toBe(nasty);
714
+ });
715
+ it("does not call the SDK when the abort signal is already aborted", async () => {
716
+ const t = pick(build(), "image_generate");
717
+ const ctrl = new AbortController();
718
+ ctrl.abort();
719
+ // Tool wraps the call and returns a structured error in this case.
720
+ await t.execute("c", { prompt: "x", path: "out.png" }, ctrl.signal);
721
+ // The SDK is still called — the SDK is what honors the signal —
722
+ // but the signal we forwarded must be the aborted one. This pins
723
+ // that the tool doesn't strip / replace the signal.
724
+ expect(sdkMocks.generateImage.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
725
+ expect(ctrl.signal.aborted).toBe(true);
726
+ });
727
+ it("survives the SDK returning a partial response shape (image but no images array)", async () => {
728
+ sdkMocks.generateImage.mockResolvedValueOnce({
729
+ image: { uint8Array: new Uint8Array(TINY_PNG), base64: "", mediaType: "image/png" },
730
+ // no `images` array, no providerMetadata, no warnings — minimal shape
731
+ });
732
+ const t = pick(build(), "image_generate");
733
+ const result = await t.execute("c", { prompt: "x", path: "out.png" });
734
+ expect(existsSync(join(cwd, "out.png"))).toBe(true);
735
+ expect(JSON.stringify(result.details)).toContain("out.png");
736
+ });
737
+ it("silently overwrites an existing file at the output path", async () => {
738
+ writeFileSync(join(cwd, "out.png"), Buffer.from("OLD CONTENT"));
739
+ const t = pick(build(), "image_generate");
740
+ await t.execute("c", { prompt: "x", path: "out.png" });
741
+ const written = require("node:fs").readFileSync(join(cwd, "out.png"));
742
+ // The new bytes overwrote the old marker.
743
+ expect(written.toString()).not.toContain("OLD CONTENT");
744
+ });
745
+ it("returns a structured error (not a crash) when fs.writeFileBuffer throws", async () => {
746
+ // Point the path at a directory that does not exist, then make
747
+ // the SDK return early with an error that simulates ENOSPC. We
748
+ // don't actually fill the disk — we just prove the catch wraps.
749
+ sdkMocks.generateImage.mockRejectedValueOnce(Object.assign(new Error("ENOSPC: no space left"), { code: "ENOSPC" }));
750
+ const t = pick(build(), "image_generate");
751
+ const result = await t.execute("c", { prompt: "x", path: "out.png" });
752
+ expect(JSON.stringify(result)).toMatch(/ENOSPC|space|error/i);
753
+ expect(existsSync(join(cwd, "out.png"))).toBe(false);
754
+ });
755
+ it("isolates state across consecutive calls (no leaked args between invocations)", async () => {
756
+ const t = pick(build(), "image_generate");
757
+ await t.execute("c", { prompt: "first", path: "a.png", seed: 1 });
758
+ await t.execute("c", { prompt: "second", path: "b.png" });
759
+ expect(sdkMocks.generateImage.mock.calls).toHaveLength(2);
760
+ expect(sdkMocks.generateImage.mock.calls[0][0].seed).toBe(1);
761
+ expect(sdkMocks.generateImage.mock.calls[1][0].seed).toBeUndefined();
762
+ expect(sdkMocks.generateImage.mock.calls[0][0].prompt).toBe("first");
763
+ expect(sdkMocks.generateImage.mock.calls[1][0].prompt).toBe("second");
764
+ });
765
+ it("forwards exotic seeds (negative, zero) as-is — clamping is the provider's job", async () => {
766
+ const t = pick(build(), "image_generate");
767
+ await t.execute("c", { prompt: "x", path: "a.png", seed: -1 });
768
+ await t.execute("c", { prompt: "x", path: "b.png", seed: 0 });
769
+ expect(sdkMocks.generateImage.mock.calls[0][0].seed).toBe(-1);
770
+ expect(sdkMocks.generateImage.mock.calls[1][0].seed).toBe(0);
771
+ });
772
+ });
773
+ describe("video_generate — paranoid", () => {
774
+ function build() { return createImageTools(cwd, [cwd], ["video_generate"], makeVault()); }
775
+ it("forwards an empty-string prompt verbatim", async () => {
776
+ const t = pick(build(), "video_generate");
777
+ await t.execute("c", { prompt: "", path: "out.mp4" });
778
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].prompt).toBe("");
779
+ });
780
+ it("forwards a 200KB prompt without truncation", async () => {
781
+ const huge = "scene: " + "a wave crashes slowly. ".repeat(10000);
782
+ expect(huge.length).toBeGreaterThan(200_000);
783
+ const t = pick(build(), "video_generate");
784
+ await t.execute("c", { prompt: huge, path: "out.mp4" });
785
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].prompt.length).toBe(huge.length);
786
+ });
787
+ it("forwards exotic numeric inputs (zero / negative duration / fps) verbatim — provider validates", async () => {
788
+ const t = pick(build(), "video_generate");
789
+ await t.execute("c", { prompt: "x", path: "out.mp4", duration: 0, fps: -10 });
790
+ const args = sdkMocks.experimental_generateVideo.mock.calls[0][0];
791
+ expect(args.duration).toBe(0);
792
+ expect(args.fps).toBe(-10);
793
+ });
794
+ it("survives a malformed aspect_ratio string by passing it through (SDK rejects, we map error)", async () => {
795
+ sdkMocks.experimental_generateVideo.mockRejectedValueOnce(new Error("Invalid aspectRatio format"));
796
+ const t = pick(build(), "video_generate");
797
+ await expectFailure(t.execute("c", { prompt: "x", path: "out.mp4", aspect_ratio: "not-a-ratio" }), /aspect|invalid|format/i);
798
+ expect(existsSync(join(cwd, "out.mp4"))).toBe(false);
799
+ });
800
+ it("isolates state across consecutive calls (different bytes each time)", async () => {
801
+ sdkMocks.experimental_generateVideo
802
+ .mockResolvedValueOnce({ video: { uint8Array: new Uint8Array([1, 2, 3]), base64: "", mediaType: "video/mp4" }, videos: [], providerMetadata: {}, warnings: [], responses: [{}] })
803
+ .mockResolvedValueOnce({ video: { uint8Array: new Uint8Array([4, 5, 6, 7]), base64: "", mediaType: "video/mp4" }, videos: [], providerMetadata: {}, warnings: [], responses: [{}] });
804
+ const t = pick(build(), "video_generate");
805
+ await t.execute("c", { prompt: "a", path: "a.mp4" });
806
+ await t.execute("c", { prompt: "b", path: "b.mp4" });
807
+ expect(statSync(join(cwd, "a.mp4")).size).toBe(3);
808
+ expect(statSync(join(cwd, "b.mp4")).size).toBe(4);
809
+ });
810
+ it("preserves nasty unicode in the prompt", async () => {
811
+ const nasty = "scene‮🚀‍";
812
+ const t = pick(build(), "video_generate");
813
+ await t.execute("c", { prompt: nasty, path: "out.mp4" });
814
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].prompt).toBe(nasty);
815
+ });
816
+ it("returns a structured error (no crash) when the SDK rejects with a non-Error value", async () => {
817
+ sdkMocks.experimental_generateVideo.mockRejectedValueOnce({ code: "weird_object", reason: "no message" });
818
+ const t = pick(build(), "video_generate");
819
+ const result = await t.execute("c", { prompt: "x", path: "out.mp4" });
820
+ expect(JSON.stringify(result)).toMatch(/error/i);
821
+ });
822
+ });
823
+ describe("image_analyze — paranoid", () => {
824
+ function build() { return createImageTools(cwd, [cwd], ["image_analyze"], makeVault()); }
825
+ it("returns a sane result when the SDK gives back an empty text", async () => {
826
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
827
+ sdkMocks.generateText.mockResolvedValueOnce({
828
+ text: "", usage: { inputTokens: 5, outputTokens: 0, totalTokens: 5 },
829
+ providerMetadata: {}, warnings: [], response: {},
830
+ });
831
+ const t = pick(build(), "image_analyze");
832
+ const result = await t.execute("c", { path: "i.png" });
833
+ expect(result).toBeDefined();
834
+ expect(text(result)).toBe("");
835
+ expect(result.details.tokens).toBe(5);
836
+ });
837
+ it("does not call the SDK when the file is missing", async () => {
838
+ const t = pick(build(), "image_analyze");
839
+ const result = await t.execute("c", { path: "nope.png" });
840
+ expect(JSON.stringify(result.details)).toMatch(/file_read_error|enoent|no such/i);
841
+ expect(sdkMocks.generateText).not.toHaveBeenCalled();
842
+ });
843
+ it("does not call the SDK when the file exceeds the 20 MB cap", async () => {
844
+ const big = Buffer.alloc(21 * 1024 * 1024);
845
+ writeFileSync(join(cwd, "huge.png"), big);
846
+ const t = pick(build(), "image_analyze");
847
+ const result = await t.execute("c", { path: "huge.png" });
848
+ expect(JSON.stringify(result.details)).toMatch(/file_too_large/);
849
+ expect(sdkMocks.generateText).not.toHaveBeenCalled();
850
+ });
851
+ it("falls back to OPENAI_API_KEY env when the vault has no openai key", async () => {
852
+ process.env.OPENAI_API_KEY = "env-openai-key";
853
+ try {
854
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
855
+ const noKeysVault = {
856
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
857
+ getKey: () => undefined, has: () => false, list: () => [],
858
+ };
859
+ const t = pick(createImageTools(cwd, [cwd], ["image_analyze"], noKeysVault), "image_analyze");
860
+ await t.execute("c", { path: "i.png" });
861
+ expect(sdkMocks.resolveVisionProvider).toHaveBeenCalledWith("openai", "env-openai-key");
862
+ }
863
+ finally {
864
+ delete process.env.OPENAI_API_KEY;
865
+ }
866
+ });
867
+ it("derives the correct mediaType from the file extension (jpeg)", async () => {
868
+ writeFileSync(join(cwd, "photo.jpg"), TINY_PNG);
869
+ const t = pick(build(), "image_analyze");
870
+ await t.execute("c", { path: "photo.jpg" });
871
+ const args = sdkMocks.generateText.mock.calls[0][0];
872
+ expect(args.messages[0].content[1].mediaType).toBe("image/jpeg");
873
+ });
874
+ it("accepts a file at exactly the 20 MB boundary", async () => {
875
+ const exact = Buffer.alloc(20 * 1024 * 1024); // == MAX_IMAGE_SIZE
876
+ writeFileSync(join(cwd, "edge.png"), exact);
877
+ const t = pick(build(), "image_analyze");
878
+ const result = await t.execute("c", { path: "edge.png" });
879
+ // No file_too_large error — the cap is exclusive on the upper side.
880
+ expect(JSON.stringify(result.details)).not.toMatch(/file_too_large/);
881
+ expect(sdkMocks.generateText).toHaveBeenCalledTimes(1);
882
+ });
883
+ it("rejects a file 1 byte over the 20 MB boundary (no SDK call)", async () => {
884
+ const over = Buffer.alloc(20 * 1024 * 1024 + 1);
885
+ writeFileSync(join(cwd, "over.png"), over);
886
+ const t = pick(build(), "image_analyze");
887
+ const result = await t.execute("c", { path: "over.png" });
888
+ expect(JSON.stringify(result.details)).toMatch(/file_too_large/);
889
+ expect(sdkMocks.generateText).not.toHaveBeenCalled();
890
+ });
891
+ it("forwards a 50KB user prompt to the SDK without truncation", async () => {
892
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
893
+ const longPrompt = "Analyze: " + "consider every shadow and hue. ".repeat(2000);
894
+ expect(longPrompt.length).toBeGreaterThan(50_000);
895
+ const t = pick(build(), "image_analyze");
896
+ await t.execute("c", { path: "i.png", prompt: longPrompt });
897
+ expect(sdkMocks.generateText.mock.calls[0][0].messages[0].content[0].text).toBe(longPrompt);
898
+ });
899
+ it("accepts a non-image file (e.g. text disguised as .png) — content-validation is the SDK's job", async () => {
900
+ writeFileSync(join(cwd, "fake.png"), Buffer.from("THIS IS NOT A PNG, JUST TEXT"));
901
+ const t = pick(build(), "image_analyze");
902
+ const result = await t.execute("c", { path: "fake.png" });
903
+ // Tool doesn't sniff bytes; it sends them and lets the model reject.
904
+ // Pin: no crash, SDK still called with the raw bytes.
905
+ expect(result).toBeDefined();
906
+ expect(sdkMocks.generateText).toHaveBeenCalledTimes(1);
907
+ });
908
+ it("returns a structured error when the path is a directory, not a file", async () => {
909
+ require("node:fs").mkdirSync(join(cwd, "imgs"), { recursive: true });
910
+ const t = pick(build(), "image_analyze");
911
+ const result = await t.execute("c", { path: "imgs" });
912
+ expect(JSON.stringify(result.details)).toMatch(/file_read_error|EISDIR|directory/i);
913
+ expect(sdkMocks.generateText).not.toHaveBeenCalled();
914
+ });
915
+ it("preserves nasty unicode in the user prompt (NUL, RTL override, ZWJ)", async () => {
916
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
917
+ const nasty = "describe: ‮flip‍end";
918
+ const t = pick(build(), "image_analyze");
919
+ await t.execute("c", { path: "i.png", prompt: nasty });
920
+ expect(sdkMocks.generateText.mock.calls[0][0].messages[0].content[0].text).toBe(nasty);
921
+ });
922
+ it("forwards exotic max_tokens (0, very high) verbatim — clamping is the SDK's job", async () => {
923
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
924
+ const t = pick(build(), "image_analyze");
925
+ await t.execute("c", { path: "i.png", max_tokens: 0 });
926
+ await t.execute("c", { path: "i.png", max_tokens: 1_000_000 });
927
+ expect(sdkMocks.generateText.mock.calls[0][0].maxOutputTokens).toBe(0);
928
+ expect(sdkMocks.generateText.mock.calls[1][0].maxOutputTokens).toBe(1_000_000);
929
+ });
930
+ it("isolates state across consecutive calls (different files, different prompts)", async () => {
931
+ writeFileSync(join(cwd, "a.png"), TINY_PNG);
932
+ writeFileSync(join(cwd, "b.jpg"), TINY_PNG);
933
+ const t = pick(build(), "image_analyze");
934
+ await t.execute("c", { path: "a.png", prompt: "first" });
935
+ await t.execute("c", { path: "b.jpg", prompt: "second" });
936
+ expect(sdkMocks.generateText.mock.calls).toHaveLength(2);
937
+ expect(sdkMocks.generateText.mock.calls[0][0].messages[0].content[0].text).toBe("first");
938
+ expect(sdkMocks.generateText.mock.calls[1][0].messages[0].content[0].text).toBe("second");
939
+ // mediaType correctly diverges per file.
940
+ expect(sdkMocks.generateText.mock.calls[0][0].messages[0].content[1].mediaType).toBe("image/png");
941
+ expect(sdkMocks.generateText.mock.calls[1][0].messages[0].content[1].mediaType).toBe("image/jpeg");
942
+ });
943
+ it("uses image/png as a safe default for unknown extensions", async () => {
944
+ writeFileSync(join(cwd, "weird.xyz"), TINY_PNG);
945
+ const t = pick(build(), "image_analyze");
946
+ await t.execute("c", { path: "weird.xyz" });
947
+ expect(sdkMocks.generateText.mock.calls[0][0].messages[0].content[1].mediaType).toBe("image/png");
948
+ });
949
+ it("returns a structured error (no crash) when the SDK rejects with a non-Error value", async () => {
950
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
951
+ sdkMocks.generateText.mockRejectedValueOnce("string rejection");
952
+ const t = pick(build(), "image_analyze");
953
+ const result = await t.execute("c", { path: "i.png" });
954
+ expect(JSON.stringify(result)).toMatch(/error/i);
955
+ });
956
+ });
957
+ describe("audio_transcribe — paranoid", () => {
958
+ function build() { return createAudioTools(cwd, [cwd], ["audio_transcribe"], makeVault()); }
959
+ it("formats unknown language / unknown duration gracefully when the SDK omits them", async () => {
960
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
961
+ sdkMocks.experimental_transcribe.mockResolvedValueOnce({
962
+ text: "Just transcript text.",
963
+ segments: [],
964
+ // no language, no durationInSeconds
965
+ warnings: [], providerMetadata: {}, responses: [{}],
966
+ });
967
+ const t = pick(build(), "audio_transcribe");
968
+ const result = await t.execute("c", { path: "r.mp3" });
969
+ expect(text(result)).toContain("Just transcript text.");
970
+ expect(text(result)).toMatch(/Language: unknown/i);
971
+ expect(text(result)).toMatch(/Duration: unknown/i);
972
+ });
973
+ it("returns a sane result when the SDK gives back an empty transcript", async () => {
974
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
975
+ sdkMocks.experimental_transcribe.mockResolvedValueOnce({
976
+ text: "",
977
+ segments: [],
978
+ language: "en",
979
+ durationInSeconds: 0.5,
980
+ warnings: [], providerMetadata: {}, responses: [{}],
981
+ });
982
+ const t = pick(build(), "audio_transcribe");
983
+ const result = await t.execute("c", { path: "r.mp3" });
984
+ expect(result).toBeDefined();
985
+ expect(result.details.textLength).toBe(0);
986
+ });
987
+ it("does not call the SDK when the file is missing", async () => {
988
+ const t = pick(build(), "audio_transcribe");
989
+ const result = await t.execute("c", { path: "ghost.mp3" });
990
+ expect(JSON.stringify(result.details)).toMatch(/file_read_error|enoent|no such/i);
991
+ expect(sdkMocks.experimental_transcribe).not.toHaveBeenCalled();
992
+ });
993
+ it("does not call the SDK when the file exceeds the 25 MB cap", async () => {
994
+ const big = Buffer.alloc(26 * 1024 * 1024);
995
+ writeFileSync(join(cwd, "huge.wav"), big);
996
+ const t = pick(build(), "audio_transcribe");
997
+ const result = await t.execute("c", { path: "huge.wav" });
998
+ expect(JSON.stringify(result.details)).toMatch(/file_too_large/);
999
+ expect(sdkMocks.experimental_transcribe).not.toHaveBeenCalled();
1000
+ });
1001
+ it("accepts a file at exactly the 25 MB boundary", async () => {
1002
+ writeFileSync(join(cwd, "edge.wav"), Buffer.alloc(25 * 1024 * 1024));
1003
+ const t = pick(build(), "audio_transcribe");
1004
+ const result = await t.execute("c", { path: "edge.wav" });
1005
+ expect(JSON.stringify(result.details)).not.toMatch(/file_too_large/);
1006
+ expect(sdkMocks.experimental_transcribe).toHaveBeenCalledTimes(1);
1007
+ });
1008
+ it("does not call the SDK when the path is a directory", async () => {
1009
+ require("node:fs").mkdirSync(join(cwd, "audio_dir"), { recursive: true });
1010
+ const t = pick(build(), "audio_transcribe");
1011
+ const result = await t.execute("c", { path: "audio_dir" });
1012
+ expect(JSON.stringify(result.details)).toMatch(/file_read_error|EISDIR|directory/i);
1013
+ expect(sdkMocks.experimental_transcribe).not.toHaveBeenCalled();
1014
+ });
1015
+ it("falls back to OPENAI_API_KEY env when the vault has no openai key", async () => {
1016
+ process.env.OPENAI_API_KEY = "env-openai-key";
1017
+ try {
1018
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1019
+ const noKeysVault = {
1020
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1021
+ getKey: () => undefined, has: () => false, list: () => [],
1022
+ };
1023
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_transcribe"], noKeysVault), "audio_transcribe");
1024
+ await t.execute("c", { path: "r.mp3" });
1025
+ expect(sdkMocks.resolveTranscribeProvider).toHaveBeenCalledWith("openai", "env-openai-key");
1026
+ }
1027
+ finally {
1028
+ delete process.env.OPENAI_API_KEY;
1029
+ }
1030
+ });
1031
+ it("falls back to DEEPGRAM_API_KEY env for the deepgram provider", async () => {
1032
+ process.env.DEEPGRAM_API_KEY = "env-dg-key";
1033
+ try {
1034
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1035
+ const noKeysVault = {
1036
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1037
+ getKey: () => undefined, has: () => false, list: () => [],
1038
+ };
1039
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_transcribe"], noKeysVault), "audio_transcribe");
1040
+ await t.execute("c", { path: "r.mp3", model: "deepgram/nova-3" });
1041
+ expect(sdkMocks.resolveTranscribeProvider).toHaveBeenCalledWith("deepgram", "env-dg-key");
1042
+ }
1043
+ finally {
1044
+ delete process.env.DEEPGRAM_API_KEY;
1045
+ }
1046
+ });
1047
+ it("returns a structured error when neither vault nor env has the right key", async () => {
1048
+ delete process.env.OPENAI_API_KEY;
1049
+ delete process.env.DEEPGRAM_API_KEY;
1050
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1051
+ const noKeysVault = {
1052
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1053
+ getKey: () => undefined, has: () => false, list: () => [],
1054
+ };
1055
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_transcribe"], noKeysVault), "audio_transcribe");
1056
+ await expectFailure(t.execute("c", { path: "r.mp3" }), /missing|openai_api_key|env/i);
1057
+ expect(sdkMocks.resolveTranscribeProvider).not.toHaveBeenCalled();
1058
+ });
1059
+ it("forwards a 5KB whisper prompt verbatim to providerOptions (no truncation)", async () => {
1060
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1061
+ const giant = "Glossary: " + "Polpo, ".repeat(700);
1062
+ expect(giant.length).toBeGreaterThan(4000);
1063
+ const t = pick(build(), "audio_transcribe");
1064
+ await t.execute("c", { path: "r.mp3", prompt: giant });
1065
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].providerOptions.openai.prompt).toBe(giant);
1066
+ });
1067
+ it("preserves nasty unicode (NUL, RTL override, ZWJ) in the prompt", async () => {
1068
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1069
+ const nasty = "before after‮flip‍end";
1070
+ const t = pick(build(), "audio_transcribe");
1071
+ await t.execute("c", { path: "r.mp3", prompt: nasty });
1072
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].providerOptions.openai.prompt).toBe(nasty);
1073
+ });
1074
+ it("isolates state across consecutive calls (different bytes, different opts)", async () => {
1075
+ writeFileSync(join(cwd, "a.mp3"), Buffer.from("AAA"));
1076
+ writeFileSync(join(cwd, "b.wav"), Buffer.from("BBBB"));
1077
+ const t = pick(build(), "audio_transcribe");
1078
+ await t.execute("c", { path: "a.mp3", language: "en" });
1079
+ await t.execute("c", { path: "b.wav", model: "deepgram/nova-3", language: "it" });
1080
+ expect(sdkMocks.experimental_transcribe.mock.calls).toHaveLength(2);
1081
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].providerOptions.openai.language).toBe("en");
1082
+ expect(sdkMocks.experimental_transcribe.mock.calls[1][0].providerOptions.deepgram.language).toBe("it");
1083
+ // Each call sees its own bytes.
1084
+ const firstBytes = sdkMocks.experimental_transcribe.mock.calls[0][0].audio;
1085
+ const secondBytes = sdkMocks.experimental_transcribe.mock.calls[1][0].audio;
1086
+ expect(firstBytes.byteLength).toBe(3);
1087
+ expect(secondBytes.byteLength).toBe(4);
1088
+ });
1089
+ it("returns a structured error (no crash) when the SDK rejects with a non-Error value", async () => {
1090
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1091
+ sdkMocks.experimental_transcribe.mockRejectedValueOnce("plain string rejection");
1092
+ const t = pick(build(), "audio_transcribe");
1093
+ const result = await t.execute("c", { path: "r.mp3" });
1094
+ expect(JSON.stringify(result)).toMatch(/error/i);
1095
+ });
1096
+ it("survives a corrupt-looking input (bytes that look like text, not audio)", async () => {
1097
+ writeFileSync(join(cwd, "fake.mp3"), Buffer.from("THIS IS NOT REALLY AN MP3"));
1098
+ const t = pick(build(), "audio_transcribe");
1099
+ // Tool doesn't sniff format — just ships bytes. Pin: no crash.
1100
+ const result = await t.execute("c", { path: "fake.mp3" });
1101
+ expect(result).toBeDefined();
1102
+ expect(sdkMocks.experimental_transcribe).toHaveBeenCalledTimes(1);
1103
+ });
1104
+ });
1105
+ describe("search_web — paranoid", () => {
1106
+ function build() { return createSearchTools(new ExaSearchProvider({ apiKey: "fake-exa-key" }), ["search_web"]); }
1107
+ it("survives a 5KB query without errors", async () => {
1108
+ routeFetch([
1109
+ { match: () => true,
1110
+ response: () => new Response(JSON.stringify({ results: [] }), { status: 200 }) },
1111
+ ]);
1112
+ const giant = "polpo " + "search ".repeat(1000);
1113
+ const t = pick(build(), "search_web");
1114
+ await t.execute("c", { query: giant });
1115
+ const body = JSON.parse(lastRequests[0].init?.body);
1116
+ expect(body.query).toBe(giant);
1117
+ });
1118
+ it("survives a query with quotes / shell metachars (no injection)", async () => {
1119
+ routeFetch([
1120
+ { match: () => true,
1121
+ response: () => new Response(JSON.stringify({ results: [] }), { status: 200 }) },
1122
+ ]);
1123
+ const t = pick(build(), "search_web");
1124
+ await t.execute("c", { query: `"; rm -rf /; echo "` });
1125
+ const body = JSON.parse(lastRequests[0].init?.body);
1126
+ expect(body.query).toBe(`"; rm -rf /; echo "`);
1127
+ // The query reaches Exa as a string, no shell escaping involved.
1128
+ });
1129
+ it("doesn't crash on a malformed Exa response (results is undefined)", async () => {
1130
+ routeFetch([
1131
+ { match: () => true,
1132
+ response: () => new Response(JSON.stringify({ unexpectedShape: true }), { status: 200 }) },
1133
+ ]);
1134
+ const t = pick(build(), "search_web");
1135
+ const result = await t.execute("c", { query: "x" });
1136
+ expect(result).toBeDefined();
1137
+ // No crash. Either reports 0 results or a parsing error.
1138
+ });
1139
+ it("returns a clean error when fetch hits an AbortError (timeout)", async () => {
1140
+ globalThis.fetch = vi.fn(async () => {
1141
+ const e = new Error("timeout");
1142
+ e.name = "AbortError";
1143
+ throw e;
1144
+ });
1145
+ const t = pick(build(), "search_web");
1146
+ const result = await t.execute("c", { query: "x" });
1147
+ expect(text(result).toLowerCase()).toMatch(/timeout|abort|error|fail/);
1148
+ });
1149
+ it("survives an Exa response with a partially-populated result (title or url missing)", async () => {
1150
+ // Real Exa responses are typed; we don't expect raw nulls in
1151
+ // the array. But fields *inside* a result can be missing
1152
+ // (publishedDate often is, sometimes the title in the case of
1153
+ // social media URLs). Pin: the formatter must not throw on a
1154
+ // missing title or url.
1155
+ routeFetch([
1156
+ { match: () => true,
1157
+ response: () => new Response(JSON.stringify({
1158
+ results: [
1159
+ { url: "https://only-url.com" }, // no title
1160
+ { title: "Only title" }, // no url
1161
+ { title: "Both", url: "https://both.com" },
1162
+ ],
1163
+ }), { status: 200 }) },
1164
+ ]);
1165
+ const t = pick(build(), "search_web");
1166
+ const result = await t.execute("c", { query: "x" });
1167
+ expect(result).toBeDefined();
1168
+ expect(text(result)).toContain("Both");
1169
+ });
1170
+ });
1171
+ describe("search_find_similar — paranoid", () => {
1172
+ function build() { return createSearchTools(new ExaSearchProvider({ apiKey: "fake-exa-key" }), ["search_find_similar"]); }
1173
+ it("doesn't crash on an empty URL string (defers to Exa for validation)", async () => {
1174
+ routeFetch([
1175
+ { match: () => true,
1176
+ response: () => new Response(JSON.stringify({ error: "bad url" }), { status: 400 }) },
1177
+ ]);
1178
+ const t = pick(build(), "search_find_similar");
1179
+ const result = await t.execute("c", { url: "" });
1180
+ expect(result).toBeDefined();
1181
+ // Either tool refuses pre-flight or Exa rejects; both fine.
1182
+ });
1183
+ it("forwards numResults to Exa within the body", async () => {
1184
+ routeFetch([
1185
+ { match: () => true,
1186
+ response: () => new Response(JSON.stringify({ results: [] }), { status: 200 }) },
1187
+ ]);
1188
+ const t = pick(build(), "search_find_similar");
1189
+ await t.execute("c", { url: "https://x", numResults: 7 });
1190
+ const body = JSON.parse(lastRequests[0].init?.body);
1191
+ expect(body.numResults).toBe(7);
1192
+ });
1193
+ it("handles a 503 service unavailable cleanly", async () => {
1194
+ routeFetch([
1195
+ { match: () => true,
1196
+ response: () => new Response("maintenance", { status: 503 }) },
1197
+ ]);
1198
+ const t = pick(build(), "search_find_similar");
1199
+ const result = await t.execute("c", { url: "https://x" });
1200
+ expect(text(result).toLowerCase()).toMatch(/503|maintenance|error|server/);
1201
+ });
1202
+ });
1203
+ // ════════════════════════════════════════════════════════════
1204
+ // audio_speak (Vercel AI SDK — experimental_generateSpeech)
1205
+ // ════════════════════════════════════════════════════════════
1206
+ describe("audio_speak", () => {
1207
+ function build() { return createAudioTools(cwd, [cwd], ["audio_speak"], makeVault()); }
1208
+ it("calls the SDK with the resolved openai tts-1 model and writes the bytes (default provider)", async () => {
1209
+ const t = pick(build(), "audio_speak");
1210
+ const result = await t.execute("c", { text: "Hello world", path: "out.mp3" });
1211
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(true);
1212
+ expect(JSON.stringify(result.details)).toContain("out.mp3");
1213
+ expect(result.details).toMatchObject({
1214
+ provider: "openai",
1215
+ model: "tts-1",
1216
+ voice: "alloy",
1217
+ format: "mp3",
1218
+ textLength: 11,
1219
+ });
1220
+ expect(sdkMocks.resolveSpeakProvider).toHaveBeenCalledTimes(1);
1221
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("openai");
1222
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][1]).toMatchObject({ apiKey: "fake-openai-key" });
1223
+ const args = sdkMocks.experimental_generateSpeech.mock.calls[0][0];
1224
+ expect(args.model).toEqual({ _isMockSpeechModel: true, providerName: "openai", modelId: "tts-1" });
1225
+ expect(args.text).toBe("Hello world");
1226
+ expect(args.voice).toBe("alloy");
1227
+ expect(args.outputFormat).toBe("mp3");
1228
+ });
1229
+ it("forwards openai-specific knobs (speed, instructions) via providerOptions", async () => {
1230
+ const t = pick(build(), "audio_speak");
1231
+ await t.execute("c", {
1232
+ text: "x", path: "out.mp3",
1233
+ speed: 1.5,
1234
+ instructions: "Speak in a cheerful tone",
1235
+ });
1236
+ const args = sdkMocks.experimental_generateSpeech.mock.calls[0][0];
1237
+ expect(args.speed).toBe(1.5);
1238
+ expect(args.instructions).toBe("Speak in a cheerful tone");
1239
+ expect(args.providerOptions).toEqual({
1240
+ openai: { speed: 1.5, instructions: "Speak in a cheerful tone" },
1241
+ });
1242
+ });
1243
+ it("routes to deepgram (Aura) when model is deepgram-prefixed", async () => {
1244
+ const t = pick(build(), "audio_speak");
1245
+ await t.execute("c", { text: "hi", path: "out.mp3", model: "deepgram/aura-2-asteria-en" });
1246
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("deepgram");
1247
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][1]).toMatchObject({ apiKey: "fake-deepgram-key" });
1248
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].model.modelId).toBe("aura-2-asteria-en");
1249
+ });
1250
+ it("routes to elevenlabs with the multilingual default + Rachel voice ID", async () => {
1251
+ const t = pick(build(), "audio_speak");
1252
+ await t.execute("c", { text: "hi", path: "out.mp3", model: "elevenlabs/eleven_multilingual_v2" });
1253
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("elevenlabs");
1254
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][1]).toMatchObject({ apiKey: "fake-elevenlabs-key" });
1255
+ const args = sdkMocks.experimental_generateSpeech.mock.calls[0][0];
1256
+ expect(args.model.modelId).toBe("eleven_multilingual_v2");
1257
+ expect(args.voice).toBe("21m00Tcm4TlvDq8ikWAM");
1258
+ // ElevenLabs uses a more granular outputFormat string.
1259
+ expect(args.outputFormat).toBe("mp3_44100_128");
1260
+ });
1261
+ it("routes to edge with shell+fs (no apiKey) and forwards gender via providerOptions", async () => {
1262
+ const t = pick(build(), "audio_speak");
1263
+ await t.execute("c", {
1264
+ text: "ciao", path: "out.mp3",
1265
+ model: "edge/edge-tts",
1266
+ language: "it",
1267
+ gender: "male",
1268
+ });
1269
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("edge");
1270
+ const cfg = sdkMocks.resolveSpeakProvider.mock.calls[0][1];
1271
+ expect(cfg.apiKey).toBeUndefined();
1272
+ expect(cfg.shell).toBeDefined();
1273
+ expect(cfg.fs).toBeDefined();
1274
+ const args = sdkMocks.experimental_generateSpeech.mock.calls[0][0];
1275
+ expect(args.language).toBe("it");
1276
+ expect(args.providerOptions).toEqual({ edge: { gender: "male" } });
1277
+ });
1278
+ it("respects an explicit voice override on openai", async () => {
1279
+ const t = pick(build(), "audio_speak");
1280
+ await t.execute("c", { text: "x", path: "out.mp3", voice: "onyx" });
1281
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].voice).toBe("onyx");
1282
+ });
1283
+ it("respects custom model override on every provider", async () => {
1284
+ const t = pick(build(), "audio_speak");
1285
+ await t.execute("c", { text: "x", path: "out.mp3", model: "openai/tts-1-hd" });
1286
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].model.modelId).toBe("tts-1-hd");
1287
+ });
1288
+ it("forwards the abort signal to the SDK", async () => {
1289
+ const t = pick(build(), "audio_speak");
1290
+ const ctrl = new AbortController();
1291
+ await t.execute("c", { text: "x", path: "out.mp3" }, ctrl.signal);
1292
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].abortSignal).toBe(ctrl.signal);
1293
+ });
1294
+ it("refuses an output path outside the sandbox before the SDK is called", async () => {
1295
+ const t = pick(build(), "audio_speak");
1296
+ await expect(t.execute("c", { text: "x", path: "/etc/escape.mp3" }))
1297
+ .rejects.toThrow(/sandbox|allowed|denied/i);
1298
+ expect(sdkMocks.experimental_generateSpeech).not.toHaveBeenCalled();
1299
+ });
1300
+ it("rejects when the SDK returns no audio bytes", async () => {
1301
+ sdkMocks.experimental_generateSpeech.mockResolvedValueOnce({
1302
+ audio: { uint8Array: new Uint8Array(0), base64: "", mediaType: "audio/mpeg" },
1303
+ warnings: [], request: {}, response: { timestamp: new Date(), modelId: "tts-1" }, providerMetadata: {},
1304
+ });
1305
+ // Cloud failure → tries edge fallback. Mock the second call (edge)
1306
+ // also as empty, so the whole thing surfaces a structured failure.
1307
+ sdkMocks.experimental_generateSpeech.mockResolvedValueOnce({
1308
+ audio: { uint8Array: new Uint8Array(0), base64: "", mediaType: "audio/mpeg" },
1309
+ warnings: [], request: {}, response: { timestamp: new Date(), modelId: "edge-tts" }, providerMetadata: {},
1310
+ });
1311
+ const t = pick(build(), "audio_speak");
1312
+ const result = await t.execute("c", { text: "x", path: "out.mp3" });
1313
+ expect(JSON.stringify(result)).toMatch(/no audio bytes|error|fallback/i);
1314
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(false);
1315
+ });
1316
+ });
1317
+ describe("audio_speak — no auto-fallback (explicit failure)", () => {
1318
+ // Pre-refactor the tool silently retried on edge-tts when the cloud
1319
+ // provider failed. That was hidden behavior and surprised users who
1320
+ // looked at logs vs. invoices. Now the failure is loud and visible
1321
+ // — agents (or higher-level retry policies) can decide what to do.
1322
+ function build() { return createAudioTools(cwd, [cwd], ["audio_speak"], makeVault()); }
1323
+ it("does NOT silently retry on edge-tts when the cloud provider throws", async () => {
1324
+ sdkMocks.experimental_generateSpeech.mockRejectedValueOnce(new Error("AI_APICallError: 401 invalid key"));
1325
+ const t = pick(build(), "audio_speak");
1326
+ const result = await t.execute("c", { text: "ciao", path: "out.mp3", language: "it" });
1327
+ // The error is returned verbatim — no [Fallback] prefix, no
1328
+ // second SDK call, no edge-tts attempt.
1329
+ expect(text(result)).toMatch(/401|invalid key/i);
1330
+ expect(text(result)).not.toMatch(/\[Fallback\]/);
1331
+ expect(sdkMocks.experimental_generateSpeech).toHaveBeenCalledTimes(1);
1332
+ expect(sdkMocks.resolveSpeakProvider.mock.calls.map(c => c[0])).toEqual(["openai"]);
1333
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(false);
1334
+ });
1335
+ it("returns a structured error with the failing provider name", async () => {
1336
+ sdkMocks.experimental_generateSpeech.mockRejectedValueOnce(new Error("AI_APICallError: 429"));
1337
+ const t = pick(build(), "audio_speak");
1338
+ const result = await t.execute("c", { text: "x", path: "out.mp3", model: "elevenlabs/eleven_multilingual_v2" });
1339
+ expect(text(result)).toMatch(/429/);
1340
+ expect(JSON.stringify(result.details)).toMatch(/error/);
1341
+ expect(sdkMocks.experimental_generateSpeech).toHaveBeenCalledTimes(1);
1342
+ });
1343
+ it("edge provider failure surfaces directly (no recursion to itself)", async () => {
1344
+ sdkMocks.experimental_generateSpeech.mockRejectedValueOnce(new Error("edge-tts CLI is not installed"));
1345
+ const t = pick(build(), "audio_speak");
1346
+ const result = await t.execute("c", { text: "x", path: "out.mp3", model: "edge/edge-tts" });
1347
+ expect(JSON.stringify(result)).toMatch(/edge.*not installed/i);
1348
+ expect(sdkMocks.experimental_generateSpeech).toHaveBeenCalledTimes(1);
1349
+ });
1350
+ });
1351
+ describe("audio_speak — paranoid", () => {
1352
+ function build() { return createAudioTools(cwd, [cwd], ["audio_speak"], makeVault()); }
1353
+ it("forwards an empty-string text verbatim (no auto-pad, no crash)", async () => {
1354
+ const t = pick(build(), "audio_speak");
1355
+ await t.execute("c", { text: "", path: "out.mp3" });
1356
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].text).toBe("");
1357
+ });
1358
+ it("forwards a 200KB text without truncation", async () => {
1359
+ const huge = "Read aloud: " + "A long sentence. ".repeat(13000);
1360
+ expect(huge.length).toBeGreaterThan(200_000);
1361
+ const t = pick(build(), "audio_speak");
1362
+ await t.execute("c", { text: huge, path: "out.mp3" });
1363
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].text.length).toBe(huge.length);
1364
+ });
1365
+ it("preserves nasty unicode (NUL, RTL override, ZWJ, emoji) in the text", async () => {
1366
+ const nasty = "before after‮flip‍🚀end";
1367
+ const t = pick(build(), "audio_speak");
1368
+ await t.execute("c", { text: nasty, path: "out.mp3" });
1369
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].text).toBe(nasty);
1370
+ });
1371
+ it("forwards exotic numeric speed values (0.25, 4.0) verbatim", async () => {
1372
+ const t = pick(build(), "audio_speak");
1373
+ await t.execute("c", { text: "x", path: "a.mp3", speed: 0.25 });
1374
+ await t.execute("c", { text: "x", path: "b.mp3", speed: 4.0 });
1375
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].speed).toBe(0.25);
1376
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[1][0].speed).toBe(4.0);
1377
+ });
1378
+ it("infers outputFormat from extension for openai (.wav, .opus, .flac)", async () => {
1379
+ const t = pick(build(), "audio_speak");
1380
+ await t.execute("c", { text: "x", path: "a.wav" });
1381
+ await t.execute("c", { text: "x", path: "b.opus" });
1382
+ await t.execute("c", { text: "x", path: "c.flac" });
1383
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].outputFormat).toBe("wav");
1384
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[1][0].outputFormat).toBe("opus");
1385
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[2][0].outputFormat).toBe("flac");
1386
+ });
1387
+ it("uses elevenlabs-specific format string for known extensions", async () => {
1388
+ const t = pick(build(), "audio_speak");
1389
+ await t.execute("c", { text: "x", path: "a.wav", model: "elevenlabs/eleven_multilingual_v2" });
1390
+ await t.execute("c", { text: "x", path: "b.flac", model: "elevenlabs/eleven_multilingual_v2" });
1391
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].outputFormat).toBe("pcm_44100");
1392
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[1][0].outputFormat).toBe("flac");
1393
+ });
1394
+ it("falls back to mp3_44100_128 for unknown extensions on elevenlabs", async () => {
1395
+ const t = pick(build(), "audio_speak");
1396
+ await t.execute("c", { text: "x", path: "weird.xyz", model: "elevenlabs/eleven_multilingual_v2" });
1397
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].outputFormat).toBe("mp3_44100_128");
1398
+ });
1399
+ it("falls back to OPENAI_API_KEY env when the vault has no openai key", async () => {
1400
+ process.env.OPENAI_API_KEY = "env-openai-key";
1401
+ try {
1402
+ const noKeysVault = {
1403
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1404
+ getKey: () => undefined, has: () => false, list: () => [],
1405
+ };
1406
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_speak"], noKeysVault), "audio_speak");
1407
+ await t.execute("c", { text: "x", path: "out.mp3" });
1408
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][1]).toMatchObject({ apiKey: "env-openai-key" });
1409
+ }
1410
+ finally {
1411
+ delete process.env.OPENAI_API_KEY;
1412
+ }
1413
+ });
1414
+ it("falls back to ELEVENLABS_API_KEY env for the elevenlabs provider", async () => {
1415
+ process.env.ELEVENLABS_API_KEY = "env-el-key";
1416
+ try {
1417
+ const noKeysVault = {
1418
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1419
+ getKey: () => undefined, has: () => false, list: () => [],
1420
+ };
1421
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_speak"], noKeysVault), "audio_speak");
1422
+ await t.execute("c", { text: "x", path: "out.mp3", model: "elevenlabs/eleven_multilingual_v2" });
1423
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][1]).toMatchObject({ apiKey: "env-el-key" });
1424
+ }
1425
+ finally {
1426
+ delete process.env.ELEVENLABS_API_KEY;
1427
+ }
1428
+ });
1429
+ it("the edge provider doesn't need any apiKey at all (no env, no vault)", async () => {
1430
+ delete process.env.OPENAI_API_KEY;
1431
+ delete process.env.DEEPGRAM_API_KEY;
1432
+ delete process.env.ELEVENLABS_API_KEY;
1433
+ const noKeysVault = {
1434
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1435
+ getKey: () => undefined, has: () => false, list: () => [],
1436
+ };
1437
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_speak"], noKeysVault), "audio_speak");
1438
+ const result = await t.execute("c", { text: "ciao", path: "out.mp3", model: "edge/edge-tts", language: "it" });
1439
+ // No requireEnv crash; edge resolver got no key.
1440
+ const cfg = sdkMocks.resolveSpeakProvider.mock.calls[0][1];
1441
+ expect(cfg.apiKey).toBeUndefined();
1442
+ expect(cfg.shell).toBeDefined();
1443
+ expect(cfg.fs).toBeDefined();
1444
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(true);
1445
+ expect(result.details.provider).toBe("edge");
1446
+ });
1447
+ it("isolates state across consecutive calls (different providers, different bytes)", async () => {
1448
+ sdkMocks.experimental_generateSpeech
1449
+ .mockResolvedValueOnce({
1450
+ audio: { uint8Array: new Uint8Array([1, 2, 3]), base64: "", mediaType: "audio/mpeg" },
1451
+ warnings: [], request: {}, response: { timestamp: new Date(), modelId: "tts-1" }, providerMetadata: {},
1452
+ })
1453
+ .mockResolvedValueOnce({
1454
+ audio: { uint8Array: new Uint8Array([4, 5, 6, 7]), base64: "", mediaType: "audio/mpeg" },
1455
+ warnings: [], request: {}, response: { timestamp: new Date(), modelId: "aura-2-asteria-en" }, providerMetadata: {},
1456
+ });
1457
+ const t = pick(build(), "audio_speak");
1458
+ await t.execute("c", { text: "first", path: "a.mp3" });
1459
+ await t.execute("c", { text: "second", path: "b.mp3", model: "deepgram/nova-3" });
1460
+ expect(statSync(join(cwd, "a.mp3")).size).toBe(3);
1461
+ expect(statSync(join(cwd, "b.mp3")).size).toBe(4);
1462
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].text).toBe("first");
1463
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[1][0].text).toBe("second");
1464
+ });
1465
+ it("silently overwrites an existing file at the output path", async () => {
1466
+ writeFileSync(join(cwd, "out.mp3"), Buffer.from("OLD"));
1467
+ const t = pick(build(), "audio_speak");
1468
+ await t.execute("c", { text: "fresh", path: "out.mp3" });
1469
+ const written = require("node:fs").readFileSync(join(cwd, "out.mp3"));
1470
+ expect(written.toString()).not.toContain("OLD");
1471
+ });
1472
+ it("does not write a partial file when the SDK throws after some progress", async () => {
1473
+ sdkMocks.experimental_generateSpeech
1474
+ .mockRejectedValueOnce(new Error("provider went away"))
1475
+ .mockRejectedValueOnce(new Error("edge-tts not installed"));
1476
+ const t = pick(build(), "audio_speak");
1477
+ await t.execute("c", { text: "x", path: "out.mp3" }).catch(() => { });
1478
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(false);
1479
+ });
1480
+ it("returns a structured error (no crash) when the SDK rejects with a non-Error value", async () => {
1481
+ sdkMocks.experimental_generateSpeech
1482
+ .mockRejectedValueOnce("plain string")
1483
+ .mockRejectedValueOnce("plain string 2");
1484
+ const t = pick(build(), "audio_speak");
1485
+ const result = await t.execute("c", { text: "x", path: "out.mp3" });
1486
+ expect(JSON.stringify(result)).toMatch(/error/i);
1487
+ });
1488
+ it("returns a structured error when neither vault nor env has the right key (cloud path)", async () => {
1489
+ delete process.env.OPENAI_API_KEY;
1490
+ delete process.env.DEEPGRAM_API_KEY;
1491
+ delete process.env.ELEVENLABS_API_KEY;
1492
+ const noKeysVault = {
1493
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1494
+ getKey: () => undefined, has: () => false, list: () => [],
1495
+ };
1496
+ const t = pick(createAudioTools(cwd, [cwd], ["audio_speak"], noKeysVault), "audio_speak");
1497
+ // Mock edge fallback also missing edge-tts.
1498
+ sdkMocks.experimental_generateSpeech.mockRejectedValueOnce(new Error("edge-tts CLI is not installed"));
1499
+ const result = await t.execute("c", { text: "x", path: "out.mp3" });
1500
+ expect(JSON.stringify(result)).toMatch(/openai_api_key|missing|env|edge-tts/i);
1501
+ expect(existsSync(join(cwd, "out.mp3"))).toBe(false);
1502
+ });
1503
+ it("respects custom voice override on openai (alloy → onyx)", async () => {
1504
+ const t = pick(build(), "audio_speak");
1505
+ await t.execute("c", { text: "x", path: "out.mp3", voice: "onyx" });
1506
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].voice).toBe("onyx");
1507
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].model.modelId).toBe("tts-1");
1508
+ });
1509
+ it("forwards an explicit edge voice (it-IT-DiegoNeural) verbatim", async () => {
1510
+ const t = pick(build(), "audio_speak");
1511
+ await t.execute("c", {
1512
+ text: "ciao", path: "out.mp3",
1513
+ model: "edge/edge-tts", voice: "it-IT-DiegoNeural",
1514
+ });
1515
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].voice).toBe("it-IT-DiegoNeural");
1516
+ });
1517
+ it("isolates abort across consecutive calls (per-call signal forwarded)", async () => {
1518
+ const t = pick(build(), "audio_speak");
1519
+ const a = new AbortController();
1520
+ const b = new AbortController();
1521
+ await t.execute("c", { text: "1", path: "a.mp3" }, a.signal);
1522
+ await t.execute("c", { text: "2", path: "b.mp3" }, b.signal);
1523
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].abortSignal).toBe(a.signal);
1524
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[1][0].abortSignal).toBe(b.signal);
1525
+ });
1526
+ });
1527
+ // ════════════════════════════════════════════════════════════
1528
+ // Agent-config model precedence — adversarial pinning
1529
+ //
1530
+ // Each tool resolves its effective model in priority:
1531
+ // 1. per-call `model` input override
1532
+ // 2. agent-config default (passed to factory as imageModel/videoModel/...)
1533
+ // 3. DEFAULT_*_MODEL constant from @polpo-ai/core
1534
+ // These tests pin every transition in that chain so a regression
1535
+ // in one layer doesn't get masked by a fallback in another.
1536
+ // ════════════════════════════════════════════════════════════
1537
+ describe("agent-config model precedence — image_generate", () => {
1538
+ it("uses the agent-configured imageModel when no per-call override is passed", async () => {
1539
+ const tools = createImageTools({
1540
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1541
+ imageModel: "fal/fal-ai/flux-pro/v1.1",
1542
+ });
1543
+ const t = pick(tools, "image_generate");
1544
+ await t.execute("c", { prompt: "x", path: "out.png" });
1545
+ expect(sdkMocks.generateImage.mock.calls[0][0].model.modelId).toBe("fal-ai/flux-pro/v1.1");
1546
+ });
1547
+ it("per-call override beats the agent-configured imageModel", async () => {
1548
+ const tools = createImageTools({
1549
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1550
+ imageModel: "fal/fal-ai/flux-pro/v1.1",
1551
+ });
1552
+ const t = pick(tools, "image_generate");
1553
+ await t.execute("c", { prompt: "x", path: "out.png", model: "fal/fal-ai/flux/schnell" });
1554
+ expect(sdkMocks.generateImage.mock.calls[0][0].model.modelId).toBe("fal-ai/flux/schnell");
1555
+ });
1556
+ it("falls through to DEFAULT_IMAGE_MODEL when neither override nor config is set", async () => {
1557
+ const tools = createImageTools({
1558
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1559
+ });
1560
+ const t = pick(tools, "image_generate");
1561
+ await t.execute("c", { prompt: "x", path: "out.png" });
1562
+ expect(sdkMocks.generateImage.mock.calls[0][0].model.modelId).toBe("fal-ai/flux/dev");
1563
+ });
1564
+ it("returns a structured error when the agent-configured imageModel is malformed", async () => {
1565
+ const tools = createImageTools({
1566
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1567
+ imageModel: "no-slash-here",
1568
+ });
1569
+ const t = pick(tools, "image_generate");
1570
+ const result = await t.execute("c", { prompt: "x", path: "out.png" });
1571
+ expect(JSON.stringify(result)).toMatch(/invalid|provider|model/i);
1572
+ expect(sdkMocks.generateImage).not.toHaveBeenCalled();
1573
+ });
1574
+ it("returns a structured error when the per-call override is malformed", async () => {
1575
+ const tools = createImageTools({
1576
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1577
+ });
1578
+ const t = pick(tools, "image_generate");
1579
+ const result = await t.execute("c", { prompt: "x", path: "out.png", model: "/empty-provider" });
1580
+ expect(JSON.stringify(result)).toMatch(/invalid|provider|model|non-empty/i);
1581
+ expect(sdkMocks.generateImage).not.toHaveBeenCalled();
1582
+ });
1583
+ it("rejects an unknown image provider with a clear error", async () => {
1584
+ const tools = createImageTools({
1585
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1586
+ imageModel: "google/imagen-3",
1587
+ });
1588
+ const t = pick(tools, "image_generate");
1589
+ const result = await t.execute("c", { prompt: "x", path: "out.png" });
1590
+ // The provider-resolver mock captures every call name; only "fal"
1591
+ // is in the supported set today. The tool surfaces the resolver's
1592
+ // error untouched.
1593
+ expect(JSON.stringify(result)).toMatch(/error|google|provider/i);
1594
+ });
1595
+ });
1596
+ describe("agent-config model precedence — video_generate", () => {
1597
+ it("uses the agent-configured videoModel by default", async () => {
1598
+ const tools = createImageTools({
1599
+ cwd, allowedPaths: [cwd], allowedTools: ["video_generate"], vault: makeVault(),
1600
+ videoModel: "fal/luma-ray-2",
1601
+ });
1602
+ const t = pick(tools, "video_generate");
1603
+ await t.execute("c", { prompt: "x", path: "out.mp4" });
1604
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].model.modelId).toBe("luma-ray-2");
1605
+ });
1606
+ it("falls through to DEFAULT_VIDEO_MODEL when nothing is configured", async () => {
1607
+ const tools = createImageTools({
1608
+ cwd, allowedPaths: [cwd], allowedTools: ["video_generate"], vault: makeVault(),
1609
+ });
1610
+ const t = pick(tools, "video_generate");
1611
+ await t.execute("c", { prompt: "x", path: "out.mp4" });
1612
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].model.modelId).toBe("luma-ray-2-flash");
1613
+ });
1614
+ });
1615
+ describe("agent-config model precedence — image_analyze", () => {
1616
+ it("uses the agent-configured visionModel (anthropic) when no override", async () => {
1617
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
1618
+ const tools = createImageTools({
1619
+ cwd, allowedPaths: [cwd], allowedTools: ["image_analyze"], vault: makeVault(),
1620
+ visionModel: "anthropic/claude-sonnet-4-20250514",
1621
+ });
1622
+ const t = pick(tools, "image_analyze");
1623
+ await t.execute("c", { path: "i.png" });
1624
+ expect(sdkMocks.resolveVisionProvider).toHaveBeenCalledWith("anthropic", expect.any(String));
1625
+ expect(sdkMocks.generateText.mock.calls[0][0].model.modelId).toBe("claude-sonnet-4-20250514");
1626
+ });
1627
+ it("falls through to DEFAULT_VISION_MODEL (openai/gpt-4o-mini) when nothing is configured", async () => {
1628
+ writeFileSync(join(cwd, "i.png"), TINY_PNG);
1629
+ const tools = createImageTools({
1630
+ cwd, allowedPaths: [cwd], allowedTools: ["image_analyze"], vault: makeVault(),
1631
+ });
1632
+ const t = pick(tools, "image_analyze");
1633
+ await t.execute("c", { path: "i.png" });
1634
+ expect(sdkMocks.generateText.mock.calls[0][0].model.modelId).toBe("gpt-4o-mini");
1635
+ });
1636
+ });
1637
+ describe("agent-config model precedence — audio_transcribe", () => {
1638
+ it("uses the agent-configured transcribeModel (deepgram) when no override", async () => {
1639
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1640
+ const tools = createAudioTools({
1641
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_transcribe"], vault: makeVault(),
1642
+ transcribeModel: "deepgram/nova-3",
1643
+ });
1644
+ const t = pick(tools, "audio_transcribe");
1645
+ await t.execute("c", { path: "r.mp3" });
1646
+ expect(sdkMocks.resolveTranscribeProvider).toHaveBeenCalledWith("deepgram", expect.any(String));
1647
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].model.modelId).toBe("nova-3");
1648
+ });
1649
+ it("per-call override beats the configured transcribeModel", async () => {
1650
+ writeFileSync(join(cwd, "r.mp3"), Buffer.from("data"));
1651
+ const tools = createAudioTools({
1652
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_transcribe"], vault: makeVault(),
1653
+ transcribeModel: "deepgram/nova-3",
1654
+ });
1655
+ const t = pick(tools, "audio_transcribe");
1656
+ await t.execute("c", { path: "r.mp3", model: "openai/whisper-1" });
1657
+ expect(sdkMocks.experimental_transcribe.mock.calls[0][0].model.modelId).toBe("whisper-1");
1658
+ });
1659
+ });
1660
+ describe("agent-config model precedence — audio_speak", () => {
1661
+ it("uses the agent-configured ttsModel (elevenlabs) when no override", async () => {
1662
+ const tools = createAudioTools({
1663
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_speak"], vault: makeVault(),
1664
+ ttsModel: "elevenlabs/eleven_multilingual_v2",
1665
+ });
1666
+ const t = pick(tools, "audio_speak");
1667
+ await t.execute("c", { text: "hi", path: "out.mp3" });
1668
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("elevenlabs");
1669
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].model.modelId).toBe("eleven_multilingual_v2");
1670
+ });
1671
+ it("uses the agent-configured ttsModel (edge) without requiring a vault key", async () => {
1672
+ delete process.env.OPENAI_API_KEY;
1673
+ const noKeysVault = {
1674
+ get: () => undefined, getSmtp: () => undefined, getImap: () => undefined,
1675
+ getKey: () => undefined, has: () => false, list: () => [],
1676
+ };
1677
+ const tools = createAudioTools({
1678
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_speak"], vault: noKeysVault,
1679
+ ttsModel: "edge/edge-tts",
1680
+ });
1681
+ const t = pick(tools, "audio_speak");
1682
+ await t.execute("c", { text: "ciao", path: "out.mp3", language: "it" });
1683
+ expect(sdkMocks.resolveSpeakProvider.mock.calls[0][0]).toBe("edge");
1684
+ const cfg = sdkMocks.resolveSpeakProvider.mock.calls[0][1];
1685
+ expect(cfg.apiKey).toBeUndefined();
1686
+ expect(cfg.shell).toBeDefined();
1687
+ expect(cfg.fs).toBeDefined();
1688
+ });
1689
+ it("falls through to DEFAULT_TTS_MODEL when nothing is configured", async () => {
1690
+ const tools = createAudioTools({
1691
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_speak"], vault: makeVault(),
1692
+ });
1693
+ const t = pick(tools, "audio_speak");
1694
+ await t.execute("c", { text: "x", path: "out.mp3" });
1695
+ expect(sdkMocks.experimental_generateSpeech.mock.calls[0][0].model.modelId).toBe("tts-1");
1696
+ });
1697
+ it("returns a structured error when ttsModel string is malformed", async () => {
1698
+ const tools = createAudioTools({
1699
+ cwd, allowedPaths: [cwd], allowedTools: ["audio_speak"], vault: makeVault(),
1700
+ ttsModel: "openai/", // empty model
1701
+ });
1702
+ const t = pick(tools, "audio_speak");
1703
+ const result = await t.execute("c", { text: "x", path: "out.mp3" });
1704
+ expect(JSON.stringify(result)).toMatch(/invalid|provider|model|non-empty/i);
1705
+ expect(sdkMocks.experimental_generateSpeech).not.toHaveBeenCalled();
1706
+ });
1707
+ });
1708
+ describe("agent-config — model strings with multi-segment ids round-trip", () => {
1709
+ // fal exposes models like "fal-ai/flux/dev" that themselves contain
1710
+ // slashes. The first slash splits provider/model; everything after
1711
+ // is one opaque id. Pin this end-to-end through the tool layer.
1712
+ it("image_generate preserves a 4-segment fal model id", async () => {
1713
+ const tools = createImageTools({
1714
+ cwd, allowedPaths: [cwd], allowedTools: ["image_generate"], vault: makeVault(),
1715
+ imageModel: "fal/fal-ai/wan/v2.2-1.3b/text-to-video",
1716
+ });
1717
+ const t = pick(tools, "image_generate");
1718
+ await t.execute("c", { prompt: "x", path: "out.png" });
1719
+ expect(sdkMocks.generateImage.mock.calls[0][0].model.modelId).toBe("fal-ai/wan/v2.2-1.3b/text-to-video");
1720
+ });
1721
+ it("video_generate preserves a 3-segment fal video id", async () => {
1722
+ const tools = createImageTools({
1723
+ cwd, allowedPaths: [cwd], allowedTools: ["video_generate"], vault: makeVault(),
1724
+ videoModel: "fal/fal-ai/wan/v2.2-1.3b/text-to-video",
1725
+ });
1726
+ const t = pick(tools, "video_generate");
1727
+ await t.execute("c", { prompt: "x", path: "out.mp4" });
1728
+ expect(sdkMocks.experimental_generateVideo.mock.calls[0][0].model.modelId).toBe("fal-ai/wan/v2.2-1.3b/text-to-video");
1729
+ });
1730
+ });
1731
+ //# sourceMappingURL=external-api-tools.test.js.map