@polpo-ai/tools 0.6.32 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/__tests__/email-tools.test.d.ts +2 -0
  2. package/dist/__tests__/email-tools.test.d.ts.map +1 -0
  3. package/dist/__tests__/email-tools.test.js +705 -0
  4. package/dist/__tests__/email-tools.test.js.map +1 -0
  5. package/dist/__tests__/extended-tools.test.d.ts +2 -0
  6. package/dist/__tests__/extended-tools.test.d.ts.map +1 -0
  7. package/dist/__tests__/extended-tools.test.js +743 -0
  8. package/dist/__tests__/extended-tools.test.js.map +1 -0
  9. package/dist/__tests__/external-api-tools.test.d.ts +2 -0
  10. package/dist/__tests__/external-api-tools.test.d.ts.map +1 -0
  11. package/dist/__tests__/external-api-tools.test.js +1731 -0
  12. package/dist/__tests__/external-api-tools.test.js.map +1 -0
  13. package/dist/__tests__/memory-tools.test.d.ts +2 -0
  14. package/dist/__tests__/memory-tools.test.d.ts.map +1 -0
  15. package/dist/__tests__/memory-tools.test.js +0 -0
  16. package/dist/__tests__/memory-tools.test.js.map +1 -0
  17. package/dist/audio-tools.d.ts +25 -27
  18. package/dist/audio-tools.d.ts.map +1 -1
  19. package/dist/audio-tools.js +156 -438
  20. package/dist/audio-tools.js.map +1 -1
  21. package/dist/browser-tools.d.ts.map +1 -1
  22. package/dist/browser-tools.js +5 -1
  23. package/dist/browser-tools.js.map +1 -1
  24. package/dist/email-tools.d.ts.map +1 -1
  25. package/dist/email-tools.js +11 -3
  26. package/dist/email-tools.js.map +1 -1
  27. package/dist/image-tools.d.ts +27 -25
  28. package/dist/image-tools.d.ts.map +1 -1
  29. package/dist/image-tools.js +151 -332
  30. package/dist/image-tools.js.map +1 -1
  31. package/dist/index.d.ts +1 -2
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +3 -2
  34. package/dist/index.js.map +1 -1
  35. package/dist/lib/edge-speech-model.d.ts +61 -0
  36. package/dist/lib/edge-speech-model.d.ts.map +1 -0
  37. package/dist/lib/edge-speech-model.js +144 -0
  38. package/dist/lib/edge-speech-model.js.map +1 -0
  39. package/dist/lib/exa-search-provider.d.ts +27 -0
  40. package/dist/lib/exa-search-provider.d.ts.map +1 -0
  41. package/dist/lib/exa-search-provider.js +109 -0
  42. package/dist/lib/exa-search-provider.js.map +1 -0
  43. package/dist/lib/provider-resolver.d.ts +54 -0
  44. package/dist/lib/provider-resolver.d.ts.map +1 -0
  45. package/dist/lib/provider-resolver.js +115 -0
  46. package/dist/lib/provider-resolver.js.map +1 -0
  47. package/dist/search-tools.d.ts +10 -13
  48. package/dist/search-tools.d.ts.map +1 -1
  49. package/dist/search-tools.js +63 -140
  50. package/dist/search-tools.js.map +1 -1
  51. package/dist/system-tools.d.ts +19 -5
  52. package/dist/system-tools.d.ts.map +1 -1
  53. package/dist/system-tools.js +16 -10
  54. package/dist/system-tools.js.map +1 -1
  55. package/package.json +12 -2
  56. package/dist/phone-tools.d.ts +0 -27
  57. package/dist/phone-tools.d.ts.map +0 -1
  58. package/dist/phone-tools.js +0 -577
  59. package/dist/phone-tools.js.map +0 -1
@@ -0,0 +1,743 @@
1
+ /**
2
+ * Behavioral tests for the next layer of @polpo-ai/tools, run through
3
+ * the same mock-LLM-style entry point as system-tools.test.ts:
4
+ *
5
+ * - register_outcome (task-only, gated by outputDir)
6
+ * - http_fetch / http_download (with global.fetch mocked)
7
+ * - vault_get / vault_list (against a synthetic ResolvedVault)
8
+ * - excel_read / excel_write / excel_info / excel_query
9
+ * - pdf_read / pdf_info / pdf_merge (NOT pdf_create — Chromium)
10
+ * - docx_read / docx_create
11
+ *
12
+ * Each `describe` builds its own tools via the public factory the
13
+ * cloud / OSS callers use, so the wiring layer (path sandbox + fs
14
+ * abstraction + allowedTools filter) is covered alongside the tool
15
+ * body. Sample documents are built in-process from the same
16
+ * libraries the tool uses, so we don't ship any binary fixtures.
17
+ */
18
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
19
+ import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, statSync, } from "node:fs";
20
+ import { tmpdir } from "node:os";
21
+ import { join } from "node:path";
22
+ import { createOutcomeTools } from "../outcome-tools.js";
23
+ import { createHttpTools } from "../http-tools.js";
24
+ import { createVaultToolsCore } from "../vault-tools.js";
25
+ import { createExcelTools } from "../excel-tools.js";
26
+ import { createPdfTools } from "../pdf-tools.js";
27
+ import { createDocxTools } from "../docx-tools.js";
28
+ let cwd;
29
+ function pick(tools, name) {
30
+ const t = tools.find((x) => x.name === name);
31
+ if (!t)
32
+ throw new Error(`Tool '${name}' not registered. Got: ${tools.map((x) => x.name).join(", ")}`);
33
+ return t;
34
+ }
35
+ function text(result) {
36
+ const block = result.content[0];
37
+ if (block?.type !== "text")
38
+ throw new Error(`Expected text content block, got ${block?.type}`);
39
+ return block.text;
40
+ }
41
+ beforeEach(() => {
42
+ cwd = mkdtempSync(join(tmpdir(), "polpo-ext-tools-"));
43
+ });
44
+ afterEach(() => {
45
+ rmSync(cwd, { recursive: true, force: true });
46
+ });
47
+ // ────────────────────────────────────────────────────────────
48
+ // register_outcome
49
+ // ────────────────────────────────────────────────────────────
50
+ describe("register_outcome", () => {
51
+ function build() {
52
+ const tools = createOutcomeTools(cwd, [cwd], ["register_outcome"], cwd);
53
+ return pick(tools, "register_outcome");
54
+ }
55
+ it("registers an inline text outcome with details", async () => {
56
+ const t = build();
57
+ const result = await t.execute("c1", { type: "text", label: "Hello note", text: "ciao mondo" });
58
+ // The shell-side engine reads these `outcome*` keys from details
59
+ // when it collates a TaskOutcome; pin the contract.
60
+ expect(result.details).toMatchObject({
61
+ outcomeType: "text",
62
+ outcomeLabel: "Hello note",
63
+ outcomeText: "ciao mondo",
64
+ });
65
+ expect(text(result).toLowerCase()).toContain("registered");
66
+ });
67
+ it("validates that a file outcome's path actually exists on disk", async () => {
68
+ const t = build();
69
+ const result = await t.execute("c1", { type: "file", label: "Missing report", path: "nope.pdf" });
70
+ expect(result.details).toMatchObject({ error: expect.any(String) });
71
+ });
72
+ it("accepts a real file and infers mime from extension", async () => {
73
+ const t = build();
74
+ writeFileSync(join(cwd, "report.pdf"), "%PDF-1.4 fake");
75
+ const result = await t.execute("c1", { type: "file", label: "Q1 Report", path: "report.pdf" });
76
+ expect(result.details).toMatchObject({
77
+ outcomeType: "file",
78
+ outcomeLabel: "Q1 Report",
79
+ outcomeMimeType: "application/pdf",
80
+ });
81
+ expect(text(result).toLowerCase()).toContain("registered");
82
+ });
83
+ it("registers a url outcome verbatim", async () => {
84
+ const t = build();
85
+ const result = await t.execute("c1", { type: "url", label: "Docs", url: "https://example.com" });
86
+ expect(result.details).toMatchObject({
87
+ outcomeType: "url",
88
+ outcomeUrl: "https://example.com",
89
+ });
90
+ });
91
+ it("registers a json outcome carrying structured data", async () => {
92
+ const t = build();
93
+ const payload = { rows: 42, source: "stripe" };
94
+ const result = await t.execute("c1", { type: "json", label: "Stats", data: payload });
95
+ expect(result.details).toMatchObject({
96
+ outcomeType: "json",
97
+ outcomeData: payload,
98
+ });
99
+ });
100
+ it("rejects file outcomes whose path escapes the sandbox", async () => {
101
+ const t = build();
102
+ await expect(t.execute("c1", { type: "file", label: "Escape", path: "/etc/hostname" })).rejects.toThrow(/sandbox|allowed|denied/i);
103
+ });
104
+ // ── Adversarial ────────────────────────────────────────────
105
+ it("type=file without a path → structured error, not a crash", async () => {
106
+ // LLM hallucinates: declares a file outcome but forgets to set
107
+ // path. The tool must NOT register a phantom record; it must
108
+ // either error or refuse cleanly.
109
+ const t = build();
110
+ const result = await t.execute("c1", { type: "file", label: "Phantom" });
111
+ // Either a structured error OR the engine treats path-less file
112
+ // as text-ish. Pin "produces something parseable, not undefined".
113
+ expect(result.details).toBeDefined();
114
+ });
115
+ it("type=text with empty string returns a structured response (no crash)", async () => {
116
+ // The tool may either accept empty text or refuse it as missing.
117
+ // The contract we lock in: never throws unhandled, always
118
+ // returns a parseable result.
119
+ const t = build();
120
+ const result = await t.execute("c1", { type: "text", label: "Empty", text: "" });
121
+ expect(result.details).toBeDefined();
122
+ expect(text(result).length).toBeGreaterThan(0);
123
+ });
124
+ it("registers an outcome whose path has no extension (mime detection fallback)", async () => {
125
+ const t = build();
126
+ writeFileSync(join(cwd, "Makefile"), "all:\n\techo hi\n");
127
+ const result = await t.execute("c1", { type: "file", label: "Build script", path: "Makefile" });
128
+ expect(result.details).toMatchObject({ outcomeType: "file" });
129
+ // No extension → either no mimeType or a sensible default.
130
+ // Either way, the tool must not crash on the missing extension.
131
+ });
132
+ it("file path that is actually a directory is rejected gracefully", async () => {
133
+ const t = build();
134
+ const subdir = join(cwd, "subdir");
135
+ require("node:fs").mkdirSync(subdir);
136
+ const result = await t.execute("c1", { type: "file", label: "Dir", path: "subdir" });
137
+ expect(result.details).toBeDefined();
138
+ expect(text(result).length).toBeGreaterThan(0);
139
+ });
140
+ it("very long inline text (50KB) is registered without truncation in details", async () => {
141
+ const t = build();
142
+ const big = "x".repeat(50_000);
143
+ const result = await t.execute("c1", { type: "text", label: "Big", text: big });
144
+ // outcomeText preserves the original (truncation is a UI concern,
145
+ // not the tool's contract).
146
+ expect(result.details).toMatchObject({ outcomeText: big });
147
+ });
148
+ it("tags array is normalized (stripped of empties / preserved order)", async () => {
149
+ const t = build();
150
+ const result = await t.execute("c1", {
151
+ type: "url",
152
+ label: "Doc",
153
+ url: "https://example.com",
154
+ tags: ["report", "Q4", "internal"],
155
+ });
156
+ const tags = result.details.outcomeTags;
157
+ expect(tags).toEqual(expect.arrayContaining(["report", "Q4", "internal"]));
158
+ });
159
+ it("symlink-escape path is refused (target outside sandbox)", async () => {
160
+ const t = build();
161
+ require("node:fs").symlinkSync("/etc/hostname", join(cwd, "trick.txt"));
162
+ await expect(t.execute("c1", { type: "file", label: "Tricked", path: "trick.txt" })).rejects.toThrow(/sandbox|allowed|denied/i);
163
+ });
164
+ });
165
+ // ────────────────────────────────────────────────────────────
166
+ // http_fetch / http_download
167
+ // ────────────────────────────────────────────────────────────
168
+ describe("http_fetch / http_download", () => {
169
+ let originalFetch;
170
+ beforeEach(() => {
171
+ originalFetch = globalThis.fetch;
172
+ });
173
+ afterEach(() => {
174
+ globalThis.fetch = originalFetch;
175
+ });
176
+ function mockFetch(impl) {
177
+ globalThis.fetch = vi.fn(impl);
178
+ }
179
+ function build() {
180
+ return createHttpTools(cwd, [cwd], ["http_fetch", "http_download"]);
181
+ }
182
+ // ── http_fetch ───────────────────────────────────────────
183
+ it("http_fetch returns body + status for a 200 JSON response", async () => {
184
+ mockFetch(async () => new Response(JSON.stringify({ ok: true }), {
185
+ status: 200,
186
+ headers: { "content-type": "application/json" },
187
+ }));
188
+ const t = pick(build(), "http_fetch");
189
+ const result = await t.execute("c1", { url: "https://api.example.com/x" });
190
+ expect(text(result)).toContain('"ok"');
191
+ expect(result.details).toMatchObject({ status: 200 });
192
+ });
193
+ it("http_fetch surfaces non-2xx status without throwing", async () => {
194
+ mockFetch(async () => new Response("not found", { status: 404 }));
195
+ const t = pick(build(), "http_fetch");
196
+ const result = await t.execute("c1", { url: "https://api.example.com/missing" });
197
+ expect(result.details).toMatchObject({ status: 404 });
198
+ });
199
+ it("http_fetch sends method/headers/body through to fetch", async () => {
200
+ const seen = {};
201
+ mockFetch(async (url, init) => {
202
+ seen.url = url;
203
+ seen.init = init;
204
+ return new Response("{}", { status: 200, headers: { "content-type": "application/json" } });
205
+ });
206
+ const t = pick(build(), "http_fetch");
207
+ await t.execute("c1", {
208
+ url: "https://api.example.com/post",
209
+ method: "POST",
210
+ headers: { Authorization: "Bearer x" },
211
+ body: JSON.stringify({ a: 1 }),
212
+ });
213
+ expect(seen.url).toBe("https://api.example.com/post");
214
+ expect(seen.init.method).toBe("POST");
215
+ expect(seen.init.headers).toMatchObject({ Authorization: "Bearer x" });
216
+ });
217
+ it("http_fetch blocks SSRF to localhost / internal hosts", async () => {
218
+ const t = pick(build(), "http_fetch");
219
+ const result = await t.execute("c1", { url: "http://127.0.0.1/" });
220
+ // Either rejects or returns an SSRF error — no real fetch.
221
+ const looksBlocked = (text(result) + JSON.stringify(result.details)).toLowerCase();
222
+ expect(looksBlocked).toMatch(/ssrf|forbidden|block|denied|not allowed|invalid/);
223
+ });
224
+ // ── http_download ────────────────────────────────────────
225
+ it("http_download writes the response body to the sandbox", async () => {
226
+ const payload = Buffer.from("hello-bin");
227
+ mockFetch(async () => new Response(payload, {
228
+ status: 200,
229
+ headers: { "content-type": "application/octet-stream" },
230
+ }));
231
+ const t = pick(build(), "http_download");
232
+ const result = await t.execute("c1", {
233
+ url: "https://files.example.com/x.bin",
234
+ path: "downloads/x.bin",
235
+ });
236
+ const onDisk = readFileSync(join(cwd, "downloads/x.bin"));
237
+ expect(onDisk.equals(payload)).toBe(true);
238
+ expect(result.details).toMatchObject({ bytes: payload.byteLength });
239
+ });
240
+ it("http_download rejects writes outside the sandbox", async () => {
241
+ mockFetch(async () => new Response("x", { status: 200 }));
242
+ const t = pick(build(), "http_download");
243
+ await expect(t.execute("c1", { url: "https://files.example.com/x", path: "/tmp/outside.bin" })).rejects.toThrow(/sandbox|allowed|denied/i);
244
+ });
245
+ it("http_download surfaces non-2xx without writing the file", async () => {
246
+ mockFetch(async () => new Response("nope", { status: 500 }));
247
+ const t = pick(build(), "http_download");
248
+ const result = await t.execute("c1", { url: "https://files.example.com/x", path: "x.bin" });
249
+ expect(existsSync(join(cwd, "x.bin"))).toBe(false);
250
+ expect(JSON.stringify(result.details)).toMatch(/500|error|fail/i);
251
+ });
252
+ // ── Adversarial ────────────────────────────────────────────
253
+ it("http_fetch refuses dangerous URL schemes (file://, javascript:, data:)", async () => {
254
+ const t = pick(build(), "http_fetch");
255
+ for (const url of [
256
+ "file:///etc/passwd",
257
+ "javascript:alert(1)",
258
+ "data:text/html,<script>alert(1)</script>",
259
+ ]) {
260
+ const result = await t.execute("c1", { url });
261
+ const blob = (text(result) + JSON.stringify(result.details)).toLowerCase();
262
+ expect(blob).toMatch(/invalid|forbidden|block|denied|not allowed|protocol|scheme|ssrf/);
263
+ }
264
+ });
265
+ it("http_fetch survives a network error (fetch throws) without crashing", async () => {
266
+ mockFetch(async () => { throw new Error("ECONNRESET — peer reset connection"); });
267
+ const t = pick(build(), "http_fetch");
268
+ const result = await t.execute("c1", { url: "https://api.example.com/x" });
269
+ // Must come back as a tool result, not a thrown rejection.
270
+ expect(result.details).toBeDefined();
271
+ expect(JSON.stringify(result.details).toLowerCase()).toMatch(/error|reset|connection|fail/);
272
+ });
273
+ it("http_fetch handles a non-UTF8 response body without corrupting the result", async () => {
274
+ // Random binary that isn't valid UTF-8.
275
+ const bytes = Buffer.from([0xff, 0xfe, 0x00, 0x80, 0x01, 0x02]);
276
+ mockFetch(async () => new Response(bytes, {
277
+ status: 200,
278
+ headers: { "content-type": "application/octet-stream" },
279
+ }));
280
+ const t = pick(build(), "http_fetch");
281
+ const result = await t.execute("c1", { url: "https://api.example.com/bin" });
282
+ // Must complete cleanly. Either decoded best-effort or reported
283
+ // as binary; the assertion is "no throw, status preserved".
284
+ expect(result.details).toMatchObject({ status: 200 });
285
+ });
286
+ it("http_fetch truncates / reports very large response bodies", async () => {
287
+ // 5 MB of 'x' — bigger than any sensible LLM context window.
288
+ const huge = "x".repeat(5 * 1024 * 1024);
289
+ mockFetch(async () => new Response(huge, {
290
+ status: 200,
291
+ headers: { "content-type": "text/plain" },
292
+ }));
293
+ const t = pick(build(), "http_fetch");
294
+ const result = await t.execute("c1", { url: "https://api.example.com/huge" });
295
+ // Body in the visible content must NOT be the full 5 MB —
296
+ // either truncated or summarized. Pin the upper bound generously
297
+ // (1 MB) so the assertion isn't brittle to small impl tweaks.
298
+ expect(text(result).length).toBeLessThan(1_000_000);
299
+ });
300
+ it("http_fetch passes through 5xx body so the LLM can read the server error", async () => {
301
+ mockFetch(async () => new Response("internal: db unavailable\nretry-after: 30s", {
302
+ status: 503,
303
+ headers: { "content-type": "text/plain" },
304
+ }));
305
+ const t = pick(build(), "http_fetch");
306
+ const result = await t.execute("c1", { url: "https://api.example.com/x" });
307
+ expect(text(result)).toContain("db unavailable");
308
+ expect(result.details).toMatchObject({ status: 503 });
309
+ });
310
+ it("http_download creates parent dirs for a deeply nested target path", async () => {
311
+ mockFetch(async () => new Response(Buffer.from("ok"), { status: 200 }));
312
+ const t = pick(build(), "http_download");
313
+ await t.execute("c1", {
314
+ url: "https://files.example.com/x",
315
+ path: "a/b/c/d/leaf.bin",
316
+ });
317
+ expect(existsSync(join(cwd, "a/b/c/d/leaf.bin"))).toBe(true);
318
+ });
319
+ it("http_download handles an empty 200 body without erroring", async () => {
320
+ mockFetch(async () => new Response("", { status: 200 }));
321
+ const t = pick(build(), "http_download");
322
+ const result = await t.execute("c1", { url: "https://files.example.com/0", path: "z.bin" });
323
+ expect(existsSync(join(cwd, "z.bin"))).toBe(true);
324
+ expect(statSync(join(cwd, "z.bin")).size).toBe(0);
325
+ expect(result.details).toMatchObject({ bytes: 0 });
326
+ });
327
+ it("http_download survives fetch throw without leaving a half-written file", async () => {
328
+ mockFetch(async () => { throw new Error("DNS failure"); });
329
+ const t = pick(build(), "http_download");
330
+ const result = await t.execute("c1", { url: "https://files.example.com/x", path: "broken.bin" });
331
+ expect(existsSync(join(cwd, "broken.bin"))).toBe(false);
332
+ expect(JSON.stringify(result.details).toLowerCase()).toMatch(/error|dns|fail/);
333
+ });
334
+ });
335
+ // ────────────────────────────────────────────────────────────
336
+ // vault_get / vault_list
337
+ // ────────────────────────────────────────────────────────────
338
+ describe("vault_get / vault_list", () => {
339
+ function makeVault(entries) {
340
+ return {
341
+ get: (service) => entries[service]?.values,
342
+ getSmtp: () => entries["smtp"]?.values,
343
+ getImap: () => entries["imap"]?.values,
344
+ getKey: (service, key) => entries[service]?.values[key],
345
+ has: (service) => service in entries,
346
+ list: () => Object.entries(entries).map(([service, v]) => ({
347
+ service, type: v.type, keys: Object.keys(v.values),
348
+ })),
349
+ };
350
+ }
351
+ it("vault_list reports configured services without leaking values", async () => {
352
+ const vault = makeVault({
353
+ stripe: { type: "api_key", values: { key: "sk_test_xxx" } },
354
+ smtp: { type: "smtp", values: { host: "smtp.x", user: "u", pass: "p" } },
355
+ });
356
+ const t = pick(createVaultToolsCore(vault), "vault_list");
357
+ const result = await t.execute("c1", {});
358
+ const out = text(result);
359
+ expect(out).toContain("stripe");
360
+ expect(out).toContain("smtp");
361
+ // The actual secret must NOT appear in the output payload.
362
+ expect(out).not.toContain("sk_test_xxx");
363
+ expect(out).not.toContain("smtp.x");
364
+ });
365
+ it("vault_get returns the credential values for a known service", async () => {
366
+ const vault = makeVault({
367
+ stripe: { type: "api_key", values: { key: "sk_test_abcdefg" } },
368
+ });
369
+ const t = pick(createVaultToolsCore(vault), "vault_get");
370
+ const result = await t.execute("c1", { service: "stripe" });
371
+ expect(text(result)).toContain("sk_test_abcdefg");
372
+ expect(result.details).toMatchObject({ found: true });
373
+ });
374
+ it("vault_get returns a clear not-found result for an unknown service", async () => {
375
+ const vault = makeVault({});
376
+ const t = pick(createVaultToolsCore(vault), "vault_get");
377
+ const result = await t.execute("c1", { service: "stripe" });
378
+ expect(result.details).toMatchObject({ found: false });
379
+ expect(text(result).toLowerCase()).toMatch(/no.*entry|not.*found|missing/);
380
+ });
381
+ // ── Adversarial ────────────────────────────────────────────
382
+ it("vault_list with zero entries returns an empty/none-configured result", async () => {
383
+ const vault = makeVault({});
384
+ const t = pick(createVaultToolsCore(vault), "vault_list");
385
+ const result = await t.execute("c1", {});
386
+ // Must not throw; output indicates no services.
387
+ expect(text(result).length).toBeGreaterThan(0);
388
+ });
389
+ it("vault_get on a service with multibyte-named keys still works", async () => {
390
+ const vault = makeVault({
391
+ "服务-α": { type: "custom", values: { "ключ": "valore-€42" } },
392
+ });
393
+ const t = pick(createVaultToolsCore(vault), "vault_get");
394
+ const result = await t.execute("c1", { service: "服务-α" });
395
+ expect(text(result)).toContain("valore-€42");
396
+ });
397
+ it("vault_get with empty-string service name doesn't crash", async () => {
398
+ const vault = makeVault({ stripe: { type: "api_key", values: { key: "x" } } });
399
+ const t = pick(createVaultToolsCore(vault), "vault_get");
400
+ // LLM hallucinates and passes ""; result must be structured, not a crash.
401
+ const result = await t.execute("c1", { service: "" });
402
+ expect(result.details).toMatchObject({ found: false });
403
+ });
404
+ });
405
+ // ────────────────────────────────────────────────────────────
406
+ // excel_read / excel_write / excel_info / excel_query
407
+ // ────────────────────────────────────────────────────────────
408
+ describe("excel_read / write / info / query", () => {
409
+ async function buildXlsx(file, sheet, rows) {
410
+ const ExcelJS = await import("exceljs");
411
+ const wb = new ExcelJS.Workbook();
412
+ const ws = wb.addWorksheet(sheet);
413
+ rows.forEach((r) => ws.addRow(r));
414
+ await wb.xlsx.writeFile(file);
415
+ }
416
+ function build() {
417
+ return createExcelTools(cwd, [cwd], ["excel_read", "excel_write", "excel_info", "excel_query"]);
418
+ }
419
+ it("excel_write creates a multi-row xlsx", async () => {
420
+ const t = pick(build(), "excel_write");
421
+ const result = await t.execute("c1", {
422
+ path: "out.xlsx",
423
+ sheet_name: "Sales",
424
+ headers: ["product", "qty"],
425
+ rows: [
426
+ ["pen", 12],
427
+ ["pad", 30],
428
+ ],
429
+ });
430
+ expect(existsSync(join(cwd, "out.xlsx"))).toBe(true);
431
+ expect(statSync(join(cwd, "out.xlsx")).size).toBeGreaterThan(0);
432
+ expect(JSON.stringify(result.details)).toContain("out.xlsx");
433
+ });
434
+ it("excel_read returns the cell content of a sample workbook", async () => {
435
+ await buildXlsx(join(cwd, "in.xlsx"), "Data", [
436
+ ["name", "score"],
437
+ ["alice", 9],
438
+ ["bob", 7],
439
+ ]);
440
+ const t = pick(build(), "excel_read");
441
+ const result = await t.execute("c1", { path: "in.xlsx" });
442
+ const out = text(result);
443
+ expect(out).toContain("alice");
444
+ expect(out).toContain("9");
445
+ expect(out).toContain("bob");
446
+ });
447
+ it("excel_info returns sheet metadata without dumping data", async () => {
448
+ await buildXlsx(join(cwd, "info.xlsx"), "Inventory", [
449
+ ["sku", "qty"],
450
+ ["A1", 100],
451
+ ["A2", 200],
452
+ ["A3", 300],
453
+ ]);
454
+ const t = pick(build(), "excel_info");
455
+ const result = await t.execute("c1", { path: "info.xlsx" });
456
+ const out = text(result).toLowerCase();
457
+ expect(out).toMatch(/inventory|sheet/);
458
+ expect(JSON.stringify(result.details).toLowerCase()).toMatch(/sheet|row|column/);
459
+ });
460
+ it("excel_read on a missing file returns a structured error", async () => {
461
+ const t = pick(build(), "excel_read");
462
+ const result = await t.execute("c1", { path: "no-such.xlsx" });
463
+ expect(JSON.stringify(result.details)).toMatch(/error|fail/i);
464
+ });
465
+ it("excel_write rejects paths outside the sandbox", async () => {
466
+ const t = pick(build(), "excel_write");
467
+ await expect(t.execute("c1", { path: "/etc/escape.xlsx", headers: ["a"], rows: [[1]] })).rejects.toThrow(/sandbox|allowed|denied/i);
468
+ });
469
+ // ── Adversarial ────────────────────────────────────────────
470
+ it("excel_read on a corrupted xlsx returns a structured error", async () => {
471
+ // 256 bytes of random garbage with .xlsx extension — exceljs will
472
+ // throw or return nothing useful. The tool must surface this
473
+ // without crashing the agent loop.
474
+ writeFileSync(join(cwd, "broken.xlsx"), Buffer.alloc(256, 0x42));
475
+ const t = pick(build(), "excel_read");
476
+ const result = await t.execute("c1", { path: "broken.xlsx" });
477
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|invalid/i);
478
+ });
479
+ it("excel_write produces a CSV when the path ends in .csv", async () => {
480
+ const t = pick(build(), "excel_write");
481
+ await t.execute("c1", {
482
+ path: "out.csv",
483
+ headers: ["product", "price"],
484
+ rows: [["pen", 1.5], ["pad", 2.0]],
485
+ });
486
+ const content = readFileSync(join(cwd, "out.csv"), "utf8");
487
+ // CSV is plain text, must contain the data verbatim — no zip
488
+ // structure (xlsx would start with PK\x03\x04).
489
+ expect(content).toContain("product");
490
+ expect(content).toContain("pen");
491
+ expect(content).not.toMatch(/^PK/);
492
+ });
493
+ it("excel_write handles non-ASCII headers and cell values", async () => {
494
+ const t = pick(build(), "excel_write");
495
+ await t.execute("c1", {
496
+ path: "i18n.xlsx",
497
+ sheet_name: "Données",
498
+ headers: ["прoдукт", "数量", "prix (€)"],
499
+ rows: [
500
+ ["café ☕", 12, 3.5],
501
+ ["крендель 🥨", 7, 2.25],
502
+ ],
503
+ });
504
+ expect(existsSync(join(cwd, "i18n.xlsx"))).toBe(true);
505
+ // Round-trip: read it back via excel_read.
506
+ const read = pick(build(), "excel_read");
507
+ const r2 = await read.execute("c1", { path: "i18n.xlsx" });
508
+ const out = text(r2);
509
+ expect(out).toContain("café ☕");
510
+ expect(out).toContain("крендель");
511
+ });
512
+ it("excel_write with empty rows array still writes a file with headers", async () => {
513
+ const t = pick(build(), "excel_write");
514
+ const result = await t.execute("c1", {
515
+ path: "empty.xlsx",
516
+ headers: ["a", "b", "c"],
517
+ rows: [],
518
+ });
519
+ expect(existsSync(join(cwd, "empty.xlsx"))).toBe(true);
520
+ expect(JSON.stringify(result.details)).toContain("empty.xlsx");
521
+ });
522
+ });
523
+ // ────────────────────────────────────────────────────────────
524
+ // pdf_read / pdf_info / pdf_merge
525
+ // (pdf_create is in Layer 2 — it spawns Chromium via Shell.)
526
+ // ────────────────────────────────────────────────────────────
527
+ describe("pdf_read / pdf_info / pdf_merge", () => {
528
+ async function buildSamplePdf(file, pageCount = 1, title = "Sample") {
529
+ const { PDFDocument, StandardFonts } = await import("pdf-lib");
530
+ const doc = await PDFDocument.create();
531
+ doc.setTitle(title);
532
+ const font = await doc.embedFont(StandardFonts.Helvetica);
533
+ for (let i = 0; i < pageCount; i++) {
534
+ const page = doc.addPage([300, 200]);
535
+ page.drawText(`Page ${i + 1} of ${title}`, { x: 20, y: 100, size: 14, font });
536
+ }
537
+ writeFileSync(file, await doc.save());
538
+ }
539
+ function build() {
540
+ return createPdfTools(cwd, [cwd], ["pdf_read", "pdf_info", "pdf_merge"]);
541
+ }
542
+ it("pdf_info reports page count + title", async () => {
543
+ await buildSamplePdf(join(cwd, "doc.pdf"), 3, "Quarterly");
544
+ const t = pick(build(), "pdf_info");
545
+ const result = await t.execute("c1", { path: "doc.pdf" });
546
+ expect(JSON.stringify(result.details)).toMatch(/3/);
547
+ expect(text(result).toLowerCase()).toMatch(/page|pages/);
548
+ });
549
+ it("pdf_read extracts text-ish content (text or metadata)", async () => {
550
+ await buildSamplePdf(join(cwd, "doc.pdf"), 2, "Report");
551
+ const t = pick(build(), "pdf_read");
552
+ const result = await t.execute("c1", { path: "doc.pdf" });
553
+ // Some impls embed text in the PDF stream and others don't extract
554
+ // it without poppler — accept either behavior, but the call must
555
+ // succeed and return *something* about the document.
556
+ expect(result.details).toBeDefined();
557
+ expect(text(result).length).toBeGreaterThan(0);
558
+ });
559
+ it("pdf_merge combines two PDFs into one with the summed page count", async () => {
560
+ await buildSamplePdf(join(cwd, "a.pdf"), 2, "A");
561
+ await buildSamplePdf(join(cwd, "b.pdf"), 3, "B");
562
+ const t = pick(build(), "pdf_merge");
563
+ const result = await t.execute("c1", {
564
+ inputs: ["a.pdf", "b.pdf"],
565
+ output: "merged.pdf",
566
+ });
567
+ expect(existsSync(join(cwd, "merged.pdf"))).toBe(true);
568
+ // pdf-lib check on the merged output for hard truth.
569
+ const { PDFDocument } = await import("pdf-lib");
570
+ const buf = readFileSync(join(cwd, "merged.pdf"));
571
+ const merged = await PDFDocument.load(buf);
572
+ expect(merged.getPageCount()).toBe(5);
573
+ expect(JSON.stringify(result.details)).toMatch(/5|pages/);
574
+ });
575
+ it("pdf_read on a missing file returns a structured error", async () => {
576
+ const t = pick(build(), "pdf_read");
577
+ const result = await t.execute("c1", { path: "ghost.pdf" });
578
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|not.*found/i);
579
+ });
580
+ it("pdf_merge rejects an output that escapes the sandbox", async () => {
581
+ await buildSamplePdf(join(cwd, "a.pdf"));
582
+ await buildSamplePdf(join(cwd, "b.pdf"));
583
+ const t = pick(build(), "pdf_merge");
584
+ await expect(t.execute("c1", { inputs: ["a.pdf", "b.pdf"], output: "/etc/escape.pdf" })).rejects.toThrow(/sandbox|allowed|denied/i);
585
+ });
586
+ // ── Adversarial ────────────────────────────────────────────
587
+ it("pdf_read on random bytes named .pdf returns a structured error", async () => {
588
+ writeFileSync(join(cwd, "garbage.pdf"), Buffer.alloc(512, 0x7e));
589
+ const t = pick(build(), "pdf_read");
590
+ const result = await t.execute("c1", { path: "garbage.pdf" });
591
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|invalid|parse/i);
592
+ });
593
+ it("pdf_info on a non-PDF file (renamed .txt) is reported, not crashed", async () => {
594
+ writeFileSync(join(cwd, "fake.pdf"), "this is just text\n");
595
+ const t = pick(build(), "pdf_info");
596
+ const result = await t.execute("c1", { path: "fake.pdf" });
597
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|invalid/i);
598
+ });
599
+ it("pdf_merge with one input is a degenerate but legal copy", async () => {
600
+ await buildSamplePdf(join(cwd, "only.pdf"), 4, "Solo");
601
+ const t = pick(build(), "pdf_merge");
602
+ const result = await t.execute("c1", { inputs: ["only.pdf"], output: "copy.pdf" });
603
+ // Either succeeds (copy) or returns a structured "need >= 2"
604
+ // error. Both are acceptable contracts; pin "no crash + result
605
+ // is structured".
606
+ expect(result.details).toBeDefined();
607
+ });
608
+ it("pdf_merge refuses an input path that escapes the sandbox", async () => {
609
+ await buildSamplePdf(join(cwd, "ok.pdf"));
610
+ const t = pick(build(), "pdf_merge");
611
+ // Accepts either a thrown rejection or a structured error in
612
+ // details — both prevent the merge. What we need to lock in is
613
+ // "no merged output ever lands on disk".
614
+ const result = await t.execute("c1", {
615
+ inputs: ["ok.pdf", "/etc/passwd"],
616
+ output: "merged.pdf",
617
+ }).catch((e) => ({ content: [{ type: "text", text: e.message }], details: { error: e.message } }));
618
+ const blob = JSON.stringify(result.details).toLowerCase();
619
+ expect(blob).toMatch(/sandbox|allowed|denied|access/);
620
+ expect(existsSync(join(cwd, "merged.pdf"))).toBe(false);
621
+ });
622
+ it("pdf_merge with a 50-input fan-in produces a valid 50-page PDF", async () => {
623
+ // Stress test: many small inputs. exposes any quadratic /
624
+ // file-handle behavior before it bites in prod.
625
+ const inputs = [];
626
+ for (let i = 0; i < 50; i++) {
627
+ const name = `p${i}.pdf`;
628
+ await buildSamplePdf(join(cwd, name), 1, `P${i}`);
629
+ inputs.push(name);
630
+ }
631
+ const t = pick(build(), "pdf_merge");
632
+ await t.execute("c1", { inputs, output: "big.pdf" });
633
+ const { PDFDocument } = await import("pdf-lib");
634
+ const merged = await PDFDocument.load(readFileSync(join(cwd, "big.pdf")));
635
+ expect(merged.getPageCount()).toBe(50);
636
+ });
637
+ });
638
+ // ────────────────────────────────────────────────────────────
639
+ // docx_read / docx_create
640
+ // ────────────────────────────────────────────────────────────
641
+ describe("docx_read / docx_create", () => {
642
+ function build() {
643
+ return createDocxTools(cwd, [cwd], ["docx_read", "docx_create"]);
644
+ }
645
+ it("docx_create writes a docx that docx_read can read back", async () => {
646
+ const create = pick(build(), "docx_create");
647
+ const r1 = await create.execute("c1", {
648
+ path: "letter.docx",
649
+ title: "Lettera al cliente",
650
+ content: [
651
+ { type: "paragraph", text: "Caro cliente," },
652
+ { type: "paragraph", text: "Grazie per il tuo ordine." },
653
+ { type: "paragraph", text: "— Team" },
654
+ ],
655
+ });
656
+ expect(existsSync(join(cwd, "letter.docx"))).toBe(true);
657
+ expect(JSON.stringify(r1.details)).toContain("letter.docx");
658
+ const read = pick(build(), "docx_read");
659
+ const r2 = await read.execute("c1", { path: "letter.docx" });
660
+ const out = text(r2);
661
+ expect(out).toContain("Caro cliente");
662
+ expect(out).toContain("Grazie per il tuo ordine");
663
+ });
664
+ it("docx_read on a missing file returns a structured error", async () => {
665
+ const t = pick(build(), "docx_read");
666
+ const result = await t.execute("c1", { path: "ghost.docx" });
667
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|not.*found/i);
668
+ });
669
+ it("docx_create rejects paths outside the sandbox", async () => {
670
+ const t = pick(build(), "docx_create");
671
+ await expect(t.execute("c1", { path: "/etc/escape.docx", content: [{ type: "paragraph", text: "x" }] })).rejects.toThrow(/sandbox|allowed|denied/i);
672
+ });
673
+ // ── Adversarial ────────────────────────────────────────────
674
+ it("docx_read on a corrupted docx returns a structured error", async () => {
675
+ writeFileSync(join(cwd, "broken.docx"), Buffer.alloc(256, 0x55));
676
+ const t = pick(build(), "docx_read");
677
+ const result = await t.execute("c1", { path: "broken.docx" });
678
+ expect(JSON.stringify(result.details)).toMatch(/error|fail|invalid|parse/i);
679
+ });
680
+ it("docx_create handles all four block types in one document", async () => {
681
+ const create = pick(build(), "docx_create");
682
+ await create.execute("c1", {
683
+ path: "kitchen-sink.docx",
684
+ title: "Mixed",
685
+ content: [
686
+ { type: "heading", text: "Sezione 1", level: 1 },
687
+ { type: "paragraph", text: "Paragrafo introduttivo." },
688
+ { type: "bullet", text: "Punto uno" },
689
+ { type: "bullet", text: "Punto due" },
690
+ { type: "numbered", text: "Step alpha" },
691
+ { type: "numbered", text: "Step beta" },
692
+ ],
693
+ });
694
+ expect(existsSync(join(cwd, "kitchen-sink.docx"))).toBe(true);
695
+ const read = pick(build(), "docx_read");
696
+ const r2 = await read.execute("c1", { path: "kitchen-sink.docx" });
697
+ const out = text(r2);
698
+ expect(out).toContain("Sezione 1");
699
+ expect(out).toContain("Paragrafo introduttivo");
700
+ expect(out).toContain("Punto uno");
701
+ expect(out).toContain("Step alpha");
702
+ });
703
+ it("docx_create with empty content array still writes a (mostly empty) file", async () => {
704
+ const t = pick(build(), "docx_create");
705
+ const result = await t.execute("c1", { path: "empty.docx", content: [] });
706
+ // Either succeeds (empty doc) or refuses with a clear error.
707
+ // Pin "no unhandled throw, result has details".
708
+ expect(result.details).toBeDefined();
709
+ });
710
+ it("docx_create with non-ASCII content (RTL + emoji + ideographs) round-trips", async () => {
711
+ const create = pick(build(), "docx_create");
712
+ await create.execute("c1", {
713
+ path: "i18n.docx",
714
+ content: [
715
+ { type: "paragraph", text: "Latin: café" },
716
+ { type: "paragraph", text: "Hebrew RTL: שלום עולם" },
717
+ { type: "paragraph", text: "CJK: 你好世界" },
718
+ { type: "paragraph", text: "Emoji 🐙🚀 + zero-width ​ end" },
719
+ ],
720
+ });
721
+ const read = pick(build(), "docx_read");
722
+ const r2 = await read.execute("c1", { path: "i18n.docx" });
723
+ const out = text(r2);
724
+ expect(out).toContain("café");
725
+ expect(out).toContain("שלום עולם");
726
+ expect(out).toContain("你好世界");
727
+ expect(out).toContain("🐙");
728
+ });
729
+ it("docx_create with a 500-paragraph payload writes within a reasonable time", async () => {
730
+ const create = pick(build(), "docx_create");
731
+ const content = Array.from({ length: 500 }, (_, i) => ({
732
+ type: "paragraph",
733
+ text: `Paragraph #${i + 1}: lorem ipsum dolor sit amet consectetur adipiscing elit.`,
734
+ }));
735
+ const t0 = Date.now();
736
+ await create.execute("c1", { path: "big.docx", content });
737
+ const elapsed = Date.now() - t0;
738
+ expect(existsSync(join(cwd, "big.docx"))).toBe(true);
739
+ // Generous bound — flags only true regressions (10x slowdown).
740
+ expect(elapsed).toBeLessThan(15_000);
741
+ });
742
+ });
743
+ //# sourceMappingURL=extended-tools.test.js.map