@poncho-ai/harness 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -0
- package/.turbo/turbo-test.log +22 -0
- package/CHANGELOG.md +16 -0
- package/LICENSE +21 -0
- package/dist/index.d.ts +416 -0
- package/dist/index.js +3015 -0
- package/package.json +53 -0
- package/src/agent-parser.ts +127 -0
- package/src/anthropic-client.ts +134 -0
- package/src/config.ts +141 -0
- package/src/default-tools.ts +89 -0
- package/src/harness.ts +522 -0
- package/src/index.ts +17 -0
- package/src/latitude-capture.ts +108 -0
- package/src/local-tools.ts +108 -0
- package/src/mcp.ts +287 -0
- package/src/memory.ts +700 -0
- package/src/model-client.ts +44 -0
- package/src/model-factory.ts +14 -0
- package/src/openai-client.ts +169 -0
- package/src/skill-context.ts +259 -0
- package/src/skill-tools.ts +357 -0
- package/src/state.ts +1017 -0
- package/src/telemetry.ts +108 -0
- package/src/tool-dispatcher.ts +69 -0
- package/test/agent-parser.test.ts +39 -0
- package/test/harness.test.ts +716 -0
- package/test/mcp.test.ts +82 -0
- package/test/memory.test.ts +50 -0
- package/test/model-factory.test.ts +16 -0
- package/test/state.test.ts +43 -0
- package/test/telemetry.test.ts +57 -0
- package/tsconfig.json +8 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it, vi } from "vitest";
|
|
5
|
+
import { defineTool } from "@poncho-ai/sdk";
|
|
6
|
+
import { AgentHarness } from "../src/harness.js";
|
|
7
|
+
import { loadSkillMetadata } from "../src/skill-context.js";
|
|
8
|
+
|
|
9
|
+
describe("agent harness", () => {
|
|
10
|
+
it("registers default filesystem tools", async () => {
|
|
11
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-default-tools-"));
|
|
12
|
+
await writeFile(
|
|
13
|
+
join(dir, "AGENT.md"),
|
|
14
|
+
`---
|
|
15
|
+
name: default-tools-agent
|
|
16
|
+
model:
|
|
17
|
+
provider: anthropic
|
|
18
|
+
name: claude-opus-4-5
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
# Default Tools Agent
|
|
22
|
+
`,
|
|
23
|
+
"utf8",
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
27
|
+
await harness.initialize();
|
|
28
|
+
const names = harness.listTools().map((tool) => tool.name);
|
|
29
|
+
|
|
30
|
+
expect(names).toContain("list_directory");
|
|
31
|
+
expect(names).toContain("read_file");
|
|
32
|
+
expect(names).toContain("write_file");
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("disables write_file by default in production environment", async () => {
|
|
36
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-prod-tools-"));
|
|
37
|
+
await writeFile(
|
|
38
|
+
join(dir, "AGENT.md"),
|
|
39
|
+
`---
|
|
40
|
+
name: prod-tools-agent
|
|
41
|
+
model:
|
|
42
|
+
provider: anthropic
|
|
43
|
+
name: claude-opus-4-5
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
# Prod Tools Agent
|
|
47
|
+
`,
|
|
48
|
+
"utf8",
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
const harness = new AgentHarness({ workingDir: dir, environment: "production" });
|
|
52
|
+
await harness.initialize();
|
|
53
|
+
const names = harness.listTools().map((tool) => tool.name);
|
|
54
|
+
|
|
55
|
+
expect(names).toContain("list_directory");
|
|
56
|
+
expect(names).toContain("read_file");
|
|
57
|
+
expect(names).not.toContain("write_file");
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("allows disabling built-in tools via poncho.config.js", async () => {
|
|
61
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-disable-default-tools-"));
|
|
62
|
+
await writeFile(
|
|
63
|
+
join(dir, "AGENT.md"),
|
|
64
|
+
`---
|
|
65
|
+
name: disable-default-tools-agent
|
|
66
|
+
model:
|
|
67
|
+
provider: anthropic
|
|
68
|
+
name: claude-opus-4-5
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
# Disable Default Tools Agent
|
|
72
|
+
`,
|
|
73
|
+
"utf8",
|
|
74
|
+
);
|
|
75
|
+
await writeFile(
|
|
76
|
+
join(dir, "poncho.config.js"),
|
|
77
|
+
`export default {
|
|
78
|
+
tools: {
|
|
79
|
+
defaults: {
|
|
80
|
+
read_file: false
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
`,
|
|
85
|
+
"utf8",
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
const harness = new AgentHarness({ workingDir: dir, environment: "production" });
|
|
89
|
+
await harness.initialize();
|
|
90
|
+
const names = harness.listTools().map((tool) => tool.name);
|
|
91
|
+
expect(names).toContain("list_directory");
|
|
92
|
+
expect(names).not.toContain("read_file");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("supports per-environment tool overrides", async () => {
|
|
96
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-env-tool-overrides-"));
|
|
97
|
+
await writeFile(
|
|
98
|
+
join(dir, "AGENT.md"),
|
|
99
|
+
`---
|
|
100
|
+
name: env-tool-overrides-agent
|
|
101
|
+
model:
|
|
102
|
+
provider: anthropic
|
|
103
|
+
name: claude-opus-4-5
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
# Env Tool Overrides Agent
|
|
107
|
+
`,
|
|
108
|
+
"utf8",
|
|
109
|
+
);
|
|
110
|
+
await writeFile(
|
|
111
|
+
join(dir, "poncho.config.js"),
|
|
112
|
+
`export default {
|
|
113
|
+
tools: {
|
|
114
|
+
defaults: {
|
|
115
|
+
read_file: false
|
|
116
|
+
},
|
|
117
|
+
byEnvironment: {
|
|
118
|
+
development: {
|
|
119
|
+
read_file: true
|
|
120
|
+
},
|
|
121
|
+
production: {
|
|
122
|
+
write_file: false
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
`,
|
|
128
|
+
"utf8",
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
const developmentHarness = new AgentHarness({ workingDir: dir, environment: "development" });
|
|
132
|
+
await developmentHarness.initialize();
|
|
133
|
+
const developmentTools = developmentHarness.listTools().map((tool) => tool.name);
|
|
134
|
+
expect(developmentTools).toContain("read_file");
|
|
135
|
+
expect(developmentTools).toContain("write_file");
|
|
136
|
+
|
|
137
|
+
const productionHarness = new AgentHarness({ workingDir: dir, environment: "production" });
|
|
138
|
+
await productionHarness.initialize();
|
|
139
|
+
const productionTools = productionHarness.listTools().map((tool) => tool.name);
|
|
140
|
+
expect(productionTools).not.toContain("read_file");
|
|
141
|
+
expect(productionTools).not.toContain("write_file");
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it("does not auto-register exported tool objects from skill scripts", async () => {
|
|
145
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-no-auto-tool-register-"));
|
|
146
|
+
await writeFile(
|
|
147
|
+
join(dir, "AGENT.md"),
|
|
148
|
+
`---
|
|
149
|
+
name: no-auto-tool-register-agent
|
|
150
|
+
model:
|
|
151
|
+
provider: anthropic
|
|
152
|
+
name: claude-opus-4-5
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
# No Auto Tool Register Agent
|
|
156
|
+
`,
|
|
157
|
+
"utf8",
|
|
158
|
+
);
|
|
159
|
+
await mkdir(join(dir, "skills", "summarize", "scripts"), { recursive: true });
|
|
160
|
+
await writeFile(
|
|
161
|
+
join(dir, "skills", "summarize", "SKILL.md"),
|
|
162
|
+
`---
|
|
163
|
+
name: summarize
|
|
164
|
+
description: Summarize text
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
# Summarize Skill
|
|
168
|
+
`,
|
|
169
|
+
"utf8",
|
|
170
|
+
);
|
|
171
|
+
await writeFile(
|
|
172
|
+
join(dir, "skills", "summarize", "scripts", "summarize.ts"),
|
|
173
|
+
`import { defineTool } from "@poncho-ai/sdk";
|
|
174
|
+
|
|
175
|
+
export default defineTool({
|
|
176
|
+
name: "summarize_text",
|
|
177
|
+
description: "Summarize input text",
|
|
178
|
+
inputSchema: {
|
|
179
|
+
type: "object",
|
|
180
|
+
properties: {
|
|
181
|
+
content: { type: "string" }
|
|
182
|
+
},
|
|
183
|
+
required: ["content"]
|
|
184
|
+
},
|
|
185
|
+
async handler(input) {
|
|
186
|
+
return { summary: String(input.content).slice(0, 20) };
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
`,
|
|
190
|
+
"utf8",
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
194
|
+
await harness.initialize();
|
|
195
|
+
const names = harness.listTools().map((tool) => tool.name);
|
|
196
|
+
|
|
197
|
+
expect(names).not.toContain("summarize_text");
|
|
198
|
+
expect(names).toContain("run_skill_script");
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it("injects SKILL.md context into system prompt", async () => {
|
|
202
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-skill-context-"));
|
|
203
|
+
await writeFile(
|
|
204
|
+
join(dir, "AGENT.md"),
|
|
205
|
+
`---
|
|
206
|
+
name: skill-context-agent
|
|
207
|
+
model:
|
|
208
|
+
provider: anthropic
|
|
209
|
+
name: claude-opus-4-5
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
# Skill Context Agent
|
|
213
|
+
`,
|
|
214
|
+
"utf8",
|
|
215
|
+
);
|
|
216
|
+
await mkdir(join(dir, "skills", "summarize"), { recursive: true });
|
|
217
|
+
await writeFile(
|
|
218
|
+
join(dir, "skills", "summarize", "SKILL.md"),
|
|
219
|
+
`---
|
|
220
|
+
name: summarize
|
|
221
|
+
description: Summarize long text into concise output
|
|
222
|
+
allowed-tools: summarize_text
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
# Summarize Skill
|
|
226
|
+
|
|
227
|
+
When users ask for summarization, prefer calling summarize_text.
|
|
228
|
+
`,
|
|
229
|
+
"utf8",
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
233
|
+
await harness.initialize();
|
|
234
|
+
|
|
235
|
+
const mockedGenerate = vi.fn().mockResolvedValueOnce({
|
|
236
|
+
text: "done",
|
|
237
|
+
toolCalls: [],
|
|
238
|
+
usage: { input: 5, output: 5 },
|
|
239
|
+
rawContent: [],
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
(harness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
243
|
+
generate: mockedGenerate,
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
for await (const _event of harness.run({ task: "summarize this text" })) {
|
|
247
|
+
// consume events
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const firstCall = mockedGenerate.mock.calls[0]?.[0] as
|
|
251
|
+
| { systemPrompt?: string; tools?: Array<{ name: string }> }
|
|
252
|
+
| undefined;
|
|
253
|
+
// Skill metadata injected as XML <available_skills> block
|
|
254
|
+
expect(firstCall?.systemPrompt).toContain("<available_skills");
|
|
255
|
+
expect(firstCall?.systemPrompt).toContain("<name>summarize</name>");
|
|
256
|
+
expect(firstCall?.systemPrompt).toContain("Summarize long text into concise output");
|
|
257
|
+
// activate_skill tool should be registered
|
|
258
|
+
const toolNames = firstCall?.tools?.map((t) => t.name) ?? [];
|
|
259
|
+
expect(toolNames).toContain("activate_skill");
|
|
260
|
+
expect(toolNames).toContain("read_skill_resource");
|
|
261
|
+
expect(toolNames).toContain("list_skill_scripts");
|
|
262
|
+
expect(toolNames).toContain("run_skill_script");
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it("lists skill scripts through list_skill_scripts", async () => {
|
|
266
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-skill-script-list-"));
|
|
267
|
+
await writeFile(
|
|
268
|
+
join(dir, "AGENT.md"),
|
|
269
|
+
`---
|
|
270
|
+
name: skill-script-list-agent
|
|
271
|
+
model:
|
|
272
|
+
provider: anthropic
|
|
273
|
+
name: claude-opus-4-5
|
|
274
|
+
---
|
|
275
|
+
|
|
276
|
+
# Skill Script List Agent
|
|
277
|
+
`,
|
|
278
|
+
"utf8",
|
|
279
|
+
);
|
|
280
|
+
await mkdir(join(dir, "skills", "math", "scripts", "nested"), { recursive: true });
|
|
281
|
+
await writeFile(
|
|
282
|
+
join(dir, "skills", "math", "SKILL.md"),
|
|
283
|
+
`---
|
|
284
|
+
name: math
|
|
285
|
+
description: Simple math scripts
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
# Math Skill
|
|
289
|
+
`,
|
|
290
|
+
"utf8",
|
|
291
|
+
);
|
|
292
|
+
await writeFile(
|
|
293
|
+
join(dir, "skills", "math", "scripts", "add.ts"),
|
|
294
|
+
"export default async function run() { return { ok: true }; }\n",
|
|
295
|
+
"utf8",
|
|
296
|
+
);
|
|
297
|
+
await writeFile(
|
|
298
|
+
join(dir, "skills", "math", "scripts", "nested", "multiply.js"),
|
|
299
|
+
"export async function run() { return { ok: true }; }\n",
|
|
300
|
+
"utf8",
|
|
301
|
+
);
|
|
302
|
+
await writeFile(
|
|
303
|
+
join(dir, "skills", "math", "scripts", "README.md"),
|
|
304
|
+
"# not executable\n",
|
|
305
|
+
"utf8",
|
|
306
|
+
);
|
|
307
|
+
|
|
308
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
309
|
+
await harness.initialize();
|
|
310
|
+
const listScripts = harness.listTools().find((tool) => tool.name === "list_skill_scripts");
|
|
311
|
+
|
|
312
|
+
expect(listScripts).toBeDefined();
|
|
313
|
+
const result = await listScripts!.handler({ skill: "math" });
|
|
314
|
+
expect(result).toEqual({
|
|
315
|
+
skill: "math",
|
|
316
|
+
scripts: ["scripts/add.ts", "scripts/nested/multiply.js"],
|
|
317
|
+
});
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
it("runs JavaScript/TypeScript skill scripts through run_skill_script", async () => {
|
|
321
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-skill-script-runner-"));
|
|
322
|
+
await writeFile(
|
|
323
|
+
join(dir, "AGENT.md"),
|
|
324
|
+
`---
|
|
325
|
+
name: skill-script-runner-agent
|
|
326
|
+
model:
|
|
327
|
+
provider: anthropic
|
|
328
|
+
name: claude-opus-4-5
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
# Skill Script Runner Agent
|
|
332
|
+
`,
|
|
333
|
+
"utf8",
|
|
334
|
+
);
|
|
335
|
+
await mkdir(join(dir, "skills", "math", "scripts"), { recursive: true });
|
|
336
|
+
await writeFile(
|
|
337
|
+
join(dir, "skills", "math", "SKILL.md"),
|
|
338
|
+
`---
|
|
339
|
+
name: math
|
|
340
|
+
description: Simple math scripts
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
# Math Skill
|
|
344
|
+
`,
|
|
345
|
+
"utf8",
|
|
346
|
+
);
|
|
347
|
+
await writeFile(
|
|
348
|
+
join(dir, "skills", "math", "scripts", "add.ts"),
|
|
349
|
+
`export default async function run(input) {
|
|
350
|
+
const a = Number(input?.a ?? 0);
|
|
351
|
+
const b = Number(input?.b ?? 0);
|
|
352
|
+
return { sum: a + b };
|
|
353
|
+
}
|
|
354
|
+
`,
|
|
355
|
+
"utf8",
|
|
356
|
+
);
|
|
357
|
+
|
|
358
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
359
|
+
await harness.initialize();
|
|
360
|
+
const runner = harness.listTools().find((tool) => tool.name === "run_skill_script");
|
|
361
|
+
|
|
362
|
+
expect(runner).toBeDefined();
|
|
363
|
+
const result = await runner!.handler({
|
|
364
|
+
skill: "math",
|
|
365
|
+
script: "add.ts",
|
|
366
|
+
input: { a: 2, b: 3 },
|
|
367
|
+
});
|
|
368
|
+
expect(result).toEqual({
|
|
369
|
+
skill: "math",
|
|
370
|
+
script: "add.ts",
|
|
371
|
+
output: { sum: 5 },
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
it("blocks path traversal in run_skill_script", async () => {
|
|
376
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-skill-script-path-"));
|
|
377
|
+
await writeFile(
|
|
378
|
+
join(dir, "AGENT.md"),
|
|
379
|
+
`---
|
|
380
|
+
name: skill-script-path-agent
|
|
381
|
+
model:
|
|
382
|
+
provider: anthropic
|
|
383
|
+
name: claude-opus-4-5
|
|
384
|
+
---
|
|
385
|
+
|
|
386
|
+
# Skill Script Path Agent
|
|
387
|
+
`,
|
|
388
|
+
"utf8",
|
|
389
|
+
);
|
|
390
|
+
await mkdir(join(dir, "skills", "safe", "scripts"), { recursive: true });
|
|
391
|
+
await writeFile(
|
|
392
|
+
join(dir, "skills", "safe", "SKILL.md"),
|
|
393
|
+
`---
|
|
394
|
+
name: safe
|
|
395
|
+
description: Safe skill
|
|
396
|
+
---
|
|
397
|
+
|
|
398
|
+
# Safe Skill
|
|
399
|
+
`,
|
|
400
|
+
"utf8",
|
|
401
|
+
);
|
|
402
|
+
|
|
403
|
+
const harness = new AgentHarness({ workingDir: dir });
|
|
404
|
+
await harness.initialize();
|
|
405
|
+
const runner = harness.listTools().find((tool) => tool.name === "run_skill_script");
|
|
406
|
+
expect(runner).toBeDefined();
|
|
407
|
+
const result = await runner!.handler({
|
|
408
|
+
skill: "safe",
|
|
409
|
+
script: "../outside.ts",
|
|
410
|
+
});
|
|
411
|
+
expect(result).toMatchObject({
|
|
412
|
+
error: expect.stringContaining("must be relative and within the skill directory"),
|
|
413
|
+
});
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
it("injects local authoring guidance only in development environment", async () => {
|
|
417
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-dev-guidance-"));
|
|
418
|
+
await writeFile(
|
|
419
|
+
join(dir, "AGENT.md"),
|
|
420
|
+
`---
|
|
421
|
+
name: dev-guidance-agent
|
|
422
|
+
model:
|
|
423
|
+
provider: anthropic
|
|
424
|
+
name: claude-opus-4-5
|
|
425
|
+
---
|
|
426
|
+
|
|
427
|
+
# Dev Guidance Agent
|
|
428
|
+
`,
|
|
429
|
+
"utf8",
|
|
430
|
+
);
|
|
431
|
+
|
|
432
|
+
const developmentHarness = new AgentHarness({ workingDir: dir, environment: "development" });
|
|
433
|
+
await developmentHarness.initialize();
|
|
434
|
+
const devGenerate = vi.fn().mockResolvedValueOnce({
|
|
435
|
+
text: "done",
|
|
436
|
+
toolCalls: [],
|
|
437
|
+
usage: { input: 5, output: 5 },
|
|
438
|
+
rawContent: [],
|
|
439
|
+
});
|
|
440
|
+
(developmentHarness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
441
|
+
generate: devGenerate,
|
|
442
|
+
};
|
|
443
|
+
for await (const _event of developmentHarness.run({ task: "hello" })) {
|
|
444
|
+
// consume events
|
|
445
|
+
}
|
|
446
|
+
const devCall = devGenerate.mock.calls[0]?.[0] as { systemPrompt?: string } | undefined;
|
|
447
|
+
expect(devCall?.systemPrompt).toContain("## Development Mode Context");
|
|
448
|
+
expect(devCall?.systemPrompt).toContain("poncho.config.js");
|
|
449
|
+
expect(devCall?.systemPrompt).toContain("skills/<skill-name>/SKILL.md");
|
|
450
|
+
|
|
451
|
+
const productionHarness = new AgentHarness({ workingDir: dir, environment: "production" });
|
|
452
|
+
await productionHarness.initialize();
|
|
453
|
+
const prodGenerate = vi.fn().mockResolvedValueOnce({
|
|
454
|
+
text: "done",
|
|
455
|
+
toolCalls: [],
|
|
456
|
+
usage: { input: 5, output: 5 },
|
|
457
|
+
rawContent: [],
|
|
458
|
+
});
|
|
459
|
+
(productionHarness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
460
|
+
generate: prodGenerate,
|
|
461
|
+
};
|
|
462
|
+
for await (const _event of productionHarness.run({ task: "hello" })) {
|
|
463
|
+
// consume events
|
|
464
|
+
}
|
|
465
|
+
const prodCall = prodGenerate.mock.calls[0]?.[0] as { systemPrompt?: string } | undefined;
|
|
466
|
+
expect(prodCall?.systemPrompt).not.toContain("## Development Mode Context");
|
|
467
|
+
expect(prodCall?.systemPrompt).not.toContain("skills/<skill-name>/SKILL.md");
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
it("runs a tool call loop and completes", async () => {
|
|
471
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-"));
|
|
472
|
+
await writeFile(
|
|
473
|
+
join(dir, "AGENT.md"),
|
|
474
|
+
`---
|
|
475
|
+
name: test-agent
|
|
476
|
+
model:
|
|
477
|
+
provider: anthropic
|
|
478
|
+
name: claude-opus-4-5
|
|
479
|
+
---
|
|
480
|
+
|
|
481
|
+
# Test Agent
|
|
482
|
+
`,
|
|
483
|
+
"utf8",
|
|
484
|
+
);
|
|
485
|
+
|
|
486
|
+
const harness = new AgentHarness({
|
|
487
|
+
workingDir: dir,
|
|
488
|
+
toolDefinitions: [
|
|
489
|
+
defineTool({
|
|
490
|
+
name: "echo",
|
|
491
|
+
description: "Echoes input value",
|
|
492
|
+
inputSchema: {
|
|
493
|
+
type: "object",
|
|
494
|
+
properties: { value: { type: "string" } },
|
|
495
|
+
required: ["value"],
|
|
496
|
+
},
|
|
497
|
+
handler: async (input) => ({ echoed: input.value }),
|
|
498
|
+
}),
|
|
499
|
+
],
|
|
500
|
+
});
|
|
501
|
+
await harness.initialize();
|
|
502
|
+
|
|
503
|
+
const mockedGenerate = vi
|
|
504
|
+
.fn()
|
|
505
|
+
.mockResolvedValueOnce({
|
|
506
|
+
text: "",
|
|
507
|
+
toolCalls: [{ id: "tool_1", name: "echo", input: { value: "hi" } }],
|
|
508
|
+
usage: { input: 10, output: 5 },
|
|
509
|
+
rawContent: [],
|
|
510
|
+
})
|
|
511
|
+
.mockResolvedValueOnce({
|
|
512
|
+
text: "done",
|
|
513
|
+
toolCalls: [],
|
|
514
|
+
usage: { input: 5, output: 5 },
|
|
515
|
+
rawContent: [],
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
(harness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
519
|
+
generate: mockedGenerate,
|
|
520
|
+
};
|
|
521
|
+
|
|
522
|
+
const events = [];
|
|
523
|
+
for await (const event of harness.run({ task: "run echo" })) {
|
|
524
|
+
events.push(event);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
expect(events.some((event) => event.type === "tool:completed")).toBe(true);
|
|
528
|
+
expect(events.some((event) => event.type === "run:completed")).toBe(true);
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
it("emits approval events and denies requiresApproval tools by default", async () => {
|
|
532
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-approval-"));
|
|
533
|
+
await writeFile(
|
|
534
|
+
join(dir, "AGENT.md"),
|
|
535
|
+
`---
|
|
536
|
+
name: approval-agent
|
|
537
|
+
model:
|
|
538
|
+
provider: anthropic
|
|
539
|
+
name: claude-opus-4-5
|
|
540
|
+
---
|
|
541
|
+
|
|
542
|
+
# Approval Agent
|
|
543
|
+
`,
|
|
544
|
+
"utf8",
|
|
545
|
+
);
|
|
546
|
+
|
|
547
|
+
const harness = new AgentHarness({
|
|
548
|
+
workingDir: dir,
|
|
549
|
+
toolDefinitions: [
|
|
550
|
+
defineTool({
|
|
551
|
+
name: "dangerous-delete",
|
|
552
|
+
description: "Requires approval",
|
|
553
|
+
requiresApproval: true,
|
|
554
|
+
inputSchema: {
|
|
555
|
+
type: "object",
|
|
556
|
+
properties: { path: { type: "string" } },
|
|
557
|
+
required: ["path"],
|
|
558
|
+
},
|
|
559
|
+
handler: async () => ({ ok: true }),
|
|
560
|
+
}),
|
|
561
|
+
],
|
|
562
|
+
});
|
|
563
|
+
await harness.initialize();
|
|
564
|
+
|
|
565
|
+
const mockedGenerate = vi
|
|
566
|
+
.fn()
|
|
567
|
+
.mockResolvedValueOnce({
|
|
568
|
+
text: "",
|
|
569
|
+
toolCalls: [
|
|
570
|
+
{
|
|
571
|
+
id: "tool_approval",
|
|
572
|
+
name: "dangerous-delete",
|
|
573
|
+
input: { path: "/tmp/foo" },
|
|
574
|
+
},
|
|
575
|
+
],
|
|
576
|
+
usage: { input: 10, output: 5 },
|
|
577
|
+
rawContent: [],
|
|
578
|
+
})
|
|
579
|
+
.mockResolvedValueOnce({
|
|
580
|
+
text: "I could not run that tool without approval.",
|
|
581
|
+
toolCalls: [],
|
|
582
|
+
usage: { input: 5, output: 5 },
|
|
583
|
+
rawContent: [],
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
(harness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
587
|
+
generate: mockedGenerate,
|
|
588
|
+
};
|
|
589
|
+
|
|
590
|
+
const events = [];
|
|
591
|
+
for await (const event of harness.run({ task: "delete the file" })) {
|
|
592
|
+
events.push(event);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
expect(
|
|
596
|
+
events.some((event) => event.type === "tool:approval:required"),
|
|
597
|
+
).toBe(true);
|
|
598
|
+
expect(events.some((event) => event.type === "tool:approval:denied")).toBe(true);
|
|
599
|
+
expect(events.some((event) => event.type === "tool:error")).toBe(true);
|
|
600
|
+
expect(events.some((event) => event.type === "run:completed")).toBe(true);
|
|
601
|
+
});
|
|
602
|
+
|
|
603
|
+
it("grants requiresApproval tools when approval handler allows it", async () => {
|
|
604
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-approval-ok-"));
|
|
605
|
+
await writeFile(
|
|
606
|
+
join(dir, "AGENT.md"),
|
|
607
|
+
`---
|
|
608
|
+
name: approval-agent-ok
|
|
609
|
+
model:
|
|
610
|
+
provider: anthropic
|
|
611
|
+
name: claude-opus-4-5
|
|
612
|
+
---
|
|
613
|
+
|
|
614
|
+
# Approval Agent OK
|
|
615
|
+
`,
|
|
616
|
+
"utf8",
|
|
617
|
+
);
|
|
618
|
+
|
|
619
|
+
const harness = new AgentHarness({
|
|
620
|
+
workingDir: dir,
|
|
621
|
+
approvalHandler: async () => true,
|
|
622
|
+
toolDefinitions: [
|
|
623
|
+
defineTool({
|
|
624
|
+
name: "dangerous-delete",
|
|
625
|
+
description: "Requires approval",
|
|
626
|
+
requiresApproval: true,
|
|
627
|
+
inputSchema: {
|
|
628
|
+
type: "object",
|
|
629
|
+
properties: { path: { type: "string" } },
|
|
630
|
+
required: ["path"],
|
|
631
|
+
},
|
|
632
|
+
handler: async () => ({ ok: true }),
|
|
633
|
+
}),
|
|
634
|
+
],
|
|
635
|
+
});
|
|
636
|
+
await harness.initialize();
|
|
637
|
+
|
|
638
|
+
const mockedGenerate = vi
|
|
639
|
+
.fn()
|
|
640
|
+
.mockResolvedValueOnce({
|
|
641
|
+
text: "",
|
|
642
|
+
toolCalls: [
|
|
643
|
+
{
|
|
644
|
+
id: "tool_approval_ok",
|
|
645
|
+
name: "dangerous-delete",
|
|
646
|
+
input: { path: "/tmp/foo" },
|
|
647
|
+
},
|
|
648
|
+
],
|
|
649
|
+
usage: { input: 10, output: 5 },
|
|
650
|
+
rawContent: [],
|
|
651
|
+
})
|
|
652
|
+
.mockResolvedValueOnce({
|
|
653
|
+
text: "Done.",
|
|
654
|
+
toolCalls: [],
|
|
655
|
+
usage: { input: 5, output: 5 },
|
|
656
|
+
rawContent: [],
|
|
657
|
+
});
|
|
658
|
+
|
|
659
|
+
(harness as unknown as { modelClient: { generate: unknown } }).modelClient = {
|
|
660
|
+
generate: mockedGenerate,
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
const events = [];
|
|
664
|
+
for await (const event of harness.run({ task: "delete the file" })) {
|
|
665
|
+
events.push(event);
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
expect(events.some((event) => event.type === "tool:approval:granted")).toBe(true);
|
|
669
|
+
expect(events.some((event) => event.type === "tool:completed")).toBe(true);
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
it("parses spec-style allowed-tools from SKILL.md frontmatter", async () => {
|
|
673
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-allowed-tools-"));
|
|
674
|
+
await mkdir(join(dir, "skills", "summarize"), { recursive: true });
|
|
675
|
+
await writeFile(
|
|
676
|
+
join(dir, "skills", "summarize", "SKILL.md"),
|
|
677
|
+
`---
|
|
678
|
+
name: summarize
|
|
679
|
+
description: Summarize text
|
|
680
|
+
allowed-tools: summarize_text read_file
|
|
681
|
+
---
|
|
682
|
+
|
|
683
|
+
# Summarize
|
|
684
|
+
`,
|
|
685
|
+
"utf8",
|
|
686
|
+
);
|
|
687
|
+
|
|
688
|
+
const metadata = await loadSkillMetadata(dir);
|
|
689
|
+
expect(metadata).toHaveLength(1);
|
|
690
|
+
expect(metadata[0]?.name).toBe("summarize");
|
|
691
|
+
expect(metadata[0]?.tools).toEqual(["summarize_text", "read_file"]);
|
|
692
|
+
});
|
|
693
|
+
|
|
694
|
+
it("keeps backward compatibility with legacy tools list frontmatter", async () => {
|
|
695
|
+
const dir = await mkdtemp(join(tmpdir(), "poncho-harness-legacy-tools-"));
|
|
696
|
+
await mkdir(join(dir, "skills", "legacy"), { recursive: true });
|
|
697
|
+
await writeFile(
|
|
698
|
+
join(dir, "skills", "legacy", "SKILL.md"),
|
|
699
|
+
`---
|
|
700
|
+
name: legacy
|
|
701
|
+
description: Legacy skill
|
|
702
|
+
tools:
|
|
703
|
+
- legacy_tool
|
|
704
|
+
---
|
|
705
|
+
|
|
706
|
+
# Legacy
|
|
707
|
+
`,
|
|
708
|
+
"utf8",
|
|
709
|
+
);
|
|
710
|
+
|
|
711
|
+
const metadata = await loadSkillMetadata(dir);
|
|
712
|
+
expect(metadata).toHaveLength(1);
|
|
713
|
+
expect(metadata[0]?.name).toBe("legacy");
|
|
714
|
+
expect(metadata[0]?.tools).toEqual(["legacy_tool"]);
|
|
715
|
+
});
|
|
716
|
+
});
|