@clawpify/skills 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/clawpify/SKILL.md +20 -118
- package/dist/agent.d.ts +139 -9
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +415 -49
- package/dist/index.d.ts +5 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +431 -78
- package/dist/mcp-server.js +1 -1
- package/dist/memory.d.ts +26 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/memory.js +17 -0
- package/dist/shopify.d.ts +1 -1
- package/dist/shopify.js +1 -1
- package/dist/skills.d.ts +17 -0
- package/dist/skills.d.ts.map +1 -1
- package/dist/skills.js +20 -1
- package/package.json +8 -2
- package/src/agent.test.ts +927 -0
- package/src/agent.ts +639 -68
- package/src/index.ts +21 -2
- package/src/memory.ts +38 -0
- package/src/shopify.ts +2 -2
- package/src/skills.ts +50 -0
|
@@ -0,0 +1,927 @@
|
|
|
1
|
+
import { describe, test, expect, mock } from "bun:test";
|
|
2
|
+
import { ShopifyAgent, DEFAULT_SYSTEM_INSTRUCTION } from "./agent";
|
|
3
|
+
import type { AgentPlugin, AgentHooks, ModelPricing } from "./agent";
|
|
4
|
+
import { ShopifyClient } from "./shopify";
|
|
5
|
+
import { InMemoryStore } from "./memory";
|
|
6
|
+
import type { MemoryStore } from "./memory";
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Helpers — build realistic Anthropic response shapes
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
function makeUsage(
|
|
13
|
+
overrides: Partial<{
|
|
14
|
+
input_tokens: number;
|
|
15
|
+
output_tokens: number;
|
|
16
|
+
cache_creation_input_tokens: number;
|
|
17
|
+
cache_read_input_tokens: number;
|
|
18
|
+
}> = {}
|
|
19
|
+
) {
|
|
20
|
+
return {
|
|
21
|
+
input_tokens: overrides.input_tokens ?? 100,
|
|
22
|
+
output_tokens: overrides.output_tokens ?? 50,
|
|
23
|
+
cache_creation_input_tokens: overrides.cache_creation_input_tokens ?? 0,
|
|
24
|
+
cache_read_input_tokens: overrides.cache_read_input_tokens ?? 0,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function makeTextResponse(text: string, usage = makeUsage()) {
|
|
29
|
+
return {
|
|
30
|
+
id: "msg_test",
|
|
31
|
+
type: "message",
|
|
32
|
+
role: "assistant",
|
|
33
|
+
model: "claude-sonnet-4-5",
|
|
34
|
+
stop_reason: "end_turn",
|
|
35
|
+
content: [{ type: "text", text }],
|
|
36
|
+
usage,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function makeThinkingResponse(
|
|
41
|
+
thinking: string,
|
|
42
|
+
text: string,
|
|
43
|
+
usage = makeUsage()
|
|
44
|
+
) {
|
|
45
|
+
return {
|
|
46
|
+
id: "msg_test",
|
|
47
|
+
type: "message",
|
|
48
|
+
role: "assistant",
|
|
49
|
+
model: "claude-sonnet-4-5",
|
|
50
|
+
stop_reason: "end_turn",
|
|
51
|
+
content: [
|
|
52
|
+
{ type: "thinking", thinking, signature: "sig_test" },
|
|
53
|
+
{ type: "text", text },
|
|
54
|
+
],
|
|
55
|
+
usage,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function makeToolUseResponse(
|
|
60
|
+
toolCalls: Array<{ name: string; input: Record<string, any> }>,
|
|
61
|
+
usage = makeUsage()
|
|
62
|
+
) {
|
|
63
|
+
return {
|
|
64
|
+
id: "msg_test",
|
|
65
|
+
type: "message",
|
|
66
|
+
role: "assistant",
|
|
67
|
+
model: "claude-sonnet-4-5",
|
|
68
|
+
stop_reason: "tool_use",
|
|
69
|
+
content: toolCalls.map((tc, i) => ({
|
|
70
|
+
type: "tool_use",
|
|
71
|
+
id: `tool_${i}`,
|
|
72
|
+
name: tc.name,
|
|
73
|
+
input: tc.input,
|
|
74
|
+
})),
|
|
75
|
+
usage,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Create a fake MessageStream-like object for testing chatStream(). */
|
|
80
|
+
function makeFakeStream(
|
|
81
|
+
deltas: Array<{ type: string; [key: string]: any }>,
|
|
82
|
+
finalMsg: any
|
|
83
|
+
) {
|
|
84
|
+
return {
|
|
85
|
+
async *[Symbol.asyncIterator]() {
|
|
86
|
+
for (const delta of deltas) {
|
|
87
|
+
yield { type: "content_block_delta", delta };
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
finalMessage: async () => finalMsg,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
// Shared setup
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
const SKILL_CONTENT = "# Test skill content\nSome reference docs here.";
|
|
99
|
+
|
|
100
|
+
let mockCreate: ReturnType<typeof mock>;
|
|
101
|
+
let mockStream: ReturnType<typeof mock>;
|
|
102
|
+
let mockGraphql: ReturnType<typeof mock>;
|
|
103
|
+
|
|
104
|
+
function createAgent(
|
|
105
|
+
overrides: {
|
|
106
|
+
plugins?: AgentPlugin[];
|
|
107
|
+
hooks?: AgentHooks;
|
|
108
|
+
pricing?: ModelPricing;
|
|
109
|
+
systemInstruction?: string;
|
|
110
|
+
thinking?: { budgetTokens: number };
|
|
111
|
+
maxIterations?: number;
|
|
112
|
+
memory?: MemoryStore;
|
|
113
|
+
} = {}
|
|
114
|
+
) {
|
|
115
|
+
mockCreate = mock();
|
|
116
|
+
mockStream = mock();
|
|
117
|
+
mockGraphql = mock();
|
|
118
|
+
|
|
119
|
+
const shopifyClient = new ShopifyClient({
|
|
120
|
+
storeUrl: "test-store.myshopify.com",
|
|
121
|
+
accessToken: "shpat_test",
|
|
122
|
+
});
|
|
123
|
+
shopifyClient.graphql = mockGraphql as any;
|
|
124
|
+
|
|
125
|
+
const agent = new ShopifyAgent({
|
|
126
|
+
shopify: shopifyClient,
|
|
127
|
+
skillContent: SKILL_CONTENT,
|
|
128
|
+
...overrides,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// Patch the private anthropic client
|
|
132
|
+
(agent as any).anthropic = {
|
|
133
|
+
messages: { create: mockCreate, stream: mockStream },
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
return agent;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// ==========================================================================
|
|
140
|
+
// 1. Basic chat
|
|
141
|
+
// ==========================================================================
|
|
142
|
+
|
|
143
|
+
describe("Basic chat", () => {
|
|
144
|
+
test("returns a text response for a simple question", async () => {
|
|
145
|
+
const agent = createAgent();
|
|
146
|
+
mockCreate.mockResolvedValueOnce(
|
|
147
|
+
makeTextResponse("Here are your products!")
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const result = await agent.chat("List my products");
|
|
151
|
+
|
|
152
|
+
expect(result.response).toBe("Here are your products!");
|
|
153
|
+
expect(result.history.length).toBeGreaterThan(0);
|
|
154
|
+
expect(result.iterationsUsed).toBe(0);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test("uses custom system instruction when provided in config", async () => {
|
|
158
|
+
const custom = "You are a custom bot. Do custom things.";
|
|
159
|
+
const agent = createAgent({ systemInstruction: custom });
|
|
160
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("OK"));
|
|
161
|
+
|
|
162
|
+
await agent.chat("Hi");
|
|
163
|
+
|
|
164
|
+
const system = mockCreate.mock.calls[0][0].system;
|
|
165
|
+
expect(system[0].text).toBe(custom);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test("passes conversation history through to the API", async () => {
|
|
169
|
+
const agent = createAgent();
|
|
170
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Got it!"));
|
|
171
|
+
|
|
172
|
+
const history = [
|
|
173
|
+
{ role: "user" as const, content: "Hello" },
|
|
174
|
+
{ role: "assistant" as const, content: "Hi there!" },
|
|
175
|
+
];
|
|
176
|
+
|
|
177
|
+
await agent.chat("What's new?", history);
|
|
178
|
+
|
|
179
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
180
|
+
expect(callArgs.messages).toEqual([
|
|
181
|
+
...history,
|
|
182
|
+
{ role: "user", content: "What's new?" },
|
|
183
|
+
]);
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// ==========================================================================
|
|
188
|
+
// 2. Prompt caching
|
|
189
|
+
// ==========================================================================
|
|
190
|
+
|
|
191
|
+
describe("Prompt caching", () => {
|
|
192
|
+
test("sends system prompt as an array with cache_control on the skill content block", async () => {
|
|
193
|
+
const agent = createAgent();
|
|
194
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Hello!"));
|
|
195
|
+
|
|
196
|
+
await agent.chat("Hi");
|
|
197
|
+
|
|
198
|
+
const system = mockCreate.mock.calls[0][0].system;
|
|
199
|
+
|
|
200
|
+
expect(Array.isArray(system)).toBe(true);
|
|
201
|
+
expect(system).toHaveLength(2);
|
|
202
|
+
expect(system[0].type).toBe("text");
|
|
203
|
+
expect(system[0].text).toBe(DEFAULT_SYSTEM_INSTRUCTION);
|
|
204
|
+
expect(system[0].cache_control).toBeUndefined();
|
|
205
|
+
expect(system[1].type).toBe("text");
|
|
206
|
+
expect(system[1].text).toBe(SKILL_CONTENT);
|
|
207
|
+
expect(system[1].cache_control).toEqual({ type: "ephemeral" });
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
test("adds cache_control to the last tool in the tools array", async () => {
|
|
211
|
+
const agent = createAgent();
|
|
212
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Done"));
|
|
213
|
+
|
|
214
|
+
await agent.chat("Hi");
|
|
215
|
+
|
|
216
|
+
const tools = mockCreate.mock.calls[0][0].tools;
|
|
217
|
+
const lastTool = tools[tools.length - 1];
|
|
218
|
+
expect(lastTool.cache_control).toEqual({ type: "ephemeral" });
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// ==========================================================================
|
|
223
|
+
// 3. Token usage tracking
|
|
224
|
+
// ==========================================================================
|
|
225
|
+
|
|
226
|
+
describe("Token usage tracking", () => {
|
|
227
|
+
test("returns token usage from a single API call", async () => {
|
|
228
|
+
const agent = createAgent();
|
|
229
|
+
mockCreate.mockResolvedValueOnce(
|
|
230
|
+
makeTextResponse(
|
|
231
|
+
"Result",
|
|
232
|
+
makeUsage({ input_tokens: 200, output_tokens: 80 })
|
|
233
|
+
)
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
const result = await agent.chat("Question");
|
|
237
|
+
|
|
238
|
+
expect(result.usage.inputTokens).toBe(200);
|
|
239
|
+
expect(result.usage.outputTokens).toBe(80);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test("accumulates token usage across multiple tool-use round-trips", async () => {
|
|
243
|
+
const agent = createAgent();
|
|
244
|
+
|
|
245
|
+
mockCreate.mockResolvedValueOnce(
|
|
246
|
+
makeToolUseResponse(
|
|
247
|
+
[{ name: "shopify_graphql", input: { query: "{ shop { name } }" } }],
|
|
248
|
+
makeUsage({ input_tokens: 100, output_tokens: 30 })
|
|
249
|
+
)
|
|
250
|
+
);
|
|
251
|
+
mockGraphql.mockResolvedValueOnce({ data: { shop: { name: "Test" } } });
|
|
252
|
+
mockCreate.mockResolvedValueOnce(
|
|
253
|
+
makeTextResponse(
|
|
254
|
+
"Your shop is called Test",
|
|
255
|
+
makeUsage({ input_tokens: 150, output_tokens: 40 })
|
|
256
|
+
)
|
|
257
|
+
);
|
|
258
|
+
|
|
259
|
+
const result = await agent.chat("What is my shop name?");
|
|
260
|
+
|
|
261
|
+
expect(result.usage.inputTokens).toBe(250);
|
|
262
|
+
expect(result.usage.outputTokens).toBe(70);
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
test("computes cost using default Sonnet pricing", async () => {
|
|
266
|
+
const agent = createAgent();
|
|
267
|
+
|
|
268
|
+
mockCreate.mockResolvedValueOnce(
|
|
269
|
+
makeTextResponse(
|
|
270
|
+
"Done",
|
|
271
|
+
makeUsage({
|
|
272
|
+
input_tokens: 1_000_000,
|
|
273
|
+
output_tokens: 1_000_000,
|
|
274
|
+
cache_creation_input_tokens: 500_000,
|
|
275
|
+
cache_read_input_tokens: 500_000,
|
|
276
|
+
})
|
|
277
|
+
)
|
|
278
|
+
);
|
|
279
|
+
|
|
280
|
+
const result = await agent.chat("Expensive query");
|
|
281
|
+
|
|
282
|
+
const expectedCost =
|
|
283
|
+
(1_000_000 * 3) / 1_000_000 +
|
|
284
|
+
(1_000_000 * 15) / 1_000_000 +
|
|
285
|
+
(500_000 * 3.75) / 1_000_000 +
|
|
286
|
+
(500_000 * 0.3) / 1_000_000;
|
|
287
|
+
|
|
288
|
+
expect(result.usage.totalCost).toBeCloseTo(expectedCost, 4);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
test("computes cost using custom pricing override", async () => {
|
|
292
|
+
const customPricing: ModelPricing = {
|
|
293
|
+
inputPerMillion: 1,
|
|
294
|
+
outputPerMillion: 5,
|
|
295
|
+
cacheWritePerMillion: 1.5,
|
|
296
|
+
cacheReadPerMillion: 0.1,
|
|
297
|
+
};
|
|
298
|
+
const agent = createAgent({ pricing: customPricing });
|
|
299
|
+
|
|
300
|
+
mockCreate.mockResolvedValueOnce(
|
|
301
|
+
makeTextResponse(
|
|
302
|
+
"Done",
|
|
303
|
+
makeUsage({ input_tokens: 1_000_000, output_tokens: 1_000_000 })
|
|
304
|
+
)
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
const result = await agent.chat("Custom pricing");
|
|
308
|
+
expect(result.usage.totalCost).toBeCloseTo(6.0, 4);
|
|
309
|
+
});
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// ==========================================================================
|
|
313
|
+
// 4. Plugin / tool registration
|
|
314
|
+
// ==========================================================================
|
|
315
|
+
|
|
316
|
+
describe("Plugin / tool registration", () => {
|
|
317
|
+
const emailPlugin: AgentPlugin = {
|
|
318
|
+
tool: {
|
|
319
|
+
name: "send_email",
|
|
320
|
+
description: "Send an email",
|
|
321
|
+
input_schema: {
|
|
322
|
+
type: "object" as const,
|
|
323
|
+
properties: {
|
|
324
|
+
to: { type: "string", description: "Recipient" },
|
|
325
|
+
body: { type: "string", description: "Email body" },
|
|
326
|
+
},
|
|
327
|
+
required: ["to", "body"],
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
handler: mock(async () => "Email sent successfully"),
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
test("includes plugin tools alongside shopify_graphql in API calls", async () => {
|
|
334
|
+
const agent = createAgent({ plugins: [emailPlugin] });
|
|
335
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("OK"));
|
|
336
|
+
|
|
337
|
+
await agent.chat("Send an email");
|
|
338
|
+
|
|
339
|
+
const toolNames = mockCreate.mock.calls[0][0].tools.map(
|
|
340
|
+
(t: any) => t.name
|
|
341
|
+
);
|
|
342
|
+
expect(toolNames).toContain("shopify_graphql");
|
|
343
|
+
expect(toolNames).toContain("send_email");
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
test("dispatches tool calls to the correct plugin handler", async () => {
|
|
347
|
+
const handler = mock(async () => "Email sent!");
|
|
348
|
+
const plugin: AgentPlugin = { tool: emailPlugin.tool, handler };
|
|
349
|
+
const agent = createAgent({ plugins: [plugin] });
|
|
350
|
+
|
|
351
|
+
mockCreate.mockResolvedValueOnce(
|
|
352
|
+
makeToolUseResponse([
|
|
353
|
+
{ name: "send_email", input: { to: "a@b.com", body: "Hi" } },
|
|
354
|
+
])
|
|
355
|
+
);
|
|
356
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Email was sent!"));
|
|
357
|
+
|
|
358
|
+
await agent.chat("Email Alice");
|
|
359
|
+
|
|
360
|
+
expect(handler).toHaveBeenCalledWith({ to: "a@b.com", body: "Hi" });
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
test("throws when registering a plugin with the reserved name shopify_graphql", () => {
|
|
364
|
+
const agent = createAgent();
|
|
365
|
+
|
|
366
|
+
expect(() =>
|
|
367
|
+
agent.registerPlugin({
|
|
368
|
+
tool: {
|
|
369
|
+
name: "shopify_graphql",
|
|
370
|
+
description: "Conflict",
|
|
371
|
+
input_schema: { type: "object" as const, properties: {} },
|
|
372
|
+
},
|
|
373
|
+
handler: async () => "",
|
|
374
|
+
})
|
|
375
|
+
).toThrow(
|
|
376
|
+
'Cannot register plugin with reserved tool name "shopify_graphql"'
|
|
377
|
+
);
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
test("throws when registering a duplicate plugin name", () => {
|
|
381
|
+
const agent = createAgent({ plugins: [emailPlugin] });
|
|
382
|
+
|
|
383
|
+
expect(() => agent.registerPlugin(emailPlugin)).toThrow(
|
|
384
|
+
'Plugin with tool name "send_email" is already registered'
|
|
385
|
+
);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test("returns an error result for an unknown tool name", async () => {
|
|
389
|
+
const agent = createAgent();
|
|
390
|
+
|
|
391
|
+
mockCreate.mockResolvedValueOnce(
|
|
392
|
+
makeToolUseResponse([
|
|
393
|
+
{ name: "nonexistent_tool", input: { foo: "bar" } },
|
|
394
|
+
])
|
|
395
|
+
);
|
|
396
|
+
mockCreate.mockResolvedValueOnce(
|
|
397
|
+
makeTextResponse("Sorry, I couldn't do that.")
|
|
398
|
+
);
|
|
399
|
+
|
|
400
|
+
await agent.chat("Do something weird");
|
|
401
|
+
|
|
402
|
+
const secondCallMessages = mockCreate.mock.calls[1][0].messages;
|
|
403
|
+
const userMessage = secondCallMessages[secondCallMessages.length - 1];
|
|
404
|
+
const toolResult = userMessage.content[0];
|
|
405
|
+
|
|
406
|
+
expect(toolResult.is_error).toBe(true);
|
|
407
|
+
expect(toolResult.content).toContain('Unknown tool "nonexistent_tool"');
|
|
408
|
+
});
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
// ==========================================================================
|
|
412
|
+
// 5. Lifecycle hooks
|
|
413
|
+
// ==========================================================================
|
|
414
|
+
|
|
415
|
+
describe("Lifecycle hooks", () => {
|
|
416
|
+
test("calls onRequest before processing", async () => {
|
|
417
|
+
const onRequest = mock(() => {});
|
|
418
|
+
const agent = createAgent({ hooks: { onRequest } });
|
|
419
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Hi"));
|
|
420
|
+
|
|
421
|
+
await agent.chat("Hello", []);
|
|
422
|
+
|
|
423
|
+
expect(onRequest).toHaveBeenCalledTimes(1);
|
|
424
|
+
expect(onRequest.mock.calls[0][0]).toBe("Hello");
|
|
425
|
+
expect(onRequest.mock.calls[0][1]).toEqual([]);
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
test("calls onApiCall before each Anthropic API call", async () => {
|
|
429
|
+
const onApiCall = mock(() => {});
|
|
430
|
+
const agent = createAgent({ hooks: { onApiCall } });
|
|
431
|
+
|
|
432
|
+
mockCreate.mockResolvedValueOnce(
|
|
433
|
+
makeToolUseResponse([
|
|
434
|
+
{ name: "shopify_graphql", input: { query: "{ shop { name } }" } },
|
|
435
|
+
])
|
|
436
|
+
);
|
|
437
|
+
mockGraphql.mockResolvedValueOnce({ data: { shop: { name: "Test" } } });
|
|
438
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Done"));
|
|
439
|
+
|
|
440
|
+
await agent.chat("Test");
|
|
441
|
+
|
|
442
|
+
expect(onApiCall).toHaveBeenCalledTimes(2);
|
|
443
|
+
expect(onApiCall.mock.calls[0][0]).toHaveProperty("model");
|
|
444
|
+
expect(onApiCall.mock.calls[0][0]).toHaveProperty("messages");
|
|
445
|
+
expect(onApiCall.mock.calls[0][0]).toHaveProperty("tools");
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
test("calls onToolCall and onToolResult during tool execution", async () => {
|
|
449
|
+
const onToolCall = mock(() => {});
|
|
450
|
+
const onToolResult = mock(() => {});
|
|
451
|
+
const agent = createAgent({ hooks: { onToolCall, onToolResult } });
|
|
452
|
+
|
|
453
|
+
mockCreate.mockResolvedValueOnce(
|
|
454
|
+
makeToolUseResponse([
|
|
455
|
+
{ name: "shopify_graphql", input: { query: "{ shop { name } }" } },
|
|
456
|
+
])
|
|
457
|
+
);
|
|
458
|
+
mockGraphql.mockResolvedValueOnce({
|
|
459
|
+
data: { shop: { name: "My Shop" } },
|
|
460
|
+
});
|
|
461
|
+
mockCreate.mockResolvedValueOnce(
|
|
462
|
+
makeTextResponse("Your shop is My Shop")
|
|
463
|
+
);
|
|
464
|
+
|
|
465
|
+
await agent.chat("What is my shop name?");
|
|
466
|
+
|
|
467
|
+
expect(onToolCall).toHaveBeenCalledTimes(1);
|
|
468
|
+
expect(onToolCall.mock.calls[0][0]).toBe("shopify_graphql");
|
|
469
|
+
expect(onToolResult).toHaveBeenCalledTimes(1);
|
|
470
|
+
expect(onToolResult.mock.calls[0][2]).toBe(false);
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
test("calls onResponse with the final text and usage", async () => {
|
|
474
|
+
const onResponse = mock(() => {});
|
|
475
|
+
const agent = createAgent({ hooks: { onResponse } });
|
|
476
|
+
|
|
477
|
+
mockCreate.mockResolvedValueOnce(
|
|
478
|
+
makeTextResponse(
|
|
479
|
+
"Final answer",
|
|
480
|
+
makeUsage({ input_tokens: 300, output_tokens: 120 })
|
|
481
|
+
)
|
|
482
|
+
);
|
|
483
|
+
|
|
484
|
+
await agent.chat("Tell me something");
|
|
485
|
+
|
|
486
|
+
expect(onResponse).toHaveBeenCalledTimes(1);
|
|
487
|
+
expect(onResponse.mock.calls[0][0]).toBe("Final answer");
|
|
488
|
+
expect(onResponse.mock.calls[0][1].inputTokens).toBe(300);
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
test("calls onError when the API throws", async () => {
|
|
492
|
+
const onError = mock(() => {});
|
|
493
|
+
const agent = createAgent({ hooks: { onError } });
|
|
494
|
+
|
|
495
|
+
mockCreate.mockRejectedValueOnce(new Error("API rate limited"));
|
|
496
|
+
|
|
497
|
+
await expect(agent.chat("Trigger error")).rejects.toThrow(
|
|
498
|
+
"API rate limited"
|
|
499
|
+
);
|
|
500
|
+
expect(onError).toHaveBeenCalledTimes(1);
|
|
501
|
+
expect(onError.mock.calls[0][0].message).toBe("API rate limited");
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
test("a throwing hook does not break the agent loop", async () => {
|
|
505
|
+
const agent = createAgent({
|
|
506
|
+
hooks: {
|
|
507
|
+
onRequest: () => {
|
|
508
|
+
throw new Error("Hook exploded!");
|
|
509
|
+
},
|
|
510
|
+
onApiCall: () => {
|
|
511
|
+
throw new Error("Hook exploded again!");
|
|
512
|
+
},
|
|
513
|
+
onResponse: () => {
|
|
514
|
+
throw new Error("Hook exploded a third time!");
|
|
515
|
+
},
|
|
516
|
+
},
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Still works"));
|
|
520
|
+
|
|
521
|
+
const result = await agent.chat("Test resilience");
|
|
522
|
+
expect(result.response).toBe("Still works");
|
|
523
|
+
});
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
// ==========================================================================
|
|
527
|
+
// 6. Extended thinking
|
|
528
|
+
// ==========================================================================
|
|
529
|
+
|
|
530
|
+
describe("Extended thinking", () => {
|
|
531
|
+
test("passes thinking config to the API when enabled", async () => {
|
|
532
|
+
const agent = createAgent({ thinking: { budgetTokens: 10_000 } });
|
|
533
|
+
mockCreate.mockResolvedValueOnce(
|
|
534
|
+
makeThinkingResponse("Let me think...", "Here's the answer")
|
|
535
|
+
);
|
|
536
|
+
|
|
537
|
+
await agent.chat("Complex question");
|
|
538
|
+
|
|
539
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
540
|
+
expect(callArgs.thinking).toEqual({
|
|
541
|
+
type: "enabled",
|
|
542
|
+
budget_tokens: 10_000,
|
|
543
|
+
});
|
|
544
|
+
expect(callArgs.max_tokens).toBe(10_000 + 4096);
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
test("does not pass thinking config when disabled", async () => {
|
|
548
|
+
const agent = createAgent();
|
|
549
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Simple answer"));
|
|
550
|
+
|
|
551
|
+
await agent.chat("Simple question");
|
|
552
|
+
|
|
553
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
554
|
+
expect(callArgs.thinking).toBeUndefined();
|
|
555
|
+
expect(callArgs.max_tokens).toBe(4096);
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
test("extracts thinking text from the response", async () => {
|
|
559
|
+
const agent = createAgent({ thinking: { budgetTokens: 5_000 } });
|
|
560
|
+
mockCreate.mockResolvedValueOnce(
|
|
561
|
+
makeThinkingResponse(
|
|
562
|
+
"First I need to check the store...",
|
|
563
|
+
"Your store has 10 products"
|
|
564
|
+
)
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
const result = await agent.chat("How many products?");
|
|
568
|
+
|
|
569
|
+
expect(result.thinking).toBe("First I need to check the store...");
|
|
570
|
+
expect(result.response).toBe("Your store has 10 products");
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
test("returns undefined thinking when not enabled", async () => {
|
|
574
|
+
const agent = createAgent();
|
|
575
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Answer"));
|
|
576
|
+
|
|
577
|
+
const result = await agent.chat("Question");
|
|
578
|
+
|
|
579
|
+
expect(result.thinking).toBeUndefined();
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
test("accumulates thinking across multiple loop iterations", async () => {
|
|
583
|
+
const agent = createAgent({ thinking: { budgetTokens: 8_000 } });
|
|
584
|
+
|
|
585
|
+
// First call: thinking + tool use
|
|
586
|
+
mockCreate.mockResolvedValueOnce({
|
|
587
|
+
id: "msg_1",
|
|
588
|
+
type: "message",
|
|
589
|
+
role: "assistant",
|
|
590
|
+
model: "claude-sonnet-4-5",
|
|
591
|
+
stop_reason: "tool_use",
|
|
592
|
+
content: [
|
|
593
|
+
{
|
|
594
|
+
type: "thinking",
|
|
595
|
+
thinking: "I should check the shop name",
|
|
596
|
+
signature: "sig1",
|
|
597
|
+
},
|
|
598
|
+
{
|
|
599
|
+
type: "tool_use",
|
|
600
|
+
id: "tool_0",
|
|
601
|
+
name: "shopify_graphql",
|
|
602
|
+
input: { query: "{ shop { name } }" },
|
|
603
|
+
},
|
|
604
|
+
],
|
|
605
|
+
usage: makeUsage(),
|
|
606
|
+
});
|
|
607
|
+
mockGraphql.mockResolvedValueOnce({ data: { shop: { name: "Acme" } } });
|
|
608
|
+
|
|
609
|
+
// Second call: more thinking + final text
|
|
610
|
+
mockCreate.mockResolvedValueOnce(
|
|
611
|
+
makeThinkingResponse("Now I know it's Acme", "Your shop is Acme")
|
|
612
|
+
);
|
|
613
|
+
|
|
614
|
+
const result = await agent.chat("What's my shop?");
|
|
615
|
+
|
|
616
|
+
expect(result.thinking).toContain("I should check the shop name");
|
|
617
|
+
expect(result.thinking).toContain("Now I know it's Acme");
|
|
618
|
+
expect(result.response).toBe("Your shop is Acme");
|
|
619
|
+
});
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// ==========================================================================
|
|
623
|
+
// 7. Max iterations
|
|
624
|
+
// ==========================================================================
|
|
625
|
+
|
|
626
|
+
describe("Max iterations", () => {
|
|
627
|
+
test("defaults to 20 iterations", async () => {
|
|
628
|
+
const agent = createAgent();
|
|
629
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Quick answer"));
|
|
630
|
+
|
|
631
|
+
const result = await agent.chat("Hi");
|
|
632
|
+
|
|
633
|
+
expect(result.iterationsUsed).toBe(0);
|
|
634
|
+
});
|
|
635
|
+
|
|
636
|
+
test("stops the loop when maxIterations is exceeded", async () => {
|
|
637
|
+
const onError = mock(() => {});
|
|
638
|
+
const agent = createAgent({ maxIterations: 2, hooks: { onError } });
|
|
639
|
+
|
|
640
|
+
// Set up 3 tool-use responses followed by a text response
|
|
641
|
+
for (let i = 0; i < 3; i++) {
|
|
642
|
+
mockCreate.mockResolvedValueOnce(
|
|
643
|
+
makeToolUseResponse([
|
|
644
|
+
{
|
|
645
|
+
name: "shopify_graphql",
|
|
646
|
+
input: { query: `{ query_${i} }` },
|
|
647
|
+
},
|
|
648
|
+
])
|
|
649
|
+
);
|
|
650
|
+
mockGraphql.mockResolvedValueOnce({ data: {} });
|
|
651
|
+
}
|
|
652
|
+
mockCreate.mockResolvedValueOnce(
|
|
653
|
+
makeTextResponse("Should not reach here")
|
|
654
|
+
);
|
|
655
|
+
|
|
656
|
+
const result = await agent.chat("Run many queries");
|
|
657
|
+
|
|
658
|
+
// Should have stopped after 2 iterations, not 3
|
|
659
|
+
expect(result.iterationsUsed).toBeLessThanOrEqual(3);
|
|
660
|
+
expect(onError).toHaveBeenCalled();
|
|
661
|
+
expect(onError.mock.calls[0][0].message).toContain("maximum iterations");
|
|
662
|
+
});
|
|
663
|
+
|
|
664
|
+
test("tracks iterationsUsed correctly during normal tool use", async () => {
|
|
665
|
+
const agent = createAgent();
|
|
666
|
+
|
|
667
|
+
mockCreate.mockResolvedValueOnce(
|
|
668
|
+
makeToolUseResponse([
|
|
669
|
+
{ name: "shopify_graphql", input: { query: "{ shop { name } }" } },
|
|
670
|
+
])
|
|
671
|
+
);
|
|
672
|
+
mockGraphql.mockResolvedValueOnce({ data: { shop: { name: "Test" } } });
|
|
673
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Done"));
|
|
674
|
+
|
|
675
|
+
const result = await agent.chat("Test");
|
|
676
|
+
|
|
677
|
+
expect(result.iterationsUsed).toBe(1);
|
|
678
|
+
});
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
// ==========================================================================
|
|
682
|
+
// 8. Streaming
|
|
683
|
+
// ==========================================================================
|
|
684
|
+
|
|
685
|
+
describe("Streaming (chatStream)", () => {
|
|
686
|
+
test("yields text delta events", async () => {
|
|
687
|
+
const agent = createAgent();
|
|
688
|
+
|
|
689
|
+
mockStream.mockReturnValueOnce(
|
|
690
|
+
makeFakeStream(
|
|
691
|
+
[
|
|
692
|
+
{ type: "text_delta", text: "Hello " },
|
|
693
|
+
{ type: "text_delta", text: "world!" },
|
|
694
|
+
],
|
|
695
|
+
makeTextResponse("Hello world!")
|
|
696
|
+
)
|
|
697
|
+
);
|
|
698
|
+
|
|
699
|
+
const events = [];
|
|
700
|
+
for await (const event of agent.chatStream("Hi")) {
|
|
701
|
+
events.push(event);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
const textEvents = events.filter((e) => e.type === "text");
|
|
705
|
+
expect(textEvents).toHaveLength(2);
|
|
706
|
+
expect(textEvents[0]).toEqual({ type: "text", text: "Hello " });
|
|
707
|
+
expect(textEvents[1]).toEqual({ type: "text", text: "world!" });
|
|
708
|
+
|
|
709
|
+
const doneEvent = events.find((e) => e.type === "done");
|
|
710
|
+
expect(doneEvent).toBeDefined();
|
|
711
|
+
expect((doneEvent as any).response).toBe("Hello world!");
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
test("yields thinking delta events when thinking is enabled", async () => {
|
|
715
|
+
const agent = createAgent({ thinking: { budgetTokens: 5_000 } });
|
|
716
|
+
|
|
717
|
+
mockStream.mockReturnValueOnce(
|
|
718
|
+
makeFakeStream(
|
|
719
|
+
[
|
|
720
|
+
{ type: "thinking_delta", thinking: "Let me think..." },
|
|
721
|
+
{ type: "text_delta", text: "Answer" },
|
|
722
|
+
],
|
|
723
|
+
makeThinkingResponse("Let me think...", "Answer")
|
|
724
|
+
)
|
|
725
|
+
);
|
|
726
|
+
|
|
727
|
+
const events = [];
|
|
728
|
+
for await (const event of agent.chatStream("Question")) {
|
|
729
|
+
events.push(event);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const thinkingEvents = events.filter((e) => e.type === "thinking");
|
|
733
|
+
expect(thinkingEvents).toHaveLength(1);
|
|
734
|
+
expect(thinkingEvents[0]).toEqual({
|
|
735
|
+
type: "thinking",
|
|
736
|
+
text: "Let me think...",
|
|
737
|
+
});
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
test("yields tool_call and tool_result events during tool use", async () => {
|
|
741
|
+
const agent = createAgent();
|
|
742
|
+
|
|
743
|
+
// First stream: tool use
|
|
744
|
+
mockStream.mockReturnValueOnce(
|
|
745
|
+
makeFakeStream(
|
|
746
|
+
[],
|
|
747
|
+
makeToolUseResponse([
|
|
748
|
+
{
|
|
749
|
+
name: "shopify_graphql",
|
|
750
|
+
input: { query: "{ shop { name } }" },
|
|
751
|
+
},
|
|
752
|
+
])
|
|
753
|
+
)
|
|
754
|
+
);
|
|
755
|
+
mockGraphql.mockResolvedValueOnce({ data: { shop: { name: "Acme" } } });
|
|
756
|
+
|
|
757
|
+
// Second stream: final text
|
|
758
|
+
mockStream.mockReturnValueOnce(
|
|
759
|
+
makeFakeStream(
|
|
760
|
+
[{ type: "text_delta", text: "Your shop is Acme" }],
|
|
761
|
+
makeTextResponse("Your shop is Acme")
|
|
762
|
+
)
|
|
763
|
+
);
|
|
764
|
+
|
|
765
|
+
const events = [];
|
|
766
|
+
for await (const event of agent.chatStream("Shop name?")) {
|
|
767
|
+
events.push(event);
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
const toolCallEvent = events.find((e) => e.type === "tool_call");
|
|
771
|
+
expect(toolCallEvent).toBeDefined();
|
|
772
|
+
expect((toolCallEvent as any).name).toBe("shopify_graphql");
|
|
773
|
+
|
|
774
|
+
const toolResultEvent = events.find((e) => e.type === "tool_result");
|
|
775
|
+
expect(toolResultEvent).toBeDefined();
|
|
776
|
+
expect((toolResultEvent as any).isError).toBe(false);
|
|
777
|
+
});
|
|
778
|
+
|
|
779
|
+
test("emits a done event with usage and history", async () => {
|
|
780
|
+
const agent = createAgent();
|
|
781
|
+
|
|
782
|
+
mockStream.mockReturnValueOnce(
|
|
783
|
+
makeFakeStream(
|
|
784
|
+
[{ type: "text_delta", text: "Done" }],
|
|
785
|
+
makeTextResponse(
|
|
786
|
+
"Done",
|
|
787
|
+
makeUsage({ input_tokens: 500, output_tokens: 100 })
|
|
788
|
+
)
|
|
789
|
+
)
|
|
790
|
+
);
|
|
791
|
+
|
|
792
|
+
const events = [];
|
|
793
|
+
for await (const event of agent.chatStream("Hi")) {
|
|
794
|
+
events.push(event);
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
const done = events.find((e) => e.type === "done") as any;
|
|
798
|
+
expect(done).toBeDefined();
|
|
799
|
+
expect(done.usage.inputTokens).toBe(500);
|
|
800
|
+
expect(done.usage.outputTokens).toBe(100);
|
|
801
|
+
expect(done.history.length).toBeGreaterThan(0);
|
|
802
|
+
expect(done.iterationsUsed).toBe(0);
|
|
803
|
+
});
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
// ==========================================================================
|
|
807
|
+
// 9. Memory (chatWithMemory)
|
|
808
|
+
// ==========================================================================
|
|
809
|
+
|
|
810
|
+
describe("Memory (chatWithMemory)", () => {
|
|
811
|
+
test("loads history from the store, chats, and saves updated history", async () => {
|
|
812
|
+
const memory = new InMemoryStore();
|
|
813
|
+
const agent = createAgent({ memory });
|
|
814
|
+
|
|
815
|
+
// First message
|
|
816
|
+
mockCreate.mockResolvedValueOnce(
|
|
817
|
+
makeTextResponse("You have 5 products")
|
|
818
|
+
);
|
|
819
|
+
|
|
820
|
+
const result1 = await agent.chatWithMemory("sess-1", "How many products?");
|
|
821
|
+
expect(result1.response).toBe("You have 5 products");
|
|
822
|
+
|
|
823
|
+
// Verify history was saved
|
|
824
|
+
const saved = await memory.load("sess-1");
|
|
825
|
+
expect(saved.length).toBeGreaterThan(0);
|
|
826
|
+
|
|
827
|
+
// Second message in same session should receive prior history
|
|
828
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Here they are..."));
|
|
829
|
+
|
|
830
|
+
const result2 = await agent.chatWithMemory(
|
|
831
|
+
"sess-1",
|
|
832
|
+
"Show me the first one"
|
|
833
|
+
);
|
|
834
|
+
expect(result2.response).toBe("Here they are...");
|
|
835
|
+
|
|
836
|
+
// The second API call should include the prior conversation
|
|
837
|
+
const secondCallMessages = mockCreate.mock.calls[1][0].messages;
|
|
838
|
+
expect(secondCallMessages.length).toBeGreaterThan(1);
|
|
839
|
+
});
|
|
840
|
+
|
|
841
|
+
test("different sessions are independent", async () => {
|
|
842
|
+
const memory = new InMemoryStore();
|
|
843
|
+
const agent = createAgent({ memory });
|
|
844
|
+
|
|
845
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Session A"));
|
|
846
|
+
await agent.chatWithMemory("a", "Hello from A");
|
|
847
|
+
|
|
848
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Session B"));
|
|
849
|
+
await agent.chatWithMemory("b", "Hello from B");
|
|
850
|
+
|
|
851
|
+
// Session B should have only 1 user message, not A's history
|
|
852
|
+
const bMessages = mockCreate.mock.calls[1][0].messages;
|
|
853
|
+
expect(bMessages).toHaveLength(1);
|
|
854
|
+
expect(bMessages[0].content).toBe("Hello from B");
|
|
855
|
+
});
|
|
856
|
+
|
|
857
|
+
test("throws when no memory store is configured", async () => {
|
|
858
|
+
const agent = createAgent(); // no memory
|
|
859
|
+
|
|
860
|
+
await expect(
|
|
861
|
+
agent.chatWithMemory("sess-1", "Hello")
|
|
862
|
+
).rejects.toThrow("chatWithMemory requires a MemoryStore");
|
|
863
|
+
});
|
|
864
|
+
|
|
865
|
+
test("clearing a session removes its history", async () => {
|
|
866
|
+
const memory = new InMemoryStore();
|
|
867
|
+
const agent = createAgent({ memory });
|
|
868
|
+
|
|
869
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("First"));
|
|
870
|
+
await agent.chatWithMemory("sess-1", "Hello");
|
|
871
|
+
|
|
872
|
+
await memory.clear("sess-1");
|
|
873
|
+
|
|
874
|
+
// After clearing, next call should have empty history
|
|
875
|
+
mockCreate.mockResolvedValueOnce(makeTextResponse("Fresh start"));
|
|
876
|
+
await agent.chatWithMemory("sess-1", "Hi again");
|
|
877
|
+
|
|
878
|
+
const messages = mockCreate.mock.calls[1][0].messages;
|
|
879
|
+
expect(messages).toHaveLength(1);
|
|
880
|
+
});
|
|
881
|
+
});
|
|
882
|
+
|
|
883
|
+
// ==========================================================================
|
|
884
|
+
// 10. InMemoryStore
|
|
885
|
+
// ==========================================================================
|
|
886
|
+
|
|
887
|
+
describe("InMemoryStore", () => {
|
|
888
|
+
test("save and load round-trip correctly", async () => {
|
|
889
|
+
const store = new InMemoryStore();
|
|
890
|
+
|
|
891
|
+
const history = [
|
|
892
|
+
{ role: "user", content: "Hello" },
|
|
893
|
+
{ role: "assistant", content: "Hi!" },
|
|
894
|
+
];
|
|
895
|
+
|
|
896
|
+
await store.save("test", history);
|
|
897
|
+
const loaded = await store.load("test");
|
|
898
|
+
|
|
899
|
+
expect(loaded).toEqual(history);
|
|
900
|
+
});
|
|
901
|
+
|
|
902
|
+
test("returns empty array for unknown session", async () => {
|
|
903
|
+
const store = new InMemoryStore();
|
|
904
|
+
const loaded = await store.load("nonexistent");
|
|
905
|
+
expect(loaded).toEqual([]);
|
|
906
|
+
});
|
|
907
|
+
|
|
908
|
+
test("clear removes the session data", async () => {
|
|
909
|
+
const store = new InMemoryStore();
|
|
910
|
+
await store.save("test", [{ role: "user", content: "Hi" }]);
|
|
911
|
+
|
|
912
|
+
await store.clear("test");
|
|
913
|
+
const loaded = await store.load("test");
|
|
914
|
+
expect(loaded).toEqual([]);
|
|
915
|
+
});
|
|
916
|
+
|
|
917
|
+
test("stores a deep copy so mutations don't affect stored data", async () => {
|
|
918
|
+
const store = new InMemoryStore();
|
|
919
|
+
const history = [{ role: "user", content: "Original" }];
|
|
920
|
+
|
|
921
|
+
await store.save("test", history);
|
|
922
|
+
history[0].content = "Mutated";
|
|
923
|
+
|
|
924
|
+
const loaded = await store.load("test");
|
|
925
|
+
expect(loaded[0].content).toBe("Original");
|
|
926
|
+
});
|
|
927
|
+
});
|