zeitlich 0.2.14 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -12
- package/dist/adapters/sandbox/daytona/index.cjs +52 -23
- package/dist/adapters/sandbox/daytona/index.cjs.map +1 -1
- package/dist/adapters/sandbox/daytona/index.d.cts +10 -2
- package/dist/adapters/sandbox/daytona/index.d.ts +10 -2
- package/dist/adapters/sandbox/daytona/index.js +52 -23
- package/dist/adapters/sandbox/daytona/index.js.map +1 -1
- package/dist/adapters/sandbox/inmemory/index.cjs +21 -16
- package/dist/adapters/sandbox/inmemory/index.cjs.map +1 -1
- package/dist/adapters/sandbox/inmemory/index.d.cts +1 -1
- package/dist/adapters/sandbox/inmemory/index.d.ts +1 -1
- package/dist/adapters/sandbox/inmemory/index.js +21 -16
- package/dist/adapters/sandbox/inmemory/index.js.map +1 -1
- package/dist/adapters/sandbox/virtual/index.cjs +38 -38
- package/dist/adapters/sandbox/virtual/index.cjs.map +1 -1
- package/dist/adapters/sandbox/virtual/index.d.cts +6 -6
- package/dist/adapters/sandbox/virtual/index.d.ts +6 -6
- package/dist/adapters/sandbox/virtual/index.js +37 -37
- package/dist/adapters/sandbox/virtual/index.js.map +1 -1
- package/dist/adapters/thread/google-genai/index.cjs +22 -0
- package/dist/adapters/thread/google-genai/index.cjs.map +1 -1
- package/dist/adapters/thread/google-genai/index.d.cts +3 -3
- package/dist/adapters/thread/google-genai/index.d.ts +3 -3
- package/dist/adapters/thread/google-genai/index.js +22 -0
- package/dist/adapters/thread/google-genai/index.js.map +1 -1
- package/dist/adapters/thread/langchain/index.cjs +22 -0
- package/dist/adapters/thread/langchain/index.cjs.map +1 -1
- package/dist/adapters/thread/langchain/index.d.cts +3 -3
- package/dist/adapters/thread/langchain/index.d.ts +3 -3
- package/dist/adapters/thread/langchain/index.js +22 -0
- package/dist/adapters/thread/langchain/index.js.map +1 -1
- package/dist/index.cjs +38 -11
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -6
- package/dist/index.d.ts +6 -6
- package/dist/index.js +38 -11
- package/dist/index.js.map +1 -1
- package/dist/{types-B9ljZewB.d.cts → types-35POpVfa.d.cts} +6 -0
- package/dist/{types-B9ljZewB.d.ts → types-35POpVfa.d.ts} +6 -0
- package/dist/{types-CDubRtad.d.cts → types-BMRzfELQ.d.cts} +2 -0
- package/dist/{types-CDubRtad.d.ts → types-BMRzfELQ.d.ts} +2 -0
- package/dist/{types-CwwgQ_9H.d.ts → types-BSOte_8s.d.ts} +6 -2
- package/dist/{types-BVP87m_W.d.cts → types-DCi2qXjN.d.cts} +6 -2
- package/dist/{types-GpMU4b0w.d.cts → types-Drli9aCK.d.cts} +3 -1
- package/dist/{types-B4C9txdq.d.ts → types-XPtivmSJ.d.ts} +3 -1
- package/dist/workflow.cjs +23 -11
- package/dist/workflow.cjs.map +1 -1
- package/dist/workflow.d.cts +6 -6
- package/dist/workflow.d.ts +6 -6
- package/dist/workflow.js +23 -11
- package/dist/workflow.js.map +1 -1
- package/package.json +7 -3
- package/src/adapters/sandbox/daytona/filesystem.ts +43 -19
- package/src/adapters/sandbox/daytona/index.ts +16 -3
- package/src/adapters/sandbox/daytona/types.ts +4 -0
- package/src/adapters/sandbox/inmemory/index.ts +22 -16
- package/src/adapters/sandbox/virtual/filesystem.ts +29 -31
- package/src/adapters/sandbox/virtual/index.ts +5 -3
- package/src/adapters/sandbox/virtual/provider.ts +5 -2
- package/src/adapters/sandbox/virtual/types.ts +3 -0
- package/src/adapters/sandbox/virtual/with-virtual-sandbox.ts +4 -3
- package/src/adapters/thread/google-genai/activities.ts +11 -0
- package/src/adapters/thread/langchain/activities.ts +11 -0
- package/src/lib/sandbox/tree.integration.test.ts +153 -0
- package/src/lib/sandbox/types.ts +2 -0
- package/src/lib/session/session-edge-cases.integration.test.ts +962 -0
- package/src/lib/session/session.integration.test.ts +852 -0
- package/src/lib/session/session.ts +11 -5
- package/src/lib/session/types.ts +2 -0
- package/src/lib/skills/skills.integration.test.ts +308 -0
- package/src/lib/state/manager.integration.test.ts +342 -0
- package/src/lib/subagent/register.ts +22 -7
- package/src/lib/subagent/subagent.integration.test.ts +467 -0
- package/src/lib/thread/id.test.ts +50 -0
- package/src/lib/thread/manager.ts +20 -1
- package/src/lib/thread/types.ts +6 -0
- package/src/lib/tool-router/auto-append-sandbox.integration.test.ts +344 -0
- package/src/lib/tool-router/router-edge-cases.integration.test.ts +623 -0
- package/src/lib/tool-router/router.integration.test.ts +699 -0
- package/src/lib/types.test.ts +29 -0
|
@@ -0,0 +1,852 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import type { ToolResultConfig, TokenUsage } from "../types";
|
|
4
|
+
import type { ThreadOps } from "./types";
|
|
5
|
+
import type { RunAgentActivity } from "../model/types";
|
|
6
|
+
import type { RawToolCall } from "../tool-router/types";
|
|
7
|
+
import type { SandboxOps } from "../sandbox/types";
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Mock @temporalio/workflow
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
let idCounter = 0;
|
|
14
|
+
|
|
15
|
+
vi.mock("@temporalio/workflow", () => {
|
|
16
|
+
class MockApplicationFailure extends Error {
|
|
17
|
+
nonRetryable?: boolean;
|
|
18
|
+
static create({
|
|
19
|
+
message,
|
|
20
|
+
nonRetryable,
|
|
21
|
+
}: {
|
|
22
|
+
message: string;
|
|
23
|
+
nonRetryable?: boolean;
|
|
24
|
+
}) {
|
|
25
|
+
const err = new MockApplicationFailure(message);
|
|
26
|
+
err.nonRetryable = nonRetryable;
|
|
27
|
+
return err;
|
|
28
|
+
}
|
|
29
|
+
static fromError(error: unknown) {
|
|
30
|
+
const src = error instanceof Error ? error : new Error(String(error));
|
|
31
|
+
return new MockApplicationFailure(src.message);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
proxyActivities: <T>() => ({}) as T,
|
|
37
|
+
condition: async (fn: () => boolean) => fn(),
|
|
38
|
+
defineUpdate: (name: string) => ({ __type: "update", name }),
|
|
39
|
+
defineQuery: (name: string) => ({ __type: "query", name }),
|
|
40
|
+
setHandler: (_def: unknown, _handler: unknown) => {},
|
|
41
|
+
uuid4: () => `00000000-0000-0000-0000-${String(++idCounter).padStart(12, "0")}`,
|
|
42
|
+
ApplicationFailure: MockApplicationFailure,
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
import { createSession } from "./session";
|
|
47
|
+
import { createAgentStateManager } from "../state/manager";
|
|
48
|
+
import { defineTool } from "../tool-router/router";
|
|
49
|
+
import type { ToolHandlerResponse, RouterContext } from "../tool-router/types";
|
|
50
|
+
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Helpers
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
function at<T>(arr: T[], index: number): T {
|
|
56
|
+
const val = arr[index];
|
|
57
|
+
if (val === undefined) throw new Error(`Index ${index} out of bounds`);
|
|
58
|
+
return val;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function createMockThreadOps() {
|
|
62
|
+
const log: { op: string; args: unknown[] }[] = [];
|
|
63
|
+
|
|
64
|
+
const ops: ThreadOps = {
|
|
65
|
+
initializeThread: async (threadId) => {
|
|
66
|
+
log.push({ op: "initializeThread", args: [threadId] });
|
|
67
|
+
},
|
|
68
|
+
appendHumanMessage: async (threadId, content) => {
|
|
69
|
+
log.push({ op: "appendHumanMessage", args: [threadId, content] });
|
|
70
|
+
},
|
|
71
|
+
appendToolResult: async (config) => {
|
|
72
|
+
log.push({ op: "appendToolResult", args: [config] });
|
|
73
|
+
},
|
|
74
|
+
appendSystemMessage: async (threadId, content) => {
|
|
75
|
+
log.push({ op: "appendSystemMessage", args: [threadId, content] });
|
|
76
|
+
},
|
|
77
|
+
forkThread: async (source, target) => {
|
|
78
|
+
log.push({ op: "forkThread", args: [source, target] });
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
return { ops, log };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
type TurnScript = {
|
|
86
|
+
message: unknown;
|
|
87
|
+
toolCalls: RawToolCall[];
|
|
88
|
+
usage?: TokenUsage;
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
function createScriptedRunAgent(turns: TurnScript[]): RunAgentActivity<unknown> {
|
|
92
|
+
let call = 0;
|
|
93
|
+
return async () => {
|
|
94
|
+
const turn = turns[call++];
|
|
95
|
+
if (!turn) {
|
|
96
|
+
return { message: "done", rawToolCalls: [], usage: undefined };
|
|
97
|
+
}
|
|
98
|
+
return {
|
|
99
|
+
message: turn.message,
|
|
100
|
+
rawToolCalls: turn.toolCalls,
|
|
101
|
+
usage: turn.usage,
|
|
102
|
+
};
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function createEchoTool() {
|
|
107
|
+
return defineTool({
|
|
108
|
+
name: "Echo" as const,
|
|
109
|
+
description: "echoes input",
|
|
110
|
+
schema: z.object({ text: z.string() }),
|
|
111
|
+
handler: async (
|
|
112
|
+
args: { text: string },
|
|
113
|
+
_ctx: RouterContext,
|
|
114
|
+
): Promise<ToolHandlerResponse<{ echoed: string }>> => ({
|
|
115
|
+
toolResponse: `Echo: ${args.text}`,
|
|
116
|
+
data: { echoed: args.text },
|
|
117
|
+
}),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
// Tests
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
describe("createSession integration", () => {
|
|
126
|
+
beforeEach(() => {
|
|
127
|
+
idCounter = 0;
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// --- Basic completion ---
|
|
131
|
+
|
|
132
|
+
it("completes immediately when runAgent returns no tool calls", async () => {
|
|
133
|
+
const { ops, log } = createMockThreadOps();
|
|
134
|
+
|
|
135
|
+
const session = await createSession({
|
|
136
|
+
agentName: "TestAgent",
|
|
137
|
+
threadId: "thread-1",
|
|
138
|
+
runAgent: createScriptedRunAgent([
|
|
139
|
+
{ message: "Hello!", toolCalls: [] },
|
|
140
|
+
]),
|
|
141
|
+
threadOps: ops,
|
|
142
|
+
buildContextMessage: () => "What is 2+2?",
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const stateManager = createAgentStateManager({
|
|
146
|
+
initialState: { systemPrompt: "You are a test assistant." },
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const result = await session.runSession({ stateManager });
|
|
150
|
+
|
|
151
|
+
expect(result.exitReason).toBe("completed");
|
|
152
|
+
expect(result.finalMessage).toBe("Hello!");
|
|
153
|
+
expect(result.threadId).toBe("thread-1");
|
|
154
|
+
|
|
155
|
+
const systemOps = log.filter((l) => l.op === "appendSystemMessage");
|
|
156
|
+
expect(systemOps).toHaveLength(1);
|
|
157
|
+
expect(at(systemOps, 0).args[1]).toBe("You are a test assistant.");
|
|
158
|
+
|
|
159
|
+
const humanOps = log.filter((l) => l.op === "appendHumanMessage");
|
|
160
|
+
expect(humanOps).toHaveLength(1);
|
|
161
|
+
expect(at(humanOps, 0).args[1]).toBe("What is 2+2?");
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// --- Tool execution ---
|
|
165
|
+
|
|
166
|
+
it("executes tool calls and completes on next turn", async () => {
|
|
167
|
+
const { ops, log } = createMockThreadOps();
|
|
168
|
+
|
|
169
|
+
const session = await createSession({
|
|
170
|
+
agentName: "TestAgent",
|
|
171
|
+
threadId: "thread-1",
|
|
172
|
+
runAgent: createScriptedRunAgent([
|
|
173
|
+
{
|
|
174
|
+
message: "Let me echo that.",
|
|
175
|
+
toolCalls: [{ id: "tc-1", name: "Echo", args: { text: "hello" } }],
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
message: "Done echoing.",
|
|
179
|
+
toolCalls: [],
|
|
180
|
+
},
|
|
181
|
+
]),
|
|
182
|
+
threadOps: ops,
|
|
183
|
+
tools: { Echo: createEchoTool() },
|
|
184
|
+
buildContextMessage: () => "Echo hello for me.",
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
const stateManager = createAgentStateManager({
|
|
188
|
+
initialState: { systemPrompt: "You are a test assistant." },
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const result = await session.runSession({ stateManager });
|
|
192
|
+
|
|
193
|
+
expect(result.exitReason).toBe("completed");
|
|
194
|
+
expect(result.finalMessage).toBe("Done echoing.");
|
|
195
|
+
|
|
196
|
+
const toolResults = log.filter((l) => l.op === "appendToolResult");
|
|
197
|
+
expect(toolResults).toHaveLength(1);
|
|
198
|
+
const resultConfig = at(toolResults, 0).args[0] as ToolResultConfig;
|
|
199
|
+
expect(resultConfig.toolName).toBe("Echo");
|
|
200
|
+
expect(resultConfig.content).toBe("Echo: hello");
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// --- Multi-turn loop ---
|
|
204
|
+
|
|
205
|
+
it("runs multiple turns with tool calls before completing", async () => {
|
|
206
|
+
const { ops } = createMockThreadOps();
|
|
207
|
+
|
|
208
|
+
const session = await createSession({
|
|
209
|
+
agentName: "TestAgent",
|
|
210
|
+
threadId: "thread-1",
|
|
211
|
+
runAgent: createScriptedRunAgent([
|
|
212
|
+
{
|
|
213
|
+
message: "turn 1",
|
|
214
|
+
toolCalls: [{ id: "tc-1", name: "Echo", args: { text: "one" } }],
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
message: "turn 2",
|
|
218
|
+
toolCalls: [{ id: "tc-2", name: "Echo", args: { text: "two" } }],
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
message: "turn 3",
|
|
222
|
+
toolCalls: [{ id: "tc-3", name: "Echo", args: { text: "three" } }],
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
message: "final",
|
|
226
|
+
toolCalls: [],
|
|
227
|
+
},
|
|
228
|
+
]),
|
|
229
|
+
threadOps: ops,
|
|
230
|
+
tools: { Echo: createEchoTool() },
|
|
231
|
+
buildContextMessage: () => "Count to three.",
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const stateManager = createAgentStateManager({
|
|
235
|
+
initialState: { systemPrompt: "You are a test assistant." },
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
const result = await session.runSession({ stateManager });
|
|
239
|
+
|
|
240
|
+
expect(result.exitReason).toBe("completed");
|
|
241
|
+
expect(result.finalMessage).toBe("final");
|
|
242
|
+
expect(result.usage.turns).toBe(4);
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
// --- MaxTurns limit ---
|
|
246
|
+
|
|
247
|
+
it("stops at maxTurns and returns null finalMessage", async () => {
|
|
248
|
+
const { ops } = createMockThreadOps();
|
|
249
|
+
|
|
250
|
+
const infiniteAgent = createScriptedRunAgent(
|
|
251
|
+
Array.from({ length: 10 }, (_, i) => ({
|
|
252
|
+
message: `turn ${i + 1}`,
|
|
253
|
+
toolCalls: [{ id: `tc-${i}`, name: "Echo", args: { text: `${i}` } }],
|
|
254
|
+
})),
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
const session = await createSession({
|
|
258
|
+
agentName: "TestAgent",
|
|
259
|
+
threadId: "thread-1",
|
|
260
|
+
maxTurns: 3,
|
|
261
|
+
runAgent: infiniteAgent,
|
|
262
|
+
threadOps: ops,
|
|
263
|
+
tools: { Echo: createEchoTool() },
|
|
264
|
+
buildContextMessage: () => "go",
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
const stateManager = createAgentStateManager({
|
|
268
|
+
initialState: { systemPrompt: "You are a test assistant." },
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const result = await session.runSession({ stateManager });
|
|
272
|
+
|
|
273
|
+
expect(result.exitReason).toBe("max_turns");
|
|
274
|
+
expect(result.finalMessage).toBeNull();
|
|
275
|
+
expect(result.usage.turns).toBe(3);
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
// --- Session hooks ---
|
|
279
|
+
|
|
280
|
+
it("calls onSessionStart and onSessionEnd hooks", async () => {
|
|
281
|
+
const { ops } = createMockThreadOps();
|
|
282
|
+
const hookLog: string[] = [];
|
|
283
|
+
|
|
284
|
+
const session = await createSession({
|
|
285
|
+
agentName: "TestAgent",
|
|
286
|
+
threadId: "thread-1",
|
|
287
|
+
runAgent: createScriptedRunAgent([
|
|
288
|
+
{ message: "done", toolCalls: [] },
|
|
289
|
+
]),
|
|
290
|
+
threadOps: ops,
|
|
291
|
+
buildContextMessage: () => "hi",
|
|
292
|
+
hooks: {
|
|
293
|
+
onSessionStart: async ({ agentName, threadId }) => {
|
|
294
|
+
hookLog.push(`start:${agentName}:${threadId}`);
|
|
295
|
+
},
|
|
296
|
+
onSessionEnd: async ({ agentName, exitReason, turns }) => {
|
|
297
|
+
hookLog.push(`end:${agentName}:${exitReason}:${turns}`);
|
|
298
|
+
},
|
|
299
|
+
},
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
const stateManager = createAgentStateManager({
|
|
303
|
+
initialState: { systemPrompt: "test" },
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
await session.runSession({ stateManager });
|
|
307
|
+
|
|
308
|
+
expect(hookLog).toEqual([
|
|
309
|
+
"start:TestAgent:thread-1",
|
|
310
|
+
"end:TestAgent:completed:1",
|
|
311
|
+
]);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// --- System prompt ---
|
|
315
|
+
|
|
316
|
+
it("throws when system prompt is missing", async () => {
|
|
317
|
+
const { ops } = createMockThreadOps();
|
|
318
|
+
|
|
319
|
+
const session = await createSession({
|
|
320
|
+
agentName: "TestAgent",
|
|
321
|
+
threadId: "thread-1",
|
|
322
|
+
runAgent: createScriptedRunAgent([]),
|
|
323
|
+
threadOps: ops,
|
|
324
|
+
buildContextMessage: () => "hi",
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
const stateManager = createAgentStateManager({
|
|
328
|
+
initialState: {},
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
await expect(session.runSession({ stateManager })).rejects.toThrow(
|
|
332
|
+
"No system prompt in state",
|
|
333
|
+
);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
it("skips system prompt when appendSystemPrompt is false", async () => {
|
|
337
|
+
const { ops, log } = createMockThreadOps();
|
|
338
|
+
|
|
339
|
+
const session = await createSession({
|
|
340
|
+
agentName: "TestAgent",
|
|
341
|
+
threadId: "thread-1",
|
|
342
|
+
appendSystemPrompt: false,
|
|
343
|
+
runAgent: createScriptedRunAgent([
|
|
344
|
+
{ message: "ok", toolCalls: [] },
|
|
345
|
+
]),
|
|
346
|
+
threadOps: ops,
|
|
347
|
+
buildContextMessage: () => "hi",
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
const stateManager = createAgentStateManager({
|
|
351
|
+
initialState: {},
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
const result = await session.runSession({ stateManager });
|
|
355
|
+
|
|
356
|
+
expect(result.exitReason).toBe("completed");
|
|
357
|
+
const systemOps = log.filter((l) => l.op === "appendSystemMessage");
|
|
358
|
+
expect(systemOps).toHaveLength(0);
|
|
359
|
+
const initOps = log.filter((l) => l.op === "initializeThread");
|
|
360
|
+
expect(initOps).toHaveLength(1);
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
// --- Token usage ---
|
|
364
|
+
|
|
365
|
+
it("accumulates token usage across turns", async () => {
|
|
366
|
+
const { ops } = createMockThreadOps();
|
|
367
|
+
|
|
368
|
+
const session = await createSession({
|
|
369
|
+
agentName: "TestAgent",
|
|
370
|
+
threadId: "thread-1",
|
|
371
|
+
runAgent: createScriptedRunAgent([
|
|
372
|
+
{
|
|
373
|
+
message: "turn 1",
|
|
374
|
+
toolCalls: [{ id: "tc-1", name: "Echo", args: { text: "a" } }],
|
|
375
|
+
usage: { inputTokens: 100, outputTokens: 50 },
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
message: "turn 2",
|
|
379
|
+
toolCalls: [],
|
|
380
|
+
usage: { inputTokens: 150, outputTokens: 75 },
|
|
381
|
+
},
|
|
382
|
+
]),
|
|
383
|
+
threadOps: ops,
|
|
384
|
+
tools: { Echo: createEchoTool() },
|
|
385
|
+
buildContextMessage: () => "go",
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
const stateManager = createAgentStateManager({
|
|
389
|
+
initialState: { systemPrompt: "test" },
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
const result = await session.runSession({ stateManager });
|
|
393
|
+
|
|
394
|
+
expect(result.usage.totalInputTokens).toBe(250);
|
|
395
|
+
expect(result.usage.totalOutputTokens).toBe(125);
|
|
396
|
+
expect(result.usage.turns).toBe(2);
|
|
397
|
+
});
|
|
398
|
+
|
|
399
|
+
// --- Invalid tool calls ---
|
|
400
|
+
|
|
401
|
+
it("appends error for invalid tool call args and continues", async () => {
|
|
402
|
+
const { ops, log } = createMockThreadOps();
|
|
403
|
+
|
|
404
|
+
const session = await createSession({
|
|
405
|
+
agentName: "TestAgent",
|
|
406
|
+
threadId: "thread-1",
|
|
407
|
+
runAgent: createScriptedRunAgent([
|
|
408
|
+
{
|
|
409
|
+
message: "bad call",
|
|
410
|
+
toolCalls: [
|
|
411
|
+
{ id: "tc-bad", name: "Echo", args: { text: 999 } },
|
|
412
|
+
{ id: "tc-good", name: "Echo", args: { text: "valid" } },
|
|
413
|
+
],
|
|
414
|
+
},
|
|
415
|
+
{
|
|
416
|
+
message: "done",
|
|
417
|
+
toolCalls: [],
|
|
418
|
+
},
|
|
419
|
+
]),
|
|
420
|
+
threadOps: ops,
|
|
421
|
+
tools: { Echo: createEchoTool() },
|
|
422
|
+
buildContextMessage: () => "go",
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
const stateManager = createAgentStateManager({
|
|
426
|
+
initialState: { systemPrompt: "test" },
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
const result = await session.runSession({ stateManager });
|
|
430
|
+
expect(result.exitReason).toBe("completed");
|
|
431
|
+
|
|
432
|
+
const toolResults = log.filter((l) => l.op === "appendToolResult");
|
|
433
|
+
// One error result for bad call + one success result for good call
|
|
434
|
+
expect(toolResults.length).toBeGreaterThanOrEqual(2);
|
|
435
|
+
const errorResult = toolResults.find((l) => {
|
|
436
|
+
const config = l.args[0] as ToolResultConfig;
|
|
437
|
+
return config.toolCallId === "tc-bad";
|
|
438
|
+
});
|
|
439
|
+
expect(errorResult).toBeDefined();
|
|
440
|
+
const errorConfig = errorResult?.args[0] as ToolResultConfig | undefined;
|
|
441
|
+
expect(errorConfig?.content).toContain("Invalid tool call");
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
// --- continueThread ---
|
|
445
|
+
|
|
446
|
+
it("forks thread when continueThread is set", async () => {
|
|
447
|
+
const { ops, log } = createMockThreadOps();
|
|
448
|
+
|
|
449
|
+
const session = await createSession({
|
|
450
|
+
agentName: "TestAgent",
|
|
451
|
+
threadId: "source-thread",
|
|
452
|
+
continueThread: true,
|
|
453
|
+
runAgent: createScriptedRunAgent([
|
|
454
|
+
{ message: "continued", toolCalls: [] },
|
|
455
|
+
]),
|
|
456
|
+
threadOps: ops,
|
|
457
|
+
buildContextMessage: () => "continue",
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
const stateManager = createAgentStateManager({
|
|
461
|
+
initialState: { systemPrompt: "test" },
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
const result = await session.runSession({ stateManager });
|
|
465
|
+
|
|
466
|
+
expect(result.exitReason).toBe("completed");
|
|
467
|
+
|
|
468
|
+
const forkOps = log.filter((l) => l.op === "forkThread");
|
|
469
|
+
expect(forkOps).toHaveLength(1);
|
|
470
|
+
expect(at(forkOps, 0).args[0]).toBe("source-thread");
|
|
471
|
+
|
|
472
|
+
const systemOps = log.filter((l) => l.op === "appendSystemMessage");
|
|
473
|
+
expect(systemOps).toHaveLength(0);
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
// --- Sandbox lifecycle ---
|
|
477
|
+
|
|
478
|
+
it("creates and destroys sandbox when sandboxOps are provided", async () => {
|
|
479
|
+
const { ops } = createMockThreadOps();
|
|
480
|
+
const sandboxLog: string[] = [];
|
|
481
|
+
|
|
482
|
+
const sandboxOps: SandboxOps = {
|
|
483
|
+
createSandbox: async (options) => {
|
|
484
|
+
sandboxLog.push(`create:${options?.id ?? "unknown"}`);
|
|
485
|
+
return { sandboxId: `sb-${options?.id ?? "unknown"}` };
|
|
486
|
+
},
|
|
487
|
+
destroySandbox: async (sandboxId: string) => {
|
|
488
|
+
sandboxLog.push(`destroy:${sandboxId}`);
|
|
489
|
+
},
|
|
490
|
+
snapshotSandbox: async () => ({
|
|
491
|
+
sandboxId: "sb-1",
|
|
492
|
+
providerId: "test",
|
|
493
|
+
data: null,
|
|
494
|
+
createdAt: new Date().toISOString(),
|
|
495
|
+
}),
|
|
496
|
+
};
|
|
497
|
+
|
|
498
|
+
const session = await createSession({
|
|
499
|
+
agentName: "TestAgent",
|
|
500
|
+
threadId: "thread-1",
|
|
501
|
+
runAgent: createScriptedRunAgent([
|
|
502
|
+
{ message: "done", toolCalls: [] },
|
|
503
|
+
]),
|
|
504
|
+
threadOps: ops,
|
|
505
|
+
buildContextMessage: () => "go",
|
|
506
|
+
sandbox: sandboxOps,
|
|
507
|
+
});
|
|
508
|
+
|
|
509
|
+
const stateManager = createAgentStateManager({
|
|
510
|
+
initialState: { systemPrompt: "test" },
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
await session.runSession({ stateManager });
|
|
514
|
+
|
|
515
|
+
expect(sandboxLog).toContain("create:thread-1");
|
|
516
|
+
expect(sandboxLog).toContain("destroy:sb-thread-1");
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
it("does not create or destroy sandbox when sandboxId is inherited", async () => {
|
|
520
|
+
const { ops } = createMockThreadOps();
|
|
521
|
+
const sandboxLog: string[] = [];
|
|
522
|
+
|
|
523
|
+
const sandboxOps: SandboxOps = {
|
|
524
|
+
createSandbox: async () => {
|
|
525
|
+
sandboxLog.push("create");
|
|
526
|
+
return { sandboxId: "sb-new" };
|
|
527
|
+
},
|
|
528
|
+
destroySandbox: async () => {
|
|
529
|
+
sandboxLog.push("destroy");
|
|
530
|
+
},
|
|
531
|
+
snapshotSandbox: async () => ({
|
|
532
|
+
sandboxId: "sb-1",
|
|
533
|
+
providerId: "test",
|
|
534
|
+
data: null,
|
|
535
|
+
createdAt: new Date().toISOString(),
|
|
536
|
+
}),
|
|
537
|
+
};
|
|
538
|
+
|
|
539
|
+
const session = await createSession({
|
|
540
|
+
agentName: "TestAgent",
|
|
541
|
+
threadId: "thread-1",
|
|
542
|
+
runAgent: createScriptedRunAgent([
|
|
543
|
+
{ message: "done", toolCalls: [] },
|
|
544
|
+
]),
|
|
545
|
+
threadOps: ops,
|
|
546
|
+
buildContextMessage: () => "go",
|
|
547
|
+
sandbox: sandboxOps,
|
|
548
|
+
sandboxId: "inherited-sb",
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
const stateManager = createAgentStateManager({
|
|
552
|
+
initialState: { systemPrompt: "test" },
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
await session.runSession({ stateManager });
|
|
556
|
+
|
|
557
|
+
expect(sandboxLog).toHaveLength(0);
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
// --- Sandbox ID passed to tool handlers ---
|
|
561
|
+
|
|
562
|
+
it("passes sandbox ID to tool handlers via processToolCalls context", async () => {
|
|
563
|
+
const { ops } = createMockThreadOps();
|
|
564
|
+
let capturedSandboxId: string | undefined;
|
|
565
|
+
|
|
566
|
+
const spyTool = defineTool({
|
|
567
|
+
name: "Spy" as const,
|
|
568
|
+
description: "captures context",
|
|
569
|
+
schema: z.object({}),
|
|
570
|
+
handler: async (_args: Record<string, never>, ctx: RouterContext) => {
|
|
571
|
+
capturedSandboxId = ctx.sandboxId;
|
|
572
|
+
return { toolResponse: "ok", data: null };
|
|
573
|
+
},
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
const session = await createSession({
|
|
577
|
+
agentName: "TestAgent",
|
|
578
|
+
threadId: "thread-1",
|
|
579
|
+
runAgent: createScriptedRunAgent([
|
|
580
|
+
{
|
|
581
|
+
message: "spy",
|
|
582
|
+
toolCalls: [{ id: "tc-1", name: "Spy", args: {} }],
|
|
583
|
+
},
|
|
584
|
+
{ message: "done", toolCalls: [] },
|
|
585
|
+
]),
|
|
586
|
+
threadOps: ops,
|
|
587
|
+
tools: { Spy: spyTool },
|
|
588
|
+
buildContextMessage: () => "go",
|
|
589
|
+
sandboxId: "my-sandbox",
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
const stateManager = createAgentStateManager({
|
|
593
|
+
initialState: { systemPrompt: "test" },
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
await session.runSession({ stateManager });
|
|
597
|
+
|
|
598
|
+
expect(capturedSandboxId).toBe("my-sandbox");
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
// --- Error propagation ---
|
|
602
|
+
|
|
603
|
+
it("propagates runAgent errors and calls onSessionEnd with failed reason", async () => {
|
|
604
|
+
const { ops } = createMockThreadOps();
|
|
605
|
+
let endReason: string | undefined;
|
|
606
|
+
|
|
607
|
+
const session = await createSession({
|
|
608
|
+
agentName: "TestAgent",
|
|
609
|
+
threadId: "thread-1",
|
|
610
|
+
runAgent: async () => {
|
|
611
|
+
throw new Error("LLM went down");
|
|
612
|
+
},
|
|
613
|
+
threadOps: ops,
|
|
614
|
+
buildContextMessage: () => "go",
|
|
615
|
+
hooks: {
|
|
616
|
+
onSessionEnd: async ({ exitReason }) => {
|
|
617
|
+
endReason = exitReason;
|
|
618
|
+
},
|
|
619
|
+
},
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
const stateManager = createAgentStateManager({
|
|
623
|
+
initialState: { systemPrompt: "test" },
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
await expect(session.runSession({ stateManager })).rejects.toThrow(
|
|
627
|
+
"LLM went down",
|
|
628
|
+
);
|
|
629
|
+
|
|
630
|
+
expect(endReason).toBe("failed");
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
// --- Tool execution hooks within session ---
|
|
634
|
+
|
|
635
|
+
it("fires global tool hooks during session tool processing", async () => {
|
|
636
|
+
const { ops } = createMockThreadOps();
|
|
637
|
+
const hookLog: string[] = [];
|
|
638
|
+
|
|
639
|
+
const session = await createSession({
|
|
640
|
+
agentName: "TestAgent",
|
|
641
|
+
threadId: "thread-1",
|
|
642
|
+
runAgent: createScriptedRunAgent([
|
|
643
|
+
{
|
|
644
|
+
message: "call echo",
|
|
645
|
+
toolCalls: [{ id: "tc-1", name: "Echo", args: { text: "hi" } }],
|
|
646
|
+
},
|
|
647
|
+
{ message: "done", toolCalls: [] },
|
|
648
|
+
]),
|
|
649
|
+
threadOps: ops,
|
|
650
|
+
tools: { Echo: createEchoTool() },
|
|
651
|
+
buildContextMessage: () => "go",
|
|
652
|
+
hooks: {
|
|
653
|
+
onPreToolUse: async ({ toolCall }) => {
|
|
654
|
+
hookLog.push(`pre:${toolCall.name}`);
|
|
655
|
+
return {};
|
|
656
|
+
},
|
|
657
|
+
onPostToolUse: async ({ toolCall }) => {
|
|
658
|
+
hookLog.push(`post:${toolCall.name}`);
|
|
659
|
+
},
|
|
660
|
+
},
|
|
661
|
+
});
|
|
662
|
+
|
|
663
|
+
const stateManager = createAgentStateManager({
|
|
664
|
+
initialState: { systemPrompt: "test" },
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
await session.runSession({ stateManager });
|
|
668
|
+
|
|
669
|
+
expect(hookLog).toEqual(["pre:Echo", "post:Echo"]);
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// --- Generated thread IDs ---
|
|
673
|
+
|
|
674
|
+
it("generates a thread ID when none is provided", async () => {
|
|
675
|
+
const { ops } = createMockThreadOps();
|
|
676
|
+
|
|
677
|
+
const session = await createSession({
|
|
678
|
+
agentName: "TestAgent",
|
|
679
|
+
runAgent: createScriptedRunAgent([
|
|
680
|
+
{ message: "done", toolCalls: [] },
|
|
681
|
+
]),
|
|
682
|
+
threadOps: ops,
|
|
683
|
+
buildContextMessage: () => "go",
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
const stateManager = createAgentStateManager({
|
|
687
|
+
initialState: { systemPrompt: "test" },
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
const result = await session.runSession({ stateManager });
|
|
691
|
+
|
|
692
|
+
expect(result.threadId).toBeTruthy();
|
|
693
|
+
expect(result.threadId.length).toBeGreaterThan(0);
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
// --- Multiple tools in a single turn ---
|
|
697
|
+
|
|
698
|
+
it("handles multiple tool calls in a single turn", async () => {
|
|
699
|
+
const { ops, log } = createMockThreadOps();
|
|
700
|
+
|
|
701
|
+
const addTool = defineTool({
|
|
702
|
+
name: "Add" as const,
|
|
703
|
+
description: "adds numbers",
|
|
704
|
+
schema: z.object({ a: z.number(), b: z.number() }),
|
|
705
|
+
handler: async (args: { a: number; b: number }) => ({
|
|
706
|
+
toolResponse: `${args.a + args.b}`,
|
|
707
|
+
data: { sum: args.a + args.b },
|
|
708
|
+
}),
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
const session = await createSession({
|
|
712
|
+
agentName: "TestAgent",
|
|
713
|
+
threadId: "thread-1",
|
|
714
|
+
runAgent: createScriptedRunAgent([
|
|
715
|
+
{
|
|
716
|
+
message: "computing",
|
|
717
|
+
toolCalls: [
|
|
718
|
+
{ id: "tc-1", name: "Echo", args: { text: "hello" } },
|
|
719
|
+
{ id: "tc-2", name: "Add", args: { a: 3, b: 4 } },
|
|
720
|
+
],
|
|
721
|
+
},
|
|
722
|
+
{ message: "all done", toolCalls: [] },
|
|
723
|
+
]),
|
|
724
|
+
threadOps: ops,
|
|
725
|
+
tools: { Echo: createEchoTool(), Add: addTool },
|
|
726
|
+
buildContextMessage: () => "go",
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
const stateManager = createAgentStateManager({
|
|
730
|
+
initialState: { systemPrompt: "test" },
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
const result = await session.runSession({ stateManager });
|
|
734
|
+
|
|
735
|
+
expect(result.exitReason).toBe("completed");
|
|
736
|
+
expect(result.finalMessage).toBe("all done");
|
|
737
|
+
|
|
738
|
+
const toolResults = log.filter((l) => l.op === "appendToolResult");
|
|
739
|
+
expect(toolResults).toHaveLength(2);
|
|
740
|
+
});
|
|
741
|
+
|
|
742
|
+
// --- buildContextMessage async ---
|
|
743
|
+
|
|
744
|
+
it("supports async buildContextMessage", async () => {
|
|
745
|
+
const { ops, log } = createMockThreadOps();
|
|
746
|
+
|
|
747
|
+
const session = await createSession({
|
|
748
|
+
agentName: "TestAgent",
|
|
749
|
+
threadId: "thread-1",
|
|
750
|
+
runAgent: createScriptedRunAgent([
|
|
751
|
+
{ message: "done", toolCalls: [] },
|
|
752
|
+
]),
|
|
753
|
+
threadOps: ops,
|
|
754
|
+
buildContextMessage: async () => {
|
|
755
|
+
await new Promise((r) => setTimeout(r, 5));
|
|
756
|
+
return "async context";
|
|
757
|
+
},
|
|
758
|
+
});
|
|
759
|
+
|
|
760
|
+
const stateManager = createAgentStateManager({
|
|
761
|
+
initialState: { systemPrompt: "test" },
|
|
762
|
+
});
|
|
763
|
+
|
|
764
|
+
await session.runSession({ stateManager });
|
|
765
|
+
|
|
766
|
+
const humanOps = log.filter((l) => l.op === "appendHumanMessage");
|
|
767
|
+
expect(at(humanOps, 0).args[1]).toBe("async context");
|
|
768
|
+
});
|
|
769
|
+
|
|
770
|
+
// --- Sandbox stateUpdate merge ---
|
|
771
|
+
|
|
772
|
+
it("merges sandbox stateUpdate into state manager", async () => {
|
|
773
|
+
const { ops } = createMockThreadOps();
|
|
774
|
+
|
|
775
|
+
const sandboxOps: SandboxOps = {
|
|
776
|
+
createSandbox: async () => ({
|
|
777
|
+
sandboxId: "sb-1",
|
|
778
|
+
stateUpdate: { customField: "from-sandbox" },
|
|
779
|
+
}),
|
|
780
|
+
destroySandbox: async () => {},
|
|
781
|
+
snapshotSandbox: async () => ({
|
|
782
|
+
sandboxId: "sb-1",
|
|
783
|
+
providerId: "test",
|
|
784
|
+
data: null,
|
|
785
|
+
createdAt: new Date().toISOString(),
|
|
786
|
+
}),
|
|
787
|
+
};
|
|
788
|
+
|
|
789
|
+
const session = await createSession({
|
|
790
|
+
agentName: "TestAgent",
|
|
791
|
+
threadId: "thread-1",
|
|
792
|
+
runAgent: createScriptedRunAgent([
|
|
793
|
+
{ message: "done", toolCalls: [] },
|
|
794
|
+
]),
|
|
795
|
+
threadOps: ops,
|
|
796
|
+
buildContextMessage: () => "go",
|
|
797
|
+
sandbox: sandboxOps,
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
const stateManager = createAgentStateManager<{ customField: string }>({
|
|
801
|
+
initialState: { systemPrompt: "test", customField: "" },
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
await session.runSession({ stateManager });
|
|
805
|
+
|
|
806
|
+
expect(stateManager.get("customField")).toBe("from-sandbox");
|
|
807
|
+
});
|
|
808
|
+
|
|
809
|
+
// --- Tool usage tracking from tool results ---
|
|
810
|
+
|
|
811
|
+
it("accumulates usage from tool call results", async () => {
|
|
812
|
+
const { ops } = createMockThreadOps();
|
|
813
|
+
|
|
814
|
+
const usageTool = defineTool({
|
|
815
|
+
name: "UsageTool" as const,
|
|
816
|
+
description: "returns usage",
|
|
817
|
+
schema: z.object({}),
|
|
818
|
+
handler: async () => ({
|
|
819
|
+
toolResponse: "ok",
|
|
820
|
+
data: null,
|
|
821
|
+
usage: { inputTokens: 50, outputTokens: 25 },
|
|
822
|
+
}),
|
|
823
|
+
});
|
|
824
|
+
|
|
825
|
+
const session = await createSession({
|
|
826
|
+
agentName: "TestAgent",
|
|
827
|
+
threadId: "thread-1",
|
|
828
|
+
runAgent: createScriptedRunAgent([
|
|
829
|
+
{
|
|
830
|
+
message: "t1",
|
|
831
|
+
toolCalls: [{ id: "tc-1", name: "UsageTool", args: {} }],
|
|
832
|
+
usage: { inputTokens: 100, outputTokens: 50 },
|
|
833
|
+
},
|
|
834
|
+
{ message: "done", toolCalls: [], usage: { inputTokens: 80, outputTokens: 40 } },
|
|
835
|
+
]),
|
|
836
|
+
threadOps: ops,
|
|
837
|
+
tools: { UsageTool: usageTool },
|
|
838
|
+
buildContextMessage: () => "go",
|
|
839
|
+
});
|
|
840
|
+
|
|
841
|
+
const stateManager = createAgentStateManager({
|
|
842
|
+
initialState: { systemPrompt: "test" },
|
|
843
|
+
});
|
|
844
|
+
|
|
845
|
+
const result = await session.runSession({ stateManager });
|
|
846
|
+
|
|
847
|
+
// runAgent usage: 100+80=180 input, 50+40=90 output
|
|
848
|
+
// Note: handler-level usage is not forwarded through router results
|
|
849
|
+
expect(result.usage.totalInputTokens).toBe(180);
|
|
850
|
+
expect(result.usage.totalOutputTokens).toBe(90);
|
|
851
|
+
});
|
|
852
|
+
});
|