@alexkroman1/aai 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_internal-types.d.ts +49 -22
- package/dist/_internal-types.js +43 -1
- package/dist/_mock-ws.d.ts +1 -2
- package/dist/_run-code.d.ts +31 -0
- package/dist/_session-ctx.d.ts +73 -0
- package/dist/_session-otel.d.ts +43 -0
- package/dist/_session-persist.d.ts +30 -0
- package/dist/_ssrf.d.ts +30 -0
- package/dist/_ssrf.js +123 -0
- package/dist/_utils.d.ts +25 -0
- package/dist/_utils.js +54 -1
- package/dist/builtin-tools.d.ts +5 -34
- package/dist/direct-executor-Ca0wt5H0.js +572 -0
- package/dist/direct-executor.d.ts +34 -5
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -2
- package/dist/kv.d.ts +30 -38
- package/dist/kv.js +19 -86
- package/dist/matchers.d.ts +20 -0
- package/dist/matchers.js +41 -0
- package/dist/memory-tools.d.ts +39 -0
- package/dist/middleware-core.d.ts +47 -0
- package/dist/middleware-core.js +107 -0
- package/dist/middleware.d.ts +37 -0
- package/dist/protocol.d.ts +44 -24
- package/dist/protocol.js +34 -14
- package/dist/runtime.d.ts +26 -2
- package/dist/runtime.js +44 -7
- package/dist/s2s.d.ts +19 -29
- package/dist/s2s.js +117 -87
- package/dist/server.d.ts +31 -3
- package/dist/server.js +102 -28
- package/dist/session-BkN9u0ni.js +683 -0
- package/dist/session.d.ts +55 -28
- package/dist/session.js +2 -312
- package/dist/sqlite-kv.d.ts +34 -0
- package/dist/sqlite-kv.js +133 -0
- package/dist/sqlite-vector.d.ts +58 -0
- package/dist/sqlite-vector.js +149 -0
- package/dist/system-prompt.d.ts +21 -0
- package/dist/telemetry.d.ts +49 -0
- package/dist/telemetry.js +95 -0
- package/dist/testing-MRl3SXsI.js +519 -0
- package/dist/testing.d.ts +299 -0
- package/dist/testing.js +2 -0
- package/dist/types.d.ts +324 -39
- package/dist/types.js +62 -9
- package/dist/vector.d.ts +18 -22
- package/dist/vector.js +41 -48
- package/dist/worker-entry.d.ts +11 -3
- package/dist/worker-entry.js +19 -8
- package/dist/ws-handler.d.ts +7 -3
- package/dist/ws-handler.js +64 -12
- package/package.json +55 -8
- package/dist/_mock-ws.js +0 -158
- package/dist/builtin-tools.js +0 -270
- package/dist/direct-executor.js +0 -125
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Testing utilities for AAI agents.
|
|
3
|
+
*
|
|
4
|
+
* Provides a test harness for unit-testing agents without audio, network,
|
|
5
|
+
* or an LLM. Use {@link createTestHarness} to create a harness from a
|
|
6
|
+
* `defineAgent()` result, then drive tool calls and multi-turn conversations.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { describe, expect, test } from "vitest";
|
|
11
|
+
* import { createTestHarness } from "@alexkroman1/aai/testing";
|
|
12
|
+
* import agent from "./agent.ts";
|
|
13
|
+
*
|
|
14
|
+
* describe("my agent", () => {
|
|
15
|
+
* test("greet tool returns greeting", async () => {
|
|
16
|
+
* const t = createTestHarness(agent);
|
|
17
|
+
* const result = await t.executeTool("greet", { name: "Alice" });
|
|
18
|
+
* expect(result).toBe("Hello, Alice!");
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* test("multi-turn conversation", async () => {
|
|
22
|
+
* const t = createTestHarness(agent);
|
|
23
|
+
* const turn1 = await t.turn("Add a pizza", [
|
|
24
|
+
* { tool: "add_pizza", args: { size: "large", crust: "regular", toppings: ["pepperoni"], quantity: 1 } },
|
|
25
|
+
* ]);
|
|
26
|
+
* expect(turn1).toHaveCalledTool("add_pizza");
|
|
27
|
+
*
|
|
28
|
+
* const turn2 = await t.turn("View my order", [
|
|
29
|
+
* { tool: "view_order", args: {} },
|
|
30
|
+
* ]);
|
|
31
|
+
* expect(turn2).toHaveCalledTool("view_order");
|
|
32
|
+
* });
|
|
33
|
+
* });
|
|
34
|
+
* ```
|
|
35
|
+
*
|
|
36
|
+
* @packageDocumentation
|
|
37
|
+
*/
|
|
38
|
+
import { type DirectExecutor } from "./direct-executor.ts";
|
|
39
|
+
import type { Kv } from "./kv.ts";
|
|
40
|
+
import type { AgentDef, Message, StepInfo } from "./types.ts";
|
|
41
|
+
import type { VectorStore } from "./vector.ts";
|
|
42
|
+
export { installMockWebSocket, MockWebSocket } from "./_mock-ws.ts";
|
|
43
|
+
/**
|
|
44
|
+
* A single tool call recorded during a turn.
|
|
45
|
+
*
|
|
46
|
+
* @public
|
|
47
|
+
*/
|
|
48
|
+
export type RecordedToolCall = {
|
|
49
|
+
/** The name of the tool that was called. */
|
|
50
|
+
toolName: string;
|
|
51
|
+
/** The arguments passed to the tool. */
|
|
52
|
+
args: Readonly<Record<string, unknown>>;
|
|
53
|
+
/** The string result returned by the tool. */
|
|
54
|
+
result: string;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Result of a simulated turn via {@link TestHarness.turn}.
|
|
58
|
+
*
|
|
59
|
+
* Contains all tool calls that were executed and provides assertion helpers
|
|
60
|
+
* for verifying agent behavior in tests.
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* const result = await t.turn("search for flights", [
|
|
65
|
+
* { tool: "search_flights", args: { destination: "NYC" } },
|
|
66
|
+
* ]);
|
|
67
|
+
*
|
|
68
|
+
* // Check if a tool was called
|
|
69
|
+
* expect(result).toHaveCalledTool("search_flights");
|
|
70
|
+
*
|
|
71
|
+
* // Check tool was called with specific args
|
|
72
|
+
* expect(result).toHaveCalledTool("search_flights", { destination: "NYC" });
|
|
73
|
+
*
|
|
74
|
+
* // Access raw tool call data
|
|
75
|
+
* expect(result.toolCalls[0].result).toContain("JFK");
|
|
76
|
+
* ```
|
|
77
|
+
*
|
|
78
|
+
* @public
|
|
79
|
+
*/
|
|
80
|
+
export declare class TurnResult {
|
|
81
|
+
/** The user text that initiated this turn. */
|
|
82
|
+
readonly text: string;
|
|
83
|
+
/** All tool calls executed during this turn, in order. */
|
|
84
|
+
readonly toolCalls: readonly RecordedToolCall[];
|
|
85
|
+
/** Convenience accessor: just the result strings from each tool call. */
|
|
86
|
+
readonly toolResults: readonly string[];
|
|
87
|
+
/** @internal */
|
|
88
|
+
constructor(text: string, toolCalls: RecordedToolCall[]);
|
|
89
|
+
/**
|
|
90
|
+
* Check whether a tool was called during this turn.
|
|
91
|
+
*
|
|
92
|
+
* When `args` is provided, checks that at least one call to the named tool
|
|
93
|
+
* contains all specified key-value pairs (partial match).
|
|
94
|
+
*
|
|
95
|
+
* @param toolName - The tool name to look for.
|
|
96
|
+
* @param args - Optional partial args to match against.
|
|
97
|
+
* @returns `true` if a matching tool call was found.
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```ts
|
|
101
|
+
* result.toHaveCalledTool("add_pizza"); // any call
|
|
102
|
+
* result.toHaveCalledTool("add_pizza", { size: "large" }); // partial match
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
toHaveCalledTool(toolName: string, args?: Record<string, unknown>): boolean;
|
|
106
|
+
/**
|
|
107
|
+
* Get all calls to a specific tool during this turn.
|
|
108
|
+
*
|
|
109
|
+
* @param toolName - The tool name to filter by.
|
|
110
|
+
* @returns Array of matching tool calls (may be empty).
|
|
111
|
+
*/
|
|
112
|
+
getToolCalls(toolName: string): readonly RecordedToolCall[];
|
|
113
|
+
/**
|
|
114
|
+
* Get the parsed JSON result of the first call to a specific tool.
|
|
115
|
+
*
|
|
116
|
+
* Throws if the tool was not called during this turn.
|
|
117
|
+
*
|
|
118
|
+
* @typeParam T - The expected shape of the parsed result.
|
|
119
|
+
* @param toolName - The tool name to look up.
|
|
120
|
+
* @returns The parsed result, cast to `T`.
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```ts
|
|
124
|
+
* const order = turn.toolResult<{ pizzas: Pizza[]; total: string }>("view_order");
|
|
125
|
+
* expect(order.pizzas).toHaveLength(2);
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
toolResult<T = unknown>(toolName: string): T;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Options for creating a {@link TestHarness}.
|
|
132
|
+
*
|
|
133
|
+
* @public
|
|
134
|
+
*/
|
|
135
|
+
export type TestHarnessOptions = {
|
|
136
|
+
/** Environment variables available to tools via `ctx.env`. */
|
|
137
|
+
env?: Record<string, string>;
|
|
138
|
+
/** KV store instance. Defaults to an in-memory SQLite store. */
|
|
139
|
+
kv?: Kv;
|
|
140
|
+
/** Vector store instance. Defaults to an in-memory SQLite store. */
|
|
141
|
+
vector?: VectorStore;
|
|
142
|
+
};
|
|
143
|
+
/**
|
|
144
|
+
* A tool call to execute during a simulated turn.
|
|
145
|
+
*
|
|
146
|
+
* @public
|
|
147
|
+
*/
|
|
148
|
+
export type TurnToolCall = {
|
|
149
|
+
/** The tool name to invoke. */
|
|
150
|
+
tool: string;
|
|
151
|
+
/** Arguments to pass to the tool. */
|
|
152
|
+
args: Record<string, unknown>;
|
|
153
|
+
};
|
|
154
|
+
/**
|
|
155
|
+
* Test harness for unit-testing AAI agents without audio, network, or LLM.
|
|
156
|
+
*
|
|
157
|
+
* Created via {@link createTestHarness}. Maintains conversation state across
|
|
158
|
+
* turns, executes tools against the real agent code, and records all tool
|
|
159
|
+
* calls for assertions.
|
|
160
|
+
*
|
|
161
|
+
* @example
|
|
162
|
+
* ```ts
|
|
163
|
+
* import { createTestHarness } from "@alexkroman1/aai/testing";
|
|
164
|
+
* import agent from "./agent.ts";
|
|
165
|
+
*
|
|
166
|
+
* const t = createTestHarness(agent);
|
|
167
|
+
*
|
|
168
|
+
* // Execute a single tool
|
|
169
|
+
* const result = await t.executeTool("greet", { name: "Alice" });
|
|
170
|
+
*
|
|
171
|
+
* // Simulate a full turn with tool calls
|
|
172
|
+
* const turn = await t.turn("hello", [
|
|
173
|
+
* { tool: "greet", args: { name: "Alice" } },
|
|
174
|
+
* ]);
|
|
175
|
+
* expect(turn).toHaveCalledTool("greet");
|
|
176
|
+
* ```
|
|
177
|
+
*
|
|
178
|
+
* @public
|
|
179
|
+
*/
|
|
180
|
+
export declare class TestHarness {
|
|
181
|
+
/** @internal */
|
|
182
|
+
readonly _executor: DirectExecutor;
|
|
183
|
+
/** @internal */
|
|
184
|
+
readonly _sessionId: string;
|
|
185
|
+
private _messages;
|
|
186
|
+
private _onStepCalls;
|
|
187
|
+
private _onTurnCalls;
|
|
188
|
+
private _connected;
|
|
189
|
+
/** @internal */
|
|
190
|
+
constructor(executor: DirectExecutor, sessionId: string);
|
|
191
|
+
/** Conversation messages accumulated across turns. */
|
|
192
|
+
get messages(): readonly Message[];
|
|
193
|
+
/** All `onStep` hook invocations recorded so far. */
|
|
194
|
+
get steps(): readonly StepInfo[];
|
|
195
|
+
/** All `onTurn` hook invocations (the text argument) recorded so far. */
|
|
196
|
+
get turns(): readonly string[];
|
|
197
|
+
/**
|
|
198
|
+
* Fire the `onConnect` lifecycle hook.
|
|
199
|
+
*
|
|
200
|
+
* Called automatically on the first {@link turn} call if not called manually.
|
|
201
|
+
*/
|
|
202
|
+
connect(): Promise<void>;
|
|
203
|
+
/**
|
|
204
|
+
* Fire the `onDisconnect` lifecycle hook and clean up session state.
|
|
205
|
+
*/
|
|
206
|
+
disconnect(): Promise<void>;
|
|
207
|
+
/**
|
|
208
|
+
* Execute a single tool by name with the given arguments.
|
|
209
|
+
*
|
|
210
|
+
* The tool runs with full agent context (env, state, kv, vector, messages).
|
|
211
|
+
* The call is **not** recorded in conversation history — use {@link turn}
|
|
212
|
+
* for that.
|
|
213
|
+
*
|
|
214
|
+
* @param toolName - The tool to execute.
|
|
215
|
+
* @param args - Arguments to pass to the tool.
|
|
216
|
+
* @returns The tool's string result.
|
|
217
|
+
*
|
|
218
|
+
* @example
|
|
219
|
+
* ```ts
|
|
220
|
+
* const result = await t.executeTool("get_weather", { city: "London" });
|
|
221
|
+
* const data = JSON.parse(result);
|
|
222
|
+
* expect(data.temp).toBeDefined();
|
|
223
|
+
* ```
|
|
224
|
+
*/
|
|
225
|
+
executeTool(toolName: string, args?: Record<string, unknown>): Promise<string>;
|
|
226
|
+
/**
|
|
227
|
+
* Simulate a user turn: add the user message, execute the given tool calls
|
|
228
|
+
* in sequence, and record everything.
|
|
229
|
+
*
|
|
230
|
+
* This is the primary method for testing agent behavior. It:
|
|
231
|
+
* 1. Fires `onConnect` if this is the first turn
|
|
232
|
+
* 2. Adds the user message to conversation history
|
|
233
|
+
* 3. Fires the `onTurn` hook
|
|
234
|
+
* 4. Executes each tool call in order, firing `onStep` for each
|
|
235
|
+
* 5. Returns a {@link TurnResult} with assertion helpers
|
|
236
|
+
*
|
|
237
|
+
* @param text - The user's spoken/typed input.
|
|
238
|
+
* @param toolCalls - Tool calls to execute (simulating what the LLM would invoke).
|
|
239
|
+
* @returns A {@link TurnResult} with recorded tool calls and assertion methods.
|
|
240
|
+
*
|
|
241
|
+
* @example
|
|
242
|
+
* ```ts
|
|
243
|
+
* const turn = await t.turn("Add pepperoni pizza", [
|
|
244
|
+
* { tool: "add_pizza", args: { size: "large", crust: "regular", toppings: ["pepperoni"], quantity: 1 } },
|
|
245
|
+
* ]);
|
|
246
|
+
* expect(turn).toHaveCalledTool("add_pizza", { size: "large" });
|
|
247
|
+
* expect(turn.toolCalls[0].result).toContain("$14.99");
|
|
248
|
+
* ```
|
|
249
|
+
*/
|
|
250
|
+
turn(text: string, toolCalls?: TurnToolCall[]): Promise<TurnResult>;
|
|
251
|
+
/**
|
|
252
|
+
* Add a user message to conversation history without executing tools.
|
|
253
|
+
*
|
|
254
|
+
* Useful for setting up conversation context before a turn.
|
|
255
|
+
*/
|
|
256
|
+
addUserMessage(text: string): void;
|
|
257
|
+
/**
|
|
258
|
+
* Add an assistant message to conversation history.
|
|
259
|
+
*
|
|
260
|
+
* Useful for simulating prior assistant responses in multi-turn tests.
|
|
261
|
+
*/
|
|
262
|
+
addAssistantMessage(text: string): void;
|
|
263
|
+
/**
|
|
264
|
+
* Reset conversation state: clears messages, step/turn history.
|
|
265
|
+
*
|
|
266
|
+
* Does **not** reset KV or vector store — create a new harness for that.
|
|
267
|
+
*/
|
|
268
|
+
reset(): void;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Create a test harness for unit-testing an agent.
|
|
272
|
+
*
|
|
273
|
+
* The harness wraps the agent's tool definitions and lifecycle hooks,
|
|
274
|
+
* providing a simple API for executing tools and simulating multi-turn
|
|
275
|
+
* conversations — all without audio, network, or an LLM.
|
|
276
|
+
*
|
|
277
|
+
* @param agent - The agent definition returned by `defineAgent()`.
|
|
278
|
+
* @param options - Optional environment, KV, and vector store overrides.
|
|
279
|
+
* @returns A {@link TestHarness} instance.
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* ```ts
|
|
283
|
+
* import { createTestHarness } from "@alexkroman1/aai/testing";
|
|
284
|
+
* import agent from "./agent.ts";
|
|
285
|
+
*
|
|
286
|
+
* const t = createTestHarness(agent);
|
|
287
|
+
* const result = await t.executeTool("my_tool", { key: "value" });
|
|
288
|
+
* ```
|
|
289
|
+
*
|
|
290
|
+
* @example With environment variables
|
|
291
|
+
* ```ts
|
|
292
|
+
* const t = createTestHarness(agent, {
|
|
293
|
+
* env: { API_KEY: "test-key" },
|
|
294
|
+
* });
|
|
295
|
+
* ```
|
|
296
|
+
*
|
|
297
|
+
* @public
|
|
298
|
+
*/
|
|
299
|
+
export declare function createTestHarness(agent: AgentDef<any>, options?: TestHarnessOptions): TestHarness;
|
package/dist/testing.js
ADDED