@alexkroman1/aai 0.12.3 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +20 -0
- package/CHANGELOG.md +176 -0
- package/dist/constants-VTFoymJ-.js +47 -0
- package/dist/host/_run-code.d.ts +1 -1
- package/dist/host/_runtime-conformance.d.ts +4 -5
- package/dist/host/builtin-tools.d.ts +11 -9
- package/dist/host/runtime-barrel.d.ts +15 -0
- package/dist/{direct-executor-DRRrZUp0.js → host/runtime-barrel.js} +453 -348
- package/dist/host/runtime-config.d.ts +42 -0
- package/dist/host/runtime.d.ts +119 -35
- package/dist/host/s2s.d.ts +14 -38
- package/dist/host/server.d.ts +16 -8
- package/dist/host/session-ctx.d.ts +55 -0
- package/dist/host/session.d.ts +20 -70
- package/dist/host/tool-executor.d.ts +20 -0
- package/dist/host/unstorage-kv.d.ts +1 -1
- package/dist/host/ws-handler.d.ts +4 -2
- package/dist/index.d.ts +9 -20
- package/dist/index.js +63 -2
- package/dist/{isolate → sdk}/_internal-types.d.ts +5 -9
- package/dist/{isolate → sdk}/constants.d.ts +6 -4
- package/dist/sdk/define.d.ts +66 -0
- package/dist/{isolate → sdk}/kv.d.ts +1 -49
- package/dist/sdk/manifest-barrel.d.ts +8 -0
- package/dist/sdk/manifest-barrel.js +52 -0
- package/dist/sdk/manifest.d.ts +50 -0
- package/dist/{isolate → sdk}/protocol.d.ts +59 -36
- package/dist/sdk/protocol.js +163 -0
- package/dist/{isolate → sdk}/system-prompt.d.ts +2 -2
- package/dist/sdk/types.d.ts +201 -0
- package/dist/sdk/ws-upgrade.d.ts +5 -0
- package/dist/{system-prompt-DYAYFW99.js → system-prompt-nik_iavo.js} +10 -10
- package/dist/types-Cfx_4QDK.js +39 -0
- package/dist/ws-upgrade-BeOQ7fXL.js +30 -0
- package/exports-no-dev-deps.test.ts +62 -0
- package/host/_mock-ws.ts +185 -0
- package/host/_run-code.ts +217 -0
- package/host/_runtime-conformance.ts +143 -0
- package/host/_test-utils.ts +276 -0
- package/host/builtin-tools.test.ts +774 -0
- package/host/builtin-tools.ts +255 -0
- package/host/cleanup.test.ts +422 -0
- package/host/fixture-replay.test.ts +463 -0
- package/host/fixtures/README.md +40 -0
- package/host/fixtures/greeting-session-sequence.json +40 -0
- package/host/fixtures/reply-audio-samples.json +42 -0
- package/host/fixtures/reply-lifecycle.json +21 -0
- package/host/fixtures/session-ready.json +48 -0
- package/host/fixtures/session-updated.json +45 -0
- package/host/fixtures/simple-question-sequence.json +73 -0
- package/host/fixtures/tool-call-sequence.json +114 -0
- package/host/fixtures/tool-calls.json +11 -0
- package/host/fixtures/tool-config-session-sequence.json +51 -0
- package/host/fixtures/user-speech-recognition.json +30 -0
- package/host/fixtures/web-search-sequence.json +122 -0
- package/host/integration.test.ts +222 -0
- package/host/runtime-barrel.ts +25 -0
- package/host/runtime-config.test.ts +71 -0
- package/host/runtime-config.ts +99 -0
- package/host/runtime.test.ts +641 -0
- package/host/runtime.ts +308 -0
- package/host/s2s-fixtures.test.ts +237 -0
- package/host/s2s.test.ts +562 -0
- package/host/s2s.ts +310 -0
- package/host/server-shutdown.test.ts +76 -0
- package/host/server.test.ts +116 -0
- package/host/server.ts +223 -0
- package/host/session-ctx.ts +107 -0
- package/host/session-fixture-replay.test.ts +136 -0
- package/host/session-prompt.test.ts +77 -0
- package/host/session.test.ts +590 -0
- package/host/session.ts +370 -0
- package/host/tool-executor.test.ts +124 -0
- package/host/tool-executor.ts +80 -0
- package/host/unstorage-kv.test.ts +99 -0
- package/host/unstorage-kv.ts +69 -0
- package/host/ws-handler.test.ts +739 -0
- package/host/ws-handler.ts +255 -0
- package/index.ts +16 -0
- package/package.json +24 -72
- package/sdk/_internal-types.test.ts +34 -0
- package/sdk/_internal-types.ts +115 -0
- package/sdk/compat-fixtures/README.md +26 -0
- package/sdk/compat-fixtures/v1.json +68 -0
- package/sdk/constants.ts +77 -0
- package/sdk/define.test.ts +57 -0
- package/sdk/define.ts +88 -0
- package/sdk/kv.ts +60 -0
- package/sdk/manifest-barrel.ts +12 -0
- package/sdk/manifest.test.ts +56 -0
- package/sdk/manifest.ts +89 -0
- package/sdk/protocol-compat.test.ts +187 -0
- package/sdk/protocol-snapshot.test.ts +199 -0
- package/sdk/protocol.test.ts +170 -0
- package/sdk/protocol.ts +223 -0
- package/sdk/schema-alignment.test.ts +191 -0
- package/sdk/system-prompt.test.ts +111 -0
- package/sdk/system-prompt.ts +74 -0
- package/sdk/tsconfig.json +12 -0
- package/sdk/types-inference.test.ts +122 -0
- package/sdk/types.test.ts +14 -0
- package/sdk/types.ts +226 -0
- package/sdk/utils.test.ts +52 -0
- package/sdk/utils.ts +20 -0
- package/sdk/ws-upgrade.test.ts +48 -0
- package/sdk/ws-upgrade.ts +13 -0
- package/tsconfig.build.json +14 -0
- package/tsconfig.json +10 -0
- package/tsdown.config.ts +26 -0
- package/vitest.config.ts +17 -0
- package/dist/host/_test-utils.d.ts +0 -73
- package/dist/host/direct-executor.d.ts +0 -130
- package/dist/host/index.d.ts +0 -19
- package/dist/host/index.js +0 -165
- package/dist/host/matchers.d.ts +0 -20
- package/dist/host/matchers.js +0 -41
- package/dist/host/server.js +0 -164
- package/dist/host/testing.d.ts +0 -294
- package/dist/host/testing.js +0 -2
- package/dist/host/vite-plugin.d.ts +0 -15
- package/dist/host/vite-plugin.js +0 -83
- package/dist/isolate/_kv-utils.d.ts +0 -10
- package/dist/isolate/_utils.js +0 -17
- package/dist/isolate/hooks.d.ts +0 -44
- package/dist/isolate/hooks.js +0 -58
- package/dist/isolate/index.d.ts +0 -18
- package/dist/isolate/index.js +0 -6
- package/dist/isolate/kv.js +0 -1
- package/dist/isolate/protocol.js +0 -2
- package/dist/isolate/types.d.ts +0 -418
- package/dist/isolate/types.js +0 -175
- package/dist/protocol-rcOrz7T3.js +0 -183
- package/dist/testing-BreLdpq-.js +0 -513
- package/dist/types.test-d.d.ts +0 -7
- /package/dist/{isolate/_utils.d.ts → sdk/utils.d.ts} +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* run_code built-in tool — executes user JavaScript in a fresh `node:vm`
|
|
4
|
+
* context with no network, filesystem, or process access.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import vm from "node:vm";
|
|
8
|
+
import { z } from "zod";
|
|
9
|
+
import { RUN_CODE_TIMEOUT_MS } from "../sdk/constants.ts";
|
|
10
|
+
import type { ToolDef } from "../sdk/types.ts";
|
|
11
|
+
import { errorMessage } from "../sdk/utils.ts";
|
|
12
|
+
|
|
13
|
+
const SKIPPED_CLASS_KEYS = new Set(["constructor", "prototype", "length", "name"]);
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Copy static members from a class constructor to a wrapper function,
|
|
17
|
+
* skipping built-in keys that must not be forwarded.
|
|
18
|
+
*/
|
|
19
|
+
// biome-ignore lint/complexity/noBannedTypes: copying descriptors from arbitrary class constructors
|
|
20
|
+
function copyStaticMembers(src: Function, dst: Function): void {
|
|
21
|
+
for (const key of Object.getOwnPropertyNames(src)) {
|
|
22
|
+
if (SKIPPED_CLASS_KEYS.has(key)) continue;
|
|
23
|
+
try {
|
|
24
|
+
const desc = Object.getOwnPropertyDescriptor(src, key);
|
|
25
|
+
if (desc) Object.defineProperty(dst, key, desc);
|
|
26
|
+
} catch {
|
|
27
|
+
// Skip non-configurable properties
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Neuter the `.constructor` chain on a host function or class constructor.
|
|
34
|
+
*
|
|
35
|
+
* For plain functions: wraps the function so calling `.constructor` or
|
|
36
|
+
* `.constructor.constructor` no longer exposes the host `Function`.
|
|
37
|
+
*
|
|
38
|
+
* For class constructors: additionally copies static methods and neutralizes
|
|
39
|
+
* `prototype.constructor` so instances created via `new` also cannot escape.
|
|
40
|
+
*
|
|
41
|
+
* This prevents sandbox code from reaching the host `Function` constructor
|
|
42
|
+
* via patterns like `fn.constructor.constructor('return process')()`.
|
|
43
|
+
*/
|
|
44
|
+
// biome-ignore lint/complexity/noBannedTypes: wrapping arbitrary functions and class constructors
|
|
45
|
+
function neutralizeConstructor<T extends Function>(fn: T): T {
|
|
46
|
+
const hasPrototype = typeof fn.prototype === "object" && fn.prototype !== null;
|
|
47
|
+
|
|
48
|
+
function Wrapper(this: unknown, ...args: unknown[]) {
|
|
49
|
+
if (hasPrototype) {
|
|
50
|
+
return new (fn as unknown as new (...a: unknown[]) => unknown)(...args);
|
|
51
|
+
}
|
|
52
|
+
return (fn as unknown as (...a: unknown[]) => unknown)(...args);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (hasPrototype) {
|
|
56
|
+
copyStaticMembers(fn, Wrapper);
|
|
57
|
+
// Neuter prototype.constructor so instances can't escape either.
|
|
58
|
+
if (Wrapper.prototype) {
|
|
59
|
+
Object.defineProperty(Wrapper.prototype, "constructor", {
|
|
60
|
+
value: undefined,
|
|
61
|
+
writable: false,
|
|
62
|
+
configurable: false,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
Object.defineProperty(Wrapper, "constructor", {
|
|
68
|
+
value: undefined,
|
|
69
|
+
writable: false,
|
|
70
|
+
configurable: false,
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
return Wrapper as unknown as T;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const runCodeParams = z.object({
|
|
77
|
+
code: z.string().describe("JavaScript code to execute. Use console.log() for output."),
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Execute JavaScript code inside a fresh `node:vm` context.
|
|
82
|
+
*
|
|
83
|
+
* Each invocation creates a disposable VM context with:
|
|
84
|
+
* - No filesystem access (`node:fs` and other built-ins unavailable)
|
|
85
|
+
* - No network access (`fetch`, `http` unavailable)
|
|
86
|
+
* - No child process spawning
|
|
87
|
+
* - No environment variable access (`process` unavailable)
|
|
88
|
+
* - Execution timeout (default 5 s)
|
|
89
|
+
*
|
|
90
|
+
* The context is discarded after execution, so no state leaks between
|
|
91
|
+
* invocations or across sessions.
|
|
92
|
+
*/
|
|
93
|
+
export function createRunCode(): ToolDef<typeof runCodeParams> & { guidance: string } {
|
|
94
|
+
return {
|
|
95
|
+
guidance:
|
|
96
|
+
"You MUST use the run_code tool for ANY question involving math, counting, calculations, " +
|
|
97
|
+
"data processing, or code. NEVER do mental math or recite code verbally. " +
|
|
98
|
+
"run_code executes JavaScript (not Python). Always write JavaScript.",
|
|
99
|
+
description:
|
|
100
|
+
"Execute JavaScript code in a sandbox and return the output. Use this for calculations, data transformations, string manipulation, or any task that benefits from running code. Output is captured from console.log(). No network or filesystem access.",
|
|
101
|
+
parameters: runCodeParams,
|
|
102
|
+
async execute(args) {
|
|
103
|
+
return executeInIsolate(args.code);
|
|
104
|
+
},
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Execute user code in a fresh `node:vm` context.
|
|
110
|
+
*
|
|
111
|
+
* @remarks
|
|
112
|
+
* The VM context only exposes standard ECMAScript globals and a console
|
|
113
|
+
* object that captures output. Node.js APIs (`process`, `require`,
|
|
114
|
+
* `import()`) are not available inside the sandbox.
|
|
115
|
+
*/
|
|
116
|
+
export async function executeInIsolate(code: string): Promise<string | { error: string }> {
|
|
117
|
+
const output: string[] = [];
|
|
118
|
+
const capture = (...args: unknown[]) => output.push(args.map(String).join(" "));
|
|
119
|
+
|
|
120
|
+
// Prevent timer callbacks from leaking into host event loop after execution.
|
|
121
|
+
const activeTimers = new Set<ReturnType<typeof setTimeout>>();
|
|
122
|
+
|
|
123
|
+
const sandboxSetTimeout = (
|
|
124
|
+
fn: (...args: unknown[]) => void,
|
|
125
|
+
delay?: number,
|
|
126
|
+
...args: unknown[]
|
|
127
|
+
): ReturnType<typeof setTimeout> => {
|
|
128
|
+
const id = setTimeout(
|
|
129
|
+
(...a: unknown[]) => {
|
|
130
|
+
activeTimers.delete(id);
|
|
131
|
+
fn(...a);
|
|
132
|
+
},
|
|
133
|
+
delay,
|
|
134
|
+
...args,
|
|
135
|
+
);
|
|
136
|
+
activeTimers.add(id);
|
|
137
|
+
return id;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const sandboxClearTimeout = (id?: ReturnType<typeof setTimeout>): void => {
|
|
141
|
+
if (id !== undefined) {
|
|
142
|
+
activeTimers.delete(id);
|
|
143
|
+
clearTimeout(id);
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const sandboxSetInterval = (
|
|
148
|
+
fn: (...args: unknown[]) => void,
|
|
149
|
+
delay?: number,
|
|
150
|
+
...args: unknown[]
|
|
151
|
+
): ReturnType<typeof setInterval> => {
|
|
152
|
+
const id = setInterval(fn, delay, ...args);
|
|
153
|
+
activeTimers.add(id);
|
|
154
|
+
return id;
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
const sandboxClearInterval = (id?: ReturnType<typeof setInterval>): void => {
|
|
158
|
+
if (id !== undefined) {
|
|
159
|
+
activeTimers.delete(id);
|
|
160
|
+
clearInterval(id);
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
const context = vm.createContext(
|
|
165
|
+
{
|
|
166
|
+
// Console methods wrapped to prevent .constructor escape to host Function.
|
|
167
|
+
console: Object.freeze({
|
|
168
|
+
log: neutralizeConstructor(capture),
|
|
169
|
+
info: neutralizeConstructor(capture),
|
|
170
|
+
warn: neutralizeConstructor(capture),
|
|
171
|
+
error: neutralizeConstructor(capture),
|
|
172
|
+
debug: neutralizeConstructor(capture),
|
|
173
|
+
}),
|
|
174
|
+
// Wrapped timers — neutralized to prevent .constructor escape.
|
|
175
|
+
setTimeout: neutralizeConstructor(sandboxSetTimeout),
|
|
176
|
+
clearTimeout: neutralizeConstructor(sandboxClearTimeout),
|
|
177
|
+
setInterval: neutralizeConstructor(sandboxSetInterval),
|
|
178
|
+
clearInterval: neutralizeConstructor(sandboxClearInterval),
|
|
179
|
+
// Standard web-compat globals — constructor chain neutered.
|
|
180
|
+
URL: neutralizeConstructor(URL),
|
|
181
|
+
URLSearchParams: neutralizeConstructor(URLSearchParams),
|
|
182
|
+
TextEncoder: neutralizeConstructor(TextEncoder),
|
|
183
|
+
TextDecoder: neutralizeConstructor(TextDecoder),
|
|
184
|
+
atob: neutralizeConstructor(atob),
|
|
185
|
+
btoa: neutralizeConstructor(btoa),
|
|
186
|
+
structuredClone: neutralizeConstructor(structuredClone),
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
// Block string-based code generation within the sandbox realm.
|
|
190
|
+
codeGeneration: { strings: false, wasm: false },
|
|
191
|
+
},
|
|
192
|
+
);
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
// Wrap user code in an async IIFE so top-level `await` works.
|
|
196
|
+
const wrapped = `(async () => {\n${code}\n})()`;
|
|
197
|
+
const script = new vm.Script(wrapped, { filename: "run_code.js" });
|
|
198
|
+
|
|
199
|
+
// runInContext's `timeout` enforces the execution limit.
|
|
200
|
+
const result = await script.runInContext(context, { timeout: RUN_CODE_TIMEOUT_MS });
|
|
201
|
+
void result;
|
|
202
|
+
|
|
203
|
+
const text = output.join("\n").trim();
|
|
204
|
+
return text || "Code ran successfully (no output)";
|
|
205
|
+
} catch (err: unknown) {
|
|
206
|
+
return { error: errorMessage(err) };
|
|
207
|
+
} finally {
|
|
208
|
+
// Cancel all sandbox timers that are still pending. This prevents
|
|
209
|
+
// setInterval/setTimeout callbacks from running in the host event loop
|
|
210
|
+
// after the sandbox execution has completed or timed out.
|
|
211
|
+
for (const id of activeTimers) {
|
|
212
|
+
clearTimeout(id);
|
|
213
|
+
clearInterval(id);
|
|
214
|
+
}
|
|
215
|
+
activeTimers.clear();
|
|
216
|
+
}
|
|
217
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Shared runtime conformance tests.
|
|
4
|
+
*
|
|
5
|
+
* Both the self-hosted direct executor and the platform sandbox must satisfy
|
|
6
|
+
* the same behavioral contract. This module defines that contract as a
|
|
7
|
+
* reusable test suite that can be wired to either runtime.
|
|
8
|
+
*
|
|
9
|
+
* Inspired by Nitro's `testNitro()` pattern: one test fixture, many runtimes.
|
|
10
|
+
*
|
|
11
|
+
* @example Direct executor (unit test)
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { testRuntime } from "./_runtime-conformance.ts";
|
|
14
|
+
*
|
|
15
|
+
* testRuntime("direct", () => {
|
|
16
|
+
* const exec = createRuntime({ agent: CONFORMANCE_AGENT, env: { MY_VAR: "test-value" } });
|
|
17
|
+
* return { executeTool: exec.executeTool, hooks: exec.hooks };
|
|
18
|
+
* });
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @example Sandbox (integration test in aai-server)
|
|
22
|
+
* ```ts
|
|
23
|
+
* // Internal module — import the .ts source directly from this package.
|
|
24
|
+
* import { testRuntime } from "../../aai/host/_runtime-conformance.ts";
|
|
25
|
+
*
|
|
26
|
+
* testRuntime("sandbox", async () => {
|
|
27
|
+
* // ... start isolate with a bundled agent
|
|
28
|
+
* return { executeTool: buildExecuteTool(...), hooks: buildHookInvoker(...) };
|
|
29
|
+
* });
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, expect, test } from "vitest";
|
|
34
|
+
import { z } from "zod";
|
|
35
|
+
import type { ExecuteTool } from "../sdk/_internal-types.ts";
|
|
36
|
+
import type { AgentDef } from "../sdk/types.ts";
|
|
37
|
+
|
|
38
|
+
// ── Shared context type ────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Minimal runtime surface needed for conformance tests.
|
|
42
|
+
*
|
|
43
|
+
* Both `Runtime` and `buildExecuteTool`/`buildHookInvoker` from the
|
|
44
|
+
* sandbox produce objects that satisfy this interface.
|
|
45
|
+
*/
|
|
46
|
+
export type RuntimeTestContext = {
|
|
47
|
+
executeTool: ExecuteTool;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// ── Conformance agent ──────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/** Agent definition used by the conformance suite (direct executor path). */
|
|
53
|
+
export const CONFORMANCE_AGENT: AgentDef = {
|
|
54
|
+
name: "conformance-test",
|
|
55
|
+
systemPrompt: "Conformance test agent.",
|
|
56
|
+
greeting: "Hello!",
|
|
57
|
+
maxSteps: 5,
|
|
58
|
+
state: () => ({ count: 0, lastTurn: "" }),
|
|
59
|
+
tools: {
|
|
60
|
+
echo: {
|
|
61
|
+
description: "Echo input",
|
|
62
|
+
parameters: z.object({ text: z.string() }),
|
|
63
|
+
execute: ({ text }: { text: string }) => `echo:${text}`,
|
|
64
|
+
},
|
|
65
|
+
get_env: {
|
|
66
|
+
description: "Get MY_VAR from env",
|
|
67
|
+
execute: (_args: unknown, ctx) => ctx.env.MY_VAR ?? "missing",
|
|
68
|
+
},
|
|
69
|
+
get_state: {
|
|
70
|
+
description: "Get session state",
|
|
71
|
+
execute: (_args: unknown, ctx) => JSON.stringify(ctx.state),
|
|
72
|
+
},
|
|
73
|
+
echo_messages: {
|
|
74
|
+
description: "Return messages as JSON",
|
|
75
|
+
execute: (_args: unknown, ctx) => JSON.stringify(ctx.messages),
|
|
76
|
+
},
|
|
77
|
+
kv_roundtrip: {
|
|
78
|
+
description: "KV set then get",
|
|
79
|
+
parameters: z.object({ value: z.string() }),
|
|
80
|
+
execute: async ({ value }: { value: string }, ctx) => {
|
|
81
|
+
await ctx.kv.set("test-key", value);
|
|
82
|
+
const result = await ctx.kv.get<string>("test-key");
|
|
83
|
+
return `stored:${JSON.stringify(result)}`;
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// ── Shared conformance suite ───────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Run the runtime conformance test suite against a given runtime context.
|
|
93
|
+
*
|
|
94
|
+
* The `getContext` callback is invoked once per test to retrieve the
|
|
95
|
+
* current {@link RuntimeTestContext}. This allows the caller to set up
|
|
96
|
+
* the runtime in a `beforeAll` and return it lazily.
|
|
97
|
+
*
|
|
98
|
+
* All tests assume the runtime was created with {@link CONFORMANCE_AGENT}
|
|
99
|
+
* (or its bundle equivalent) and `env: { MY_VAR: "test-value" }`.
|
|
100
|
+
*/
|
|
101
|
+
export function testRuntime(label: string, getContext: () => RuntimeTestContext): void {
|
|
102
|
+
describe(`runtime conformance: ${label}`, () => {
|
|
103
|
+
// ── Tool execution ───────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
test("executes tool and returns result", async () => {
|
|
106
|
+
const { executeTool } = getContext();
|
|
107
|
+
const result = await executeTool("echo", { text: "hello" }, "s1", []);
|
|
108
|
+
expect(result).toBe("echo:hello");
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("tool receives env variables", async () => {
|
|
112
|
+
const { executeTool } = getContext();
|
|
113
|
+
const result = await executeTool("get_env", {}, "s1", []);
|
|
114
|
+
expect(result).toBe("test-value");
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
test("tool receives conversation messages", async () => {
|
|
118
|
+
const { executeTool } = getContext();
|
|
119
|
+
const msgs = [
|
|
120
|
+
{ role: "user" as const, content: "hi" },
|
|
121
|
+
{ role: "assistant" as const, content: "hello" },
|
|
122
|
+
];
|
|
123
|
+
const result = await executeTool("echo_messages", {}, "s1", msgs);
|
|
124
|
+
expect(JSON.parse(result)).toEqual(msgs);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("KV round-trip through tool context", async () => {
|
|
128
|
+
const { executeTool } = getContext();
|
|
129
|
+
const result = await executeTool("kv_roundtrip", { value: "abc" }, "s1", []);
|
|
130
|
+
expect(result).toBe('stored:"abc"');
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// ── Session state ────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
test("session state is initialized from factory", async () => {
|
|
136
|
+
const { executeTool } = getContext();
|
|
137
|
+
const result = await executeTool("get_state", {}, "state-init", []);
|
|
138
|
+
const state = JSON.parse(result);
|
|
139
|
+
expect(state).toHaveProperty("count", 0);
|
|
140
|
+
expect(state).toHaveProperty("lastTurn", "");
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
}
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { resolve } from "node:path";
|
|
5
|
+
import { createNanoEvents } from "nanoevents";
|
|
6
|
+
import { vi } from "vitest";
|
|
7
|
+
import type { AgentConfig } from "../sdk/_internal-types.ts";
|
|
8
|
+
import type { ClientSink } from "../sdk/protocol.ts";
|
|
9
|
+
import type { AgentDef, ToolContext, ToolDef } from "../sdk/types.ts";
|
|
10
|
+
import { DEFAULT_SYSTEM_PROMPT } from "../sdk/types.ts";
|
|
11
|
+
import { createRuntime } from "./runtime.ts";
|
|
12
|
+
import type { S2sEvents, S2sHandle } from "./s2s.ts";
|
|
13
|
+
import type { Session } from "./session.ts";
|
|
14
|
+
import { _internals, type S2sSessionOptions } from "./session.ts";
|
|
15
|
+
|
|
16
|
+
/** Yield to the microtask queue so pending promises settle. */
|
|
17
|
+
export function flush(): Promise<void> {
|
|
18
|
+
return new Promise<void>((r) => queueMicrotask(r));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function createMockToolContext(overrides?: Partial<ToolContext>): ToolContext {
|
|
22
|
+
return {
|
|
23
|
+
env: {},
|
|
24
|
+
state: {},
|
|
25
|
+
kv: {} as never,
|
|
26
|
+
messages: [],
|
|
27
|
+
sessionId: "test-session",
|
|
28
|
+
...overrides,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function makeTool(overrides?: Partial<ToolDef>): ToolDef {
|
|
33
|
+
return { description: "test tool", execute: () => "ok", ...overrides };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function makeAgent(overrides?: Partial<AgentDef>): AgentDef {
|
|
37
|
+
return {
|
|
38
|
+
name: "test-agent",
|
|
39
|
+
systemPrompt: "Be helpful.",
|
|
40
|
+
greeting: "Hello!",
|
|
41
|
+
maxSteps: 5,
|
|
42
|
+
tools: {},
|
|
43
|
+
...overrides,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function makeConfig(overrides: Partial<AgentConfig> = {}): AgentConfig {
|
|
48
|
+
return {
|
|
49
|
+
name: "test-agent",
|
|
50
|
+
systemPrompt: DEFAULT_SYSTEM_PROMPT,
|
|
51
|
+
greeting: "Hello",
|
|
52
|
+
...overrides,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Create a stub Session with all methods as vi.fn() spies. */
|
|
57
|
+
export function makeStubSession(overrides?: Partial<Session>): Session {
|
|
58
|
+
return {
|
|
59
|
+
start: vi.fn(() => Promise.resolve()),
|
|
60
|
+
stop: vi.fn(() => Promise.resolve()),
|
|
61
|
+
onAudio: vi.fn(),
|
|
62
|
+
onAudioReady: vi.fn(),
|
|
63
|
+
onCancel: vi.fn(),
|
|
64
|
+
onReset: vi.fn(),
|
|
65
|
+
onHistory: vi.fn(),
|
|
66
|
+
waitForTurn: vi.fn(() => Promise.resolve()),
|
|
67
|
+
...overrides,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ─── Session test helpers ───────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
export type MockS2sHandle = S2sHandle & {
|
|
74
|
+
_fire: <K extends keyof S2sEvents>(type: K, ...args: Parameters<S2sEvents[K]>) => void;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
/** Create a mock S2sHandle backed by nanoevents. */
|
|
78
|
+
export function makeMockHandle(): MockS2sHandle {
|
|
79
|
+
const emitter = createNanoEvents<S2sEvents>();
|
|
80
|
+
return {
|
|
81
|
+
on: emitter.on.bind(emitter),
|
|
82
|
+
sendAudio: vi.fn(),
|
|
83
|
+
sendToolResult: vi.fn(),
|
|
84
|
+
updateSession: vi.fn(),
|
|
85
|
+
resumeSession: vi.fn(),
|
|
86
|
+
close: vi.fn(),
|
|
87
|
+
_fire<K extends keyof S2sEvents>(type: K, ...args: Parameters<S2sEvents[K]>) {
|
|
88
|
+
emitter.emit(type, ...args);
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Minimal client that tracks events and audio. All methods are vi.fn() spies. */
|
|
94
|
+
export function makeClient(): ClientSink & {
|
|
95
|
+
events: unknown[];
|
|
96
|
+
audioChunks: Uint8Array[];
|
|
97
|
+
audioDoneCount: number;
|
|
98
|
+
} {
|
|
99
|
+
const events: unknown[] = [];
|
|
100
|
+
const audioChunks: Uint8Array[] = [];
|
|
101
|
+
let audioDoneCount = 0;
|
|
102
|
+
return {
|
|
103
|
+
open: true,
|
|
104
|
+
events,
|
|
105
|
+
audioChunks,
|
|
106
|
+
get audioDoneCount() {
|
|
107
|
+
return audioDoneCount;
|
|
108
|
+
},
|
|
109
|
+
event: vi.fn((e: unknown) => {
|
|
110
|
+
events.push(e);
|
|
111
|
+
}),
|
|
112
|
+
playAudioChunk: vi.fn((chunk: Uint8Array) => {
|
|
113
|
+
audioChunks.push(chunk);
|
|
114
|
+
}),
|
|
115
|
+
playAudioDone: vi.fn(() => {
|
|
116
|
+
audioDoneCount++;
|
|
117
|
+
}),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export const silentLogger: {
|
|
122
|
+
info: (...args: unknown[]) => void;
|
|
123
|
+
warn: (...args: unknown[]) => void;
|
|
124
|
+
error: (...args: unknown[]) => void;
|
|
125
|
+
debug: (...args: unknown[]) => void;
|
|
126
|
+
} = {
|
|
127
|
+
info: vi.fn(),
|
|
128
|
+
warn: vi.fn(),
|
|
129
|
+
error: vi.fn(),
|
|
130
|
+
debug: vi.fn(),
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export function makeSessionOpts(overrides?: Partial<S2sSessionOptions>): S2sSessionOptions {
|
|
134
|
+
return {
|
|
135
|
+
id: "session-1",
|
|
136
|
+
agent: "test-agent",
|
|
137
|
+
client: makeClient(),
|
|
138
|
+
agentConfig: {
|
|
139
|
+
name: "test-agent",
|
|
140
|
+
systemPrompt: DEFAULT_SYSTEM_PROMPT,
|
|
141
|
+
greeting: "Hello!",
|
|
142
|
+
},
|
|
143
|
+
toolSchemas: [],
|
|
144
|
+
apiKey: "test-key",
|
|
145
|
+
s2sConfig: {
|
|
146
|
+
wssUrl: "wss://fake",
|
|
147
|
+
inputSampleRate: 16_000,
|
|
148
|
+
outputSampleRate: 24_000,
|
|
149
|
+
},
|
|
150
|
+
executeTool: vi.fn(async () => "tool-result"),
|
|
151
|
+
createWebSocket: vi.fn(),
|
|
152
|
+
logger: silentLogger,
|
|
153
|
+
...overrides,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ─── Fixture replay helpers ──────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
const FIXTURE_DIR = resolve(import.meta.dirname, "fixtures");
|
|
160
|
+
|
|
161
|
+
/** Load a JSON fixture from fixtures/. */
|
|
162
|
+
export function loadFixture<T = Record<string, unknown>[]>(name: string): T {
|
|
163
|
+
return JSON.parse(readFileSync(resolve(FIXTURE_DIR, name), "utf-8"));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Wire-format → event translator: maps a single raw S2S API message to
|
|
168
|
+
* a `_fire()` call on the mock handle. Returns false if the message
|
|
169
|
+
* type is not dispatchable (audio, content_part, unknown).
|
|
170
|
+
*/
|
|
171
|
+
type FireFn = (handle: MockS2sHandle, msg: Record<string, unknown>) => void;
|
|
172
|
+
|
|
173
|
+
const FIXTURE_DISPATCH: Record<string, FireFn> = {
|
|
174
|
+
"session.ready": (h, m) => h._fire("ready", { sessionId: m.session_id as string }),
|
|
175
|
+
"session.updated": () => {
|
|
176
|
+
/* dropped — no longer dispatched */
|
|
177
|
+
},
|
|
178
|
+
"session.error": (h, m) => {
|
|
179
|
+
const code = m.code as string;
|
|
180
|
+
if (code === "session_not_found" || code === "session_forbidden") h._fire("sessionExpired");
|
|
181
|
+
else h._fire("error", new Error(m.message as string));
|
|
182
|
+
},
|
|
183
|
+
error: (h, m) => h._fire("error", new Error(m.message as string)),
|
|
184
|
+
"input.speech.started": (h) => h._fire("event", { type: "speech_started" }),
|
|
185
|
+
"input.speech.stopped": (h) => h._fire("event", { type: "speech_stopped" }),
|
|
186
|
+
"transcript.user": (h, m) =>
|
|
187
|
+
h._fire("event", { type: "user_transcript", text: m.text as string }),
|
|
188
|
+
"reply.started": (h, m) => h._fire("replyStarted", { replyId: (m.reply_id as string) ?? "" }),
|
|
189
|
+
"transcript.agent": (h, m) =>
|
|
190
|
+
h._fire("event", {
|
|
191
|
+
type: "agent_transcript",
|
|
192
|
+
text: (m.text as string) ?? "",
|
|
193
|
+
_interrupted: m.interrupted === true,
|
|
194
|
+
}),
|
|
195
|
+
"tool.call": (h, m) =>
|
|
196
|
+
h._fire("event", {
|
|
197
|
+
type: "tool_call",
|
|
198
|
+
toolCallId: m.call_id as string,
|
|
199
|
+
toolName: m.name as string,
|
|
200
|
+
args: (m.args as Record<string, unknown>) ?? {},
|
|
201
|
+
}),
|
|
202
|
+
"reply.done": (h, m) => {
|
|
203
|
+
if (m.status === "interrupted") h._fire("event", { type: "cancelled" });
|
|
204
|
+
else h._fire("event", { type: "reply_done" });
|
|
205
|
+
},
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Replay recorded S2S API messages through a MockS2sHandle.
|
|
210
|
+
*
|
|
211
|
+
* Converts raw wire-format JSON (from fixtures/) into typed `_fire()` calls.
|
|
212
|
+
* This is the inverse of `dispatchS2sMessage` in s2s.ts — it translates
|
|
213
|
+
* snake_case API fields to camelCase event payloads.
|
|
214
|
+
*
|
|
215
|
+
* Messages that don't map to an event (audio, `reply.content_part.*`) are skipped.
|
|
216
|
+
*/
|
|
217
|
+
export function replayFixtureMessages(
|
|
218
|
+
handle: MockS2sHandle,
|
|
219
|
+
messages: Record<string, unknown>[],
|
|
220
|
+
): void {
|
|
221
|
+
for (const msg of messages) {
|
|
222
|
+
FIXTURE_DISPATCH[msg.type as string]?.(handle, msg);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ─── Real-executor fixture replay ────────────────────────────────────────────
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Create a real Runtime-backed session for fixture replay testing.
|
|
230
|
+
*
|
|
231
|
+
* Uses a real `Runtime` (real tool execution, real hooks) but replaces the
|
|
232
|
+
* S2S WebSocket with a mock handle so fixture messages can be replayed
|
|
233
|
+
* through the full orchestration layer.
|
|
234
|
+
*
|
|
235
|
+
* Exercises: AgentDef → toAgentConfig → tool schemas → Zod arg validation
|
|
236
|
+
* → executeToolCall → session orchestration (reply guards, tool buffering,
|
|
237
|
+
* turnPromise chaining).
|
|
238
|
+
*
|
|
239
|
+
* Call `cleanup()` when done to restore the connectS2s spy.
|
|
240
|
+
*/
|
|
241
|
+
export function createFixtureSession(
|
|
242
|
+
// biome-ignore lint/suspicious/noExplicitAny: test helper accepts any agent state type
|
|
243
|
+
agent: AgentDef<any>,
|
|
244
|
+
opts?: { env?: Record<string, string> },
|
|
245
|
+
) {
|
|
246
|
+
const mockHandle = makeMockHandle();
|
|
247
|
+
const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
|
|
248
|
+
const client = makeClient();
|
|
249
|
+
|
|
250
|
+
const executor = createRuntime({
|
|
251
|
+
agent,
|
|
252
|
+
env: opts?.env ?? {},
|
|
253
|
+
logger: silentLogger,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const session = executor.createSession({
|
|
257
|
+
id: "fixture-session",
|
|
258
|
+
agent: agent.name,
|
|
259
|
+
client,
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
session,
|
|
264
|
+
client,
|
|
265
|
+
mockHandle,
|
|
266
|
+
executor,
|
|
267
|
+
/** Replay a fixture file through the session's S2S handle. */
|
|
268
|
+
replay(fixtureName: string) {
|
|
269
|
+
replayFixtureMessages(mockHandle, loadFixture(fixtureName));
|
|
270
|
+
},
|
|
271
|
+
/** Restore the connectS2s spy. Call in afterEach. */
|
|
272
|
+
cleanup() {
|
|
273
|
+
connectSpy.mockRestore();
|
|
274
|
+
},
|
|
275
|
+
};
|
|
276
|
+
}
|