@virtualkitchenco/multiverse-sdk 0.0.26 → 0.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -22
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -5,7 +5,13 @@ Simulation testing for AI agents. Test your agent against realistic scenarios wi
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
npm install @virtualkitchenco/multiverse-sdk
|
|
8
|
+
npm install @virtualkitchenco/multiverse-sdk zod
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For LangChain agents:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install @virtualkitchenco/multiverse-sdk zod @langchain/core @langchain/anthropic
|
|
9
15
|
```
|
|
10
16
|
|
|
11
17
|
## Quick Start
|
|
@@ -17,7 +23,7 @@ import { multiverse } from '@virtualkitchenco/multiverse-sdk';
|
|
|
17
23
|
import { z } from 'zod';
|
|
18
24
|
|
|
19
25
|
multiverse.configure({
|
|
20
|
-
baseUrl:
|
|
26
|
+
baseUrl: process.env.MULTIVERSE_URL,
|
|
21
27
|
apiKey: process.env.MULTIVERSE_API_KEY,
|
|
22
28
|
});
|
|
23
29
|
|
|
@@ -32,7 +38,9 @@ const test = multiverse.describe({
|
|
|
32
38
|
}),
|
|
33
39
|
});
|
|
34
40
|
|
|
41
|
+
const scenarios = await test.generateScenarios({ count: 5 });
|
|
35
42
|
const results = await test.run({
|
|
43
|
+
scenarios,
|
|
36
44
|
success: (world) => world.getCollection('intake_summaries').size > 0,
|
|
37
45
|
});
|
|
38
46
|
|
|
@@ -71,7 +79,7 @@ Initialize the SDK. Call once at startup.
|
|
|
71
79
|
|
|
72
80
|
```typescript
|
|
73
81
|
multiverse.configure({
|
|
74
|
-
baseUrl:
|
|
82
|
+
baseUrl: process.env.MULTIVERSE_URL,
|
|
75
83
|
apiKey: process.env.MULTIVERSE_API_KEY,
|
|
76
84
|
});
|
|
77
85
|
```
|
|
@@ -91,7 +99,7 @@ const searchFlights = multiverse.tool({
|
|
|
91
99
|
}),
|
|
92
100
|
output: SearchResultSchema,
|
|
93
101
|
execute: async (input) => realSearchFlights(input),
|
|
94
|
-
effects: (output) =>
|
|
102
|
+
effects: (output, world) =>
|
|
95
103
|
output.flights.map((f) => ({
|
|
96
104
|
operation: 'create' as const,
|
|
97
105
|
collection: 'flights',
|
|
@@ -164,15 +172,24 @@ const test = multiverse.describe({
|
|
|
164
172
|
});
|
|
165
173
|
```
|
|
166
174
|
|
|
175
|
+
| Option | Type | Description |
|
|
176
|
+
|--------|------|-------------|
|
|
177
|
+
| `name` | `string` | Agent name for grouping in the dashboard |
|
|
178
|
+
| `task` | `string` | What the agent is being tested on |
|
|
179
|
+
| `agent` | `AgentFn` | Agent function to test |
|
|
180
|
+
| `conversational` | `boolean` | Enable simulated user (chatbots, assistants). Mutually exclusive with `triggerSchema` |
|
|
181
|
+
| `triggerSchema` | `ZodSchema` | Constrains the generated event payload (autonomous agents only) |
|
|
182
|
+
| `variables` | `ZodSchema` | Typed scenario variables accessible in `success()` via `scenario.variables` |
|
|
183
|
+
|
|
167
184
|
`conversational` and `triggerSchema` are mutually exclusive at the TypeScript level.
|
|
168
185
|
|
|
169
186
|
**Agent function signature:**
|
|
170
187
|
|
|
171
188
|
```typescript
|
|
172
|
-
async function runAgent(
|
|
173
|
-
userMessage: string; //
|
|
174
|
-
runId: string; // Stable across turns
|
|
175
|
-
}): Promise<
|
|
189
|
+
async function runAgent(ctx: {
|
|
190
|
+
userMessage: string; // Generated event payload (autonomous) or latest user message (conversational)
|
|
191
|
+
runId: string; // Stable across turns, use for memory/thread scoping
|
|
192
|
+
}): Promise<unknown>
|
|
176
193
|
```
|
|
177
194
|
|
|
178
195
|
### `test.generateScenarios(options)`
|
|
@@ -181,18 +198,10 @@ Generate test scenarios upfront for inspection or reuse.
|
|
|
181
198
|
|
|
182
199
|
```typescript
|
|
183
200
|
const scenarios = await test.generateScenarios({ count: 10 });
|
|
184
|
-
|
|
185
|
-
// With typed variables for programmatic assertions
|
|
186
|
-
const scenarios = await test.generateScenarios({
|
|
187
|
-
count: 5,
|
|
188
|
-
variables: z.object({
|
|
189
|
-
expectedBookings: z.number().describe(
|
|
190
|
-
'Total bookings that should be created (e.g. 2 passengers round-trip = 4)'
|
|
191
|
-
),
|
|
192
|
-
}),
|
|
193
|
-
});
|
|
194
201
|
```
|
|
195
202
|
|
|
203
|
+
Variables are typed on `multiverse.describe()` via the `variables` option, not here.
|
|
204
|
+
|
|
196
205
|
### `test.saveScenarios(scenarios)`
|
|
197
206
|
|
|
198
207
|
Save generated scenarios for reuse across runs.
|
|
@@ -225,14 +234,17 @@ Run tests against the agent.
|
|
|
225
234
|
|
|
226
235
|
```typescript
|
|
227
236
|
const results = await test.run({
|
|
237
|
+
scenarios, // From generateScenarios()
|
|
228
238
|
success: (world, trace, scenario) => {
|
|
229
239
|
return world.getCollection('bookings').size === scenario.variables.expectedBookings;
|
|
230
240
|
},
|
|
231
|
-
scenarios, // Pre-generated or auto-generated
|
|
232
|
-
scenarioCount: 5, // Auto-generate count (ignored if scenarios provided)
|
|
233
241
|
trialsPerScenario: 4,
|
|
234
242
|
maxTurns: 20, // Max turns per run (conversational agents)
|
|
235
|
-
qualityThreshold: 70,
|
|
243
|
+
qualityThreshold: 70, // Default: 70
|
|
244
|
+
criteria: [ // Custom quality criteria (default: communication, error_handling, efficiency, accuracy)
|
|
245
|
+
{ name: 'politeness', description: 'Responds politely at all times' },
|
|
246
|
+
],
|
|
247
|
+
skipReport: true, // Skip LLM report generation
|
|
236
248
|
concurrency: 8,
|
|
237
249
|
onProgress: (p) => console.log(`${p.completed}/${p.total}`),
|
|
238
250
|
ci: {
|
|
@@ -281,7 +293,7 @@ const searchFlightsTool = tool(
|
|
|
281
293
|
// Wrap for simulation
|
|
282
294
|
const searchFlights = wrap(searchFlightsTool, {
|
|
283
295
|
output: SearchResultSchema,
|
|
284
|
-
effects: (output) =>
|
|
296
|
+
effects: (output, world) =>
|
|
285
297
|
output.flights.map((f) => ({
|
|
286
298
|
operation: 'create' as const,
|
|
287
299
|
collection: 'flights',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@virtualkitchenco/multiverse-sdk",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.27",
|
|
4
4
|
"description": "Simulation testing SDK for AI agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"nanoid": "^5.0.4",
|
|
28
28
|
"zod-to-json-schema": "^3.25.1",
|
|
29
|
-
"@virtualkitchenco/multiverse-types": "0.0.
|
|
29
|
+
"@virtualkitchenco/multiverse-types": "0.0.27"
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"@types/node": "^20.11.0",
|