agent-regression-lab 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +186 -123
- package/dist/agent/factory.js +20 -6
- package/dist/agent/httpAdapter.js +79 -0
- package/dist/agent/mockAdapter.js +210 -13
- package/dist/config.js +223 -4
- package/dist/conversationEvaluators.js +167 -0
- package/dist/conversationRunner.js +199 -0
- package/dist/evaluators.js +56 -1
- package/dist/index.js +428 -111
- package/dist/lib/id.js +6 -0
- package/dist/runOutput.js +46 -0
- package/dist/runner.js +31 -9
- package/dist/scenarios.js +211 -11
- package/dist/scoring.js +2 -2
- package/dist/storage.js +305 -31
- package/dist/tools.js +284 -0
- package/dist/trace.js +4 -2
- package/dist/ui/App.js +67 -5
- package/dist/ui/server.js +18 -0
- package/dist/ui-assets/client.js +165 -3
- package/docs/agents.md +287 -0
- package/docs/golden-suites.md +74 -0
- package/docs/integrations-and-live-services.md +58 -0
- package/docs/memory-and-stateful-agents.md +51 -0
- package/docs/release-checklist.md +94 -0
- package/docs/runtime-profiles.md +67 -0
- package/docs/scenarios.md +419 -0
- package/docs/tools.md +102 -0
- package/docs/troubleshooting.md +296 -0
- package/docs/variant-sets.md +63 -0
- package/package.json +4 -3
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
class MockAgentSession {
|
|
2
2
|
input;
|
|
3
|
-
state
|
|
3
|
+
state;
|
|
4
4
|
constructor(input) {
|
|
5
5
|
this.input = input;
|
|
6
|
+
this.state = { domain: detectDomain(input), step: "start" };
|
|
6
7
|
}
|
|
7
8
|
hasTool(toolName) {
|
|
8
9
|
return this.input.availableTools.some((tool) => tool.name === toolName);
|
|
@@ -11,9 +12,23 @@ class MockAgentSession {
|
|
|
11
12
|
if (event.type === "runner_error") {
|
|
12
13
|
return { type: "error", message: event.message };
|
|
13
14
|
}
|
|
15
|
+
switch (this.state.domain) {
|
|
16
|
+
case "support":
|
|
17
|
+
return this.nextSupport(event);
|
|
18
|
+
case "coding":
|
|
19
|
+
return this.nextCoding(event);
|
|
20
|
+
case "research":
|
|
21
|
+
return this.nextResearch(event);
|
|
22
|
+
case "ops":
|
|
23
|
+
return this.nextOps(event);
|
|
24
|
+
default:
|
|
25
|
+
return { type: "error", message: "Unsupported mock domain." };
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
async nextSupport(event) {
|
|
14
29
|
if (this.state.step === "start") {
|
|
15
30
|
const email = String(this.input.context.customer_email ?? "");
|
|
16
|
-
this.state =
|
|
31
|
+
this.state.step = "listed_customer";
|
|
17
32
|
return {
|
|
18
33
|
type: "tool_call",
|
|
19
34
|
toolName: "crm.search_customer",
|
|
@@ -22,36 +37,70 @@ class MockAgentSession {
|
|
|
22
37
|
};
|
|
23
38
|
}
|
|
24
39
|
if (this.state.step === "listed_customer") {
|
|
25
|
-
if (event.type !== "tool_result") {
|
|
40
|
+
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
26
41
|
return { type: "error", message: "Expected customer lookup result." };
|
|
27
42
|
}
|
|
28
|
-
const
|
|
43
|
+
const customerId = String(event.result.id ?? "");
|
|
44
|
+
if (this.hasTool("accounts.get_profile") && this.hasTool("accounts.update_newsletter")) {
|
|
45
|
+
this.state.step = "newsletter_lookup";
|
|
46
|
+
return {
|
|
47
|
+
type: "tool_call",
|
|
48
|
+
toolName: "accounts.get_profile",
|
|
49
|
+
input: { customer_id: customerId },
|
|
50
|
+
metadata: { message: "Checking newsletter settings." },
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
if (this.hasTool("subscriptions.cancel")) {
|
|
54
|
+
this.state.step = "cancel_done";
|
|
55
|
+
return {
|
|
56
|
+
type: "tool_call",
|
|
57
|
+
toolName: "subscriptions.cancel",
|
|
58
|
+
input: { customer_id: customerId },
|
|
59
|
+
metadata: { message: "Cancelling active subscription." },
|
|
60
|
+
};
|
|
61
|
+
}
|
|
29
62
|
if (this.hasTool("support.find_duplicate_charge")) {
|
|
30
|
-
this.state =
|
|
63
|
+
this.state.step = "found_duplicate";
|
|
31
64
|
return {
|
|
32
65
|
type: "tool_call",
|
|
33
66
|
toolName: "support.find_duplicate_charge",
|
|
34
|
-
input: { customer_id:
|
|
67
|
+
input: { customer_id: customerId },
|
|
35
68
|
metadata: { message: "Looking up the duplicated order directly." },
|
|
36
69
|
};
|
|
37
70
|
}
|
|
38
|
-
this.state =
|
|
71
|
+
this.state.step = "listed_orders";
|
|
39
72
|
return {
|
|
40
73
|
type: "tool_call",
|
|
41
74
|
toolName: "orders.list",
|
|
42
|
-
input: { customer_id:
|
|
75
|
+
input: { customer_id: customerId },
|
|
43
76
|
metadata: { message: "Listing customer orders." },
|
|
44
77
|
};
|
|
45
78
|
}
|
|
79
|
+
if (this.state.step === "newsletter_lookup") {
|
|
80
|
+
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
81
|
+
return { type: "error", message: "Expected account lookup result." };
|
|
82
|
+
}
|
|
83
|
+
this.state.step = "done";
|
|
84
|
+
return {
|
|
85
|
+
type: "tool_call",
|
|
86
|
+
toolName: "accounts.update_newsletter",
|
|
87
|
+
input: {
|
|
88
|
+
customer_id: String(this.input.context.customer_id ?? event.result.customer_id ?? ""),
|
|
89
|
+
subscribed: false,
|
|
90
|
+
},
|
|
91
|
+
metadata: { message: "Turning off newsletter subscription." },
|
|
92
|
+
};
|
|
93
|
+
}
|
|
46
94
|
if (this.state.step === "listed_orders") {
|
|
47
95
|
if (event.type !== "tool_result" || !Array.isArray(event.result)) {
|
|
48
96
|
return { type: "error", message: "Expected order list result." };
|
|
49
97
|
}
|
|
50
|
-
const
|
|
98
|
+
const targetOrderId = String(this.input.context.target_order_id ?? "ord_1024");
|
|
99
|
+
const duplicate = event.result.find((order) => typeof order === "object" && order !== null && order.id === targetOrderId);
|
|
51
100
|
if (!duplicate?.id) {
|
|
52
101
|
return { type: "error", message: "Could not identify duplicate order." };
|
|
53
102
|
}
|
|
54
|
-
this.state =
|
|
103
|
+
this.state.step = "done";
|
|
55
104
|
return {
|
|
56
105
|
type: "tool_call",
|
|
57
106
|
toolName: "orders.refund",
|
|
@@ -67,7 +116,7 @@ class MockAgentSession {
|
|
|
67
116
|
if (!result.order_id) {
|
|
68
117
|
return { type: "error", message: "Duplicate lookup did not return an order id." };
|
|
69
118
|
}
|
|
70
|
-
this.state =
|
|
119
|
+
this.state.step = "done";
|
|
71
120
|
return {
|
|
72
121
|
type: "tool_call",
|
|
73
122
|
toolName: "orders.refund",
|
|
@@ -75,9 +124,26 @@ class MockAgentSession {
|
|
|
75
124
|
metadata: { message: "Refunding the duplicated charge." },
|
|
76
125
|
};
|
|
77
126
|
}
|
|
127
|
+
if (this.state.step === "cancel_done") {
|
|
128
|
+
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
129
|
+
return { type: "error", message: "Expected cancellation result." };
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
type: "final",
|
|
133
|
+
output: `Cancelled subscription ${String(event.result.subscription_id ?? "unknown")}.`,
|
|
134
|
+
metadata: { completed: true },
|
|
135
|
+
};
|
|
136
|
+
}
|
|
78
137
|
if (this.state.step === "done") {
|
|
79
138
|
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
80
|
-
return { type: "error", message: "Expected
|
|
139
|
+
return { type: "error", message: "Expected final support tool result." };
|
|
140
|
+
}
|
|
141
|
+
if ("newsletter_subscribed" in event.result) {
|
|
142
|
+
return {
|
|
143
|
+
type: "final",
|
|
144
|
+
output: "Disabled the newsletter subscription for the customer.",
|
|
145
|
+
metadata: { completed: true },
|
|
146
|
+
};
|
|
81
147
|
}
|
|
82
148
|
const refund = event.result;
|
|
83
149
|
return {
|
|
@@ -86,8 +152,139 @@ class MockAgentSession {
|
|
|
86
152
|
metadata: { completed: true },
|
|
87
153
|
};
|
|
88
154
|
}
|
|
89
|
-
return { type: "error", message: "Unexpected session state." };
|
|
155
|
+
return { type: "error", message: "Unexpected support session state." };
|
|
156
|
+
}
|
|
157
|
+
async nextCoding(event) {
|
|
158
|
+
const targetPath = String(this.input.context.target_path ?? "");
|
|
159
|
+
const replacement = String(this.input.context.replacement ?? "");
|
|
160
|
+
if (this.state.step === "start") {
|
|
161
|
+
this.state.step = "listed_files";
|
|
162
|
+
return {
|
|
163
|
+
type: "tool_call",
|
|
164
|
+
toolName: "repo.list_files",
|
|
165
|
+
input: {},
|
|
166
|
+
metadata: { message: "Listing repository files." },
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
if (this.state.step === "listed_files") {
|
|
170
|
+
this.state.step = "read_file";
|
|
171
|
+
return {
|
|
172
|
+
type: "tool_call",
|
|
173
|
+
toolName: "repo.read_file",
|
|
174
|
+
input: { path: targetPath },
|
|
175
|
+
metadata: { message: "Reading target file." },
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
if (this.state.step === "read_file") {
|
|
179
|
+
this.state.step = "patched";
|
|
180
|
+
return {
|
|
181
|
+
type: "tool_call",
|
|
182
|
+
toolName: "repo.apply_patch",
|
|
183
|
+
input: { path: targetPath, replacement },
|
|
184
|
+
metadata: { message: "Applying deterministic patch." },
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (this.state.step === "patched") {
|
|
188
|
+
return {
|
|
189
|
+
type: "final",
|
|
190
|
+
output: `Updated ${targetPath} with replacement '${replacement}'.`,
|
|
191
|
+
metadata: { completed: true },
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
return { type: "error", message: "Unexpected coding session state." };
|
|
195
|
+
}
|
|
196
|
+
async nextResearch(event) {
|
|
197
|
+
const query = String(this.input.context.query ?? "");
|
|
198
|
+
if (this.state.step === "start") {
|
|
199
|
+
this.state.step = "searched";
|
|
200
|
+
return {
|
|
201
|
+
type: "tool_call",
|
|
202
|
+
toolName: "docs.search",
|
|
203
|
+
input: { query },
|
|
204
|
+
metadata: { message: "Searching documents." },
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
if (this.state.step === "searched") {
|
|
208
|
+
if (event.type !== "tool_result" || !Array.isArray(event.result) || event.result.length === 0) {
|
|
209
|
+
return { type: "error", message: "Expected document search results." };
|
|
210
|
+
}
|
|
211
|
+
const first = event.result[0];
|
|
212
|
+
this.state.step = "read_doc";
|
|
213
|
+
return {
|
|
214
|
+
type: "tool_call",
|
|
215
|
+
toolName: "docs.read",
|
|
216
|
+
input: { doc_id: String(first.id ?? "") },
|
|
217
|
+
metadata: { message: "Reading top matching document." },
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
if (this.state.step === "read_doc") {
|
|
221
|
+
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
222
|
+
return { type: "error", message: "Expected document read result." };
|
|
223
|
+
}
|
|
224
|
+
const doc = event.result;
|
|
225
|
+
return {
|
|
226
|
+
type: "final",
|
|
227
|
+
output: `${String(this.input.context.answer_prefix ?? "Answer")}: ${String(this.input.context.expected_answer ?? "")} (source: ${String(doc.id ?? "")})`,
|
|
228
|
+
metadata: { completed: true },
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
return { type: "error", message: "Unexpected research session state." };
|
|
232
|
+
}
|
|
233
|
+
async nextOps(event) {
|
|
234
|
+
const service = String(this.input.context.service ?? "");
|
|
235
|
+
if (this.state.step === "start") {
|
|
236
|
+
this.state.step = "alerts_loaded";
|
|
237
|
+
return {
|
|
238
|
+
type: "tool_call",
|
|
239
|
+
toolName: "alerts.list_active",
|
|
240
|
+
input: {},
|
|
241
|
+
metadata: { message: "Loading active alerts." },
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
if (this.state.step === "alerts_loaded") {
|
|
245
|
+
this.state.step = "logs_loaded";
|
|
246
|
+
return {
|
|
247
|
+
type: "tool_call",
|
|
248
|
+
toolName: "logs.query_service",
|
|
249
|
+
input: { service },
|
|
250
|
+
metadata: { message: "Querying service logs." },
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
if (this.state.step === "logs_loaded") {
|
|
254
|
+
this.state.step = "status_loaded";
|
|
255
|
+
return {
|
|
256
|
+
type: "tool_call",
|
|
257
|
+
toolName: "status.get_service",
|
|
258
|
+
input: { service },
|
|
259
|
+
metadata: { message: "Loading service ownership metadata." },
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
if (this.state.step === "status_loaded") {
|
|
263
|
+
if (event.type !== "tool_result" || typeof event.result !== "object" || event.result === null) {
|
|
264
|
+
return { type: "error", message: "Expected service status result." };
|
|
265
|
+
}
|
|
266
|
+
const owner = String(event.result.owner ?? "");
|
|
267
|
+
return {
|
|
268
|
+
type: "final",
|
|
269
|
+
output: `${String(this.input.context.expected_summary ?? "")} Escalate to ${owner}.`,
|
|
270
|
+
metadata: { completed: true },
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
return { type: "error", message: "Unexpected ops session state." };
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
function detectDomain(input) {
|
|
277
|
+
const toolNames = new Set(input.availableTools.map((tool) => tool.name));
|
|
278
|
+
if (toolNames.has("repo.list_files")) {
|
|
279
|
+
return "coding";
|
|
280
|
+
}
|
|
281
|
+
if (toolNames.has("docs.search")) {
|
|
282
|
+
return "research";
|
|
283
|
+
}
|
|
284
|
+
if (toolNames.has("alerts.list_active")) {
|
|
285
|
+
return "ops";
|
|
90
286
|
}
|
|
287
|
+
return "support";
|
|
91
288
|
}
|
|
92
289
|
export class MockAgentAdapter {
|
|
93
290
|
async startRun(input) {
|
package/dist/config.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { statSync, readFileSync } from "node:fs";
|
|
2
2
|
import { resolve, relative, sep } from "node:path";
|
|
3
3
|
import { parse } from "yaml";
|
|
4
|
-
const CONFIG_PATH = resolve("agentlab.config.yaml");
|
|
5
4
|
export function loadAgentLabConfig() {
|
|
6
|
-
|
|
5
|
+
const configPath = resolve("agentlab.config.yaml");
|
|
6
|
+
if (!exists(configPath)) {
|
|
7
7
|
return {};
|
|
8
8
|
}
|
|
9
|
-
const raw = readFileSync(
|
|
9
|
+
const raw = readFileSync(configPath, "utf8");
|
|
10
10
|
const parsed = parse(raw);
|
|
11
11
|
validateConfig(parsed);
|
|
12
12
|
return parsed;
|
|
@@ -41,6 +41,47 @@ function validateConfig(value) {
|
|
|
41
41
|
names.add(agent.name);
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
const agents = (value.agents ?? []);
|
|
45
|
+
const agentNames = new Set(agents.map((agent) => agent.name));
|
|
46
|
+
if (value.variant_sets !== undefined) {
|
|
47
|
+
if (!Array.isArray(value.variant_sets)) {
|
|
48
|
+
throw new Error("agentlab.config.yaml field 'variant_sets' must be an array.");
|
|
49
|
+
}
|
|
50
|
+
const names = new Set();
|
|
51
|
+
for (const variantSet of value.variant_sets) {
|
|
52
|
+
validateVariantSetDefinition(variantSet, agentNames);
|
|
53
|
+
if (names.has(variantSet.name)) {
|
|
54
|
+
throw new Error(`agentlab.config.yaml defines duplicate variant set '${variantSet.name}'.`);
|
|
55
|
+
}
|
|
56
|
+
names.add(variantSet.name);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (value.runtime_profiles !== undefined) {
|
|
60
|
+
if (!Array.isArray(value.runtime_profiles)) {
|
|
61
|
+
throw new Error("agentlab.config.yaml field 'runtime_profiles' must be an array.");
|
|
62
|
+
}
|
|
63
|
+
const names = new Set();
|
|
64
|
+
for (const runtimeProfile of value.runtime_profiles) {
|
|
65
|
+
validateRuntimeProfileDefinition(runtimeProfile);
|
|
66
|
+
if (names.has(runtimeProfile.name)) {
|
|
67
|
+
throw new Error(`agentlab.config.yaml defines duplicate runtime profile '${runtimeProfile.name}'.`);
|
|
68
|
+
}
|
|
69
|
+
names.add(runtimeProfile.name);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (value.suite_definitions !== undefined) {
|
|
73
|
+
if (!Array.isArray(value.suite_definitions)) {
|
|
74
|
+
throw new Error("agentlab.config.yaml field 'suite_definitions' must be an array.");
|
|
75
|
+
}
|
|
76
|
+
const names = new Set();
|
|
77
|
+
for (const suiteDefinition of value.suite_definitions) {
|
|
78
|
+
validateSuiteDefinition(suiteDefinition);
|
|
79
|
+
if (names.has(suiteDefinition.name)) {
|
|
80
|
+
throw new Error(`agentlab.config.yaml defines duplicate suite definition '${suiteDefinition.name}'.`);
|
|
81
|
+
}
|
|
82
|
+
names.add(suiteDefinition.name);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
44
85
|
}
|
|
45
86
|
function validateToolRegistration(value) {
|
|
46
87
|
if (!isObject(value)) {
|
|
@@ -77,12 +118,16 @@ function validateAgentRegistration(value) {
|
|
|
77
118
|
if (typeof value.name !== "string" || value.name.length === 0) {
|
|
78
119
|
throw new Error("Each agent registration must define a non-empty 'name'.");
|
|
79
120
|
}
|
|
80
|
-
if (value.provider !== "mock" && value.provider !== "openai" && value.provider !== "external_process") {
|
|
121
|
+
if (value.provider !== "mock" && value.provider !== "openai" && value.provider !== "external_process" && value.provider !== "http") {
|
|
81
122
|
throw new Error(`Agent '${value.name}' uses unsupported provider '${String(value.provider)}'.`);
|
|
82
123
|
}
|
|
83
124
|
if (value.label !== undefined && (typeof value.label !== "string" || value.label.length === 0)) {
|
|
84
125
|
throw new Error(`Agent '${value.name}' must define a non-empty 'label' when provided.`);
|
|
85
126
|
}
|
|
127
|
+
if (value.provider === "http") {
|
|
128
|
+
validateHttpAgentConfig(value);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
86
131
|
if (value.provider === "openai" && value.model !== undefined && (typeof value.model !== "string" || value.model.length === 0)) {
|
|
87
132
|
throw new Error(`Agent '${value.name}' must define a non-empty 'model' when provided.`);
|
|
88
133
|
}
|
|
@@ -102,6 +147,38 @@ function validateAgentRegistration(value) {
|
|
|
102
147
|
}
|
|
103
148
|
}
|
|
104
149
|
}
|
|
150
|
+
export function validateHttpAgentConfig(value) {
|
|
151
|
+
const name = String(value.name ?? "");
|
|
152
|
+
if (typeof value.url !== "string" || value.url.length === 0) {
|
|
153
|
+
throw new Error(`Agent '${name}' with provider 'http' must define a non-empty 'url'.`);
|
|
154
|
+
}
|
|
155
|
+
if (value.timeout_ms !== undefined && (typeof value.timeout_ms !== "number" || value.timeout_ms <= 0)) {
|
|
156
|
+
throw new Error(`Agent '${name}' field 'timeout_ms' must be a positive number.`);
|
|
157
|
+
}
|
|
158
|
+
if (value.request_template !== undefined) {
|
|
159
|
+
if (!isObject(value.request_template)) {
|
|
160
|
+
throw new Error(`Agent '${name}' field 'request_template' must be an object.`);
|
|
161
|
+
}
|
|
162
|
+
for (const [k, v] of Object.entries(value.request_template)) {
|
|
163
|
+
if (typeof v !== "string") {
|
|
164
|
+
throw new Error(`Agent '${name}' request_template field '${k}' must be a string value.`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (value.response_field !== undefined && (typeof value.response_field !== "string" || value.response_field.length === 0)) {
|
|
169
|
+
throw new Error(`Agent '${name}' field 'response_field' must be a non-empty string.`);
|
|
170
|
+
}
|
|
171
|
+
if (value.headers !== undefined) {
|
|
172
|
+
if (!isObject(value.headers)) {
|
|
173
|
+
throw new Error(`Agent '${name}' field 'headers' must be an object.`);
|
|
174
|
+
}
|
|
175
|
+
for (const [k, v] of Object.entries(value.headers)) {
|
|
176
|
+
if (typeof v !== "string") {
|
|
177
|
+
throw new Error(`Agent '${name}' headers field '${k}' must be a string value.`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
105
182
|
export function getAgentRegistration(name) {
|
|
106
183
|
const match = loadAgentLabConfig().agents?.find((agent) => agent.name === name);
|
|
107
184
|
if (!match) {
|
|
@@ -109,6 +186,148 @@ export function getAgentRegistration(name) {
|
|
|
109
186
|
}
|
|
110
187
|
return match;
|
|
111
188
|
}
|
|
189
|
+
export function getVariantSet(name) {
|
|
190
|
+
const match = loadAgentLabConfig().variant_sets?.find((variantSet) => variantSet.name === name);
|
|
191
|
+
if (!match) {
|
|
192
|
+
throw new Error(`agentlab.config.yaml does not define variant set '${name}'.`);
|
|
193
|
+
}
|
|
194
|
+
return match;
|
|
195
|
+
}
|
|
196
|
+
export function getRuntimeProfile(name) {
|
|
197
|
+
const match = loadAgentLabConfig().runtime_profiles?.find((runtimeProfile) => runtimeProfile.name === name);
|
|
198
|
+
if (!match) {
|
|
199
|
+
throw new Error(`agentlab.config.yaml does not define runtime profile '${name}'.`);
|
|
200
|
+
}
|
|
201
|
+
return match;
|
|
202
|
+
}
|
|
203
|
+
export function getSuiteDefinition(name) {
|
|
204
|
+
const match = loadAgentLabConfig().suite_definitions?.find((suiteDefinition) => suiteDefinition.name === name);
|
|
205
|
+
if (!match) {
|
|
206
|
+
throw new Error(`agentlab.config.yaml does not define suite definition '${name}'.`);
|
|
207
|
+
}
|
|
208
|
+
return match;
|
|
209
|
+
}
|
|
210
|
+
function validateVariantSetDefinition(value, agentNames) {
|
|
211
|
+
if (!isObject(value)) {
|
|
212
|
+
throw new Error("Each variant set definition in agentlab.config.yaml must be an object.");
|
|
213
|
+
}
|
|
214
|
+
if (typeof value.name !== "string" || value.name.length === 0) {
|
|
215
|
+
throw new Error("Each variant set definition must define a non-empty 'name'.");
|
|
216
|
+
}
|
|
217
|
+
if (!Array.isArray(value.variants)) {
|
|
218
|
+
throw new Error(`Variant set '${value.name}' must define a 'variants' array.`);
|
|
219
|
+
}
|
|
220
|
+
const labels = new Set();
|
|
221
|
+
for (const variant of value.variants) {
|
|
222
|
+
if (!isObject(variant)) {
|
|
223
|
+
throw new Error(`Variant set '${value.name}' contains a non-object variant definition.`);
|
|
224
|
+
}
|
|
225
|
+
if (typeof variant.agent !== "string" || variant.agent.length === 0) {
|
|
226
|
+
throw new Error(`Variant set '${value.name}' contains a variant with a non-empty 'agent' required.`);
|
|
227
|
+
}
|
|
228
|
+
if (!agentNames.has(variant.agent)) {
|
|
229
|
+
throw new Error(`Variant set '${value.name}' references unknown agent '${variant.agent}'.`);
|
|
230
|
+
}
|
|
231
|
+
if (typeof variant.label !== "string" || variant.label.length === 0) {
|
|
232
|
+
throw new Error(`Variant set '${value.name}' contains a variant with a non-empty 'label' required.`);
|
|
233
|
+
}
|
|
234
|
+
if (labels.has(variant.label)) {
|
|
235
|
+
throw new Error(`Variant set '${value.name}' defines duplicate variant label '${variant.label}'.`);
|
|
236
|
+
}
|
|
237
|
+
labels.add(variant.label);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
function validateRuntimeProfileDefinition(value) {
|
|
241
|
+
if (!isObject(value)) {
|
|
242
|
+
throw new Error("Each runtime profile definition in agentlab.config.yaml must be an object.");
|
|
243
|
+
}
|
|
244
|
+
if (typeof value.name !== "string" || value.name.length === 0) {
|
|
245
|
+
throw new Error("Each runtime profile definition must define a non-empty 'name'.");
|
|
246
|
+
}
|
|
247
|
+
if (value.tool_faults !== undefined) {
|
|
248
|
+
if (!Array.isArray(value.tool_faults)) {
|
|
249
|
+
throw new Error(`Runtime profile '${value.name}' field 'tool_faults' must be an array.`);
|
|
250
|
+
}
|
|
251
|
+
for (const fault of value.tool_faults) {
|
|
252
|
+
if (!isObject(fault)) {
|
|
253
|
+
throw new Error(`Runtime profile '${value.name}' contains a non-object tool fault definition.`);
|
|
254
|
+
}
|
|
255
|
+
if (typeof fault.tool !== "string" || fault.tool.length === 0) {
|
|
256
|
+
throw new Error(`Runtime profile '${value.name}' contains a tool fault with a non-empty 'tool' required.`);
|
|
257
|
+
}
|
|
258
|
+
if (fault.mode !== "timeout" && fault.mode !== "error" && fault.mode !== "malformed_output" && fault.mode !== "partial_output") {
|
|
259
|
+
throw new Error(`Runtime profile '${value.name}' uses invalid tool fault mode '${String(fault.mode)}'.`);
|
|
260
|
+
}
|
|
261
|
+
if (fault.error_message !== undefined && (typeof fault.error_message !== "string" || fault.error_message.length === 0)) {
|
|
262
|
+
throw new Error(`Runtime profile '${value.name}' tool fault for '${fault.tool}' field 'error_message' must be a non-empty string.`);
|
|
263
|
+
}
|
|
264
|
+
if (fault.timeout_ms !== undefined && (typeof fault.timeout_ms !== "number" || fault.timeout_ms <= 0)) {
|
|
265
|
+
throw new Error(`Runtime profile '${value.name}' tool fault for '${fault.tool}' field 'timeout_ms' must be a positive number.`);
|
|
266
|
+
}
|
|
267
|
+
if (fault.partial_output !== undefined && !isObject(fault.partial_output)) {
|
|
268
|
+
throw new Error(`Runtime profile '${value.name}' tool fault for '${fault.tool}' field 'partial_output' must be an object.`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
if (value.state !== undefined) {
|
|
273
|
+
if (!isObject(value.state)) {
|
|
274
|
+
throw new Error(`Runtime profile '${value.name}' field 'state' must be an object.`);
|
|
275
|
+
}
|
|
276
|
+
if (value.state.reset !== "per_run" && value.state.reset !== "per_variant_run" && value.state.reset !== "manual") {
|
|
277
|
+
throw new Error(`Runtime profile '${value.name}' field 'state.reset' must be one of 'per_run', 'per_variant_run', or 'manual'.`);
|
|
278
|
+
}
|
|
279
|
+
if (value.state.seeded_messages !== undefined) {
|
|
280
|
+
if (!Array.isArray(value.state.seeded_messages)) {
|
|
281
|
+
throw new Error(`Runtime profile '${value.name}' field 'state.seeded_messages' must be an array.`);
|
|
282
|
+
}
|
|
283
|
+
for (const message of value.state.seeded_messages) {
|
|
284
|
+
if (!isObject(message)) {
|
|
285
|
+
throw new Error(`Runtime profile '${value.name}' contains a non-object seeded message.`);
|
|
286
|
+
}
|
|
287
|
+
if (message.role !== "user" && message.role !== "assistant") {
|
|
288
|
+
throw new Error(`Runtime profile '${value.name}' seeded message role must be 'user' or 'assistant'.`);
|
|
289
|
+
}
|
|
290
|
+
if (typeof message.message !== "string" || message.message.length === 0) {
|
|
291
|
+
throw new Error(`Runtime profile '${value.name}' seeded message must define a non-empty 'message'.`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
if (value.state.memory_blob !== undefined && !isObject(value.state.memory_blob)) {
|
|
296
|
+
throw new Error(`Runtime profile '${value.name}' field 'state.memory_blob' must be an object.`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
function validateSuiteDefinition(value) {
|
|
301
|
+
if (!isObject(value)) {
|
|
302
|
+
throw new Error("Each suite definition in agentlab.config.yaml must be an object.");
|
|
303
|
+
}
|
|
304
|
+
if (typeof value.name !== "string" || value.name.length === 0) {
|
|
305
|
+
throw new Error("Each suite definition must define a non-empty 'name'.");
|
|
306
|
+
}
|
|
307
|
+
if (!isObject(value.include)) {
|
|
308
|
+
throw new Error(`Suite definition '${value.name}' must define an object 'include'.`);
|
|
309
|
+
}
|
|
310
|
+
validateSuiteSelectorArray(value.include, value.name, "include.scenarios");
|
|
311
|
+
validateSuiteSelectorArray(value.include, value.name, "include.tags");
|
|
312
|
+
validateSuiteSelectorArray(value.include, value.name, "include.suites");
|
|
313
|
+
if (value.exclude !== undefined) {
|
|
314
|
+
if (!isObject(value.exclude)) {
|
|
315
|
+
throw new Error(`Suite definition '${value.name}' field 'exclude' must be an object.`);
|
|
316
|
+
}
|
|
317
|
+
validateSuiteSelectorArray(value.exclude, value.name, "exclude.scenarios");
|
|
318
|
+
validateSuiteSelectorArray(value.exclude, value.name, "exclude.tags");
|
|
319
|
+
validateSuiteSelectorArray(value.exclude, value.name, "exclude.suites");
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
function validateSuiteSelectorArray(value, suiteName, key) {
|
|
323
|
+
const fieldName = key.split(".")[1];
|
|
324
|
+
const selector = value[fieldName];
|
|
325
|
+
if (selector !== undefined) {
|
|
326
|
+
if (!Array.isArray(selector) || selector.some((item) => typeof item !== "string")) {
|
|
327
|
+
throw new Error(`Suite definition '${suiteName}' field '${key}' must be an array of strings.`);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
112
331
|
function exists(path) {
|
|
113
332
|
try {
|
|
114
333
|
statSync(path);
|