@alis-build/harness-eval 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -10
- package/dist/adapters/claude-code/index.d.ts +2 -2
- package/dist/adapters/claude-code/index.js +2 -1
- package/dist/adapters/codex/index.d.ts +68 -0
- package/dist/adapters/codex/index.js +3 -0
- package/dist/{claude-code-ycT0JQZF.js → claude-code-C_7hxC8z.js} +37 -250
- package/dist/claude-code-C_7hxC8z.js.map +1 -0
- package/dist/cli/bin.js +204 -127
- package/dist/cli/bin.js.map +1 -1
- package/dist/codex-0cHO2te9.js +496 -0
- package/dist/codex-0cHO2te9.js.map +1 -0
- package/dist/config/loader.d.ts +2 -2
- package/dist/config/loader.js +2 -2
- package/dist/{index-6Z17eKZx.d.ts → index-DnvP1UBl.d.ts} +3 -2
- package/dist/index.d.ts +397 -153
- package/dist/index.js +125 -5
- package/dist/index.js.map +1 -0
- package/dist/loader-B1WmGGzf.d.ts +107 -0
- package/dist/{loader-BCnFJ8rm.js → loader-DnQ6Jt0i.js} +707 -157
- package/dist/loader-DnQ6Jt0i.js.map +1 -0
- package/dist/reporter-Biy-5-9M.js +2216 -0
- package/dist/reporter-Biy-5-9M.js.map +1 -0
- package/dist/runner/suite.d.ts +1 -1
- package/dist/runner/suite.js +1 -1
- package/dist/{suite-BoOvK_lq.d.ts → suite-BEShV0by.d.ts} +7 -2
- package/dist/{suite-chj0j22j.js → suite-BcP64nlb.js} +72 -4
- package/dist/suite-BcP64nlb.js.map +1 -0
- package/dist/{types-BQol062t.d.ts → types-0QkNVyp9.d.ts} +152 -11
- package/dist/types-Bac8_Ixb.js +246 -0
- package/dist/types-Bac8_Ixb.js.map +1 -0
- package/dist/types-Bu8uOZZN.d.ts +77 -0
- package/dist/{types-B9H4IZtA.d.ts → types-C0gBkl0-.d.ts} +3 -2
- package/package.json +7 -2
- package/schemas/eval-interchange-instances.schema.json +196 -0
- package/schemas/eval-interchange.schema.json +65 -52
- package/schemas/eval-run-envelope.schema.json +182 -425
- package/dist/build-DsVJ_UeU.js +0 -1396
- package/dist/build-DsVJ_UeU.js.map +0 -1
- package/dist/claude-code-ycT0JQZF.js.map +0 -1
- package/dist/loader-BCnFJ8rm.js.map +0 -1
- package/dist/loader-DTvoVfN0.d.ts +0 -33
- package/dist/suite-chj0j22j.js.map +0 -1
- package/schemas/eval-interchange-agent-trace.schema.json +0 -322
- package/schemas/eval-interchange-proto-instance.schema.json +0 -106
|
@@ -3,72 +3,13 @@ import { readFile, readdir, stat } from "node:fs/promises";
|
|
|
3
3
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
4
4
|
import { parse } from "yaml";
|
|
5
5
|
import { z } from "zod";
|
|
6
|
-
//#region src/config/paths.ts
|
|
7
|
-
/**
|
|
8
|
-
* Resolve relative paths in suite config against the suite file directory.
|
|
9
|
-
*/
|
|
10
|
-
function resolvePath(value, suiteDir) {
|
|
11
|
-
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
12
|
-
return join(suiteDir, value);
|
|
13
|
-
}
|
|
14
|
-
function resolveClaudeCodePaths(block, suiteDir) {
|
|
15
|
-
const resolved = { ...block };
|
|
16
|
-
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
17
|
-
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
18
|
-
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
19
|
-
for (const field of [
|
|
20
|
-
"systemPromptFile",
|
|
21
|
-
"appendSystemPromptFile",
|
|
22
|
-
"debugFile"
|
|
23
|
-
]) {
|
|
24
|
-
const value = resolved[field];
|
|
25
|
-
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
26
|
-
}
|
|
27
|
-
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
28
|
-
return resolved;
|
|
29
|
-
}
|
|
30
|
-
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
31
|
-
function resolveConfigPaths(config, suiteDir) {
|
|
32
|
-
if (!config) return void 0;
|
|
33
|
-
const resolved = { ...config };
|
|
34
|
-
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
35
|
-
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
36
|
-
return resolved;
|
|
37
|
-
}
|
|
38
|
-
/** Resolve paths on an entire suite after load. */
|
|
39
|
-
function resolveSuitePaths(suite, suiteFilePath) {
|
|
40
|
-
const suiteDir = configFileDir(suiteFilePath);
|
|
41
|
-
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
42
|
-
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
43
|
-
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
44
|
-
}
|
|
45
|
-
function configFileDir(filePath) {
|
|
46
|
-
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
47
|
-
}
|
|
48
|
-
function resolveEnvPaths(env, baseDir) {
|
|
49
|
-
const resolved = {};
|
|
50
|
-
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../") || value.includes("/") && !value.startsWith("http")) resolved[key] = resolvePath(value, baseDir);
|
|
51
|
-
else resolved[key] = value;
|
|
52
|
-
return resolved;
|
|
53
|
-
}
|
|
54
|
-
/** Resolve relative paths in a standalone grading config file. */
|
|
55
|
-
function resolveGradingConfigPaths(config, configFilePath) {
|
|
56
|
-
const baseDir = configFileDir(configFilePath);
|
|
57
|
-
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
58
|
-
config.judge = {
|
|
59
|
-
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
60
|
-
adapter,
|
|
61
|
-
maxConcurrent
|
|
62
|
-
};
|
|
63
|
-
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
64
|
-
}
|
|
65
|
-
//#endregion
|
|
66
6
|
//#region src/config/schema.ts
|
|
67
7
|
/**
|
|
68
8
|
* zod schemas for the YAML on-disk shape.
|
|
69
9
|
*
|
|
70
10
|
* Config uses a nested layout: generic harness fields at the top level,
|
|
71
|
-
* adapter-specific options under a named key (e.g. `claudeCode`).
|
|
11
|
+
* adapter-specific options under a named key (e.g. `claudeCode`). Validated
|
|
12
|
+
* raw shapes are transformed into runtime types by `src/config/transform.ts`.
|
|
72
13
|
*/
|
|
73
14
|
/** Claude Code adapter-specific options (nested under `claudeCode`). */
|
|
74
15
|
const ClaudeCodeConfigSchema = z.object({
|
|
@@ -117,13 +58,40 @@ const ClaudeCodeConfigSchema = z.object({
|
|
|
117
58
|
maxTurns: z.number().int().positive(),
|
|
118
59
|
isolateConfig: z.boolean()
|
|
119
60
|
}).partial();
|
|
61
|
+
/** Codex CLI adapter-specific options (nested under `codex`). */
|
|
62
|
+
const CodexConfigSchema = z.object({
|
|
63
|
+
binary: z.string(),
|
|
64
|
+
profile: z.string(),
|
|
65
|
+
sandbox: z.enum([
|
|
66
|
+
"read-only",
|
|
67
|
+
"workspace-write",
|
|
68
|
+
"danger-full-access"
|
|
69
|
+
]),
|
|
70
|
+
addDirs: z.array(z.string()),
|
|
71
|
+
configOverrides: z.array(z.string()),
|
|
72
|
+
askForApproval: z.enum([
|
|
73
|
+
"untrusted",
|
|
74
|
+
"on-request",
|
|
75
|
+
"never"
|
|
76
|
+
]),
|
|
77
|
+
dangerouslyBypassApprovalsAndSandbox: z.boolean(),
|
|
78
|
+
dangerouslyBypassHookTrust: z.boolean(),
|
|
79
|
+
ephemeral: z.boolean(),
|
|
80
|
+
ignoreUserConfig: z.boolean(),
|
|
81
|
+
skipGitRepoCheck: z.boolean(),
|
|
82
|
+
outputSchema: z.string(),
|
|
83
|
+
outputLastMessage: z.string(),
|
|
84
|
+
captureLastMessage: z.boolean(),
|
|
85
|
+
isolateConfig: z.boolean()
|
|
86
|
+
}).partial();
|
|
120
87
|
/** Generic + nested adapter config for one layer (defaultConfig, case, cell). */
|
|
121
88
|
const ConfigPartialSchema = z.object({
|
|
122
89
|
model: z.string(),
|
|
123
90
|
cwd: z.string(),
|
|
124
91
|
timeoutMs: z.number().int().positive(),
|
|
125
92
|
env: z.record(z.string(), z.string()),
|
|
126
|
-
claudeCode: ClaudeCodeConfigSchema
|
|
93
|
+
claudeCode: ClaudeCodeConfigSchema,
|
|
94
|
+
codex: CodexConfigSchema
|
|
127
95
|
}).partial();
|
|
128
96
|
/** A matrix cell — one point in the configuration matrix. */
|
|
129
97
|
const MatrixCellSchema = z.object({
|
|
@@ -136,6 +104,11 @@ const ReferenceToolCallSchema = z.object({
|
|
|
136
104
|
tool_name: z.string().min(1),
|
|
137
105
|
tool_input: z.unknown()
|
|
138
106
|
});
|
|
107
|
+
/** Reference trajectory in suite YAML — array of steps or object with mode + steps. */
|
|
108
|
+
const ReferenceTrajectorySchema = z.union([z.array(ReferenceToolCallSchema), z.object({
|
|
109
|
+
tool_name_mode: z.enum(["harness", "bare"]).optional(),
|
|
110
|
+
steps: z.array(ReferenceToolCallSchema).min(1)
|
|
111
|
+
})]);
|
|
139
112
|
/** A test case. */
|
|
140
113
|
const TestCaseSchema = z.object({
|
|
141
114
|
id: z.string().min(1),
|
|
@@ -143,7 +116,7 @@ const TestCaseSchema = z.object({
|
|
|
143
116
|
category: z.string().optional(),
|
|
144
117
|
notes: z.string().optional(),
|
|
145
118
|
expectations: z.array(z.string().min(1)).optional(),
|
|
146
|
-
reference_trajectory:
|
|
119
|
+
reference_trajectory: ReferenceTrajectorySchema.optional(),
|
|
147
120
|
human_ratings: z.record(z.string(), z.number()).optional(),
|
|
148
121
|
assertions: z.array(z.unknown()).min(1),
|
|
149
122
|
repetitions: z.number().int().positive().optional(),
|
|
@@ -192,6 +165,7 @@ function transformSuiteDirectory(raw) {
|
|
|
192
165
|
function transformTestCases(raw, pathPrefix) {
|
|
193
166
|
return raw.map((c, i) => transformTestCase(c, `${pathPrefix}[${i}]`));
|
|
194
167
|
}
|
|
168
|
+
/** Merge suite-level parts shared by single-file and directory transforms. */
|
|
195
169
|
function transformSuiteParts(raw) {
|
|
196
170
|
return {
|
|
197
171
|
adapter: raw.adapter,
|
|
@@ -200,6 +174,21 @@ function transformSuiteParts(raw) {
|
|
|
200
174
|
cases: raw.cases.map((c, i) => transformTestCase(c, `cases[${i}]`))
|
|
201
175
|
};
|
|
202
176
|
}
|
|
177
|
+
/**
|
|
178
|
+
* Normalize reference trajectory YAML into {@link ReferenceTrajectoryConfig}.
|
|
179
|
+
*
|
|
180
|
+
* Accepts a bare step array or `{ tool_name_mode?, steps }` object form.
|
|
181
|
+
*/
|
|
182
|
+
function normalizeReferenceTrajectory(raw, path) {
|
|
183
|
+
if (raw === void 0) return void 0;
|
|
184
|
+
if (Array.isArray(raw)) return { steps: raw };
|
|
185
|
+
if (!isPlainObject(raw) || !Array.isArray(raw.steps)) throw new ConfigError("reference_trajectory must be an array of tool calls or { tool_name_mode?, steps: [...] }", path);
|
|
186
|
+
return {
|
|
187
|
+
tool_name_mode: raw.tool_name_mode,
|
|
188
|
+
steps: raw.steps
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
/** Map raw matrix cell YAML to runtime {@link MatrixCell}. */
|
|
203
192
|
function transformMatrixCell(raw) {
|
|
204
193
|
return {
|
|
205
194
|
label: raw.label,
|
|
@@ -207,6 +196,7 @@ function transformMatrixCell(raw) {
|
|
|
207
196
|
axes: raw.axes
|
|
208
197
|
};
|
|
209
198
|
}
|
|
199
|
+
/** Map one raw test case to runtime {@link TestCase}, transforming assertions. */
|
|
210
200
|
function transformTestCase(raw, path) {
|
|
211
201
|
return {
|
|
212
202
|
id: raw.id,
|
|
@@ -214,7 +204,7 @@ function transformTestCase(raw, path) {
|
|
|
214
204
|
category: raw.category,
|
|
215
205
|
notes: raw.notes,
|
|
216
206
|
expectations: raw.expectations,
|
|
217
|
-
reference_trajectory: raw.reference_trajectory,
|
|
207
|
+
reference_trajectory: normalizeReferenceTrajectory(raw.reference_trajectory, `${path}.reference_trajectory`),
|
|
218
208
|
human_ratings: raw.human_ratings,
|
|
219
209
|
repetitions: raw.repetitions,
|
|
220
210
|
config: raw.config,
|
|
@@ -223,6 +213,17 @@ function transformTestCase(raw, path) {
|
|
|
223
213
|
}
|
|
224
214
|
/** Keys that may appear alongside an assertion-type key. Not assertion types themselves. */
|
|
225
215
|
const SIBLING_KEYS = /* @__PURE__ */ new Set(["threshold"]);
|
|
216
|
+
/**
|
|
217
|
+
* Parse optional `threshold` sibling and delegate the assertion body to
|
|
218
|
+
* {@link transformAssertion}.
|
|
219
|
+
*
|
|
220
|
+
* @throws {ConfigError} When the wrapper is not an object, threshold is out of
|
|
221
|
+
* `[0, 1]`, or the nested assertion fails validation.
|
|
222
|
+
*
|
|
223
|
+
* @example
|
|
224
|
+
* transformThresholdedAssertion({ called: "Read", threshold: 0.9 }, "path")
|
|
225
|
+
* // → { assertion: { type: "called", tool: "Read" }, threshold: 0.9 }
|
|
226
|
+
*/
|
|
226
227
|
function transformThresholdedAssertion(raw, path) {
|
|
227
228
|
if (!isPlainObject(raw)) throw new ConfigError(`expected object, got ${typeOf(raw)}`, path);
|
|
228
229
|
const threshold = raw.threshold;
|
|
@@ -240,6 +241,19 @@ function transformThresholdedAssertion(raw, path) {
|
|
|
240
241
|
* Finds the single non-sibling key, dispatches to the per-type transformer.
|
|
241
242
|
* Per-type transformers handle both verbose-object and shortcut-scalar input
|
|
242
243
|
* shapes where applicable.
|
|
244
|
+
*
|
|
245
|
+
* @param raw - Single assertion object from parsed YAML (may include `threshold` sibling).
|
|
246
|
+
* @param path - JSON-path-like location for error messages (e.g. `cases[0].assertions[1]`).
|
|
247
|
+
* @returns Runtime {@link Assertion} tagged union.
|
|
248
|
+
* @throws {ConfigError} When the object has no assertion key, multiple type keys, or an unknown type.
|
|
249
|
+
*
|
|
250
|
+
* @example
|
|
251
|
+
* transformAssertion({ called: "Read" }, "cases[0].assertions[0]")
|
|
252
|
+
* // → { type: "called", tool: "Read" }
|
|
253
|
+
*
|
|
254
|
+
* @example
|
|
255
|
+
* transformAssertion({ called: { tool: "Read", times: ">= 2" } }, "path")
|
|
256
|
+
* // → { type: "called", tool: "Read", times: ">= 2" }
|
|
243
257
|
*/
|
|
244
258
|
function transformAssertion(raw, path) {
|
|
245
259
|
if (!isPlainObject(raw)) throw new ConfigError(`expected object, got ${typeOf(raw)}`, path);
|
|
@@ -271,6 +285,22 @@ function transformAssertion(raw, path) {
|
|
|
271
285
|
default: throw new ConfigError(`unknown assertion type: ${typeKey}`, path);
|
|
272
286
|
}
|
|
273
287
|
}
|
|
288
|
+
/**
|
|
289
|
+
* Transform `called` YAML (scalar or `{tool, times?}`) to runtime assertion.
|
|
290
|
+
*
|
|
291
|
+
* @throws {ConfigError} When value is neither string nor object, tool is invalid,
|
|
292
|
+
* or `times` is not a valid cardinality string.
|
|
293
|
+
*
|
|
294
|
+
* @example
|
|
295
|
+
* // Scalar shortcut
|
|
296
|
+
* transformCalled("mcp__api__search_skills", "path")
|
|
297
|
+
* // → { type: "called", tool: "mcp__api__search_skills" }
|
|
298
|
+
*
|
|
299
|
+
* @example
|
|
300
|
+
* // Verbose form with cardinality
|
|
301
|
+
* transformCalled({ tool: "Read", times: ">= 1" }, "path")
|
|
302
|
+
* // → { type: "called", tool: "Read", times: ">= 1" }
|
|
303
|
+
*/
|
|
274
304
|
function transformCalled(value, path) {
|
|
275
305
|
if (typeof value === "string") return {
|
|
276
306
|
type: "called",
|
|
@@ -293,6 +323,14 @@ function transformCalled(value, path) {
|
|
|
293
323
|
times
|
|
294
324
|
};
|
|
295
325
|
}
|
|
326
|
+
/**
|
|
327
|
+
* Transform `not_called` YAML (scalar or `{tool}`).
|
|
328
|
+
*
|
|
329
|
+
* @throws {ConfigError} When value is neither string nor object with a valid `tool`.
|
|
330
|
+
*
|
|
331
|
+
* @example
|
|
332
|
+
* transformNotCalled("Bash", "path") // → { type: "not_called", tool: "Bash" }
|
|
333
|
+
*/
|
|
296
334
|
function transformNotCalled(value, path) {
|
|
297
335
|
if (typeof value === "string") return {
|
|
298
336
|
type: "not_called",
|
|
@@ -304,18 +342,45 @@ function transformNotCalled(value, path) {
|
|
|
304
342
|
tool: requireToolPattern(value.tool, `${path}.tool`)
|
|
305
343
|
};
|
|
306
344
|
}
|
|
345
|
+
/**
|
|
346
|
+
* Transform `called_any_of` — bare tool list or `{tools: [...]}`.
|
|
347
|
+
*
|
|
348
|
+
* @throws {ConfigError} When the value is not an array or `{tools: [...]}` object.
|
|
349
|
+
*
|
|
350
|
+
* @example
|
|
351
|
+
* transformCalledAnyOf(["Read", "Glob"], "path")
|
|
352
|
+
* // → { type: "called_any_of", tools: ["Read", "Glob"] }
|
|
353
|
+
*/
|
|
307
354
|
function transformCalledAnyOf(value, path) {
|
|
308
355
|
return {
|
|
309
356
|
type: "called_any_of",
|
|
310
357
|
tools: requireToolPatternList(value, path)
|
|
311
358
|
};
|
|
312
359
|
}
|
|
360
|
+
/**
|
|
361
|
+
* Transform `called_all_of` — bare tool list or `{tools: [...]}`.
|
|
362
|
+
*
|
|
363
|
+
* @throws {ConfigError} When the value is not an array or `{tools: [...]}` object.
|
|
364
|
+
*
|
|
365
|
+
* @example
|
|
366
|
+
* transformCalledAllOf({ tools: ["Read", "Grep"] }, "path")
|
|
367
|
+
* // → { type: "called_all_of", tools: ["Read", "Grep"] }
|
|
368
|
+
*/
|
|
313
369
|
function transformCalledAllOf(value, path) {
|
|
314
370
|
return {
|
|
315
371
|
type: "called_all_of",
|
|
316
372
|
tools: requireToolPatternList(value, path)
|
|
317
373
|
};
|
|
318
374
|
}
|
|
375
|
+
/**
|
|
376
|
+
* Transform `called_before: {first, then}` ordering assertion.
|
|
377
|
+
*
|
|
378
|
+
* @throws {ConfigError} When value is not an object or `first`/`then` are invalid patterns.
|
|
379
|
+
*
|
|
380
|
+
* @example
|
|
381
|
+
* transformCalledBefore({ first: "SearchSkills", then: "LoadSkill" }, "path")
|
|
382
|
+
* // → { type: "called_before", first: "SearchSkills", then: "LoadSkill" }
|
|
383
|
+
*/
|
|
319
384
|
function transformCalledBefore(value, path) {
|
|
320
385
|
if (!isPlainObject(value)) throw new ConfigError(`expected object with {first, then}, got ${typeOf(value)}`, path);
|
|
321
386
|
return {
|
|
@@ -324,6 +389,19 @@ function transformCalledBefore(value, path) {
|
|
|
324
389
|
then: requireToolPattern(value.then, `${path}.then`)
|
|
325
390
|
};
|
|
326
391
|
}
|
|
392
|
+
/**
|
|
393
|
+
* Transform `sequence` — tool list with optional `strict` flag.
|
|
394
|
+
*
|
|
395
|
+
* @throws {ConfigError} When value is neither a pattern array nor `{tools, strict?}` object.
|
|
396
|
+
*
|
|
397
|
+
* @example
|
|
398
|
+
* // Bare array (non-strict by default)
|
|
399
|
+
* transformSequence(["Read", "Edit"], "path")
|
|
400
|
+
*
|
|
401
|
+
* @example
|
|
402
|
+
* // Explicit strict ordering
|
|
403
|
+
* transformSequence({ tools: ["Read", "Edit"], strict: true }, "path")
|
|
404
|
+
*/
|
|
327
405
|
function transformSequence(value, path) {
|
|
328
406
|
if (Array.isArray(value)) return {
|
|
329
407
|
type: "sequence",
|
|
@@ -336,6 +414,19 @@ function transformSequence(value, path) {
|
|
|
336
414
|
strict: value.strict === void 0 ? void 0 : requireBool(value.strict, `${path}.strict`)
|
|
337
415
|
};
|
|
338
416
|
}
|
|
417
|
+
/**
|
|
418
|
+
* Transform `called_with: {tool, args}` with predicate validation on args.
|
|
419
|
+
*
|
|
420
|
+
* @throws {ConfigError} When `tool` or `args` is missing/invalid, or `args` fails
|
|
421
|
+
* {@link validatePredicate}.
|
|
422
|
+
*
|
|
423
|
+
* @example
|
|
424
|
+
* transformCalledWith(
|
|
425
|
+
* { tool: "Read", args: { path: { contains: "README" } } },
|
|
426
|
+
* "path",
|
|
427
|
+
* )
|
|
428
|
+
* // → { type: "called_with", tool: "Read", args: { path: { contains: "README" } } }
|
|
429
|
+
*/
|
|
339
430
|
function transformCalledWith(value, path) {
|
|
340
431
|
if (!isPlainObject(value)) throw new ConfigError(`expected object with {tool, args}, got ${typeOf(value)}`, path);
|
|
341
432
|
const tool = requireToolPattern(value.tool, `${path}.tool`);
|
|
@@ -347,10 +438,32 @@ function transformCalledWith(value, path) {
|
|
|
347
438
|
args: value.args
|
|
348
439
|
};
|
|
349
440
|
}
|
|
441
|
+
/**
|
|
442
|
+
* Transform `responded_without_tool_calls` — accepts true or empty object.
|
|
443
|
+
*
|
|
444
|
+
* @throws {ConfigError} When value is neither `true`, null, nor an empty object.
|
|
445
|
+
*
|
|
446
|
+
* @example
|
|
447
|
+
* transformRespondedWithoutToolCalls(true, "path")
|
|
448
|
+
* // → { type: "responded_without_tool_calls" }
|
|
449
|
+
*/
|
|
350
450
|
function transformRespondedWithoutToolCalls(value, path) {
|
|
351
451
|
if (value === true || value === null || isPlainObject(value) && Object.keys(value).length === 0) return { type: "responded_without_tool_calls" };
|
|
352
452
|
throw new ConfigError(`expected true or empty object, got ${JSON.stringify(value)}`, path);
|
|
353
453
|
}
|
|
454
|
+
/**
|
|
455
|
+
* Transform budget assertions (`iterations_within`, `cost_within_usd`, `duration_within_ms`).
|
|
456
|
+
*
|
|
457
|
+
* @throws {ConfigError} When `max` is missing, non-positive, or not a number.
|
|
458
|
+
*
|
|
459
|
+
* @example
|
|
460
|
+
* transformScalarMax(5, "path", "iterations_within")
|
|
461
|
+
* // → { type: "iterations_within", max: 5 }
|
|
462
|
+
*
|
|
463
|
+
* @example
|
|
464
|
+
* transformScalarMax({ max: 2.5 }, "path", "cost_within_usd")
|
|
465
|
+
* // → { type: "cost_within_usd", max: 2.5 }
|
|
466
|
+
*/
|
|
354
467
|
function transformScalarMax(value, path, type) {
|
|
355
468
|
let max;
|
|
356
469
|
if (typeof value === "number") max = value;
|
|
@@ -362,6 +475,15 @@ function transformScalarMax(value, path, type) {
|
|
|
362
475
|
max
|
|
363
476
|
};
|
|
364
477
|
}
|
|
478
|
+
/**
|
|
479
|
+
* Transform `finished_with` — stop reason string, list, or `{reasons}`.
|
|
480
|
+
*
|
|
481
|
+
* @throws {ConfigError} When value is not a string, string array, or `{reasons}` object.
|
|
482
|
+
*
|
|
483
|
+
* @example
|
|
484
|
+
* transformFinishedWith("end_turn", "path")
|
|
485
|
+
* // → { type: "finished_with", reasons: "end_turn" }
|
|
486
|
+
*/
|
|
365
487
|
function transformFinishedWith(value, path) {
|
|
366
488
|
if (typeof value === "string") return {
|
|
367
489
|
type: "finished_with",
|
|
@@ -384,6 +506,15 @@ function transformFinishedWith(value, path) {
|
|
|
384
506
|
}
|
|
385
507
|
throw new ConfigError(`expected string, string[], or {reasons: ...}, got ${JSON.stringify(value)}`, path);
|
|
386
508
|
}
|
|
509
|
+
/**
|
|
510
|
+
* Transform `response_contains` / `response_not_contains` scalar or `{text}`.
|
|
511
|
+
*
|
|
512
|
+
* @throws {ConfigError} When value is neither a string nor `{text: string}`.
|
|
513
|
+
*
|
|
514
|
+
* @example
|
|
515
|
+
* transformResponseText("done", "path", "response_contains")
|
|
516
|
+
* // → { type: "response_contains", text: "done" }
|
|
517
|
+
*/
|
|
387
518
|
function transformResponseText(value, path, type) {
|
|
388
519
|
if (typeof value === "string") return {
|
|
389
520
|
type,
|
|
@@ -395,6 +526,15 @@ function transformResponseText(value, path, type) {
|
|
|
395
526
|
};
|
|
396
527
|
throw new ConfigError(`expected string or {text: string}, got ${JSON.stringify(value)}`, path);
|
|
397
528
|
}
|
|
529
|
+
/**
|
|
530
|
+
* Transform `response_matches: {pattern, flags?}`.
|
|
531
|
+
*
|
|
532
|
+
* @throws {ConfigError} When `pattern` is missing or not a string.
|
|
533
|
+
*
|
|
534
|
+
* @example
|
|
535
|
+
* transformResponseMatches({ pattern: "error\\d+", flags: "i" }, "path")
|
|
536
|
+
* // → { type: "response_matches", pattern: "error\\d+", flags: "i" }
|
|
537
|
+
*/
|
|
398
538
|
function transformResponseMatches(value, path) {
|
|
399
539
|
if (!isPlainObject(value)) throw new ConfigError(`expected object with {pattern, flags?}, got ${typeOf(value)}`, path);
|
|
400
540
|
return {
|
|
@@ -403,24 +543,57 @@ function transformResponseMatches(value, path) {
|
|
|
403
543
|
flags: value.flags === void 0 ? void 0 : requireString(value.flags, `${path}.flags`)
|
|
404
544
|
};
|
|
405
545
|
}
|
|
546
|
+
/**
|
|
547
|
+
* Transform compound `all_of` assertion list.
|
|
548
|
+
*
|
|
549
|
+
* @throws {ConfigError} When value is not an array or `{assertions: [...]}`.
|
|
550
|
+
*
|
|
551
|
+
* @example
|
|
552
|
+
* transformAllOf([{ called: "Read" }, { not_called: "Bash" }], "path")
|
|
553
|
+
*/
|
|
406
554
|
function transformAllOf(value, path) {
|
|
407
555
|
return {
|
|
408
556
|
type: "all_of",
|
|
409
557
|
assertions: transformCompoundList(value, path)
|
|
410
558
|
};
|
|
411
559
|
}
|
|
560
|
+
/**
|
|
561
|
+
* Transform compound `any_of` assertion list.
|
|
562
|
+
*
|
|
563
|
+
* @throws {ConfigError} When value is not an array or `{assertions: [...]}`.
|
|
564
|
+
*
|
|
565
|
+
* @example
|
|
566
|
+
* transformAnyOf({ assertions: [{ called: "Read" }, { called: "Glob" }] }, "path")
|
|
567
|
+
*/
|
|
412
568
|
function transformAnyOf(value, path) {
|
|
413
569
|
return {
|
|
414
570
|
type: "any_of",
|
|
415
571
|
assertions: transformCompoundList(value, path)
|
|
416
572
|
};
|
|
417
573
|
}
|
|
574
|
+
/**
|
|
575
|
+
* Transform compound `not` — single nested assertion, no threshold.
|
|
576
|
+
*
|
|
577
|
+
* The inner assertion uses the same single-key YAML shape as top-level
|
|
578
|
+
* assertions; thresholds apply only at the outer {@link transformThresholdedAssertion} level.
|
|
579
|
+
*
|
|
580
|
+
* @throws {ConfigError} Propagates from nested {@link transformAssertion}.
|
|
581
|
+
*
|
|
582
|
+
* @example
|
|
583
|
+
* transformNot({ called: "Bash" }, "path")
|
|
584
|
+
* // → { type: "not", assertion: { type: "called", tool: "Bash" } }
|
|
585
|
+
*/
|
|
418
586
|
function transformNot(value, path) {
|
|
419
587
|
return {
|
|
420
588
|
type: "not",
|
|
421
589
|
assertion: transformAssertion(value, path)
|
|
422
590
|
};
|
|
423
591
|
}
|
|
592
|
+
/**
|
|
593
|
+
* Parse compound assertion list from array or `{assertions: [...]}`.
|
|
594
|
+
*
|
|
595
|
+
* @throws {ConfigError} When value is neither form.
|
|
596
|
+
*/
|
|
424
597
|
function transformCompoundList(value, path) {
|
|
425
598
|
const list = Array.isArray(value) ? value : isPlainObject(value) && Array.isArray(value.assertions) ? value.assertions : null;
|
|
426
599
|
if (list === null) throw new ConfigError(`expected array or {assertions: [...]}, got ${JSON.stringify(value)}`, path);
|
|
@@ -452,6 +625,9 @@ const COMPOUND_OPS = /* @__PURE__ */ new Set([
|
|
|
452
625
|
* - single-key object whose key is a leaf op (e.g. `{contains: "x"}`)
|
|
453
626
|
* - single-key compound (`{any_of: [...]}`, `{all_of: [...]}`, `{not: ...}`)
|
|
454
627
|
* - multi-key object (descend into fields; each value is a sub-predicate)
|
|
628
|
+
*
|
|
629
|
+
* @throws {ConfigError} When a compound op has a non-array value or a leaf op
|
|
630
|
+
* has the wrong value type (e.g. non-string `contains`).
|
|
455
631
|
*/
|
|
456
632
|
function validatePredicate(raw, path) {
|
|
457
633
|
if (!isPlainObject(raw)) return;
|
|
@@ -474,6 +650,12 @@ function validatePredicate(raw, path) {
|
|
|
474
650
|
}
|
|
475
651
|
for (const [field, sub] of Object.entries(raw)) validatePredicate(sub, `${path}.${field}`);
|
|
476
652
|
}
|
|
653
|
+
/**
|
|
654
|
+
* Validate a leaf predicate operator's value shape at config load time.
|
|
655
|
+
*
|
|
656
|
+
* @throws {ConfigError} When the operator's value has the wrong type or `regex`
|
|
657
|
+
* is not a valid JavaScript regular expression.
|
|
658
|
+
*/
|
|
477
659
|
function validateLeafOperator(op, value, path) {
|
|
478
660
|
switch (op) {
|
|
479
661
|
case "equals": return;
|
|
@@ -501,85 +683,293 @@ function validateLeafOperator(op, value, path) {
|
|
|
501
683
|
default: return;
|
|
502
684
|
}
|
|
503
685
|
}
|
|
686
|
+
/** Require a tool pattern string or `{ pattern }` object. */
|
|
504
687
|
function requireToolPattern(value, path) {
|
|
505
688
|
if (typeof value === "string") return value;
|
|
506
689
|
if (isPlainObject(value) && typeof value.pattern === "string") return { pattern: value.pattern };
|
|
507
690
|
throw new ConfigError(`expected string or {pattern: string}, got ${JSON.stringify(value)}`, path);
|
|
508
691
|
}
|
|
692
|
+
/** Require a bare tool pattern array or `{ tools: [...] }` wrapper. */
|
|
509
693
|
function requireToolPatternList(value, path) {
|
|
510
694
|
const list = Array.isArray(value) ? value : isPlainObject(value) && Array.isArray(value.tools) ? value.tools : null;
|
|
511
695
|
if (list === null) throw new ConfigError(`expected array of tool patterns or {tools: [...]}, got ${JSON.stringify(value)}`, path);
|
|
512
696
|
return list.map((v, i) => requireToolPattern(v, `${path}[${i}]`));
|
|
513
697
|
}
|
|
698
|
+
/** Require a string value at `path` or throw {@link ConfigError}. */
|
|
514
699
|
function requireString(value, path) {
|
|
515
700
|
if (typeof value === "string") return value;
|
|
516
701
|
throw new ConfigError(`expected string, got ${typeOf(value)}`, path);
|
|
517
702
|
}
|
|
703
|
+
/** Require a boolean value at `path` or throw {@link ConfigError}. */
|
|
518
704
|
function requireBool(value, path) {
|
|
519
705
|
if (typeof value === "boolean") return value;
|
|
520
706
|
throw new ConfigError(`expected boolean, got ${typeOf(value)}`, path);
|
|
521
707
|
}
|
|
708
|
+
/** True for non-null, non-array objects (YAML mapping nodes). */
|
|
522
709
|
function isPlainObject(x) {
|
|
523
710
|
return typeof x === "object" && x !== null && !Array.isArray(x);
|
|
524
711
|
}
|
|
712
|
+
/** Human-readable type name for config error messages. */
|
|
525
713
|
function typeOf(x) {
|
|
526
714
|
if (x === null) return "null";
|
|
527
715
|
if (Array.isArray(x)) return "array";
|
|
528
716
|
return typeof x;
|
|
529
717
|
}
|
|
530
718
|
//#endregion
|
|
531
|
-
//#region src/config/
|
|
532
|
-
/**
|
|
533
|
-
* Zod schema for standalone grading YAML (`grading.yaml`).
|
|
534
|
-
*/
|
|
535
|
-
/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
|
|
536
|
-
const JudgeConfigSchema = ConfigPartialSchema.extend({
|
|
537
|
-
adapter: z.string().optional(),
|
|
538
|
-
maxConcurrent: z.number().int().positive().optional(),
|
|
539
|
-
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
540
|
-
system_instruction: z.string().optional()
|
|
541
|
-
});
|
|
542
|
-
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
543
|
-
//#endregion
|
|
544
|
-
//#region src/config/grading-loader.ts
|
|
719
|
+
//#region src/config/loader-internals.ts
|
|
545
720
|
/**
|
|
546
|
-
*
|
|
721
|
+
* Shared suite loader helpers (case file collection and parsing).
|
|
547
722
|
*/
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
let content;
|
|
551
|
-
try {
|
|
552
|
-
content = await readFile(absolutePath, "utf8");
|
|
553
|
-
} catch (err) {
|
|
554
|
-
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
555
|
-
}
|
|
556
|
-
return parseGradingConfig(content, absolutePath);
|
|
557
|
-
}
|
|
558
|
-
function parseGradingConfig(yamlContent, sourcePath) {
|
|
723
|
+
/** Parse one case file: single case, array, or `{ cases: [...] }`. */
|
|
724
|
+
function parseCasesFile(yamlContent, sourcePath) {
|
|
559
725
|
let raw;
|
|
560
726
|
try {
|
|
561
727
|
raw = parse(yamlContent);
|
|
562
728
|
} catch (err) {
|
|
563
729
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
564
730
|
}
|
|
565
|
-
|
|
566
|
-
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
567
|
-
const config = { judge: { ...validated.data.judge } };
|
|
568
|
-
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
569
|
-
return config;
|
|
731
|
+
return transformTestCases(extractRawCases(raw, sourcePath), sourcePath ?? "cases");
|
|
570
732
|
}
|
|
571
|
-
function
|
|
733
|
+
function extractRawCases(raw, sourcePath) {
|
|
734
|
+
if (Array.isArray(raw)) return raw.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
735
|
+
if (raw && typeof raw === "object") {
|
|
736
|
+
const obj = raw;
|
|
737
|
+
if (Array.isArray(obj.cases)) return obj.cases.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
738
|
+
if ("id" in obj && "prompt" in obj && "assertions" in obj) return [validateRawCase(raw, sourcePath, 0)];
|
|
739
|
+
}
|
|
740
|
+
throw new ConfigError("expected a case object, array of cases, or { cases: [...] }", sourcePath);
|
|
741
|
+
}
|
|
742
|
+
function validateRawCase(raw, sourcePath, index) {
|
|
743
|
+
const validated = TestCaseSchema.safeParse(raw);
|
|
744
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$3(validated.error, sourcePath)}`, sourcePath);
|
|
745
|
+
return validated.data;
|
|
746
|
+
}
|
|
747
|
+
/** Recursively collect `.yaml` / `.yml` files under `casesDir`. */
|
|
748
|
+
async function collectCaseYamlFiles(casesDir) {
|
|
749
|
+
const files = [];
|
|
750
|
+
async function walk(dir) {
|
|
751
|
+
let entries;
|
|
752
|
+
try {
|
|
753
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
754
|
+
} catch (err) {
|
|
755
|
+
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
756
|
+
throw err;
|
|
757
|
+
}
|
|
758
|
+
for (const entry of entries) {
|
|
759
|
+
const fullPath = join(dir, entry.name);
|
|
760
|
+
if (entry.isDirectory()) await walk(fullPath);
|
|
761
|
+
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
await walk(casesDir);
|
|
765
|
+
return files.sort();
|
|
766
|
+
}
|
|
767
|
+
function formatZodError$3(err, sourcePath) {
|
|
572
768
|
return err.issues.map((issue) => {
|
|
573
769
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
574
770
|
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
575
771
|
}).join("\n");
|
|
576
772
|
}
|
|
577
773
|
//#endregion
|
|
578
|
-
//#region src/config/
|
|
774
|
+
//#region src/config/pipeline-schema.ts
|
|
579
775
|
/**
|
|
580
|
-
*
|
|
776
|
+
* Zod schemas for optional `pipeline:` block in suite.yaml.
|
|
777
|
+
*
|
|
778
|
+
* Step presence under `pipeline` enables orchestration via `harness-eval pipeline`.
|
|
581
779
|
*/
|
|
582
|
-
|
|
780
|
+
/** `pipeline.run` step — harness eval run. */
|
|
781
|
+
const PipelineRunStepSchema = z.object({
|
|
782
|
+
output: z.string().min(1).optional(),
|
|
783
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
784
|
+
}).optional();
|
|
785
|
+
/** `pipeline.grade` step — LLM outcome grading. */
|
|
786
|
+
const PipelineGradeStepSchema = z.object({
|
|
787
|
+
input: z.string().min(1).optional(),
|
|
788
|
+
output: z.string().min(1).optional(),
|
|
789
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
790
|
+
}).optional();
|
|
791
|
+
/** `pipeline.envelope` step — EvalRunEnvelope export. */
|
|
792
|
+
const PipelineEnvelopeStepSchema = z.object({
|
|
793
|
+
report: z.string().min(1).optional(),
|
|
794
|
+
grading: z.string().min(1).optional(),
|
|
795
|
+
output: z.string().min(1).optional(),
|
|
796
|
+
projection: z.enum([
|
|
797
|
+
"envelope",
|
|
798
|
+
"trajectory",
|
|
799
|
+
"instances"
|
|
800
|
+
]).optional(),
|
|
801
|
+
includeRawStreamEvents: z.boolean().optional(),
|
|
802
|
+
noTranscript: z.boolean().optional()
|
|
803
|
+
}).optional();
|
|
804
|
+
/** Top-level optional pipeline block in suite.yaml. */
|
|
805
|
+
const PipelineConfigSchema = z.object({
|
|
806
|
+
run: PipelineRunStepSchema,
|
|
807
|
+
grade: PipelineGradeStepSchema,
|
|
808
|
+
envelope: PipelineEnvelopeStepSchema
|
|
809
|
+
}).partial();
|
|
810
|
+
/** Default artifact filenames relative to the suite.yaml directory. */
|
|
811
|
+
const DEFAULT_PIPELINE_OUTPUTS = {
|
|
812
|
+
run: "report.json",
|
|
813
|
+
grade: "grading.json",
|
|
814
|
+
envelope: "envelope.json"
|
|
815
|
+
};
|
|
816
|
+
//#endregion
|
|
817
|
+
//#region src/config/paths.ts
|
|
818
|
+
/**
|
|
819
|
+
* Resolve relative paths in suite config against the suite file directory.
|
|
820
|
+
*
|
|
821
|
+
* YAML authors write paths relative to the suite file; this module absolutizes
|
|
822
|
+
* them at load time so the runner and adapters receive filesystem-ready values.
|
|
823
|
+
* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
|
|
824
|
+
* left unchanged.
|
|
825
|
+
*/
|
|
826
|
+
/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
|
|
827
|
+
function resolvePath(value, suiteDir) {
|
|
828
|
+
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
829
|
+
return join(suiteDir, value);
|
|
830
|
+
}
|
|
831
|
+
/** Resolve Claude Code-specific path fields within a config block. */
|
|
832
|
+
function resolveClaudeCodePaths(block, suiteDir) {
|
|
833
|
+
const resolved = { ...block };
|
|
834
|
+
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
835
|
+
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
836
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
837
|
+
for (const field of [
|
|
838
|
+
"systemPromptFile",
|
|
839
|
+
"appendSystemPromptFile",
|
|
840
|
+
"debugFile"
|
|
841
|
+
]) {
|
|
842
|
+
const value = resolved[field];
|
|
843
|
+
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
844
|
+
}
|
|
845
|
+
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
846
|
+
return resolved;
|
|
847
|
+
}
|
|
848
|
+
/** Resolve Codex-specific path fields within a config block. */
|
|
849
|
+
function resolveCodexPaths(block, suiteDir) {
|
|
850
|
+
const resolved = { ...block };
|
|
851
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
852
|
+
for (const field of ["outputSchema", "outputLastMessage"]) {
|
|
853
|
+
const value = resolved[field];
|
|
854
|
+
if (typeof value === "string") resolved[field] = resolvePath(value, suiteDir);
|
|
855
|
+
}
|
|
856
|
+
return resolved;
|
|
857
|
+
}
|
|
858
|
+
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
859
|
+
function resolveConfigPaths(config, suiteDir) {
|
|
860
|
+
if (!config) return void 0;
|
|
861
|
+
const resolved = { ...config };
|
|
862
|
+
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
863
|
+
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
864
|
+
if (resolved.codex && typeof resolved.codex === "object" && !Array.isArray(resolved.codex)) resolved.codex = resolveCodexPaths(resolved.codex, suiteDir);
|
|
865
|
+
return resolved;
|
|
866
|
+
}
|
|
867
|
+
/** Resolve paths on an entire suite after load. */
|
|
868
|
+
function resolveSuitePaths(suite, suiteFilePath) {
|
|
869
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
870
|
+
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
871
|
+
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
872
|
+
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
873
|
+
}
|
|
874
|
+
/** Parent directory of a suite or grading config file path. */
|
|
875
|
+
function configFileDir(filePath) {
|
|
876
|
+
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Heuristically resolve env var values that look like relative file paths.
|
|
880
|
+
*
|
|
881
|
+
* Used for grading config where credential or config paths may be expressed
|
|
882
|
+
* relative to the grading YAML location.
|
|
883
|
+
*/
|
|
884
|
+
function resolveEnvPaths(env, baseDir) {
|
|
885
|
+
const resolved = {};
|
|
886
|
+
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../")) resolved[key] = resolvePath(value, baseDir);
|
|
887
|
+
else resolved[key] = value;
|
|
888
|
+
return resolved;
|
|
889
|
+
}
|
|
890
|
+
/** Resolve relative paths in a standalone grading config file. */
|
|
891
|
+
function resolveGradingConfigPaths(config, configFilePath) {
|
|
892
|
+
const baseDir = configFileDir(configFilePath);
|
|
893
|
+
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
894
|
+
config.judge = {
|
|
895
|
+
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
896
|
+
adapter,
|
|
897
|
+
maxConcurrent
|
|
898
|
+
};
|
|
899
|
+
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
900
|
+
}
|
|
901
|
+
/** Resolve a pipeline artifact path relative to the suite.yaml directory. */
|
|
902
|
+
function resolvePipelinePath(value, defaultRelative, suiteDir) {
|
|
903
|
+
return resolvePath(value ?? defaultRelative, suiteDir);
|
|
904
|
+
}
|
|
905
|
+
/** Resolve relative paths in a parsed pipeline config. */
|
|
906
|
+
function resolvePipelineConfigPaths(pipeline, suiteFilePath) {
|
|
907
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
908
|
+
const resolved = {};
|
|
909
|
+
if (pipeline.run) resolved.run = resolvePipelineRunStep(pipeline.run, suiteDir);
|
|
910
|
+
if (pipeline.grade) resolved.grade = resolvePipelineGradeStep(pipeline.grade, suiteDir);
|
|
911
|
+
if (pipeline.envelope) resolved.envelope = resolvePipelineEnvelopeStep(pipeline.envelope, suiteDir);
|
|
912
|
+
return resolved;
|
|
913
|
+
}
|
|
914
|
+
/** Resolve one pipeline step's run output path. */
|
|
915
|
+
function resolvePipelineRunStep(step, suiteDir) {
|
|
916
|
+
return {
|
|
917
|
+
...step,
|
|
918
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir)
|
|
919
|
+
};
|
|
920
|
+
}
|
|
921
|
+
/** Resolve grade step input (optional) and output paths. */
|
|
922
|
+
function resolvePipelineGradeStep(step, suiteDir) {
|
|
923
|
+
return {
|
|
924
|
+
...step,
|
|
925
|
+
input: step.input ? resolvePipelinePath(step.input, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
926
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir)
|
|
927
|
+
};
|
|
928
|
+
}
|
|
929
|
+
/** Resolve envelope step report, grading, and output paths. */
|
|
930
|
+
function resolvePipelineEnvelopeStep(step, suiteDir) {
|
|
931
|
+
return {
|
|
932
|
+
...step,
|
|
933
|
+
report: step.report ? resolvePipelinePath(step.report, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
934
|
+
grading: step.grading ? resolvePipelinePath(step.grading, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir) : void 0,
|
|
935
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.envelope, suiteDir)
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
//#endregion
|
|
939
|
+
//#region src/config/grading-schema.ts
|
|
940
|
+
/**
|
|
941
|
+
* Zod schema for standalone grading YAML (`grading.yaml`).
|
|
942
|
+
*
|
|
943
|
+
* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
|
|
944
|
+
* grader-specific concurrency and system-instruction overrides.
|
|
945
|
+
*/
|
|
946
|
+
/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
|
|
947
|
+
const JudgeConfigSchema = ConfigPartialSchema.extend({
|
|
948
|
+
adapter: z.string().optional(),
|
|
949
|
+
maxConcurrent: z.number().int().positive().optional(),
|
|
950
|
+
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
951
|
+
system_instruction: z.string().optional()
|
|
952
|
+
});
|
|
953
|
+
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
954
|
+
//#endregion
|
|
955
|
+
//#region src/config/suite-file-schema.ts
|
|
956
|
+
/** Single-file suite with optional inline judge and pipeline orchestration. */
|
|
957
|
+
const SuiteFileSingleSchema = TestSuiteSchema.extend({
|
|
958
|
+
judge: JudgeConfigSchema.optional(),
|
|
959
|
+
pipeline: PipelineConfigSchema.optional()
|
|
960
|
+
});
|
|
961
|
+
/** Directory suite root with optional inline judge and pipeline orchestration. */
|
|
962
|
+
const SuiteFileDirectorySchema = SuiteDirectorySchema.extend({
|
|
963
|
+
judge: JudgeConfigSchema.optional(),
|
|
964
|
+
pipeline: PipelineConfigSchema.optional()
|
|
965
|
+
});
|
|
966
|
+
//#endregion
|
|
967
|
+
//#region src/config/suite-document-loader.ts
|
|
968
|
+
/**
|
|
969
|
+
* Load a unified suite.yaml document (suite + optional judge + pipeline).
|
|
970
|
+
*/
|
|
971
|
+
/** Load suite.yaml (or directory) including optional judge and pipeline blocks. */
|
|
972
|
+
async function loadSuiteDocument(filePath, options = {}) {
|
|
583
973
|
const absolutePath = resolve(filePath);
|
|
584
974
|
let info;
|
|
585
975
|
try {
|
|
@@ -587,19 +977,12 @@ async function loadSuite(filePath) {
|
|
|
587
977
|
} catch (err) {
|
|
588
978
|
throw new ConfigError(`failed to read suite path: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
589
979
|
}
|
|
590
|
-
|
|
591
|
-
return
|
|
592
|
-
|
|
593
|
-
async function loadSuiteFile(absolutePath) {
|
|
594
|
-
let content;
|
|
595
|
-
try {
|
|
596
|
-
content = await readFile(absolutePath, "utf8");
|
|
597
|
-
} catch (err) {
|
|
598
|
-
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
599
|
-
}
|
|
600
|
-
return parseSuite(content, absolutePath);
|
|
980
|
+
const strict = options.validateOrchestration !== false;
|
|
981
|
+
if (info.isDirectory()) return loadSuiteDocumentDirectory(absolutePath, strict);
|
|
982
|
+
return loadSuiteDocumentFile(absolutePath, strict);
|
|
601
983
|
}
|
|
602
|
-
|
|
984
|
+
/** Load suite.yaml from a directory layout (cases under `cases/`). */
|
|
985
|
+
async function loadSuiteDocumentDirectory(dir, strict) {
|
|
603
986
|
const suiteYamlPath = join(dir, "suite.yaml");
|
|
604
987
|
let content;
|
|
605
988
|
try {
|
|
@@ -607,7 +990,7 @@ async function loadSuiteDirectory(dir) {
|
|
|
607
990
|
} catch (err) {
|
|
608
991
|
throw new ConfigError(`missing suite.yaml in suite directory: ${err instanceof Error ? err.message : String(err)}`, dir);
|
|
609
992
|
}
|
|
610
|
-
const base =
|
|
993
|
+
const { judge, pipeline, suite: base } = parseSuiteFileRoot(content, suiteYamlPath, "directory", strict);
|
|
611
994
|
const casesDir = join(dir, "cases");
|
|
612
995
|
const caseFiles = await collectCaseYamlFiles(casesDir);
|
|
613
996
|
const tagged = base.cases.map((testCase, index) => ({
|
|
@@ -636,74 +1019,241 @@ async function loadSuiteDirectory(dir) {
|
|
|
636
1019
|
cases
|
|
637
1020
|
};
|
|
638
1021
|
resolveSuitePaths(suite, suiteYamlPath);
|
|
639
|
-
return suite;
|
|
1022
|
+
return buildSuiteDocument(suiteYamlPath, suite, judge, pipeline);
|
|
640
1023
|
}
|
|
641
|
-
|
|
1024
|
+
/** Load a single suite.yaml file (inline cases). */
|
|
1025
|
+
async function loadSuiteDocumentFile(absolutePath, strict) {
|
|
1026
|
+
let content;
|
|
1027
|
+
try {
|
|
1028
|
+
content = await readFile(absolutePath, "utf8");
|
|
1029
|
+
} catch (err) {
|
|
1030
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
1031
|
+
}
|
|
1032
|
+
const { judge, pipeline, suite } = parseSuiteFileRoot(content, absolutePath, "single", strict);
|
|
1033
|
+
resolveSuitePaths(suite, absolutePath);
|
|
1034
|
+
return buildSuiteDocument(absolutePath, suite, judge, pipeline);
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* Parse suite.yaml root and validate against the appropriate schema.
|
|
1038
|
+
*
|
|
1039
|
+
* When `strict` is true, uses extended schemas that validate `judge:` and
|
|
1040
|
+
* `pipeline:` blocks (for `loadSuiteDocument`). When false, uses base schemas
|
|
1041
|
+
* that silently strip unknown keys (for `loadSuite`).
|
|
1042
|
+
*/
|
|
1043
|
+
function parseSuiteFileRoot(yamlContent, sourcePath, layout, strict) {
|
|
642
1044
|
let raw;
|
|
643
1045
|
try {
|
|
644
1046
|
raw = parse(yamlContent);
|
|
645
1047
|
} catch (err) {
|
|
646
1048
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
647
1049
|
}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
1050
|
+
if (!strict) {
|
|
1051
|
+
const validated = (layout === "directory" ? SuiteDirectorySchema : TestSuiteSchema).safeParse(raw);
|
|
1052
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1053
|
+
return { suite: (layout === "directory" ? transformSuiteDirectory : transformSuite)(validated.data) };
|
|
1054
|
+
}
|
|
1055
|
+
if (layout === "directory") {
|
|
1056
|
+
const validated = SuiteFileDirectorySchema.safeParse(raw);
|
|
1057
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1058
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuiteDirectory);
|
|
1059
|
+
}
|
|
1060
|
+
const validated = SuiteFileSingleSchema.safeParse(raw);
|
|
1061
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1062
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuite);
|
|
653
1063
|
}
|
|
654
|
-
|
|
1064
|
+
/** Split validated YAML into suite, judge, and pipeline with path resolution. */
|
|
1065
|
+
function extractSuiteFileParts(data, sourcePath, transform) {
|
|
1066
|
+
const { judge: rawJudge, pipeline: rawPipeline, ...suiteRaw } = data;
|
|
1067
|
+
const suite = transform(suiteRaw);
|
|
1068
|
+
let judge;
|
|
1069
|
+
if (rawJudge) {
|
|
1070
|
+
judge = { ...rawJudge };
|
|
1071
|
+
resolveGradingConfigPaths({ judge }, sourcePath);
|
|
1072
|
+
}
|
|
1073
|
+
let pipeline;
|
|
1074
|
+
if (rawPipeline) {
|
|
1075
|
+
pipeline = transformPipelineConfig(rawPipeline);
|
|
1076
|
+
pipeline = resolvePipelineConfigPaths(pipeline, sourcePath);
|
|
1077
|
+
}
|
|
1078
|
+
return {
|
|
1079
|
+
suite,
|
|
1080
|
+
judge,
|
|
1081
|
+
pipeline
|
|
1082
|
+
};
|
|
1083
|
+
}
|
|
1084
|
+
/** Apply default artifact filenames when a pipeline step key is present but paths are omitted. */
|
|
1085
|
+
function transformPipelineConfig(raw) {
|
|
1086
|
+
const pipeline = {};
|
|
1087
|
+
if (raw.run !== void 0) pipeline.run = {
|
|
1088
|
+
output: raw.run?.output ?? DEFAULT_PIPELINE_OUTPUTS.run,
|
|
1089
|
+
maxConcurrent: raw.run?.maxConcurrent
|
|
1090
|
+
};
|
|
1091
|
+
if (raw.grade !== void 0) pipeline.grade = {
|
|
1092
|
+
input: raw.grade?.input,
|
|
1093
|
+
output: raw.grade?.output ?? DEFAULT_PIPELINE_OUTPUTS.grade,
|
|
1094
|
+
maxConcurrent: raw.grade?.maxConcurrent
|
|
1095
|
+
};
|
|
1096
|
+
if (raw.envelope !== void 0) pipeline.envelope = {
|
|
1097
|
+
report: raw.envelope?.report,
|
|
1098
|
+
grading: raw.envelope?.grading,
|
|
1099
|
+
output: raw.envelope?.output ?? DEFAULT_PIPELINE_OUTPUTS.envelope,
|
|
1100
|
+
projection: raw.envelope?.projection ?? "envelope",
|
|
1101
|
+
includeRawStreamEvents: raw.envelope?.includeRawStreamEvents,
|
|
1102
|
+
noTranscript: raw.envelope?.noTranscript
|
|
1103
|
+
};
|
|
1104
|
+
return pipeline;
|
|
1105
|
+
}
|
|
1106
|
+
/** Assemble the runtime {@link SuiteDocument} from parsed parts. */
|
|
1107
|
+
function buildSuiteDocument(suitePath, suite, judge, pipeline) {
|
|
1108
|
+
return {
|
|
1109
|
+
suitePath: resolve(suitePath),
|
|
1110
|
+
suite,
|
|
1111
|
+
judge,
|
|
1112
|
+
pipeline
|
|
1113
|
+
};
|
|
1114
|
+
}
|
|
1115
|
+
function formatZodError$2(err, sourcePath) {
|
|
1116
|
+
return err.issues.map((issue) => {
|
|
1117
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1118
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1119
|
+
}).join("\n");
|
|
1120
|
+
}
|
|
1121
|
+
//#endregion
|
|
1122
|
+
//#region src/config/grading-loader.ts
|
|
1123
|
+
/**
|
|
1124
|
+
* Load standalone grading YAML for `harness-eval grade`.
|
|
1125
|
+
*
|
|
1126
|
+
* Also accepts unified suite.yaml files with an inline `judge:` block.
|
|
1127
|
+
*/
|
|
1128
|
+
/** Load grading YAML from disk and resolve relative paths. */
|
|
1129
|
+
async function loadGradingConfig(filePath) {
|
|
1130
|
+
const absolutePath = resolve(filePath);
|
|
1131
|
+
let info;
|
|
1132
|
+
try {
|
|
1133
|
+
info = await stat(absolutePath);
|
|
1134
|
+
} catch (err) {
|
|
1135
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1136
|
+
}
|
|
1137
|
+
if (info.isDirectory()) return loadGradingFromSuiteYaml(join(absolutePath, "suite.yaml"));
|
|
1138
|
+
let content;
|
|
1139
|
+
try {
|
|
1140
|
+
content = await readFile(absolutePath, "utf8");
|
|
1141
|
+
} catch (err) {
|
|
1142
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1143
|
+
}
|
|
1144
|
+
if (isSuiteRoot(parse(content))) return parseGradingFromSuiteRaw(parse(content), absolutePath);
|
|
1145
|
+
return parseGradingConfig(content, absolutePath);
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Parse grading YAML from a string.
|
|
1149
|
+
*
|
|
1150
|
+
* @param sourcePath Optional path for error messages and path resolution.
|
|
1151
|
+
*/
|
|
1152
|
+
function parseGradingConfig(yamlContent, sourcePath) {
|
|
655
1153
|
let raw;
|
|
656
1154
|
try {
|
|
657
1155
|
raw = parse(yamlContent);
|
|
658
1156
|
} catch (err) {
|
|
659
1157
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
660
1158
|
}
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
1159
|
+
if (isSuiteRoot(raw)) return parseGradingFromSuiteRaw(raw, sourcePath ?? "suite.yaml");
|
|
1160
|
+
const validated = GradingConfigSchema.safeParse(raw);
|
|
1161
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
1162
|
+
const config = { judge: { ...validated.data.judge } };
|
|
1163
|
+
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
1164
|
+
return config;
|
|
664
1165
|
}
|
|
665
|
-
/**
|
|
666
|
-
function
|
|
1166
|
+
/** Detect unified suite.yaml by presence of suite-specific keys (vs standalone grading YAML). */
|
|
1167
|
+
function isSuiteRoot(raw) {
|
|
1168
|
+
if (raw === null || typeof raw !== "object") return false;
|
|
1169
|
+
return "cases" in raw || "matrix" in raw && "adapter" in raw;
|
|
1170
|
+
}
|
|
1171
|
+
async function loadGradingFromSuiteYaml(suiteYamlPath) {
|
|
1172
|
+
let content;
|
|
1173
|
+
try {
|
|
1174
|
+
content = await readFile(suiteYamlPath, "utf8");
|
|
1175
|
+
} catch (err) {
|
|
1176
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
1177
|
+
}
|
|
667
1178
|
let raw;
|
|
668
1179
|
try {
|
|
669
|
-
raw = parse(
|
|
1180
|
+
raw = parse(content);
|
|
670
1181
|
} catch (err) {
|
|
671
|
-
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`,
|
|
1182
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
672
1183
|
}
|
|
673
|
-
return
|
|
1184
|
+
return parseGradingFromSuiteRaw(raw, suiteYamlPath);
|
|
674
1185
|
}
|
|
675
|
-
function
|
|
676
|
-
|
|
677
|
-
if (
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
1186
|
+
function parseGradingFromSuiteRaw(raw, sourcePath) {
|
|
1187
|
+
const single = SuiteFileSingleSchema.safeParse(raw);
|
|
1188
|
+
if (single.success) {
|
|
1189
|
+
if (!single.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1190
|
+
const config = { judge: { ...single.data.judge } };
|
|
1191
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1192
|
+
return config;
|
|
681
1193
|
}
|
|
682
|
-
|
|
1194
|
+
const directory = SuiteFileDirectorySchema.safeParse(raw);
|
|
1195
|
+
if (directory.success) {
|
|
1196
|
+
if (!directory.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1197
|
+
const config = { judge: { ...directory.data.judge } };
|
|
1198
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1199
|
+
return config;
|
|
1200
|
+
}
|
|
1201
|
+
throw new ConfigError(`validation failed:\n${formatZodError$1(directory.error ?? single.error, sourcePath)}`, sourcePath);
|
|
683
1202
|
}
|
|
684
|
-
|
|
685
|
-
|
|
1203
|
+
/** Format a zod validation error with optional source file prefix. */
|
|
1204
|
+
function formatZodError$1(err, sourcePath) {
|
|
1205
|
+
return err.issues.map((issue) => {
|
|
1206
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1207
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1208
|
+
}).join("\n");
|
|
1209
|
+
}
|
|
1210
|
+
//#endregion
|
|
1211
|
+
//#region src/config/loader.ts
|
|
1212
|
+
/**
|
|
1213
|
+
* Load a `TestSuite` from a YAML file, directory, or string.
|
|
1214
|
+
*
|
|
1215
|
+
* For unified suite.yaml with optional `judge:` and `pipeline:` blocks,
|
|
1216
|
+
* use {@link loadSuiteDocument}.
|
|
1217
|
+
*/
|
|
1218
|
+
/**
|
|
1219
|
+
* Load a suite from a file path or directory path (suite portion only).
|
|
1220
|
+
*
|
|
1221
|
+
* Orchestration blocks (`judge:`, `pipeline:`) are silently stripped — callers
|
|
1222
|
+
* that only need the `TestSuite` are not broken by malformed orchestration YAML.
|
|
1223
|
+
* Use {@link loadSuiteDocument} when you need validated orchestration metadata.
|
|
1224
|
+
*/
|
|
1225
|
+
async function loadSuite(filePath) {
|
|
1226
|
+
return (await loadSuiteDocument(filePath, { validateOrchestration: false })).suite;
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Parse suite YAML from a string (single-file layout with inline cases).
|
|
1230
|
+
*
|
|
1231
|
+
* Unknown top-level keys such as `judge` and `pipeline` are stripped.
|
|
1232
|
+
*/
|
|
1233
|
+
function parseSuite(yamlContent, sourcePath) {
|
|
1234
|
+
let raw;
|
|
1235
|
+
try {
|
|
1236
|
+
raw = parse(yamlContent);
|
|
1237
|
+
} catch (err) {
|
|
1238
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
1239
|
+
}
|
|
1240
|
+
const validated = TestSuiteSchema.safeParse(raw);
|
|
686
1241
|
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
687
|
-
|
|
1242
|
+
const suite = transformSuite(validated.data);
|
|
1243
|
+
if (sourcePath) resolveSuitePaths(suite, resolve(sourcePath));
|
|
1244
|
+
return suite;
|
|
688
1245
|
}
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
697
|
-
throw err;
|
|
698
|
-
}
|
|
699
|
-
for (const entry of entries) {
|
|
700
|
-
const fullPath = join(dir, entry.name);
|
|
701
|
-
if (entry.isDirectory()) await walk(fullPath);
|
|
702
|
-
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
703
|
-
}
|
|
1246
|
+
/** Parse `suite.yaml` for directory layout (cases may be omitted). @internal */
|
|
1247
|
+
function parseSuiteDirectory(yamlContent, sourcePath) {
|
|
1248
|
+
let raw;
|
|
1249
|
+
try {
|
|
1250
|
+
raw = parse(yamlContent);
|
|
1251
|
+
} catch (err) {
|
|
1252
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
704
1253
|
}
|
|
705
|
-
|
|
706
|
-
|
|
1254
|
+
const validated = SuiteDirectorySchema.safeParse(raw);
|
|
1255
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
1256
|
+
return transformSuiteDirectory(validated.data);
|
|
707
1257
|
}
|
|
708
1258
|
function formatZodError(err, sourcePath) {
|
|
709
1259
|
return err.issues.map((issue) => {
|
|
@@ -712,6 +1262,6 @@ function formatZodError(err, sourcePath) {
|
|
|
712
1262
|
}).join("\n");
|
|
713
1263
|
}
|
|
714
1264
|
//#endregion
|
|
715
|
-
export { parseGradingConfig as a, loadGradingConfig as i,
|
|
1265
|
+
export { parseGradingConfig as a, parseCasesFile as c, loadGradingConfig as i, ConfigError as l, parseSuite as n, loadSuiteDocument as o, parseSuiteDirectory as r, DEFAULT_PIPELINE_OUTPUTS as s, loadSuite as t };
|
|
716
1266
|
|
|
717
|
-
//# sourceMappingURL=loader-
|
|
1267
|
+
//# sourceMappingURL=loader-DnQ6Jt0i.js.map
|