npm - @alis-build/harness-eval - Versions diffs - 0.1.1 → 0.1.3 - Mend

@alis-build/harness-eval 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +104 -10
package/dist/adapters/claude-code/index.d.ts +2 -2
package/dist/adapters/claude-code/index.js +2 -1
package/dist/adapters/codex/index.d.ts +68 -0
package/dist/adapters/codex/index.js +3 -0
package/dist/{claude-code-ycT0JQZF.js → claude-code-C_7hxC8z.js} +37 -250
package/dist/claude-code-C_7hxC8z.js.map +1 -0
package/dist/cli/bin.js +204 -127
package/dist/cli/bin.js.map +1 -1
package/dist/codex-0cHO2te9.js +496 -0
package/dist/codex-0cHO2te9.js.map +1 -0
package/dist/config/loader.d.ts +2 -2
package/dist/config/loader.js +2 -2
package/dist/{index-6Z17eKZx.d.ts → index-DnvP1UBl.d.ts} +3 -2
package/dist/index.d.ts +397 -153
package/dist/index.js +125 -5
package/dist/index.js.map +1 -0
package/dist/loader-B1WmGGzf.d.ts +107 -0
package/dist/{loader-BCnFJ8rm.js → loader-DnQ6Jt0i.js} +707 -157
package/dist/loader-DnQ6Jt0i.js.map +1 -0
package/dist/reporter-Biy-5-9M.js +2216 -0
package/dist/reporter-Biy-5-9M.js.map +1 -0
package/dist/runner/suite.d.ts +1 -1
package/dist/runner/suite.js +1 -1
package/dist/{suite-BoOvK_lq.d.ts → suite-BEShV0by.d.ts} +7 -2
package/dist/{suite-chj0j22j.js → suite-BcP64nlb.js} +72 -4
package/dist/suite-BcP64nlb.js.map +1 -0
package/dist/{types-BQol062t.d.ts → types-0QkNVyp9.d.ts} +152 -11
package/dist/types-Bac8_Ixb.js +246 -0
package/dist/types-Bac8_Ixb.js.map +1 -0
package/dist/types-Bu8uOZZN.d.ts +77 -0
package/dist/{types-B9H4IZtA.d.ts → types-C0gBkl0-.d.ts} +3 -2
package/package.json +7 -2
package/schemas/eval-interchange-instances.schema.json +196 -0
package/schemas/eval-interchange.schema.json +65 -52
package/schemas/eval-run-envelope.schema.json +182 -425
package/dist/build-DsVJ_UeU.js +0 -1396
package/dist/build-DsVJ_UeU.js.map +0 -1
package/dist/claude-code-ycT0JQZF.js.map +0 -1
package/dist/loader-BCnFJ8rm.js.map +0 -1
package/dist/loader-DTvoVfN0.d.ts +0 -33
package/dist/suite-chj0j22j.js.map +0 -1
package/schemas/eval-interchange-agent-trace.schema.json +0 -322
package/schemas/eval-interchange-proto-instance.schema.json +0 -106

package/dist/{loader-BCnFJ8rm.js → loader-DnQ6Jt0i.js} RENAMED Viewed

@@ -3,72 +3,13 @@ import { readFile, readdir, stat } from "node:fs/promises";
 import { isAbsolute, join, relative, resolve } from "node:path";
 import { parse } from "yaml";
 import { z } from "zod";
-//#region src/config/paths.ts
-/**
-* Resolve relative paths in suite config against the suite file directory.
-*/
-function resolvePath(value, suiteDir) {
-	if (isAbsolute(value) || value.startsWith("~/")) return value;
-	return join(suiteDir, value);
-}
-function resolveClaudeCodePaths(block, suiteDir) {
-	const resolved = { ...block };
-	if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
-	if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
-	if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
-	for (const field of [
-		"systemPromptFile",
-		"appendSystemPromptFile",
-		"debugFile"
-	]) {
-		const value = resolved[field];
-		if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
-	}
-	if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
-	return resolved;
-}
-/** Resolve relative paths in a config layer relative to `suiteDir`. */
-function resolveConfigPaths(config, suiteDir) {
-	if (!config) return void 0;
-	const resolved = { ...config };
-	if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
-	if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
-	return resolved;
-}
-/** Resolve paths on an entire suite after load. */
-function resolveSuitePaths(suite, suiteFilePath) {
-	const suiteDir = configFileDir(suiteFilePath);
-	suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
-	for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
-	for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
-}
-function configFileDir(filePath) {
-	return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
-}
-function resolveEnvPaths(env, baseDir) {
-	const resolved = {};
-	for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../") || value.includes("/") && !value.startsWith("http")) resolved[key] = resolvePath(value, baseDir);
-	else resolved[key] = value;
-	return resolved;
-}
-/** Resolve relative paths in a standalone grading config file. */
-function resolveGradingConfigPaths(config, configFilePath) {
-	const baseDir = configFileDir(configFilePath);
-	const { adapter, maxConcurrent, ...rest } = config.judge;
-	config.judge = {
-		...resolveConfigPaths(rest, baseDir) ?? rest,
-		adapter,
-		maxConcurrent
-	};
-	if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
-}
-//#endregion
 //#region src/config/schema.ts
 /**
 * zod schemas for the YAML on-disk shape.
 *
 * Config uses a nested layout: generic harness fields at the top level,
-* adapter-specific options under a named key (e.g. `claudeCode`).
+* adapter-specific options under a named key (e.g. `claudeCode`). Validated
+* raw shapes are transformed into runtime types by `src/config/transform.ts`.
 */
 /** Claude Code adapter-specific options (nested under `claudeCode`). */
 const ClaudeCodeConfigSchema = z.object({
@@ -117,13 +58,40 @@ const ClaudeCodeConfigSchema = z.object({
 	maxTurns: z.number().int().positive(),
 	isolateConfig: z.boolean()
 }).partial();
+/** Codex CLI adapter-specific options (nested under `codex`). */
+const CodexConfigSchema = z.object({
+	binary: z.string(),
+	profile: z.string(),
+	sandbox: z.enum([
+		"read-only",
+		"workspace-write",
+		"danger-full-access"
+	]),
+	addDirs: z.array(z.string()),
+	configOverrides: z.array(z.string()),
+	askForApproval: z.enum([
+		"untrusted",
+		"on-request",
+		"never"
+	]),
+	dangerouslyBypassApprovalsAndSandbox: z.boolean(),
+	dangerouslyBypassHookTrust: z.boolean(),
+	ephemeral: z.boolean(),
+	ignoreUserConfig: z.boolean(),
+	skipGitRepoCheck: z.boolean(),
+	outputSchema: z.string(),
+	outputLastMessage: z.string(),
+	captureLastMessage: z.boolean(),
+	isolateConfig: z.boolean()
+}).partial();
 /** Generic + nested adapter config for one layer (defaultConfig, case, cell). */
 const ConfigPartialSchema = z.object({
 	model: z.string(),
 	cwd: z.string(),
 	timeoutMs: z.number().int().positive(),
 	env: z.record(z.string(), z.string()),
-	claudeCode: ClaudeCodeConfigSchema
+	claudeCode: ClaudeCodeConfigSchema,
+	codex: CodexConfigSchema
 }).partial();
 /** A matrix cell — one point in the configuration matrix. */
 const MatrixCellSchema = z.object({
@@ -136,6 +104,11 @@ const ReferenceToolCallSchema = z.object({
 	tool_name: z.string().min(1),
 	tool_input: z.unknown()
 });
+/** Reference trajectory in suite YAML — array of steps or object with mode + steps. */
+const ReferenceTrajectorySchema = z.union([z.array(ReferenceToolCallSchema), z.object({
+	tool_name_mode: z.enum(["harness", "bare"]).optional(),
+	steps: z.array(ReferenceToolCallSchema).min(1)
+})]);
 /** A test case. */
 const TestCaseSchema = z.object({
 	id: z.string().min(1),
@@ -143,7 +116,7 @@ const TestCaseSchema = z.object({
 	category: z.string().optional(),
 	notes: z.string().optional(),
 	expectations: z.array(z.string().min(1)).optional(),
-	reference_trajectory: z.array(ReferenceToolCallSchema).optional(),
+	reference_trajectory: ReferenceTrajectorySchema.optional(),
 	human_ratings: z.record(z.string(), z.number()).optional(),
 	assertions: z.array(z.unknown()).min(1),
 	repetitions: z.number().int().positive().optional(),
@@ -192,6 +165,7 @@ function transformSuiteDirectory(raw) {
 function transformTestCases(raw, pathPrefix) {
 	return raw.map((c, i) => transformTestCase(c, `${pathPrefix}[${i}]`));
 }
+/** Merge suite-level parts shared by single-file and directory transforms. */
 function transformSuiteParts(raw) {
 	return {
 		adapter: raw.adapter,
@@ -200,6 +174,21 @@ function transformSuiteParts(raw) {
 		cases: raw.cases.map((c, i) => transformTestCase(c, `cases[${i}]`))
 	};
 }
+/**
+* Normalize reference trajectory YAML into {@link ReferenceTrajectoryConfig}.
+*
+* Accepts a bare step array or `{ tool_name_mode?, steps }` object form.
+*/
+function normalizeReferenceTrajectory(raw, path) {
+	if (raw === void 0) return void 0;
+	if (Array.isArray(raw)) return { steps: raw };
+	if (!isPlainObject(raw) || !Array.isArray(raw.steps)) throw new ConfigError("reference_trajectory must be an array of tool calls or { tool_name_mode?, steps: [...] }", path);
+	return {
+		tool_name_mode: raw.tool_name_mode,
+		steps: raw.steps
+	};
+}
+/** Map raw matrix cell YAML to runtime {@link MatrixCell}. */
 function transformMatrixCell(raw) {
 	return {
 		label: raw.label,
@@ -207,6 +196,7 @@ function transformMatrixCell(raw) {
 		axes: raw.axes
 	};
 }
+/** Map one raw test case to runtime {@link TestCase}, transforming assertions. */
 function transformTestCase(raw, path) {
 	return {
 		id: raw.id,
@@ -214,7 +204,7 @@ function transformTestCase(raw, path) {
 		category: raw.category,
 		notes: raw.notes,
 		expectations: raw.expectations,
-		reference_trajectory: raw.reference_trajectory,
+		reference_trajectory: normalizeReferenceTrajectory(raw.reference_trajectory, `${path}.reference_trajectory`),
 		human_ratings: raw.human_ratings,
 		repetitions: raw.repetitions,
 		config: raw.config,
@@ -223,6 +213,17 @@ function transformTestCase(raw, path) {
 }
 /** Keys that may appear alongside an assertion-type key. Not assertion types themselves. */
 const SIBLING_KEYS = /* @__PURE__ */ new Set(["threshold"]);
+/**
+* Parse optional `threshold` sibling and delegate the assertion body to
+* {@link transformAssertion}.
+*
+* @throws {ConfigError} When the wrapper is not an object, threshold is out of
+*   `[0, 1]`, or the nested assertion fails validation.
+*
+* @example
+* transformThresholdedAssertion({ called: "Read", threshold: 0.9 }, "path")
+* // → { assertion: { type: "called", tool: "Read" }, threshold: 0.9 }
+*/
 function transformThresholdedAssertion(raw, path) {
 	if (!isPlainObject(raw)) throw new ConfigError(`expected object, got ${typeOf(raw)}`, path);
 	const threshold = raw.threshold;
@@ -240,6 +241,19 @@ function transformThresholdedAssertion(raw, path) {
 * Finds the single non-sibling key, dispatches to the per-type transformer.
 * Per-type transformers handle both verbose-object and shortcut-scalar input
 * shapes where applicable.
+*
+* @param raw - Single assertion object from parsed YAML (may include `threshold` sibling).
+* @param path - JSON-path-like location for error messages (e.g. `cases[0].assertions[1]`).
+* @returns Runtime {@link Assertion} tagged union.
+* @throws {ConfigError} When the object has no assertion key, multiple type keys, or an unknown type.
+*
+* @example
+* transformAssertion({ called: "Read" }, "cases[0].assertions[0]")
+* // → { type: "called", tool: "Read" }
+*
+* @example
+* transformAssertion({ called: { tool: "Read", times: ">= 2" } }, "path")
+* // → { type: "called", tool: "Read", times: ">= 2" }
 */
 function transformAssertion(raw, path) {
 	if (!isPlainObject(raw)) throw new ConfigError(`expected object, got ${typeOf(raw)}`, path);
@@ -271,6 +285,22 @@ function transformAssertion(raw, path) {
 		default: throw new ConfigError(`unknown assertion type: ${typeKey}`, path);
 	}
 }
+/**
+* Transform `called` YAML (scalar or `{tool, times?}`) to runtime assertion.
+*
+* @throws {ConfigError} When value is neither string nor object, tool is invalid,
+*   or `times` is not a valid cardinality string.
+*
+* @example
+* // Scalar shortcut
+* transformCalled("mcp__api__search_skills", "path")
+* // → { type: "called", tool: "mcp__api__search_skills" }
+*
+* @example
+* // Verbose form with cardinality
+* transformCalled({ tool: "Read", times: ">= 1" }, "path")
+* // → { type: "called", tool: "Read", times: ">= 1" }
+*/
 function transformCalled(value, path) {
 	if (typeof value === "string") return {
 		type: "called",
@@ -293,6 +323,14 @@ function transformCalled(value, path) {
 		times
 	};
 }
+/**
+* Transform `not_called` YAML (scalar or `{tool}`).
+*
+* @throws {ConfigError} When value is neither string nor object with a valid `tool`.
+*
+* @example
+* transformNotCalled("Bash", "path") // → { type: "not_called", tool: "Bash" }
+*/
 function transformNotCalled(value, path) {
 	if (typeof value === "string") return {
 		type: "not_called",
@@ -304,18 +342,45 @@ function transformNotCalled(value, path) {
 		tool: requireToolPattern(value.tool, `${path}.tool`)
 	};
 }
+/**
+* Transform `called_any_of` — bare tool list or `{tools: [...]}`.
+*
+* @throws {ConfigError} When the value is not an array or `{tools: [...]}` object.
+*
+* @example
+* transformCalledAnyOf(["Read", "Glob"], "path")
+* // → { type: "called_any_of", tools: ["Read", "Glob"] }
+*/
 function transformCalledAnyOf(value, path) {
 	return {
 		type: "called_any_of",
 		tools: requireToolPatternList(value, path)
 	};
 }
+/**
+* Transform `called_all_of` — bare tool list or `{tools: [...]}`.
+*
+* @throws {ConfigError} When the value is not an array or `{tools: [...]}` object.
+*
+* @example
+* transformCalledAllOf({ tools: ["Read", "Grep"] }, "path")
+* // → { type: "called_all_of", tools: ["Read", "Grep"] }
+*/
 function transformCalledAllOf(value, path) {
 	return {
 		type: "called_all_of",
 		tools: requireToolPatternList(value, path)
 	};
 }
+/**
+* Transform `called_before: {first, then}` ordering assertion.
+*
+* @throws {ConfigError} When value is not an object or `first`/`then` are invalid patterns.
+*
+* @example
+* transformCalledBefore({ first: "SearchSkills", then: "LoadSkill" }, "path")
+* // → { type: "called_before", first: "SearchSkills", then: "LoadSkill" }
+*/
 function transformCalledBefore(value, path) {
 	if (!isPlainObject(value)) throw new ConfigError(`expected object with {first, then}, got ${typeOf(value)}`, path);
 	return {
@@ -324,6 +389,19 @@ function transformCalledBefore(value, path) {
 		then: requireToolPattern(value.then, `${path}.then`)
 	};
 }
+/**
+* Transform `sequence` — tool list with optional `strict` flag.
+*
+* @throws {ConfigError} When value is neither a pattern array nor `{tools, strict?}` object.
+*
+* @example
+* // Bare array (non-strict by default)
+* transformSequence(["Read", "Edit"], "path")
+*
+* @example
+* // Explicit strict ordering
+* transformSequence({ tools: ["Read", "Edit"], strict: true }, "path")
+*/
 function transformSequence(value, path) {
 	if (Array.isArray(value)) return {
 		type: "sequence",
@@ -336,6 +414,19 @@ function transformSequence(value, path) {
 		strict: value.strict === void 0 ? void 0 : requireBool(value.strict, `${path}.strict`)
 	};
 }
+/**
+* Transform `called_with: {tool, args}` with predicate validation on args.
+*
+* @throws {ConfigError} When `tool` or `args` is missing/invalid, or `args` fails
+*   {@link validatePredicate}.
+*
+* @example
+* transformCalledWith(
+*   { tool: "Read", args: { path: { contains: "README" } } },
+*   "path",
+* )
+* // → { type: "called_with", tool: "Read", args: { path: { contains: "README" } } }
+*/
 function transformCalledWith(value, path) {
 	if (!isPlainObject(value)) throw new ConfigError(`expected object with {tool, args}, got ${typeOf(value)}`, path);
 	const tool = requireToolPattern(value.tool, `${path}.tool`);
@@ -347,10 +438,32 @@ function transformCalledWith(value, path) {
 		args: value.args
 	};
 }
+/**
+* Transform `responded_without_tool_calls` — accepts true or empty object.
+*
+* @throws {ConfigError} When value is neither `true`, null, nor an empty object.
+*
+* @example
+* transformRespondedWithoutToolCalls(true, "path")
+* // → { type: "responded_without_tool_calls" }
+*/
 function transformRespondedWithoutToolCalls(value, path) {
 	if (value === true || value === null || isPlainObject(value) && Object.keys(value).length === 0) return { type: "responded_without_tool_calls" };
 	throw new ConfigError(`expected true or empty object, got ${JSON.stringify(value)}`, path);
 }
+/**
+* Transform budget assertions (`iterations_within`, `cost_within_usd`, `duration_within_ms`).
+*
+* @throws {ConfigError} When `max` is missing, non-positive, or not a number.
+*
+* @example
+* transformScalarMax(5, "path", "iterations_within")
+* // → { type: "iterations_within", max: 5 }
+*
+* @example
+* transformScalarMax({ max: 2.5 }, "path", "cost_within_usd")
+* // → { type: "cost_within_usd", max: 2.5 }
+*/
 function transformScalarMax(value, path, type) {
 	let max;
 	if (typeof value === "number") max = value;
@@ -362,6 +475,15 @@ function transformScalarMax(value, path, type) {
 		max
 	};
 }
+/**
+* Transform `finished_with` — stop reason string, list, or `{reasons}`.
+*
+* @throws {ConfigError} When value is not a string, string array, or `{reasons}` object.
+*
+* @example
+* transformFinishedWith("end_turn", "path")
+* // → { type: "finished_with", reasons: "end_turn" }
+*/
 function transformFinishedWith(value, path) {
 	if (typeof value === "string") return {
 		type: "finished_with",
@@ -384,6 +506,15 @@ function transformFinishedWith(value, path) {
 	}
 	throw new ConfigError(`expected string, string[], or {reasons: ...}, got ${JSON.stringify(value)}`, path);
 }
+/**
+* Transform `response_contains` / `response_not_contains` scalar or `{text}`.
+*
+* @throws {ConfigError} When value is neither a string nor `{text: string}`.
+*
+* @example
+* transformResponseText("done", "path", "response_contains")
+* // → { type: "response_contains", text: "done" }
+*/
 function transformResponseText(value, path, type) {
 	if (typeof value === "string") return {
 		type,
@@ -395,6 +526,15 @@ function transformResponseText(value, path, type) {
 	};
 	throw new ConfigError(`expected string or {text: string}, got ${JSON.stringify(value)}`, path);
 }
+/**
+* Transform `response_matches: {pattern, flags?}`.
+*
+* @throws {ConfigError} When `pattern` is missing or not a string.
+*
+* @example
+* transformResponseMatches({ pattern: "error\\d+", flags: "i" }, "path")
+* // → { type: "response_matches", pattern: "error\\d+", flags: "i" }
+*/
 function transformResponseMatches(value, path) {
 	if (!isPlainObject(value)) throw new ConfigError(`expected object with {pattern, flags?}, got ${typeOf(value)}`, path);
 	return {
@@ -403,24 +543,57 @@ function transformResponseMatches(value, path) {
 		flags: value.flags === void 0 ? void 0 : requireString(value.flags, `${path}.flags`)
 	};
 }
+/**
+* Transform compound `all_of` assertion list.
+*
+* @throws {ConfigError} When value is not an array or `{assertions: [...]}`.
+*
+* @example
+* transformAllOf([{ called: "Read" }, { not_called: "Bash" }], "path")
+*/
 function transformAllOf(value, path) {
 	return {
 		type: "all_of",
 		assertions: transformCompoundList(value, path)
 	};
 }
+/**
+* Transform compound `any_of` assertion list.
+*
+* @throws {ConfigError} When value is not an array or `{assertions: [...]}`.
+*
+* @example
+* transformAnyOf({ assertions: [{ called: "Read" }, { called: "Glob" }] }, "path")
+*/
 function transformAnyOf(value, path) {
 	return {
 		type: "any_of",
 		assertions: transformCompoundList(value, path)
 	};
 }
+/**
+* Transform compound `not` — single nested assertion, no threshold.
+*
+* The inner assertion uses the same single-key YAML shape as top-level
+* assertions; thresholds apply only at the outer {@link transformThresholdedAssertion} level.
+*
+* @throws {ConfigError} Propagates from nested {@link transformAssertion}.
+*
+* @example
+* transformNot({ called: "Bash" }, "path")
+* // → { type: "not", assertion: { type: "called", tool: "Bash" } }
+*/
 function transformNot(value, path) {
 	return {
 		type: "not",
 		assertion: transformAssertion(value, path)
 	};
 }
+/**
+* Parse compound assertion list from array or `{assertions: [...]}`.
+*
+* @throws {ConfigError} When value is neither form.
+*/
 function transformCompoundList(value, path) {
 	const list = Array.isArray(value) ? value : isPlainObject(value) && Array.isArray(value.assertions) ? value.assertions : null;
 	if (list === null) throw new ConfigError(`expected array or {assertions: [...]}, got ${JSON.stringify(value)}`, path);
@@ -452,6 +625,9 @@ const COMPOUND_OPS = /* @__PURE__ */ new Set([
 *   - single-key object whose key is a leaf op (e.g. `{contains: "x"}`)
 *   - single-key compound (`{any_of: [...]}`, `{all_of: [...]}`, `{not: ...}`)
 *   - multi-key object (descend into fields; each value is a sub-predicate)
+*
+* @throws {ConfigError} When a compound op has a non-array value or a leaf op
+*   has the wrong value type (e.g. non-string `contains`).
 */
 function validatePredicate(raw, path) {
 	if (!isPlainObject(raw)) return;
@@ -474,6 +650,12 @@ function validatePredicate(raw, path) {
 	}
 	for (const [field, sub] of Object.entries(raw)) validatePredicate(sub, `${path}.${field}`);
 }
+/**
+* Validate a leaf predicate operator's value shape at config load time.
+*
+* @throws {ConfigError} When the operator's value has the wrong type or `regex`
+*   is not a valid JavaScript regular expression.
+*/
 function validateLeafOperator(op, value, path) {
 	switch (op) {
 		case "equals": return;
@@ -501,85 +683,293 @@ function validateLeafOperator(op, value, path) {
 		default: return;
 	}
 }
+/** Require a tool pattern string or `{ pattern }` object. */
 function requireToolPattern(value, path) {
 	if (typeof value === "string") return value;
 	if (isPlainObject(value) && typeof value.pattern === "string") return { pattern: value.pattern };
 	throw new ConfigError(`expected string or {pattern: string}, got ${JSON.stringify(value)}`, path);
 }
+/** Require a bare tool pattern array or `{ tools: [...] }` wrapper. */
 function requireToolPatternList(value, path) {
 	const list = Array.isArray(value) ? value : isPlainObject(value) && Array.isArray(value.tools) ? value.tools : null;
 	if (list === null) throw new ConfigError(`expected array of tool patterns or {tools: [...]}, got ${JSON.stringify(value)}`, path);
 	return list.map((v, i) => requireToolPattern(v, `${path}[${i}]`));
 }
+/** Require a string value at `path` or throw {@link ConfigError}. */
 function requireString(value, path) {
 	if (typeof value === "string") return value;
 	throw new ConfigError(`expected string, got ${typeOf(value)}`, path);
 }
+/** Require a boolean value at `path` or throw {@link ConfigError}. */
 function requireBool(value, path) {
 	if (typeof value === "boolean") return value;
 	throw new ConfigError(`expected boolean, got ${typeOf(value)}`, path);
 }
+/** True for non-null, non-array objects (YAML mapping nodes). */
 function isPlainObject(x) {
 	return typeof x === "object" && x !== null && !Array.isArray(x);
 }
+/** Human-readable type name for config error messages. */
 function typeOf(x) {
 	if (x === null) return "null";
 	if (Array.isArray(x)) return "array";
 	return typeof x;
 }
 //#endregion
-//#region src/config/grading-schema.ts
-/**
-* Zod schema for standalone grading YAML (`grading.yaml`).
-*/
-/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
-const JudgeConfigSchema = ConfigPartialSchema.extend({
-	adapter: z.string().optional(),
-	maxConcurrent: z.number().int().positive().optional(),
-	/** Optional judge prompt prefix (maps to upstream system_instruction). */
-	system_instruction: z.string().optional()
-});
-const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
-//#endregion
-//#region src/config/grading-loader.ts
+//#region src/config/loader-internals.ts
 /**
-* Load standalone grading YAML for `harness-eval grade`.
+* Shared suite loader helpers (case file collection and parsing).
 */
-async function loadGradingConfig(filePath) {
-	const absolutePath = resolve(filePath);
-	let content;
-	try {
-		content = await readFile(absolutePath, "utf8");
-	} catch (err) {
-		throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
-	}
-	return parseGradingConfig(content, absolutePath);
-}
-function parseGradingConfig(yamlContent, sourcePath) {
+/** Parse one case file: single case, array, or `{ cases: [...] }`. */
+function parseCasesFile(yamlContent, sourcePath) {
 	let raw;
 	try {
 		raw = parse(yamlContent);
 	} catch (err) {
 		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
 	}
-	const validated = GradingConfigSchema.safeParse(raw);
-	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
-	const config = { judge: { ...validated.data.judge } };
-	if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
-	return config;
+	return transformTestCases(extractRawCases(raw, sourcePath), sourcePath ?? "cases");
 }
-function formatZodError$1(err, sourcePath) {
+function extractRawCases(raw, sourcePath) {
+	if (Array.isArray(raw)) return raw.map((item, index) => validateRawCase(item, sourcePath, index));
+	if (raw && typeof raw === "object") {
+		const obj = raw;
+		if (Array.isArray(obj.cases)) return obj.cases.map((item, index) => validateRawCase(item, sourcePath, index));
+		if ("id" in obj && "prompt" in obj && "assertions" in obj) return [validateRawCase(raw, sourcePath, 0)];
+	}
+	throw new ConfigError("expected a case object, array of cases, or { cases: [...] }", sourcePath);
+}
+function validateRawCase(raw, sourcePath, index) {
+	const validated = TestCaseSchema.safeParse(raw);
+	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$3(validated.error, sourcePath)}`, sourcePath);
+	return validated.data;
+}
+/** Recursively collect `.yaml` / `.yml` files under `casesDir`. */
+async function collectCaseYamlFiles(casesDir) {
+	const files = [];
+	async function walk(dir) {
+		let entries;
+		try {
+			entries = await readdir(dir, { withFileTypes: true });
+		} catch (err) {
+			if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
+			throw err;
+		}
+		for (const entry of entries) {
+			const fullPath = join(dir, entry.name);
+			if (entry.isDirectory()) await walk(fullPath);
+			else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
+		}
+	}
+	await walk(casesDir);
+	return files.sort();
+}
+function formatZodError$3(err, sourcePath) {
 	return err.issues.map((issue) => {
 		const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
 		return `  ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
 	}).join("\n");
 }
 //#endregion
-//#region src/config/loader.ts
+//#region src/config/pipeline-schema.ts
 /**
-* Load a `TestSuite` from a YAML file, directory, or string.
+* Zod schemas for optional `pipeline:` block in suite.yaml.
+*
+* Step presence under `pipeline` enables orchestration via `harness-eval pipeline`.
 */
-async function loadSuite(filePath) {
+/** `pipeline.run` step — harness eval run. */
+const PipelineRunStepSchema = z.object({
+	output: z.string().min(1).optional(),
+	maxConcurrent: z.number().int().positive().optional()
+}).optional();
+/** `pipeline.grade` step — LLM outcome grading. */
+const PipelineGradeStepSchema = z.object({
+	input: z.string().min(1).optional(),
+	output: z.string().min(1).optional(),
+	maxConcurrent: z.number().int().positive().optional()
+}).optional();
+/** `pipeline.envelope` step — EvalRunEnvelope export. */
+const PipelineEnvelopeStepSchema = z.object({
+	report: z.string().min(1).optional(),
+	grading: z.string().min(1).optional(),
+	output: z.string().min(1).optional(),
+	projection: z.enum([
+		"envelope",
+		"trajectory",
+		"instances"
+	]).optional(),
+	includeRawStreamEvents: z.boolean().optional(),
+	noTranscript: z.boolean().optional()
+}).optional();
+/** Top-level optional pipeline block in suite.yaml. */
+const PipelineConfigSchema = z.object({
+	run: PipelineRunStepSchema,
+	grade: PipelineGradeStepSchema,
+	envelope: PipelineEnvelopeStepSchema
+}).partial();
+/** Default artifact filenames relative to the suite.yaml directory. */
+const DEFAULT_PIPELINE_OUTPUTS = {
+	run: "report.json",
+	grade: "grading.json",
+	envelope: "envelope.json"
+};
+//#endregion
+//#region src/config/paths.ts
+/**
+* Resolve relative paths in suite config against the suite file directory.
+*
+* YAML authors write paths relative to the suite file; this module absolutizes
+* them at load time so the runner and adapters receive filesystem-ready values.
+* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
+* left unchanged.
+*/
+/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
+function resolvePath(value, suiteDir) {
+	if (isAbsolute(value) || value.startsWith("~/")) return value;
+	return join(suiteDir, value);
+}
+/** Resolve Claude Code-specific path fields within a config block. */
+function resolveClaudeCodePaths(block, suiteDir) {
+	const resolved = { ...block };
+	if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
+	if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
+	if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
+	for (const field of [
+		"systemPromptFile",
+		"appendSystemPromptFile",
+		"debugFile"
+	]) {
+		const value = resolved[field];
+		if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
+	}
+	if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
+	return resolved;
+}
+/** Resolve Codex-specific path fields within a config block. */
+function resolveCodexPaths(block, suiteDir) {
+	const resolved = { ...block };
+	if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
+	for (const field of ["outputSchema", "outputLastMessage"]) {
+		const value = resolved[field];
+		if (typeof value === "string") resolved[field] = resolvePath(value, suiteDir);
+	}
+	return resolved;
+}
+/** Resolve relative paths in a config layer relative to `suiteDir`. */
+function resolveConfigPaths(config, suiteDir) {
+	if (!config) return void 0;
+	const resolved = { ...config };
+	if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
+	if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
+	if (resolved.codex && typeof resolved.codex === "object" && !Array.isArray(resolved.codex)) resolved.codex = resolveCodexPaths(resolved.codex, suiteDir);
+	return resolved;
+}
+/** Resolve paths on an entire suite after load. */
+function resolveSuitePaths(suite, suiteFilePath) {
+	const suiteDir = configFileDir(suiteFilePath);
+	suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
+	for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
+	for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
+}
+/** Parent directory of a suite or grading config file path. */
+function configFileDir(filePath) {
+	return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
+}
+/**
+* Heuristically resolve env var values that look like relative file paths.
+*
+* Used for grading config where credential or config paths may be expressed
+* relative to the grading YAML location.
+*/
+function resolveEnvPaths(env, baseDir) {
+	const resolved = {};
+	for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../")) resolved[key] = resolvePath(value, baseDir);
+	else resolved[key] = value;
+	return resolved;
+}
+/** Resolve relative paths in a standalone grading config file. */
+function resolveGradingConfigPaths(config, configFilePath) {
+	const baseDir = configFileDir(configFilePath);
+	const { adapter, maxConcurrent, ...rest } = config.judge;
+	config.judge = {
+		...resolveConfigPaths(rest, baseDir) ?? rest,
+		adapter,
+		maxConcurrent
+	};
+	if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
+}
+/** Resolve a pipeline artifact path relative to the suite.yaml directory. */
+function resolvePipelinePath(value, defaultRelative, suiteDir) {
+	return resolvePath(value ?? defaultRelative, suiteDir);
+}
+/** Resolve relative paths in a parsed pipeline config. */
+function resolvePipelineConfigPaths(pipeline, suiteFilePath) {
+	const suiteDir = configFileDir(suiteFilePath);
+	const resolved = {};
+	if (pipeline.run) resolved.run = resolvePipelineRunStep(pipeline.run, suiteDir);
+	if (pipeline.grade) resolved.grade = resolvePipelineGradeStep(pipeline.grade, suiteDir);
+	if (pipeline.envelope) resolved.envelope = resolvePipelineEnvelopeStep(pipeline.envelope, suiteDir);
+	return resolved;
+}
+/** Resolve one pipeline step's run output path. */
+function resolvePipelineRunStep(step, suiteDir) {
+	return {
+		...step,
+		output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir)
+	};
+}
+/** Resolve grade step input (optional) and output paths. */
+function resolvePipelineGradeStep(step, suiteDir) {
+	return {
+		...step,
+		input: step.input ? resolvePipelinePath(step.input, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
+		output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir)
+	};
+}
+/** Resolve envelope step report, grading, and output paths. */
+function resolvePipelineEnvelopeStep(step, suiteDir) {
+	return {
+		...step,
+		report: step.report ? resolvePipelinePath(step.report, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
+		grading: step.grading ? resolvePipelinePath(step.grading, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir) : void 0,
+		output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.envelope, suiteDir)
+	};
+}
+//#endregion
+//#region src/config/grading-schema.ts
+/**
+* Zod schema for standalone grading YAML (`grading.yaml`).
+*
+* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
+* grader-specific concurrency and system-instruction overrides.
+*/
+/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
+const JudgeConfigSchema = ConfigPartialSchema.extend({
+	adapter: z.string().optional(),
+	maxConcurrent: z.number().int().positive().optional(),
+	/** Optional judge prompt prefix (maps to upstream system_instruction). */
+	system_instruction: z.string().optional()
+});
+const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
+//#endregion
+//#region src/config/suite-file-schema.ts
+/** Single-file suite with optional inline judge and pipeline orchestration. */
+const SuiteFileSingleSchema = TestSuiteSchema.extend({
+	judge: JudgeConfigSchema.optional(),
+	pipeline: PipelineConfigSchema.optional()
+});
+/** Directory suite root with optional inline judge and pipeline orchestration. */
+const SuiteFileDirectorySchema = SuiteDirectorySchema.extend({
+	judge: JudgeConfigSchema.optional(),
+	pipeline: PipelineConfigSchema.optional()
+});
+//#endregion
+//#region src/config/suite-document-loader.ts
+/**
+* Load a unified suite.yaml document (suite + optional judge + pipeline).
+*/
+/** Load suite.yaml (or directory) including optional judge and pipeline blocks. */
+async function loadSuiteDocument(filePath, options = {}) {
 	const absolutePath = resolve(filePath);
 	let info;
 	try {
@@ -587,19 +977,12 @@ async function loadSuite(filePath) {
 	} catch (err) {
 		throw new ConfigError(`failed to read suite path: ${err instanceof Error ? err.message : String(err)}`, filePath);
 	}
-	if (info.isDirectory()) return loadSuiteDirectory(absolutePath);
-	return loadSuiteFile(absolutePath);
-}
-async function loadSuiteFile(absolutePath) {
-	let content;
-	try {
-		content = await readFile(absolutePath, "utf8");
-	} catch (err) {
-		throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
-	}
-	return parseSuite(content, absolutePath);
+	const strict = options.validateOrchestration !== false;
+	if (info.isDirectory()) return loadSuiteDocumentDirectory(absolutePath, strict);
+	return loadSuiteDocumentFile(absolutePath, strict);
 }
-async function loadSuiteDirectory(dir) {
+/** Load suite.yaml from a directory layout (cases under `cases/`). */
+async function loadSuiteDocumentDirectory(dir, strict) {
 	const suiteYamlPath = join(dir, "suite.yaml");
 	let content;
 	try {
@@ -607,7 +990,7 @@ async function loadSuiteDirectory(dir) {
 	} catch (err) {
 		throw new ConfigError(`missing suite.yaml in suite directory: ${err instanceof Error ? err.message : String(err)}`, dir);
 	}
-	const base = parseSuiteDirectory(content, suiteYamlPath);
+	const { judge, pipeline, suite: base } = parseSuiteFileRoot(content, suiteYamlPath, "directory", strict);
 	const casesDir = join(dir, "cases");
 	const caseFiles = await collectCaseYamlFiles(casesDir);
 	const tagged = base.cases.map((testCase, index) => ({
@@ -636,74 +1019,241 @@ async function loadSuiteDirectory(dir) {
 		cases
 	};
 	resolveSuitePaths(suite, suiteYamlPath);
-	return suite;
+	return buildSuiteDocument(suiteYamlPath, suite, judge, pipeline);
 }
-function parseSuite(yamlContent, sourcePath) {
+/** Load a single suite.yaml file (inline cases). */
+async function loadSuiteDocumentFile(absolutePath, strict) {
+	let content;
+	try {
+		content = await readFile(absolutePath, "utf8");
+	} catch (err) {
+		throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
+	}
+	const { judge, pipeline, suite } = parseSuiteFileRoot(content, absolutePath, "single", strict);
+	resolveSuitePaths(suite, absolutePath);
+	return buildSuiteDocument(absolutePath, suite, judge, pipeline);
+}
+/**
+* Parse suite.yaml root and validate against the appropriate schema.
+*
+* When `strict` is true, uses extended schemas that validate `judge:` and
+* `pipeline:` blocks (for `loadSuiteDocument`). When false, uses base schemas
+* that silently strip unknown keys (for `loadSuite`).
+*/
+function parseSuiteFileRoot(yamlContent, sourcePath, layout, strict) {
 	let raw;
 	try {
 		raw = parse(yamlContent);
 	} catch (err) {
 		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
 	}
-	const validated = TestSuiteSchema.safeParse(raw);
-	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
-	const suite = transformSuite(validated.data);
-	if (sourcePath) resolveSuitePaths(suite, resolve(sourcePath));
-	return suite;
+	if (!strict) {
+		const validated = (layout === "directory" ? SuiteDirectorySchema : TestSuiteSchema).safeParse(raw);
+		if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
+		return { suite: (layout === "directory" ? transformSuiteDirectory : transformSuite)(validated.data) };
+	}
+	if (layout === "directory") {
+		const validated = SuiteFileDirectorySchema.safeParse(raw);
+		if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
+		return extractSuiteFileParts(validated.data, sourcePath, transformSuiteDirectory);
+	}
+	const validated = SuiteFileSingleSchema.safeParse(raw);
+	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
+	return extractSuiteFileParts(validated.data, sourcePath, transformSuite);
 }
-function parseSuiteDirectory(yamlContent, sourcePath) {
+/** Split validated YAML into suite, judge, and pipeline with path resolution. */
+function extractSuiteFileParts(data, sourcePath, transform) {
+	const { judge: rawJudge, pipeline: rawPipeline, ...suiteRaw } = data;
+	const suite = transform(suiteRaw);
+	let judge;
+	if (rawJudge) {
+		judge = { ...rawJudge };
+		resolveGradingConfigPaths({ judge }, sourcePath);
+	}
+	let pipeline;
+	if (rawPipeline) {
+		pipeline = transformPipelineConfig(rawPipeline);
+		pipeline = resolvePipelineConfigPaths(pipeline, sourcePath);
+	}
+	return {
+		suite,
+		judge,
+		pipeline
+	};
+}
+/** Apply default artifact filenames when a pipeline step key is present but paths are omitted. */
+function transformPipelineConfig(raw) {
+	const pipeline = {};
+	if (raw.run !== void 0) pipeline.run = {
+		output: raw.run?.output ?? DEFAULT_PIPELINE_OUTPUTS.run,
+		maxConcurrent: raw.run?.maxConcurrent
+	};
+	if (raw.grade !== void 0) pipeline.grade = {
+		input: raw.grade?.input,
+		output: raw.grade?.output ?? DEFAULT_PIPELINE_OUTPUTS.grade,
+		maxConcurrent: raw.grade?.maxConcurrent
+	};
+	if (raw.envelope !== void 0) pipeline.envelope = {
+		report: raw.envelope?.report,
+		grading: raw.envelope?.grading,
+		output: raw.envelope?.output ?? DEFAULT_PIPELINE_OUTPUTS.envelope,
+		projection: raw.envelope?.projection ?? "envelope",
+		includeRawStreamEvents: raw.envelope?.includeRawStreamEvents,
+		noTranscript: raw.envelope?.noTranscript
+	};
+	return pipeline;
+}
+/** Assemble the runtime {@link SuiteDocument} from parsed parts. */
+function buildSuiteDocument(suitePath, suite, judge, pipeline) {
+	return {
+		suitePath: resolve(suitePath),
+		suite,
+		judge,
+		pipeline
+	};
+}
+function formatZodError$2(err, sourcePath) {
+	return err.issues.map((issue) => {
+		const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
+		return `  ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
+	}).join("\n");
+}
+//#endregion
+//#region src/config/grading-loader.ts
+/**
+* Load standalone grading YAML for `harness-eval grade`.
+*
+* Also accepts unified suite.yaml files with an inline `judge:` block.
+*/
+/** Load grading YAML from disk and resolve relative paths. */
+async function loadGradingConfig(filePath) {
+	const absolutePath = resolve(filePath);
+	let info;
+	try {
+		info = await stat(absolutePath);
+	} catch (err) {
+		throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
+	}
+	if (info.isDirectory()) return loadGradingFromSuiteYaml(join(absolutePath, "suite.yaml"));
+	let content;
+	try {
+		content = await readFile(absolutePath, "utf8");
+	} catch (err) {
+		throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
+	}
+	if (isSuiteRoot(parse(content))) return parseGradingFromSuiteRaw(parse(content), absolutePath);
+	return parseGradingConfig(content, absolutePath);
+}
+/**
+* Parse grading YAML from a string.
+*
+* @param sourcePath Optional path for error messages and path resolution.
+*/
+function parseGradingConfig(yamlContent, sourcePath) {
 	let raw;
 	try {
 		raw = parse(yamlContent);
 	} catch (err) {
 		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
 	}
-	const validated = SuiteDirectorySchema.safeParse(raw);
-	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
-	return transformSuiteDirectory(validated.data);
+	if (isSuiteRoot(raw)) return parseGradingFromSuiteRaw(raw, sourcePath ?? "suite.yaml");
+	const validated = GradingConfigSchema.safeParse(raw);
+	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
+	const config = { judge: { ...validated.data.judge } };
+	if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
+	return config;
 }
-/** Parse one case file: single case, array, or `{ cases: [...] }`. */
-function parseCasesFile(yamlContent, sourcePath) {
+/** Detect unified suite.yaml by presence of suite-specific keys (vs standalone grading YAML). */
+function isSuiteRoot(raw) {
+	if (raw === null || typeof raw !== "object") return false;
+	return "cases" in raw || "matrix" in raw && "adapter" in raw;
+}
+async function loadGradingFromSuiteYaml(suiteYamlPath) {
+	let content;
+	try {
+		content = await readFile(suiteYamlPath, "utf8");
+	} catch (err) {
+		throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
+	}
 	let raw;
 	try {
-		raw = parse(yamlContent);
+		raw = parse(content);
 	} catch (err) {
-		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
+		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
 	}
-	return transformTestCases(extractRawCases(raw, sourcePath), sourcePath ?? "cases");
+	return parseGradingFromSuiteRaw(raw, suiteYamlPath);
 }
-function extractRawCases(raw, sourcePath) {
-	if (Array.isArray(raw)) return raw.map((item, index) => validateRawCase(item, sourcePath, index));
-	if (raw && typeof raw === "object") {
-		const obj = raw;
-		if (Array.isArray(obj.cases)) return obj.cases.map((item, index) => validateRawCase(item, sourcePath, index));
-		if ("id" in obj && "prompt" in obj && "assertions" in obj) return [validateRawCase(raw, sourcePath, 0)];
+function parseGradingFromSuiteRaw(raw, sourcePath) {
+	const single = SuiteFileSingleSchema.safeParse(raw);
+	if (single.success) {
+		if (!single.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
+		const config = { judge: { ...single.data.judge } };
+		resolveGradingConfigPaths(config, sourcePath);
+		return config;
 	}
-	throw new ConfigError("expected a case object, array of cases, or { cases: [...] }", sourcePath);
+	const directory = SuiteFileDirectorySchema.safeParse(raw);
+	if (directory.success) {
+		if (!directory.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
+		const config = { judge: { ...directory.data.judge } };
+		resolveGradingConfigPaths(config, sourcePath);
+		return config;
+	}
+	throw new ConfigError(`validation failed:\n${formatZodError$1(directory.error ?? single.error, sourcePath)}`, sourcePath);
 }
-function validateRawCase(raw, sourcePath, index) {
-	const validated = TestCaseSchema.safeParse(raw);
+/** Format a zod validation error with optional source file prefix. */
+function formatZodError$1(err, sourcePath) {
+	return err.issues.map((issue) => {
+		const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
+		return `  ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
+	}).join("\n");
+}
+//#endregion
+//#region src/config/loader.ts
+/**
+* Load a `TestSuite` from a YAML file, directory, or string.
+*
+* For unified suite.yaml with optional `judge:` and `pipeline:` blocks,
+* use {@link loadSuiteDocument}.
+*/
+/**
+* Load a suite from a file path or directory path (suite portion only).
+*
+* Orchestration blocks (`judge:`, `pipeline:`) are silently stripped — callers
+* that only need the `TestSuite` are not broken by malformed orchestration YAML.
+* Use {@link loadSuiteDocument} when you need validated orchestration metadata.
+*/
+async function loadSuite(filePath) {
+	return (await loadSuiteDocument(filePath, { validateOrchestration: false })).suite;
+}
+/**
+* Parse suite YAML from a string (single-file layout with inline cases).
+*
+* Unknown top-level keys such as `judge` and `pipeline` are stripped.
+*/
+function parseSuite(yamlContent, sourcePath) {
+	let raw;
+	try {
+		raw = parse(yamlContent);
+	} catch (err) {
+		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
+	}
+	const validated = TestSuiteSchema.safeParse(raw);
 	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
-	return validated.data;
+	const suite = transformSuite(validated.data);
+	if (sourcePath) resolveSuitePaths(suite, resolve(sourcePath));
+	return suite;
 }
-async function collectCaseYamlFiles(casesDir) {
-	const files = [];
-	async function walk(dir) {
-		let entries;
-		try {
-			entries = await readdir(dir, { withFileTypes: true });
-		} catch (err) {
-			if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
-			throw err;
-		}
-		for (const entry of entries) {
-			const fullPath = join(dir, entry.name);
-			if (entry.isDirectory()) await walk(fullPath);
-			else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
-		}
+/** Parse `suite.yaml` for directory layout (cases may be omitted). @internal */
+function parseSuiteDirectory(yamlContent, sourcePath) {
+	let raw;
+	try {
+		raw = parse(yamlContent);
+	} catch (err) {
+		throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
 	}
-	await walk(casesDir);
-	return files.sort();
+	const validated = SuiteDirectorySchema.safeParse(raw);
+	if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
+	return transformSuiteDirectory(validated.data);
 }
 function formatZodError(err, sourcePath) {
 	return err.issues.map((issue) => {
@@ -712,6 +1262,6 @@ function formatZodError(err, sourcePath) {
 	}).join("\n");
 }
 //#endregion
-export { parseGradingConfig as a, loadGradingConfig as i, parseCasesFile as n, ConfigError as o, parseSuite as r, loadSuite as t };
+export { parseGradingConfig as a, parseCasesFile as c, loadGradingConfig as i, ConfigError as l, parseSuite as n, loadSuiteDocument as o, parseSuiteDirectory as r, DEFAULT_PIPELINE_OUTPUTS as s, loadSuite as t };
-//# sourceMappingURL=loader-BCnFJ8rm.js.map
+//# sourceMappingURL=loader-DnQ6Jt0i.js.map