@alis-build/harness-eval 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -8
- package/dist/adapters/claude-code/index.d.ts +2 -2
- package/dist/adapters/claude-code/index.js +2 -1
- package/dist/adapters/codex/index.d.ts +68 -0
- package/dist/adapters/codex/index.js +3 -0
- package/dist/{claude-code-DZ4Vkgp6.js → claude-code-C_7hxC8z.js} +3 -245
- package/dist/claude-code-C_7hxC8z.js.map +1 -0
- package/dist/cli/bin.js +131 -151
- package/dist/cli/bin.js.map +1 -1
- package/dist/codex-0cHO2te9.js +496 -0
- package/dist/codex-0cHO2te9.js.map +1 -0
- package/dist/config/loader.d.ts +2 -2
- package/dist/config/loader.js +2 -2
- package/dist/{index-V22PrR0p.d.ts → index-DnvP1UBl.d.ts} +2 -2
- package/dist/index.d.ts +132 -6
- package/dist/index.js +6 -5
- package/dist/index.js.map +1 -1
- package/dist/loader-B1WmGGzf.d.ts +107 -0
- package/dist/{loader-DcI0KfRX.js → loader-DnQ6Jt0i.js} +472 -209
- package/dist/loader-DnQ6Jt0i.js.map +1 -0
- package/dist/{projections-BcX7w-f6.js → reporter-Biy-5-9M.js} +1335 -758
- package/dist/reporter-Biy-5-9M.js.map +1 -0
- package/dist/runner/suite.d.ts +1 -1
- package/dist/runner/suite.js +1 -1
- package/dist/{suite-DPJMIEbu.d.ts → suite-BEShV0by.d.ts} +2 -2
- package/dist/{suite-Dlzl-HI0.js → suite-BcP64nlb.js} +16 -2
- package/dist/{suite-Dlzl-HI0.js.map → suite-BcP64nlb.js.map} +1 -1
- package/dist/{types-CD3TwOtZ.d.ts → types-0QkNVyp9.d.ts} +2 -2
- package/dist/types-Bac8_Ixb.js +246 -0
- package/dist/types-Bac8_Ixb.js.map +1 -0
- package/dist/types-Bu8uOZZN.d.ts +77 -0
- package/dist/{types-B9H4IZtA.d.ts → types-C0gBkl0-.d.ts} +3 -2
- package/package.json +6 -2
- package/dist/claude-code-DZ4Vkgp6.js.map +0 -1
- package/dist/loader-C9yQHUPC.d.ts +0 -50
- package/dist/loader-DcI0KfRX.js.map +0 -1
- package/dist/projections-BcX7w-f6.js.map +0 -1
|
@@ -3,80 +3,6 @@ import { readFile, readdir, stat } from "node:fs/promises";
|
|
|
3
3
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
4
4
|
import { parse } from "yaml";
|
|
5
5
|
import { z } from "zod";
|
|
6
|
-
//#region src/config/paths.ts
|
|
7
|
-
/**
|
|
8
|
-
* Resolve relative paths in suite config against the suite file directory.
|
|
9
|
-
*
|
|
10
|
-
* YAML authors write paths relative to the suite file; this module absolutizes
|
|
11
|
-
* them at load time so the runner and adapters receive filesystem-ready values.
|
|
12
|
-
* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
|
|
13
|
-
* left unchanged.
|
|
14
|
-
*/
|
|
15
|
-
/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
|
|
16
|
-
function resolvePath(value, suiteDir) {
|
|
17
|
-
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
18
|
-
return join(suiteDir, value);
|
|
19
|
-
}
|
|
20
|
-
/** Resolve Claude Code-specific path fields within a config block. */
|
|
21
|
-
function resolveClaudeCodePaths(block, suiteDir) {
|
|
22
|
-
const resolved = { ...block };
|
|
23
|
-
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
24
|
-
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
25
|
-
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
26
|
-
for (const field of [
|
|
27
|
-
"systemPromptFile",
|
|
28
|
-
"appendSystemPromptFile",
|
|
29
|
-
"debugFile"
|
|
30
|
-
]) {
|
|
31
|
-
const value = resolved[field];
|
|
32
|
-
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
33
|
-
}
|
|
34
|
-
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
35
|
-
return resolved;
|
|
36
|
-
}
|
|
37
|
-
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
38
|
-
function resolveConfigPaths(config, suiteDir) {
|
|
39
|
-
if (!config) return void 0;
|
|
40
|
-
const resolved = { ...config };
|
|
41
|
-
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
42
|
-
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
43
|
-
return resolved;
|
|
44
|
-
}
|
|
45
|
-
/** Resolve paths on an entire suite after load. */
|
|
46
|
-
function resolveSuitePaths(suite, suiteFilePath) {
|
|
47
|
-
const suiteDir = configFileDir(suiteFilePath);
|
|
48
|
-
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
49
|
-
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
50
|
-
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
51
|
-
}
|
|
52
|
-
/** Parent directory of a suite or grading config file path. */
|
|
53
|
-
function configFileDir(filePath) {
|
|
54
|
-
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Heuristically resolve env var values that look like relative file paths.
|
|
58
|
-
*
|
|
59
|
-
* Used for grading config where credential or config paths may be expressed
|
|
60
|
-
* relative to the grading YAML location.
|
|
61
|
-
*/
|
|
62
|
-
function resolveEnvPaths(env, baseDir) {
|
|
63
|
-
const resolved = {};
|
|
64
|
-
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../") || value.includes("/") && !value.startsWith("http")) resolved[key] = resolvePath(value, baseDir);
|
|
65
|
-
else resolved[key] = value;
|
|
66
|
-
return resolved;
|
|
67
|
-
}
|
|
68
|
-
/** Resolve relative paths in a standalone grading config file. */
|
|
69
|
-
function resolveGradingConfigPaths(config, configFilePath) {
|
|
70
|
-
const baseDir = configFileDir(configFilePath);
|
|
71
|
-
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
72
|
-
config.judge = {
|
|
73
|
-
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
74
|
-
adapter,
|
|
75
|
-
maxConcurrent
|
|
76
|
-
};
|
|
77
|
-
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
78
|
-
}
|
|
79
|
-
//#endregion
|
|
80
6
|
//#region src/config/schema.ts
|
|
81
7
|
/**
|
|
82
8
|
* zod schemas for the YAML on-disk shape.
|
|
@@ -132,13 +58,40 @@ const ClaudeCodeConfigSchema = z.object({
|
|
|
132
58
|
maxTurns: z.number().int().positive(),
|
|
133
59
|
isolateConfig: z.boolean()
|
|
134
60
|
}).partial();
|
|
61
|
+
/** Codex CLI adapter-specific options (nested under `codex`). */
|
|
62
|
+
const CodexConfigSchema = z.object({
|
|
63
|
+
binary: z.string(),
|
|
64
|
+
profile: z.string(),
|
|
65
|
+
sandbox: z.enum([
|
|
66
|
+
"read-only",
|
|
67
|
+
"workspace-write",
|
|
68
|
+
"danger-full-access"
|
|
69
|
+
]),
|
|
70
|
+
addDirs: z.array(z.string()),
|
|
71
|
+
configOverrides: z.array(z.string()),
|
|
72
|
+
askForApproval: z.enum([
|
|
73
|
+
"untrusted",
|
|
74
|
+
"on-request",
|
|
75
|
+
"never"
|
|
76
|
+
]),
|
|
77
|
+
dangerouslyBypassApprovalsAndSandbox: z.boolean(),
|
|
78
|
+
dangerouslyBypassHookTrust: z.boolean(),
|
|
79
|
+
ephemeral: z.boolean(),
|
|
80
|
+
ignoreUserConfig: z.boolean(),
|
|
81
|
+
skipGitRepoCheck: z.boolean(),
|
|
82
|
+
outputSchema: z.string(),
|
|
83
|
+
outputLastMessage: z.string(),
|
|
84
|
+
captureLastMessage: z.boolean(),
|
|
85
|
+
isolateConfig: z.boolean()
|
|
86
|
+
}).partial();
|
|
135
87
|
/** Generic + nested adapter config for one layer (defaultConfig, case, cell). */
|
|
136
88
|
const ConfigPartialSchema = z.object({
|
|
137
89
|
model: z.string(),
|
|
138
90
|
cwd: z.string(),
|
|
139
91
|
timeoutMs: z.number().int().positive(),
|
|
140
92
|
env: z.record(z.string(), z.string()),
|
|
141
|
-
claudeCode: ClaudeCodeConfigSchema
|
|
93
|
+
claudeCode: ClaudeCodeConfigSchema,
|
|
94
|
+
codex: CodexConfigSchema
|
|
142
95
|
}).partial();
|
|
143
96
|
/** A matrix cell — one point in the configuration matrix. */
|
|
144
97
|
const MatrixCellSchema = z.object({
|
|
@@ -763,84 +716,260 @@ function typeOf(x) {
|
|
|
763
716
|
return typeof x;
|
|
764
717
|
}
|
|
765
718
|
//#endregion
|
|
766
|
-
//#region src/config/
|
|
719
|
+
//#region src/config/loader-internals.ts
|
|
767
720
|
/**
|
|
768
|
-
*
|
|
769
|
-
*
|
|
770
|
-
* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
|
|
771
|
-
* grader-specific concurrency and system-instruction overrides.
|
|
721
|
+
* Shared suite loader helpers (case file collection and parsing).
|
|
772
722
|
*/
|
|
773
|
-
/**
|
|
774
|
-
|
|
775
|
-
adapter: z.string().optional(),
|
|
776
|
-
maxConcurrent: z.number().int().positive().optional(),
|
|
777
|
-
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
778
|
-
system_instruction: z.string().optional()
|
|
779
|
-
});
|
|
780
|
-
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
781
|
-
//#endregion
|
|
782
|
-
//#region src/config/grading-loader.ts
|
|
783
|
-
/**
|
|
784
|
-
* Load standalone grading YAML for `harness-eval grade`.
|
|
785
|
-
*
|
|
786
|
-
* Grading config defines the judge subprocess (model, concurrency, Claude Code
|
|
787
|
-
* flags) separately from the suite under test.
|
|
788
|
-
*/
|
|
789
|
-
/** Load grading YAML from disk and resolve relative paths. */
|
|
790
|
-
async function loadGradingConfig(filePath) {
|
|
791
|
-
const absolutePath = resolve(filePath);
|
|
792
|
-
let content;
|
|
793
|
-
try {
|
|
794
|
-
content = await readFile(absolutePath, "utf8");
|
|
795
|
-
} catch (err) {
|
|
796
|
-
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
797
|
-
}
|
|
798
|
-
return parseGradingConfig(content, absolutePath);
|
|
799
|
-
}
|
|
800
|
-
/**
|
|
801
|
-
* Parse grading YAML from a string.
|
|
802
|
-
*
|
|
803
|
-
* @param sourcePath Optional path for error messages and path resolution.
|
|
804
|
-
*/
|
|
805
|
-
function parseGradingConfig(yamlContent, sourcePath) {
|
|
723
|
+
/** Parse one case file: single case, array, or `{ cases: [...] }`. */
|
|
724
|
+
function parseCasesFile(yamlContent, sourcePath) {
|
|
806
725
|
let raw;
|
|
807
726
|
try {
|
|
808
727
|
raw = parse(yamlContent);
|
|
809
728
|
} catch (err) {
|
|
810
729
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
811
730
|
}
|
|
812
|
-
|
|
813
|
-
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
814
|
-
const config = { judge: { ...validated.data.judge } };
|
|
815
|
-
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
816
|
-
return config;
|
|
731
|
+
return transformTestCases(extractRawCases(raw, sourcePath), sourcePath ?? "cases");
|
|
817
732
|
}
|
|
818
|
-
|
|
819
|
-
|
|
733
|
+
function extractRawCases(raw, sourcePath) {
|
|
734
|
+
if (Array.isArray(raw)) return raw.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
735
|
+
if (raw && typeof raw === "object") {
|
|
736
|
+
const obj = raw;
|
|
737
|
+
if (Array.isArray(obj.cases)) return obj.cases.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
738
|
+
if ("id" in obj && "prompt" in obj && "assertions" in obj) return [validateRawCase(raw, sourcePath, 0)];
|
|
739
|
+
}
|
|
740
|
+
throw new ConfigError("expected a case object, array of cases, or { cases: [...] }", sourcePath);
|
|
741
|
+
}
|
|
742
|
+
function validateRawCase(raw, sourcePath, index) {
|
|
743
|
+
const validated = TestCaseSchema.safeParse(raw);
|
|
744
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$3(validated.error, sourcePath)}`, sourcePath);
|
|
745
|
+
return validated.data;
|
|
746
|
+
}
|
|
747
|
+
/** Recursively collect `.yaml` / `.yml` files under `casesDir`. */
|
|
748
|
+
async function collectCaseYamlFiles(casesDir) {
|
|
749
|
+
const files = [];
|
|
750
|
+
async function walk(dir) {
|
|
751
|
+
let entries;
|
|
752
|
+
try {
|
|
753
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
754
|
+
} catch (err) {
|
|
755
|
+
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
756
|
+
throw err;
|
|
757
|
+
}
|
|
758
|
+
for (const entry of entries) {
|
|
759
|
+
const fullPath = join(dir, entry.name);
|
|
760
|
+
if (entry.isDirectory()) await walk(fullPath);
|
|
761
|
+
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
await walk(casesDir);
|
|
765
|
+
return files.sort();
|
|
766
|
+
}
|
|
767
|
+
function formatZodError$3(err, sourcePath) {
|
|
820
768
|
return err.issues.map((issue) => {
|
|
821
769
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
822
770
|
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
823
771
|
}).join("\n");
|
|
824
772
|
}
|
|
825
773
|
//#endregion
|
|
826
|
-
//#region src/config/
|
|
774
|
+
//#region src/config/pipeline-schema.ts
|
|
827
775
|
/**
|
|
828
|
-
*
|
|
776
|
+
* Zod schemas for optional `pipeline:` block in suite.yaml.
|
|
829
777
|
*
|
|
830
|
-
*
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
778
|
+
* Step presence under `pipeline` enables orchestration via `harness-eval pipeline`.
|
|
779
|
+
*/
|
|
780
|
+
/** `pipeline.run` step — harness eval run. */
|
|
781
|
+
const PipelineRunStepSchema = z.object({
|
|
782
|
+
output: z.string().min(1).optional(),
|
|
783
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
784
|
+
}).optional();
|
|
785
|
+
/** `pipeline.grade` step — LLM outcome grading. */
|
|
786
|
+
const PipelineGradeStepSchema = z.object({
|
|
787
|
+
input: z.string().min(1).optional(),
|
|
788
|
+
output: z.string().min(1).optional(),
|
|
789
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
790
|
+
}).optional();
|
|
791
|
+
/** `pipeline.envelope` step — EvalRunEnvelope export. */
|
|
792
|
+
const PipelineEnvelopeStepSchema = z.object({
|
|
793
|
+
report: z.string().min(1).optional(),
|
|
794
|
+
grading: z.string().min(1).optional(),
|
|
795
|
+
output: z.string().min(1).optional(),
|
|
796
|
+
projection: z.enum([
|
|
797
|
+
"envelope",
|
|
798
|
+
"trajectory",
|
|
799
|
+
"instances"
|
|
800
|
+
]).optional(),
|
|
801
|
+
includeRawStreamEvents: z.boolean().optional(),
|
|
802
|
+
noTranscript: z.boolean().optional()
|
|
803
|
+
}).optional();
|
|
804
|
+
/** Top-level optional pipeline block in suite.yaml. */
|
|
805
|
+
const PipelineConfigSchema = z.object({
|
|
806
|
+
run: PipelineRunStepSchema,
|
|
807
|
+
grade: PipelineGradeStepSchema,
|
|
808
|
+
envelope: PipelineEnvelopeStepSchema
|
|
809
|
+
}).partial();
|
|
810
|
+
/** Default artifact filenames relative to the suite.yaml directory. */
|
|
811
|
+
const DEFAULT_PIPELINE_OUTPUTS = {
|
|
812
|
+
run: "report.json",
|
|
813
|
+
grade: "grading.json",
|
|
814
|
+
envelope: "envelope.json"
|
|
815
|
+
};
|
|
816
|
+
//#endregion
|
|
817
|
+
//#region src/config/paths.ts
|
|
818
|
+
/**
|
|
819
|
+
* Resolve relative paths in suite config against the suite file directory.
|
|
834
820
|
*
|
|
835
|
-
*
|
|
836
|
-
*
|
|
821
|
+
* YAML authors write paths relative to the suite file; this module absolutizes
|
|
822
|
+
* them at load time so the runner and adapters receive filesystem-ready values.
|
|
823
|
+
* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
|
|
824
|
+
* left unchanged.
|
|
837
825
|
*/
|
|
826
|
+
/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
|
|
827
|
+
function resolvePath(value, suiteDir) {
|
|
828
|
+
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
829
|
+
return join(suiteDir, value);
|
|
830
|
+
}
|
|
831
|
+
/** Resolve Claude Code-specific path fields within a config block. */
|
|
832
|
+
function resolveClaudeCodePaths(block, suiteDir) {
|
|
833
|
+
const resolved = { ...block };
|
|
834
|
+
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
835
|
+
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
836
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
837
|
+
for (const field of [
|
|
838
|
+
"systemPromptFile",
|
|
839
|
+
"appendSystemPromptFile",
|
|
840
|
+
"debugFile"
|
|
841
|
+
]) {
|
|
842
|
+
const value = resolved[field];
|
|
843
|
+
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
844
|
+
}
|
|
845
|
+
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
846
|
+
return resolved;
|
|
847
|
+
}
|
|
848
|
+
/** Resolve Codex-specific path fields within a config block. */
|
|
849
|
+
function resolveCodexPaths(block, suiteDir) {
|
|
850
|
+
const resolved = { ...block };
|
|
851
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
852
|
+
for (const field of ["outputSchema", "outputLastMessage"]) {
|
|
853
|
+
const value = resolved[field];
|
|
854
|
+
if (typeof value === "string") resolved[field] = resolvePath(value, suiteDir);
|
|
855
|
+
}
|
|
856
|
+
return resolved;
|
|
857
|
+
}
|
|
858
|
+
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
859
|
+
function resolveConfigPaths(config, suiteDir) {
|
|
860
|
+
if (!config) return void 0;
|
|
861
|
+
const resolved = { ...config };
|
|
862
|
+
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
863
|
+
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
864
|
+
if (resolved.codex && typeof resolved.codex === "object" && !Array.isArray(resolved.codex)) resolved.codex = resolveCodexPaths(resolved.codex, suiteDir);
|
|
865
|
+
return resolved;
|
|
866
|
+
}
|
|
867
|
+
/** Resolve paths on an entire suite after load. */
|
|
868
|
+
function resolveSuitePaths(suite, suiteFilePath) {
|
|
869
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
870
|
+
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
871
|
+
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
872
|
+
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
873
|
+
}
|
|
874
|
+
/** Parent directory of a suite or grading config file path. */
|
|
875
|
+
function configFileDir(filePath) {
|
|
876
|
+
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
877
|
+
}
|
|
838
878
|
/**
|
|
839
|
-
*
|
|
879
|
+
* Heuristically resolve env var values that look like relative file paths.
|
|
840
880
|
*
|
|
841
|
-
*
|
|
881
|
+
* Used for grading config where credential or config paths may be expressed
|
|
882
|
+
* relative to the grading YAML location.
|
|
842
883
|
*/
|
|
843
|
-
|
|
884
|
+
function resolveEnvPaths(env, baseDir) {
|
|
885
|
+
const resolved = {};
|
|
886
|
+
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../")) resolved[key] = resolvePath(value, baseDir);
|
|
887
|
+
else resolved[key] = value;
|
|
888
|
+
return resolved;
|
|
889
|
+
}
|
|
890
|
+
/** Resolve relative paths in a standalone grading config file. */
|
|
891
|
+
function resolveGradingConfigPaths(config, configFilePath) {
|
|
892
|
+
const baseDir = configFileDir(configFilePath);
|
|
893
|
+
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
894
|
+
config.judge = {
|
|
895
|
+
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
896
|
+
adapter,
|
|
897
|
+
maxConcurrent
|
|
898
|
+
};
|
|
899
|
+
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
900
|
+
}
|
|
901
|
+
/** Resolve a pipeline artifact path relative to the suite.yaml directory. */
|
|
902
|
+
function resolvePipelinePath(value, defaultRelative, suiteDir) {
|
|
903
|
+
return resolvePath(value ?? defaultRelative, suiteDir);
|
|
904
|
+
}
|
|
905
|
+
/** Resolve relative paths in a parsed pipeline config. */
|
|
906
|
+
function resolvePipelineConfigPaths(pipeline, suiteFilePath) {
|
|
907
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
908
|
+
const resolved = {};
|
|
909
|
+
if (pipeline.run) resolved.run = resolvePipelineRunStep(pipeline.run, suiteDir);
|
|
910
|
+
if (pipeline.grade) resolved.grade = resolvePipelineGradeStep(pipeline.grade, suiteDir);
|
|
911
|
+
if (pipeline.envelope) resolved.envelope = resolvePipelineEnvelopeStep(pipeline.envelope, suiteDir);
|
|
912
|
+
return resolved;
|
|
913
|
+
}
|
|
914
|
+
/** Resolve one pipeline step's run output path. */
|
|
915
|
+
function resolvePipelineRunStep(step, suiteDir) {
|
|
916
|
+
return {
|
|
917
|
+
...step,
|
|
918
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir)
|
|
919
|
+
};
|
|
920
|
+
}
|
|
921
|
+
/** Resolve grade step input (optional) and output paths. */
|
|
922
|
+
function resolvePipelineGradeStep(step, suiteDir) {
|
|
923
|
+
return {
|
|
924
|
+
...step,
|
|
925
|
+
input: step.input ? resolvePipelinePath(step.input, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
926
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir)
|
|
927
|
+
};
|
|
928
|
+
}
|
|
929
|
+
/** Resolve envelope step report, grading, and output paths. */
|
|
930
|
+
function resolvePipelineEnvelopeStep(step, suiteDir) {
|
|
931
|
+
return {
|
|
932
|
+
...step,
|
|
933
|
+
report: step.report ? resolvePipelinePath(step.report, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
934
|
+
grading: step.grading ? resolvePipelinePath(step.grading, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir) : void 0,
|
|
935
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.envelope, suiteDir)
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
//#endregion
|
|
939
|
+
//#region src/config/grading-schema.ts
|
|
940
|
+
/**
|
|
941
|
+
* Zod schema for standalone grading YAML (`grading.yaml`).
|
|
942
|
+
*
|
|
943
|
+
* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
|
|
944
|
+
* grader-specific concurrency and system-instruction overrides.
|
|
945
|
+
*/
|
|
946
|
+
/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
|
|
947
|
+
const JudgeConfigSchema = ConfigPartialSchema.extend({
|
|
948
|
+
adapter: z.string().optional(),
|
|
949
|
+
maxConcurrent: z.number().int().positive().optional(),
|
|
950
|
+
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
951
|
+
system_instruction: z.string().optional()
|
|
952
|
+
});
|
|
953
|
+
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
954
|
+
//#endregion
|
|
955
|
+
//#region src/config/suite-file-schema.ts
|
|
956
|
+
/** Single-file suite with optional inline judge and pipeline orchestration. */
|
|
957
|
+
const SuiteFileSingleSchema = TestSuiteSchema.extend({
|
|
958
|
+
judge: JudgeConfigSchema.optional(),
|
|
959
|
+
pipeline: PipelineConfigSchema.optional()
|
|
960
|
+
});
|
|
961
|
+
/** Directory suite root with optional inline judge and pipeline orchestration. */
|
|
962
|
+
const SuiteFileDirectorySchema = SuiteDirectorySchema.extend({
|
|
963
|
+
judge: JudgeConfigSchema.optional(),
|
|
964
|
+
pipeline: PipelineConfigSchema.optional()
|
|
965
|
+
});
|
|
966
|
+
//#endregion
|
|
967
|
+
//#region src/config/suite-document-loader.ts
|
|
968
|
+
/**
|
|
969
|
+
* Load a unified suite.yaml document (suite + optional judge + pipeline).
|
|
970
|
+
*/
|
|
971
|
+
/** Load suite.yaml (or directory) including optional judge and pipeline blocks. */
|
|
972
|
+
async function loadSuiteDocument(filePath, options = {}) {
|
|
844
973
|
const absolutePath = resolve(filePath);
|
|
845
974
|
let info;
|
|
846
975
|
try {
|
|
@@ -848,26 +977,12 @@ async function loadSuite(filePath) {
|
|
|
848
977
|
} catch (err) {
|
|
849
978
|
throw new ConfigError(`failed to read suite path: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
850
979
|
}
|
|
851
|
-
|
|
852
|
-
return
|
|
980
|
+
const strict = options.validateOrchestration !== false;
|
|
981
|
+
if (info.isDirectory()) return loadSuiteDocumentDirectory(absolutePath, strict);
|
|
982
|
+
return loadSuiteDocumentFile(absolutePath, strict);
|
|
853
983
|
}
|
|
854
|
-
/** Load
|
|
855
|
-
async function
|
|
856
|
-
let content;
|
|
857
|
-
try {
|
|
858
|
-
content = await readFile(absolutePath, "utf8");
|
|
859
|
-
} catch (err) {
|
|
860
|
-
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
861
|
-
}
|
|
862
|
-
return parseSuite(content, absolutePath);
|
|
863
|
-
}
|
|
864
|
-
/**
|
|
865
|
-
* Load a directory suite: `suite.yaml` plus optional `cases/` YAML files.
|
|
866
|
-
*
|
|
867
|
-
* Cases from `suite.yaml` sort before external case files; within each file,
|
|
868
|
-
* array order is preserved.
|
|
869
|
-
*/
|
|
870
|
-
async function loadSuiteDirectory(dir) {
|
|
984
|
+
/** Load suite.yaml from a directory layout (cases under `cases/`). */
|
|
985
|
+
async function loadSuiteDocumentDirectory(dir, strict) {
|
|
871
986
|
const suiteYamlPath = join(dir, "suite.yaml");
|
|
872
987
|
let content;
|
|
873
988
|
try {
|
|
@@ -875,7 +990,7 @@ async function loadSuiteDirectory(dir) {
|
|
|
875
990
|
} catch (err) {
|
|
876
991
|
throw new ConfigError(`missing suite.yaml in suite directory: ${err instanceof Error ? err.message : String(err)}`, dir);
|
|
877
992
|
}
|
|
878
|
-
const base =
|
|
993
|
+
const { judge, pipeline, suite: base } = parseSuiteFileRoot(content, suiteYamlPath, "directory", strict);
|
|
879
994
|
const casesDir = join(dir, "cases");
|
|
880
995
|
const caseFiles = await collectCaseYamlFiles(casesDir);
|
|
881
996
|
const tagged = base.cases.map((testCase, index) => ({
|
|
@@ -904,94 +1019,242 @@ async function loadSuiteDirectory(dir) {
|
|
|
904
1019
|
cases
|
|
905
1020
|
};
|
|
906
1021
|
resolveSuitePaths(suite, suiteYamlPath);
|
|
907
|
-
return suite;
|
|
1022
|
+
return buildSuiteDocument(suiteYamlPath, suite, judge, pipeline);
|
|
1023
|
+
}
|
|
1024
|
+
/** Load a single suite.yaml file (inline cases). */
|
|
1025
|
+
async function loadSuiteDocumentFile(absolutePath, strict) {
|
|
1026
|
+
let content;
|
|
1027
|
+
try {
|
|
1028
|
+
content = await readFile(absolutePath, "utf8");
|
|
1029
|
+
} catch (err) {
|
|
1030
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
1031
|
+
}
|
|
1032
|
+
const { judge, pipeline, suite } = parseSuiteFileRoot(content, absolutePath, "single", strict);
|
|
1033
|
+
resolveSuitePaths(suite, absolutePath);
|
|
1034
|
+
return buildSuiteDocument(absolutePath, suite, judge, pipeline);
|
|
908
1035
|
}
|
|
909
1036
|
/**
|
|
910
|
-
* Parse suite
|
|
1037
|
+
* Parse suite.yaml root and validate against the appropriate schema.
|
|
911
1038
|
*
|
|
912
|
-
*
|
|
1039
|
+
* When `strict` is true, uses extended schemas that validate `judge:` and
|
|
1040
|
+
* `pipeline:` blocks (for `loadSuiteDocument`). When false, uses base schemas
|
|
1041
|
+
* that silently strip unknown keys (for `loadSuite`).
|
|
913
1042
|
*/
|
|
914
|
-
function
|
|
1043
|
+
function parseSuiteFileRoot(yamlContent, sourcePath, layout, strict) {
|
|
915
1044
|
let raw;
|
|
916
1045
|
try {
|
|
917
1046
|
raw = parse(yamlContent);
|
|
918
1047
|
} catch (err) {
|
|
919
1048
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
920
1049
|
}
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
1050
|
+
if (!strict) {
|
|
1051
|
+
const validated = (layout === "directory" ? SuiteDirectorySchema : TestSuiteSchema).safeParse(raw);
|
|
1052
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1053
|
+
return { suite: (layout === "directory" ? transformSuiteDirectory : transformSuite)(validated.data) };
|
|
1054
|
+
}
|
|
1055
|
+
if (layout === "directory") {
|
|
1056
|
+
const validated = SuiteFileDirectorySchema.safeParse(raw);
|
|
1057
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1058
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuiteDirectory);
|
|
1059
|
+
}
|
|
1060
|
+
const validated = SuiteFileSingleSchema.safeParse(raw);
|
|
1061
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1062
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuite);
|
|
926
1063
|
}
|
|
927
|
-
/**
|
|
928
|
-
function
|
|
1064
|
+
/** Split validated YAML into suite, judge, and pipeline with path resolution. */
|
|
1065
|
+
function extractSuiteFileParts(data, sourcePath, transform) {
|
|
1066
|
+
const { judge: rawJudge, pipeline: rawPipeline, ...suiteRaw } = data;
|
|
1067
|
+
const suite = transform(suiteRaw);
|
|
1068
|
+
let judge;
|
|
1069
|
+
if (rawJudge) {
|
|
1070
|
+
judge = { ...rawJudge };
|
|
1071
|
+
resolveGradingConfigPaths({ judge }, sourcePath);
|
|
1072
|
+
}
|
|
1073
|
+
let pipeline;
|
|
1074
|
+
if (rawPipeline) {
|
|
1075
|
+
pipeline = transformPipelineConfig(rawPipeline);
|
|
1076
|
+
pipeline = resolvePipelineConfigPaths(pipeline, sourcePath);
|
|
1077
|
+
}
|
|
1078
|
+
return {
|
|
1079
|
+
suite,
|
|
1080
|
+
judge,
|
|
1081
|
+
pipeline
|
|
1082
|
+
};
|
|
1083
|
+
}
|
|
1084
|
+
/** Apply default artifact filenames when a pipeline step key is present but paths are omitted. */
|
|
1085
|
+
function transformPipelineConfig(raw) {
|
|
1086
|
+
const pipeline = {};
|
|
1087
|
+
if (raw.run !== void 0) pipeline.run = {
|
|
1088
|
+
output: raw.run?.output ?? DEFAULT_PIPELINE_OUTPUTS.run,
|
|
1089
|
+
maxConcurrent: raw.run?.maxConcurrent
|
|
1090
|
+
};
|
|
1091
|
+
if (raw.grade !== void 0) pipeline.grade = {
|
|
1092
|
+
input: raw.grade?.input,
|
|
1093
|
+
output: raw.grade?.output ?? DEFAULT_PIPELINE_OUTPUTS.grade,
|
|
1094
|
+
maxConcurrent: raw.grade?.maxConcurrent
|
|
1095
|
+
};
|
|
1096
|
+
if (raw.envelope !== void 0) pipeline.envelope = {
|
|
1097
|
+
report: raw.envelope?.report,
|
|
1098
|
+
grading: raw.envelope?.grading,
|
|
1099
|
+
output: raw.envelope?.output ?? DEFAULT_PIPELINE_OUTPUTS.envelope,
|
|
1100
|
+
projection: raw.envelope?.projection ?? "envelope",
|
|
1101
|
+
includeRawStreamEvents: raw.envelope?.includeRawStreamEvents,
|
|
1102
|
+
noTranscript: raw.envelope?.noTranscript
|
|
1103
|
+
};
|
|
1104
|
+
return pipeline;
|
|
1105
|
+
}
|
|
1106
|
+
/** Assemble the runtime {@link SuiteDocument} from parsed parts. */
|
|
1107
|
+
function buildSuiteDocument(suitePath, suite, judge, pipeline) {
|
|
1108
|
+
return {
|
|
1109
|
+
suitePath: resolve(suitePath),
|
|
1110
|
+
suite,
|
|
1111
|
+
judge,
|
|
1112
|
+
pipeline
|
|
1113
|
+
};
|
|
1114
|
+
}
|
|
1115
|
+
function formatZodError$2(err, sourcePath) {
|
|
1116
|
+
return err.issues.map((issue) => {
|
|
1117
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1118
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1119
|
+
}).join("\n");
|
|
1120
|
+
}
|
|
1121
|
+
//#endregion
|
|
1122
|
+
//#region src/config/grading-loader.ts
|
|
1123
|
+
/**
|
|
1124
|
+
* Load standalone grading YAML for `harness-eval grade`.
|
|
1125
|
+
*
|
|
1126
|
+
* Also accepts unified suite.yaml files with an inline `judge:` block.
|
|
1127
|
+
*/
|
|
1128
|
+
/** Load grading YAML from disk and resolve relative paths. */
|
|
1129
|
+
async function loadGradingConfig(filePath) {
|
|
1130
|
+
const absolutePath = resolve(filePath);
|
|
1131
|
+
let info;
|
|
1132
|
+
try {
|
|
1133
|
+
info = await stat(absolutePath);
|
|
1134
|
+
} catch (err) {
|
|
1135
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1136
|
+
}
|
|
1137
|
+
if (info.isDirectory()) return loadGradingFromSuiteYaml(join(absolutePath, "suite.yaml"));
|
|
1138
|
+
let content;
|
|
1139
|
+
try {
|
|
1140
|
+
content = await readFile(absolutePath, "utf8");
|
|
1141
|
+
} catch (err) {
|
|
1142
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1143
|
+
}
|
|
1144
|
+
if (isSuiteRoot(parse(content))) return parseGradingFromSuiteRaw(parse(content), absolutePath);
|
|
1145
|
+
return parseGradingConfig(content, absolutePath);
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Parse grading YAML from a string.
|
|
1149
|
+
*
|
|
1150
|
+
* @param sourcePath Optional path for error messages and path resolution.
|
|
1151
|
+
*/
|
|
1152
|
+
function parseGradingConfig(yamlContent, sourcePath) {
|
|
929
1153
|
let raw;
|
|
930
1154
|
try {
|
|
931
1155
|
raw = parse(yamlContent);
|
|
932
1156
|
} catch (err) {
|
|
933
1157
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
934
1158
|
}
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1159
|
+
if (isSuiteRoot(raw)) return parseGradingFromSuiteRaw(raw, sourcePath ?? "suite.yaml");
|
|
1160
|
+
const validated = GradingConfigSchema.safeParse(raw);
|
|
1161
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
1162
|
+
const config = { judge: { ...validated.data.judge } };
|
|
1163
|
+
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
1164
|
+
return config;
|
|
938
1165
|
}
|
|
939
|
-
/**
|
|
940
|
-
function
|
|
1166
|
+
/** Detect unified suite.yaml by presence of suite-specific keys (vs standalone grading YAML). */
|
|
1167
|
+
function isSuiteRoot(raw) {
|
|
1168
|
+
if (raw === null || typeof raw !== "object") return false;
|
|
1169
|
+
return "cases" in raw || "matrix" in raw && "adapter" in raw;
|
|
1170
|
+
}
|
|
1171
|
+
async function loadGradingFromSuiteYaml(suiteYamlPath) {
|
|
1172
|
+
let content;
|
|
1173
|
+
try {
|
|
1174
|
+
content = await readFile(suiteYamlPath, "utf8");
|
|
1175
|
+
} catch (err) {
|
|
1176
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
1177
|
+
}
|
|
941
1178
|
let raw;
|
|
942
1179
|
try {
|
|
943
|
-
raw = parse(
|
|
1180
|
+
raw = parse(content);
|
|
944
1181
|
} catch (err) {
|
|
945
|
-
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`,
|
|
1182
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
946
1183
|
}
|
|
947
|
-
return
|
|
1184
|
+
return parseGradingFromSuiteRaw(raw, suiteYamlPath);
|
|
948
1185
|
}
|
|
1186
|
+
function parseGradingFromSuiteRaw(raw, sourcePath) {
|
|
1187
|
+
const single = SuiteFileSingleSchema.safeParse(raw);
|
|
1188
|
+
if (single.success) {
|
|
1189
|
+
if (!single.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1190
|
+
const config = { judge: { ...single.data.judge } };
|
|
1191
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1192
|
+
return config;
|
|
1193
|
+
}
|
|
1194
|
+
const directory = SuiteFileDirectorySchema.safeParse(raw);
|
|
1195
|
+
if (directory.success) {
|
|
1196
|
+
if (!directory.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1197
|
+
const config = { judge: { ...directory.data.judge } };
|
|
1198
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1199
|
+
return config;
|
|
1200
|
+
}
|
|
1201
|
+
throw new ConfigError(`validation failed:\n${formatZodError$1(directory.error ?? single.error, sourcePath)}`, sourcePath);
|
|
1202
|
+
}
|
|
1203
|
+
/** Format a zod validation error with optional source file prefix. */
|
|
1204
|
+
function formatZodError$1(err, sourcePath) {
|
|
1205
|
+
return err.issues.map((issue) => {
|
|
1206
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1207
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1208
|
+
}).join("\n");
|
|
1209
|
+
}
|
|
1210
|
+
//#endregion
|
|
1211
|
+
//#region src/config/loader.ts
|
|
949
1212
|
/**
|
|
950
|
-
*
|
|
1213
|
+
* Load a `TestSuite` from a YAML file, directory, or string.
|
|
951
1214
|
*
|
|
952
|
-
*
|
|
1215
|
+
* For unified suite.yaml with optional `judge:` and `pipeline:` blocks,
|
|
1216
|
+
* use {@link loadSuiteDocument}.
|
|
953
1217
|
*/
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
}
|
|
963
|
-
/** Validate one raw case object against {@link TestCaseSchema}. */
|
|
964
|
-
function validateRawCase(raw, sourcePath, index) {
|
|
965
|
-
const validated = TestCaseSchema.safeParse(raw);
|
|
966
|
-
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
967
|
-
return validated.data;
|
|
1218
|
+
/**
|
|
1219
|
+
* Load a suite from a file path or directory path (suite portion only).
|
|
1220
|
+
*
|
|
1221
|
+
* Orchestration blocks (`judge:`, `pipeline:`) are silently stripped — callers
|
|
1222
|
+
* that only need the `TestSuite` are not broken by malformed orchestration YAML.
|
|
1223
|
+
* Use {@link loadSuiteDocument} when you need validated orchestration metadata.
|
|
1224
|
+
*/
|
|
1225
|
+
async function loadSuite(filePath) {
|
|
1226
|
+
return (await loadSuiteDocument(filePath, { validateOrchestration: false })).suite;
|
|
968
1227
|
}
|
|
969
1228
|
/**
|
|
970
|
-
*
|
|
1229
|
+
* Parse suite YAML from a string (single-file layout with inline cases).
|
|
971
1230
|
*
|
|
972
|
-
*
|
|
973
|
-
* are optional in directory layout.
|
|
1231
|
+
* Unknown top-level keys such as `judge` and `pipeline` are stripped.
|
|
974
1232
|
*/
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
} catch (err) {
|
|
982
|
-
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
983
|
-
throw err;
|
|
984
|
-
}
|
|
985
|
-
for (const entry of entries) {
|
|
986
|
-
const fullPath = join(dir, entry.name);
|
|
987
|
-
if (entry.isDirectory()) await walk(fullPath);
|
|
988
|
-
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
989
|
-
}
|
|
1233
|
+
function parseSuite(yamlContent, sourcePath) {
|
|
1234
|
+
let raw;
|
|
1235
|
+
try {
|
|
1236
|
+
raw = parse(yamlContent);
|
|
1237
|
+
} catch (err) {
|
|
1238
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
990
1239
|
}
|
|
991
|
-
|
|
992
|
-
|
|
1240
|
+
const validated = TestSuiteSchema.safeParse(raw);
|
|
1241
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
1242
|
+
const suite = transformSuite(validated.data);
|
|
1243
|
+
if (sourcePath) resolveSuitePaths(suite, resolve(sourcePath));
|
|
1244
|
+
return suite;
|
|
1245
|
+
}
|
|
1246
|
+
/** Parse `suite.yaml` for directory layout (cases may be omitted). @internal */
|
|
1247
|
+
function parseSuiteDirectory(yamlContent, sourcePath) {
|
|
1248
|
+
let raw;
|
|
1249
|
+
try {
|
|
1250
|
+
raw = parse(yamlContent);
|
|
1251
|
+
} catch (err) {
|
|
1252
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
1253
|
+
}
|
|
1254
|
+
const validated = SuiteDirectorySchema.safeParse(raw);
|
|
1255
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
1256
|
+
return transformSuiteDirectory(validated.data);
|
|
993
1257
|
}
|
|
994
|
-
/** Format a zod validation error with optional source file prefix. */
|
|
995
1258
|
function formatZodError(err, sourcePath) {
|
|
996
1259
|
return err.issues.map((issue) => {
|
|
997
1260
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
@@ -999,6 +1262,6 @@ function formatZodError(err, sourcePath) {
|
|
|
999
1262
|
}).join("\n");
|
|
1000
1263
|
}
|
|
1001
1264
|
//#endregion
|
|
1002
|
-
export { parseGradingConfig as a, loadGradingConfig as i,
|
|
1265
|
+
export { parseGradingConfig as a, parseCasesFile as c, loadGradingConfig as i, ConfigError as l, parseSuite as n, loadSuiteDocument as o, parseSuiteDirectory as r, DEFAULT_PIPELINE_OUTPUTS as s, loadSuite as t };
|
|
1003
1266
|
|
|
1004
|
-
//# sourceMappingURL=loader-
|
|
1267
|
+
//# sourceMappingURL=loader-DnQ6Jt0i.js.map
|