@alis-build/harness-eval 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -30
- package/dist/adapters/claude-code/index.d.ts +2 -2
- package/dist/adapters/claude-code/index.js +2 -1
- package/dist/adapters/codex/index.d.ts +68 -0
- package/dist/adapters/codex/index.js +3 -0
- package/dist/{claude-code-DZ4Vkgp6.js → claude-code-C_7hxC8z.js} +3 -245
- package/dist/claude-code-C_7hxC8z.js.map +1 -0
- package/dist/cli/bin.js +131 -151
- package/dist/cli/bin.js.map +1 -1
- package/dist/codex-0cHO2te9.js +496 -0
- package/dist/codex-0cHO2te9.js.map +1 -0
- package/dist/config/loader.d.ts +2 -2
- package/dist/config/loader.js +2 -2
- package/dist/{index-V22PrR0p.d.ts → index-C56AEDUr.d.ts} +2 -2
- package/dist/index.d.ts +134 -6
- package/dist/index.js +6 -5
- package/dist/index.js.map +1 -1
- package/dist/{loader-DcI0KfRX.js → loader-CiBm4Kf6.js} +491 -209
- package/dist/loader-CiBm4Kf6.js.map +1 -0
- package/dist/loader-CrmzNwkq.d.ts +107 -0
- package/dist/{projections-BcX7w-f6.js → reporter-BKCJZRYr.js} +1475 -729
- package/dist/reporter-BKCJZRYr.js.map +1 -0
- package/dist/runner/suite.d.ts +1 -1
- package/dist/runner/suite.js +1 -1
- package/dist/{suite-Dlzl-HI0.js → suite-C3-8EjUW.js} +558 -4
- package/dist/suite-C3-8EjUW.js.map +1 -0
- package/dist/{suite-DPJMIEbu.d.ts → suite-qyOGre2g.d.ts} +2 -2
- package/dist/types-Bac8_Ixb.js +246 -0
- package/dist/types-Bac8_Ixb.js.map +1 -0
- package/dist/{types-CD3TwOtZ.d.ts → types-CLt4Yygc.d.ts} +2 -2
- package/dist/{types-B9H4IZtA.d.ts → types-D0HR2WnP.d.ts} +9 -2
- package/dist/types-DFMpv_HJ.d.ts +77 -0
- package/package.json +11 -2
- package/schemas/eval-run-envelope.schema.json +193 -183
- package/dist/claude-code-DZ4Vkgp6.js.map +0 -1
- package/dist/loader-C9yQHUPC.d.ts +0 -50
- package/dist/loader-DcI0KfRX.js.map +0 -1
- package/dist/projections-BcX7w-f6.js.map +0 -1
- package/dist/suite-Dlzl-HI0.js.map +0 -1
|
@@ -3,80 +3,6 @@ import { readFile, readdir, stat } from "node:fs/promises";
|
|
|
3
3
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
4
4
|
import { parse } from "yaml";
|
|
5
5
|
import { z } from "zod";
|
|
6
|
-
//#region src/config/paths.ts
|
|
7
|
-
/**
|
|
8
|
-
* Resolve relative paths in suite config against the suite file directory.
|
|
9
|
-
*
|
|
10
|
-
* YAML authors write paths relative to the suite file; this module absolutizes
|
|
11
|
-
* them at load time so the runner and adapters receive filesystem-ready values.
|
|
12
|
-
* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
|
|
13
|
-
* left unchanged.
|
|
14
|
-
*/
|
|
15
|
-
/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
|
|
16
|
-
function resolvePath(value, suiteDir) {
|
|
17
|
-
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
18
|
-
return join(suiteDir, value);
|
|
19
|
-
}
|
|
20
|
-
/** Resolve Claude Code-specific path fields within a config block. */
|
|
21
|
-
function resolveClaudeCodePaths(block, suiteDir) {
|
|
22
|
-
const resolved = { ...block };
|
|
23
|
-
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
24
|
-
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
25
|
-
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
26
|
-
for (const field of [
|
|
27
|
-
"systemPromptFile",
|
|
28
|
-
"appendSystemPromptFile",
|
|
29
|
-
"debugFile"
|
|
30
|
-
]) {
|
|
31
|
-
const value = resolved[field];
|
|
32
|
-
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
33
|
-
}
|
|
34
|
-
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
35
|
-
return resolved;
|
|
36
|
-
}
|
|
37
|
-
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
38
|
-
function resolveConfigPaths(config, suiteDir) {
|
|
39
|
-
if (!config) return void 0;
|
|
40
|
-
const resolved = { ...config };
|
|
41
|
-
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
42
|
-
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
43
|
-
return resolved;
|
|
44
|
-
}
|
|
45
|
-
/** Resolve paths on an entire suite after load. */
|
|
46
|
-
function resolveSuitePaths(suite, suiteFilePath) {
|
|
47
|
-
const suiteDir = configFileDir(suiteFilePath);
|
|
48
|
-
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
49
|
-
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
50
|
-
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
51
|
-
}
|
|
52
|
-
/** Parent directory of a suite or grading config file path. */
|
|
53
|
-
function configFileDir(filePath) {
|
|
54
|
-
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Heuristically resolve env var values that look like relative file paths.
|
|
58
|
-
*
|
|
59
|
-
* Used for grading config where credential or config paths may be expressed
|
|
60
|
-
* relative to the grading YAML location.
|
|
61
|
-
*/
|
|
62
|
-
function resolveEnvPaths(env, baseDir) {
|
|
63
|
-
const resolved = {};
|
|
64
|
-
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../") || value.includes("/") && !value.startsWith("http")) resolved[key] = resolvePath(value, baseDir);
|
|
65
|
-
else resolved[key] = value;
|
|
66
|
-
return resolved;
|
|
67
|
-
}
|
|
68
|
-
/** Resolve relative paths in a standalone grading config file. */
|
|
69
|
-
function resolveGradingConfigPaths(config, configFilePath) {
|
|
70
|
-
const baseDir = configFileDir(configFilePath);
|
|
71
|
-
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
72
|
-
config.judge = {
|
|
73
|
-
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
74
|
-
adapter,
|
|
75
|
-
maxConcurrent
|
|
76
|
-
};
|
|
77
|
-
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
78
|
-
}
|
|
79
|
-
//#endregion
|
|
80
6
|
//#region src/config/schema.ts
|
|
81
7
|
/**
|
|
82
8
|
* zod schemas for the YAML on-disk shape.
|
|
@@ -132,13 +58,59 @@ const ClaudeCodeConfigSchema = z.object({
|
|
|
132
58
|
maxTurns: z.number().int().positive(),
|
|
133
59
|
isolateConfig: z.boolean()
|
|
134
60
|
}).partial();
|
|
61
|
+
/** Gemini CLI adapter-specific options (nested under `geminiCli` in suite YAML). */
|
|
62
|
+
const GeminiCliConfigSchema = z.object({
|
|
63
|
+
binary: z.string(),
|
|
64
|
+
approvalMode: z.enum([
|
|
65
|
+
"default",
|
|
66
|
+
"auto_edit",
|
|
67
|
+
"yolo",
|
|
68
|
+
"plan"
|
|
69
|
+
]),
|
|
70
|
+
sandbox: z.string(),
|
|
71
|
+
skipTrust: z.boolean(),
|
|
72
|
+
includeDirectories: z.array(z.string()),
|
|
73
|
+
allowedMcpServerNames: z.array(z.string()),
|
|
74
|
+
extensions: z.array(z.string()),
|
|
75
|
+
debug: z.boolean(),
|
|
76
|
+
/** Fresh temp `GEMINI_CONFIG_DIR` per run when true. */
|
|
77
|
+
isolateConfig: z.boolean()
|
|
78
|
+
}).partial();
|
|
79
|
+
/** Codex CLI adapter-specific options (nested under `codex`). */
|
|
80
|
+
const CodexConfigSchema = z.object({
|
|
81
|
+
binary: z.string(),
|
|
82
|
+
profile: z.string(),
|
|
83
|
+
sandbox: z.enum([
|
|
84
|
+
"read-only",
|
|
85
|
+
"workspace-write",
|
|
86
|
+
"danger-full-access"
|
|
87
|
+
]),
|
|
88
|
+
addDirs: z.array(z.string()),
|
|
89
|
+
configOverrides: z.array(z.string()),
|
|
90
|
+
askForApproval: z.enum([
|
|
91
|
+
"untrusted",
|
|
92
|
+
"on-request",
|
|
93
|
+
"never"
|
|
94
|
+
]),
|
|
95
|
+
dangerouslyBypassApprovalsAndSandbox: z.boolean(),
|
|
96
|
+
dangerouslyBypassHookTrust: z.boolean(),
|
|
97
|
+
ephemeral: z.boolean(),
|
|
98
|
+
ignoreUserConfig: z.boolean(),
|
|
99
|
+
skipGitRepoCheck: z.boolean(),
|
|
100
|
+
outputSchema: z.string(),
|
|
101
|
+
outputLastMessage: z.string(),
|
|
102
|
+
captureLastMessage: z.boolean(),
|
|
103
|
+
isolateConfig: z.boolean()
|
|
104
|
+
}).partial();
|
|
135
105
|
/** Generic + nested adapter config for one layer (defaultConfig, case, cell). */
|
|
136
106
|
const ConfigPartialSchema = z.object({
|
|
137
107
|
model: z.string(),
|
|
138
108
|
cwd: z.string(),
|
|
139
109
|
timeoutMs: z.number().int().positive(),
|
|
140
110
|
env: z.record(z.string(), z.string()),
|
|
141
|
-
claudeCode: ClaudeCodeConfigSchema
|
|
111
|
+
claudeCode: ClaudeCodeConfigSchema,
|
|
112
|
+
codex: CodexConfigSchema,
|
|
113
|
+
geminiCli: GeminiCliConfigSchema
|
|
142
114
|
}).partial();
|
|
143
115
|
/** A matrix cell — one point in the configuration matrix. */
|
|
144
116
|
const MatrixCellSchema = z.object({
|
|
@@ -763,84 +735,260 @@ function typeOf(x) {
|
|
|
763
735
|
return typeof x;
|
|
764
736
|
}
|
|
765
737
|
//#endregion
|
|
766
|
-
//#region src/config/
|
|
738
|
+
//#region src/config/loader-internals.ts
|
|
767
739
|
/**
|
|
768
|
-
*
|
|
769
|
-
*
|
|
770
|
-
* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
|
|
771
|
-
* grader-specific concurrency and system-instruction overrides.
|
|
740
|
+
* Shared suite loader helpers (case file collection and parsing).
|
|
772
741
|
*/
|
|
773
|
-
/**
|
|
774
|
-
|
|
775
|
-
adapter: z.string().optional(),
|
|
776
|
-
maxConcurrent: z.number().int().positive().optional(),
|
|
777
|
-
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
778
|
-
system_instruction: z.string().optional()
|
|
779
|
-
});
|
|
780
|
-
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
781
|
-
//#endregion
|
|
782
|
-
//#region src/config/grading-loader.ts
|
|
783
|
-
/**
|
|
784
|
-
* Load standalone grading YAML for `harness-eval grade`.
|
|
785
|
-
*
|
|
786
|
-
* Grading config defines the judge subprocess (model, concurrency, Claude Code
|
|
787
|
-
* flags) separately from the suite under test.
|
|
788
|
-
*/
|
|
789
|
-
/** Load grading YAML from disk and resolve relative paths. */
|
|
790
|
-
async function loadGradingConfig(filePath) {
|
|
791
|
-
const absolutePath = resolve(filePath);
|
|
792
|
-
let content;
|
|
793
|
-
try {
|
|
794
|
-
content = await readFile(absolutePath, "utf8");
|
|
795
|
-
} catch (err) {
|
|
796
|
-
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
797
|
-
}
|
|
798
|
-
return parseGradingConfig(content, absolutePath);
|
|
799
|
-
}
|
|
800
|
-
/**
|
|
801
|
-
* Parse grading YAML from a string.
|
|
802
|
-
*
|
|
803
|
-
* @param sourcePath Optional path for error messages and path resolution.
|
|
804
|
-
*/
|
|
805
|
-
function parseGradingConfig(yamlContent, sourcePath) {
|
|
742
|
+
/** Parse one case file: single case, array, or `{ cases: [...] }`. */
|
|
743
|
+
function parseCasesFile(yamlContent, sourcePath) {
|
|
806
744
|
let raw;
|
|
807
745
|
try {
|
|
808
746
|
raw = parse(yamlContent);
|
|
809
747
|
} catch (err) {
|
|
810
748
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
811
749
|
}
|
|
812
|
-
|
|
813
|
-
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
814
|
-
const config = { judge: { ...validated.data.judge } };
|
|
815
|
-
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
816
|
-
return config;
|
|
750
|
+
return transformTestCases(extractRawCases(raw, sourcePath), sourcePath ?? "cases");
|
|
817
751
|
}
|
|
818
|
-
|
|
819
|
-
|
|
752
|
+
function extractRawCases(raw, sourcePath) {
|
|
753
|
+
if (Array.isArray(raw)) return raw.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
754
|
+
if (raw && typeof raw === "object") {
|
|
755
|
+
const obj = raw;
|
|
756
|
+
if (Array.isArray(obj.cases)) return obj.cases.map((item, index) => validateRawCase(item, sourcePath, index));
|
|
757
|
+
if ("id" in obj && "prompt" in obj && "assertions" in obj) return [validateRawCase(raw, sourcePath, 0)];
|
|
758
|
+
}
|
|
759
|
+
throw new ConfigError("expected a case object, array of cases, or { cases: [...] }", sourcePath);
|
|
760
|
+
}
|
|
761
|
+
function validateRawCase(raw, sourcePath, index) {
|
|
762
|
+
const validated = TestCaseSchema.safeParse(raw);
|
|
763
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$3(validated.error, sourcePath)}`, sourcePath);
|
|
764
|
+
return validated.data;
|
|
765
|
+
}
|
|
766
|
+
/** Recursively collect `.yaml` / `.yml` files under `casesDir`. */
|
|
767
|
+
async function collectCaseYamlFiles(casesDir) {
|
|
768
|
+
const files = [];
|
|
769
|
+
async function walk(dir) {
|
|
770
|
+
let entries;
|
|
771
|
+
try {
|
|
772
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
773
|
+
} catch (err) {
|
|
774
|
+
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
775
|
+
throw err;
|
|
776
|
+
}
|
|
777
|
+
for (const entry of entries) {
|
|
778
|
+
const fullPath = join(dir, entry.name);
|
|
779
|
+
if (entry.isDirectory()) await walk(fullPath);
|
|
780
|
+
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
await walk(casesDir);
|
|
784
|
+
return files.sort();
|
|
785
|
+
}
|
|
786
|
+
function formatZodError$3(err, sourcePath) {
|
|
820
787
|
return err.issues.map((issue) => {
|
|
821
788
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
822
789
|
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
823
790
|
}).join("\n");
|
|
824
791
|
}
|
|
825
792
|
//#endregion
|
|
826
|
-
//#region src/config/
|
|
793
|
+
//#region src/config/pipeline-schema.ts
|
|
827
794
|
/**
|
|
828
|
-
*
|
|
795
|
+
* Zod schemas for optional `pipeline:` block in suite.yaml.
|
|
796
|
+
*
|
|
797
|
+
* Step presence under `pipeline` enables orchestration via `harness-eval pipeline`.
|
|
798
|
+
*/
|
|
799
|
+
/** `pipeline.run` step — harness eval run. */
|
|
800
|
+
const PipelineRunStepSchema = z.object({
|
|
801
|
+
output: z.string().min(1).optional(),
|
|
802
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
803
|
+
}).optional();
|
|
804
|
+
/** `pipeline.grade` step — LLM outcome grading. */
|
|
805
|
+
const PipelineGradeStepSchema = z.object({
|
|
806
|
+
input: z.string().min(1).optional(),
|
|
807
|
+
output: z.string().min(1).optional(),
|
|
808
|
+
maxConcurrent: z.number().int().positive().optional()
|
|
809
|
+
}).optional();
|
|
810
|
+
/** `pipeline.envelope` step — EvalRunEnvelope export. */
|
|
811
|
+
const PipelineEnvelopeStepSchema = z.object({
|
|
812
|
+
report: z.string().min(1).optional(),
|
|
813
|
+
grading: z.string().min(1).optional(),
|
|
814
|
+
output: z.string().min(1).optional(),
|
|
815
|
+
projection: z.enum([
|
|
816
|
+
"envelope",
|
|
817
|
+
"trajectory",
|
|
818
|
+
"instances"
|
|
819
|
+
]).optional(),
|
|
820
|
+
includeRawStreamEvents: z.boolean().optional(),
|
|
821
|
+
noTranscript: z.boolean().optional()
|
|
822
|
+
}).optional();
|
|
823
|
+
/** Top-level optional pipeline block in suite.yaml. */
|
|
824
|
+
const PipelineConfigSchema = z.object({
|
|
825
|
+
run: PipelineRunStepSchema,
|
|
826
|
+
grade: PipelineGradeStepSchema,
|
|
827
|
+
envelope: PipelineEnvelopeStepSchema
|
|
828
|
+
}).partial();
|
|
829
|
+
/** Default artifact filenames relative to the suite.yaml directory. */
|
|
830
|
+
const DEFAULT_PIPELINE_OUTPUTS = {
|
|
831
|
+
run: "report.json",
|
|
832
|
+
grade: "grading.json",
|
|
833
|
+
envelope: "envelope.json"
|
|
834
|
+
};
|
|
835
|
+
//#endregion
|
|
836
|
+
//#region src/config/paths.ts
|
|
837
|
+
/**
|
|
838
|
+
* Resolve relative paths in suite config against the suite file directory.
|
|
829
839
|
*
|
|
830
|
-
*
|
|
831
|
-
*
|
|
832
|
-
*
|
|
833
|
-
*
|
|
840
|
+
* YAML authors write paths relative to the suite file; this module absolutizes
|
|
841
|
+
* them at load time so the runner and adapters receive filesystem-ready values.
|
|
842
|
+
* Tilde-prefixed paths and inline JSON blobs (settings starting with `{`) are
|
|
843
|
+
* left unchanged.
|
|
844
|
+
*/
|
|
845
|
+
/** Resolve a single path relative to `suiteDir` unless already absolute or `~/`. */
|
|
846
|
+
function resolvePath(value, suiteDir) {
|
|
847
|
+
if (isAbsolute(value) || value.startsWith("~/")) return value;
|
|
848
|
+
return join(suiteDir, value);
|
|
849
|
+
}
|
|
850
|
+
/** Resolve Claude Code-specific path fields within a config block. */
|
|
851
|
+
function resolveClaudeCodePaths(block, suiteDir) {
|
|
852
|
+
const resolved = { ...block };
|
|
853
|
+
if (typeof resolved.mcpConfig === "string") resolved.mcpConfig = resolvePath(resolved.mcpConfig, suiteDir);
|
|
854
|
+
if (Array.isArray(resolved.pluginDirs)) resolved.pluginDirs = resolved.pluginDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
855
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
856
|
+
for (const field of [
|
|
857
|
+
"systemPromptFile",
|
|
858
|
+
"appendSystemPromptFile",
|
|
859
|
+
"debugFile"
|
|
860
|
+
]) {
|
|
861
|
+
const value = resolved[field];
|
|
862
|
+
if (typeof value === "string" && !value.trim().startsWith("{")) resolved[field] = resolvePath(value, suiteDir);
|
|
863
|
+
}
|
|
864
|
+
if (typeof resolved.settings === "string" && !resolved.settings.trim().startsWith("{")) resolved.settings = resolvePath(resolved.settings, suiteDir);
|
|
865
|
+
return resolved;
|
|
866
|
+
}
|
|
867
|
+
/** Resolve Codex-specific path fields within a config block. */
|
|
868
|
+
function resolveCodexPaths(block, suiteDir) {
|
|
869
|
+
const resolved = { ...block };
|
|
870
|
+
if (Array.isArray(resolved.addDirs)) resolved.addDirs = resolved.addDirs.map((p) => typeof p === "string" ? resolvePath(p, suiteDir) : p);
|
|
871
|
+
for (const field of ["outputSchema", "outputLastMessage"]) {
|
|
872
|
+
const value = resolved[field];
|
|
873
|
+
if (typeof value === "string") resolved[field] = resolvePath(value, suiteDir);
|
|
874
|
+
}
|
|
875
|
+
return resolved;
|
|
876
|
+
}
|
|
877
|
+
/** Resolve relative paths in a config layer relative to `suiteDir`. */
|
|
878
|
+
function resolveConfigPaths(config, suiteDir) {
|
|
879
|
+
if (!config) return void 0;
|
|
880
|
+
const resolved = { ...config };
|
|
881
|
+
if (typeof resolved.cwd === "string") resolved.cwd = resolvePath(resolved.cwd, suiteDir);
|
|
882
|
+
if (resolved.claudeCode && typeof resolved.claudeCode === "object" && !Array.isArray(resolved.claudeCode)) resolved.claudeCode = resolveClaudeCodePaths(resolved.claudeCode, suiteDir);
|
|
883
|
+
if (resolved.codex && typeof resolved.codex === "object" && !Array.isArray(resolved.codex)) resolved.codex = resolveCodexPaths(resolved.codex, suiteDir);
|
|
884
|
+
return resolved;
|
|
885
|
+
}
|
|
886
|
+
/** Resolve paths on an entire suite after load. */
|
|
887
|
+
function resolveSuitePaths(suite, suiteFilePath) {
|
|
888
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
889
|
+
suite.defaultConfig = resolveConfigPaths(suite.defaultConfig, suiteDir);
|
|
890
|
+
for (const cell of suite.matrix) cell.config = resolveConfigPaths(cell.config, suiteDir) ?? cell.config;
|
|
891
|
+
for (const testCase of suite.cases) testCase.config = resolveConfigPaths(testCase.config, suiteDir);
|
|
892
|
+
}
|
|
893
|
+
/** Parent directory of a suite or grading config file path. */
|
|
894
|
+
function configFileDir(filePath) {
|
|
895
|
+
return filePath.includes("/") || filePath.includes("\\") ? filePath.replace(/[/\\][^/\\]+$/, "") : ".";
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Heuristically resolve env var values that look like relative file paths.
|
|
834
899
|
*
|
|
835
|
-
*
|
|
836
|
-
*
|
|
900
|
+
* Used for grading config where credential or config paths may be expressed
|
|
901
|
+
* relative to the grading YAML location.
|
|
837
902
|
*/
|
|
903
|
+
function resolveEnvPaths(env, baseDir) {
|
|
904
|
+
const resolved = {};
|
|
905
|
+
for (const [key, value] of Object.entries(env)) if (value.startsWith("./") || value.startsWith("../")) resolved[key] = resolvePath(value, baseDir);
|
|
906
|
+
else resolved[key] = value;
|
|
907
|
+
return resolved;
|
|
908
|
+
}
|
|
909
|
+
/** Resolve relative paths in a standalone grading config file. */
|
|
910
|
+
function resolveGradingConfigPaths(config, configFilePath) {
|
|
911
|
+
const baseDir = configFileDir(configFilePath);
|
|
912
|
+
const { adapter, maxConcurrent, ...rest } = config.judge;
|
|
913
|
+
config.judge = {
|
|
914
|
+
...resolveConfigPaths(rest, baseDir) ?? rest,
|
|
915
|
+
adapter,
|
|
916
|
+
maxConcurrent
|
|
917
|
+
};
|
|
918
|
+
if (config.judge.env) config.judge.env = resolveEnvPaths(config.judge.env, baseDir);
|
|
919
|
+
}
|
|
920
|
+
/** Resolve a pipeline artifact path relative to the suite.yaml directory. */
|
|
921
|
+
function resolvePipelinePath(value, defaultRelative, suiteDir) {
|
|
922
|
+
return resolvePath(value ?? defaultRelative, suiteDir);
|
|
923
|
+
}
|
|
924
|
+
/** Resolve relative paths in a parsed pipeline config. */
|
|
925
|
+
function resolvePipelineConfigPaths(pipeline, suiteFilePath) {
|
|
926
|
+
const suiteDir = configFileDir(suiteFilePath);
|
|
927
|
+
const resolved = {};
|
|
928
|
+
if (pipeline.run) resolved.run = resolvePipelineRunStep(pipeline.run, suiteDir);
|
|
929
|
+
if (pipeline.grade) resolved.grade = resolvePipelineGradeStep(pipeline.grade, suiteDir);
|
|
930
|
+
if (pipeline.envelope) resolved.envelope = resolvePipelineEnvelopeStep(pipeline.envelope, suiteDir);
|
|
931
|
+
return resolved;
|
|
932
|
+
}
|
|
933
|
+
/** Resolve one pipeline step's run output path. */
|
|
934
|
+
function resolvePipelineRunStep(step, suiteDir) {
|
|
935
|
+
return {
|
|
936
|
+
...step,
|
|
937
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir)
|
|
938
|
+
};
|
|
939
|
+
}
|
|
940
|
+
/** Resolve grade step input (optional) and output paths. */
|
|
941
|
+
function resolvePipelineGradeStep(step, suiteDir) {
|
|
942
|
+
return {
|
|
943
|
+
...step,
|
|
944
|
+
input: step.input ? resolvePipelinePath(step.input, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
945
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir)
|
|
946
|
+
};
|
|
947
|
+
}
|
|
948
|
+
/** Resolve envelope step report, grading, and output paths. */
|
|
949
|
+
function resolvePipelineEnvelopeStep(step, suiteDir) {
|
|
950
|
+
return {
|
|
951
|
+
...step,
|
|
952
|
+
report: step.report ? resolvePipelinePath(step.report, DEFAULT_PIPELINE_OUTPUTS.run, suiteDir) : void 0,
|
|
953
|
+
grading: step.grading ? resolvePipelinePath(step.grading, DEFAULT_PIPELINE_OUTPUTS.grade, suiteDir) : void 0,
|
|
954
|
+
output: resolvePipelinePath(step.output, DEFAULT_PIPELINE_OUTPUTS.envelope, suiteDir)
|
|
955
|
+
};
|
|
956
|
+
}
|
|
957
|
+
//#endregion
|
|
958
|
+
//#region src/config/grading-schema.ts
|
|
838
959
|
/**
|
|
839
|
-
*
|
|
960
|
+
* Zod schema for standalone grading YAML (`grading.yaml`).
|
|
840
961
|
*
|
|
841
|
-
*
|
|
962
|
+
* The top-level `judge` block reuses {@link ConfigPartialSchema} fields plus
|
|
963
|
+
* grader-specific concurrency and system-instruction overrides.
|
|
842
964
|
*/
|
|
843
|
-
|
|
965
|
+
/** Top-level `judge` block — mirrors harness config fields plus grader concurrency. */
|
|
966
|
+
const JudgeConfigSchema = ConfigPartialSchema.extend({
|
|
967
|
+
adapter: z.string().optional(),
|
|
968
|
+
maxConcurrent: z.number().int().positive().optional(),
|
|
969
|
+
/** Optional judge prompt prefix (maps to upstream system_instruction). */
|
|
970
|
+
system_instruction: z.string().optional()
|
|
971
|
+
});
|
|
972
|
+
const GradingConfigSchema = z.object({ judge: JudgeConfigSchema });
|
|
973
|
+
//#endregion
|
|
974
|
+
//#region src/config/suite-file-schema.ts
|
|
975
|
+
/** Single-file suite with optional inline judge and pipeline orchestration. */
|
|
976
|
+
const SuiteFileSingleSchema = TestSuiteSchema.extend({
|
|
977
|
+
judge: JudgeConfigSchema.optional(),
|
|
978
|
+
pipeline: PipelineConfigSchema.optional()
|
|
979
|
+
});
|
|
980
|
+
/** Directory suite root with optional inline judge and pipeline orchestration. */
|
|
981
|
+
const SuiteFileDirectorySchema = SuiteDirectorySchema.extend({
|
|
982
|
+
judge: JudgeConfigSchema.optional(),
|
|
983
|
+
pipeline: PipelineConfigSchema.optional()
|
|
984
|
+
});
|
|
985
|
+
//#endregion
|
|
986
|
+
//#region src/config/suite-document-loader.ts
|
|
987
|
+
/**
|
|
988
|
+
* Load a unified suite.yaml document (suite + optional judge + pipeline).
|
|
989
|
+
*/
|
|
990
|
+
/** Load suite.yaml (or directory) including optional judge and pipeline blocks. */
|
|
991
|
+
async function loadSuiteDocument(filePath, options = {}) {
|
|
844
992
|
const absolutePath = resolve(filePath);
|
|
845
993
|
let info;
|
|
846
994
|
try {
|
|
@@ -848,26 +996,12 @@ async function loadSuite(filePath) {
|
|
|
848
996
|
} catch (err) {
|
|
849
997
|
throw new ConfigError(`failed to read suite path: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
850
998
|
}
|
|
851
|
-
|
|
852
|
-
return
|
|
853
|
-
|
|
854
|
-
/** Load and parse a single-file suite (not a directory layout). */
|
|
855
|
-
async function loadSuiteFile(absolutePath) {
|
|
856
|
-
let content;
|
|
857
|
-
try {
|
|
858
|
-
content = await readFile(absolutePath, "utf8");
|
|
859
|
-
} catch (err) {
|
|
860
|
-
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
861
|
-
}
|
|
862
|
-
return parseSuite(content, absolutePath);
|
|
999
|
+
const strict = options.validateOrchestration !== false;
|
|
1000
|
+
if (info.isDirectory()) return loadSuiteDocumentDirectory(absolutePath, strict);
|
|
1001
|
+
return loadSuiteDocumentFile(absolutePath, strict);
|
|
863
1002
|
}
|
|
864
|
-
/**
|
|
865
|
-
|
|
866
|
-
*
|
|
867
|
-
* Cases from `suite.yaml` sort before external case files; within each file,
|
|
868
|
-
* array order is preserved.
|
|
869
|
-
*/
|
|
870
|
-
async function loadSuiteDirectory(dir) {
|
|
1003
|
+
/** Load suite.yaml from a directory layout (cases under `cases/`). */
|
|
1004
|
+
async function loadSuiteDocumentDirectory(dir, strict) {
|
|
871
1005
|
const suiteYamlPath = join(dir, "suite.yaml");
|
|
872
1006
|
let content;
|
|
873
1007
|
try {
|
|
@@ -875,7 +1009,7 @@ async function loadSuiteDirectory(dir) {
|
|
|
875
1009
|
} catch (err) {
|
|
876
1010
|
throw new ConfigError(`missing suite.yaml in suite directory: ${err instanceof Error ? err.message : String(err)}`, dir);
|
|
877
1011
|
}
|
|
878
|
-
const base =
|
|
1012
|
+
const { judge, pipeline, suite: base } = parseSuiteFileRoot(content, suiteYamlPath, "directory", strict);
|
|
879
1013
|
const casesDir = join(dir, "cases");
|
|
880
1014
|
const caseFiles = await collectCaseYamlFiles(casesDir);
|
|
881
1015
|
const tagged = base.cases.map((testCase, index) => ({
|
|
@@ -904,94 +1038,242 @@ async function loadSuiteDirectory(dir) {
|
|
|
904
1038
|
cases
|
|
905
1039
|
};
|
|
906
1040
|
resolveSuitePaths(suite, suiteYamlPath);
|
|
907
|
-
return suite;
|
|
1041
|
+
return buildSuiteDocument(suiteYamlPath, suite, judge, pipeline);
|
|
1042
|
+
}
|
|
1043
|
+
/** Load a single suite.yaml file (inline cases). */
|
|
1044
|
+
async function loadSuiteDocumentFile(absolutePath, strict) {
|
|
1045
|
+
let content;
|
|
1046
|
+
try {
|
|
1047
|
+
content = await readFile(absolutePath, "utf8");
|
|
1048
|
+
} catch (err) {
|
|
1049
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, absolutePath);
|
|
1050
|
+
}
|
|
1051
|
+
const { judge, pipeline, suite } = parseSuiteFileRoot(content, absolutePath, "single", strict);
|
|
1052
|
+
resolveSuitePaths(suite, absolutePath);
|
|
1053
|
+
return buildSuiteDocument(absolutePath, suite, judge, pipeline);
|
|
908
1054
|
}
|
|
909
1055
|
/**
|
|
910
|
-
* Parse suite
|
|
1056
|
+
* Parse suite.yaml root and validate against the appropriate schema.
|
|
911
1057
|
*
|
|
912
|
-
*
|
|
1058
|
+
* When `strict` is true, uses extended schemas that validate `judge:` and
|
|
1059
|
+
* `pipeline:` blocks (for `loadSuiteDocument`). When false, uses base schemas
|
|
1060
|
+
* that silently strip unknown keys (for `loadSuite`).
|
|
913
1061
|
*/
|
|
914
|
-
function
|
|
1062
|
+
function parseSuiteFileRoot(yamlContent, sourcePath, layout, strict) {
|
|
915
1063
|
let raw;
|
|
916
1064
|
try {
|
|
917
1065
|
raw = parse(yamlContent);
|
|
918
1066
|
} catch (err) {
|
|
919
1067
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
920
1068
|
}
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
1069
|
+
if (!strict) {
|
|
1070
|
+
const validated = (layout === "directory" ? SuiteDirectorySchema : TestSuiteSchema).safeParse(raw);
|
|
1071
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1072
|
+
return { suite: (layout === "directory" ? transformSuiteDirectory : transformSuite)(validated.data) };
|
|
1073
|
+
}
|
|
1074
|
+
if (layout === "directory") {
|
|
1075
|
+
const validated = SuiteFileDirectorySchema.safeParse(raw);
|
|
1076
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1077
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuiteDirectory);
|
|
1078
|
+
}
|
|
1079
|
+
const validated = SuiteFileSingleSchema.safeParse(raw);
|
|
1080
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$2(validated.error, sourcePath)}`, sourcePath);
|
|
1081
|
+
return extractSuiteFileParts(validated.data, sourcePath, transformSuite);
|
|
926
1082
|
}
|
|
927
|
-
/**
|
|
928
|
-
function
|
|
1083
|
+
/** Split validated YAML into suite, judge, and pipeline with path resolution. */
|
|
1084
|
+
function extractSuiteFileParts(data, sourcePath, transform) {
|
|
1085
|
+
const { judge: rawJudge, pipeline: rawPipeline, ...suiteRaw } = data;
|
|
1086
|
+
const suite = transform(suiteRaw);
|
|
1087
|
+
let judge;
|
|
1088
|
+
if (rawJudge) {
|
|
1089
|
+
judge = { ...rawJudge };
|
|
1090
|
+
resolveGradingConfigPaths({ judge }, sourcePath);
|
|
1091
|
+
}
|
|
1092
|
+
let pipeline;
|
|
1093
|
+
if (rawPipeline) {
|
|
1094
|
+
pipeline = transformPipelineConfig(rawPipeline);
|
|
1095
|
+
pipeline = resolvePipelineConfigPaths(pipeline, sourcePath);
|
|
1096
|
+
}
|
|
1097
|
+
return {
|
|
1098
|
+
suite,
|
|
1099
|
+
judge,
|
|
1100
|
+
pipeline
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
/** Apply default artifact filenames when a pipeline step key is present but paths are omitted. */
|
|
1104
|
+
function transformPipelineConfig(raw) {
|
|
1105
|
+
const pipeline = {};
|
|
1106
|
+
if (raw.run !== void 0) pipeline.run = {
|
|
1107
|
+
output: raw.run?.output ?? DEFAULT_PIPELINE_OUTPUTS.run,
|
|
1108
|
+
maxConcurrent: raw.run?.maxConcurrent
|
|
1109
|
+
};
|
|
1110
|
+
if (raw.grade !== void 0) pipeline.grade = {
|
|
1111
|
+
input: raw.grade?.input,
|
|
1112
|
+
output: raw.grade?.output ?? DEFAULT_PIPELINE_OUTPUTS.grade,
|
|
1113
|
+
maxConcurrent: raw.grade?.maxConcurrent
|
|
1114
|
+
};
|
|
1115
|
+
if (raw.envelope !== void 0) pipeline.envelope = {
|
|
1116
|
+
report: raw.envelope?.report,
|
|
1117
|
+
grading: raw.envelope?.grading,
|
|
1118
|
+
output: raw.envelope?.output ?? DEFAULT_PIPELINE_OUTPUTS.envelope,
|
|
1119
|
+
projection: raw.envelope?.projection ?? "envelope",
|
|
1120
|
+
includeRawStreamEvents: raw.envelope?.includeRawStreamEvents,
|
|
1121
|
+
noTranscript: raw.envelope?.noTranscript
|
|
1122
|
+
};
|
|
1123
|
+
return pipeline;
|
|
1124
|
+
}
|
|
1125
|
+
/** Assemble the runtime {@link SuiteDocument} from parsed parts. */
|
|
1126
|
+
function buildSuiteDocument(suitePath, suite, judge, pipeline) {
|
|
1127
|
+
return {
|
|
1128
|
+
suitePath: resolve(suitePath),
|
|
1129
|
+
suite,
|
|
1130
|
+
judge,
|
|
1131
|
+
pipeline
|
|
1132
|
+
};
|
|
1133
|
+
}
|
|
1134
|
+
function formatZodError$2(err, sourcePath) {
|
|
1135
|
+
return err.issues.map((issue) => {
|
|
1136
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1137
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1138
|
+
}).join("\n");
|
|
1139
|
+
}
|
|
1140
|
+
//#endregion
|
|
1141
|
+
//#region src/config/grading-loader.ts
|
|
1142
|
+
/**
|
|
1143
|
+
* Load standalone grading YAML for `harness-eval grade`.
|
|
1144
|
+
*
|
|
1145
|
+
* Also accepts unified suite.yaml files with an inline `judge:` block.
|
|
1146
|
+
*/
|
|
1147
|
+
/** Load grading YAML from disk and resolve relative paths. */
|
|
1148
|
+
async function loadGradingConfig(filePath) {
|
|
1149
|
+
const absolutePath = resolve(filePath);
|
|
1150
|
+
let info;
|
|
1151
|
+
try {
|
|
1152
|
+
info = await stat(absolutePath);
|
|
1153
|
+
} catch (err) {
|
|
1154
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1155
|
+
}
|
|
1156
|
+
if (info.isDirectory()) return loadGradingFromSuiteYaml(join(absolutePath, "suite.yaml"));
|
|
1157
|
+
let content;
|
|
1158
|
+
try {
|
|
1159
|
+
content = await readFile(absolutePath, "utf8");
|
|
1160
|
+
} catch (err) {
|
|
1161
|
+
throw new ConfigError(`failed to read grading config: ${err instanceof Error ? err.message : String(err)}`, filePath);
|
|
1162
|
+
}
|
|
1163
|
+
if (isSuiteRoot(parse(content))) return parseGradingFromSuiteRaw(parse(content), absolutePath);
|
|
1164
|
+
return parseGradingConfig(content, absolutePath);
|
|
1165
|
+
}
|
|
1166
|
+
/**
|
|
1167
|
+
* Parse grading YAML from a string.
|
|
1168
|
+
*
|
|
1169
|
+
* @param sourcePath Optional path for error messages and path resolution.
|
|
1170
|
+
*/
|
|
1171
|
+
function parseGradingConfig(yamlContent, sourcePath) {
|
|
929
1172
|
let raw;
|
|
930
1173
|
try {
|
|
931
1174
|
raw = parse(yamlContent);
|
|
932
1175
|
} catch (err) {
|
|
933
1176
|
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
934
1177
|
}
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1178
|
+
if (isSuiteRoot(raw)) return parseGradingFromSuiteRaw(raw, sourcePath ?? "suite.yaml");
|
|
1179
|
+
const validated = GradingConfigSchema.safeParse(raw);
|
|
1180
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError$1(validated.error, sourcePath)}`, sourcePath);
|
|
1181
|
+
const config = { judge: { ...validated.data.judge } };
|
|
1182
|
+
if (sourcePath) resolveGradingConfigPaths(config, sourcePath);
|
|
1183
|
+
return config;
|
|
938
1184
|
}
|
|
939
|
-
/**
|
|
940
|
-
function
|
|
1185
|
+
/** Detect unified suite.yaml by presence of suite-specific keys (vs standalone grading YAML). */
|
|
1186
|
+
function isSuiteRoot(raw) {
|
|
1187
|
+
if (raw === null || typeof raw !== "object") return false;
|
|
1188
|
+
return "cases" in raw || "matrix" in raw && "adapter" in raw;
|
|
1189
|
+
}
|
|
1190
|
+
async function loadGradingFromSuiteYaml(suiteYamlPath) {
|
|
1191
|
+
let content;
|
|
1192
|
+
try {
|
|
1193
|
+
content = await readFile(suiteYamlPath, "utf8");
|
|
1194
|
+
} catch (err) {
|
|
1195
|
+
throw new ConfigError(`failed to read suite file: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
1196
|
+
}
|
|
941
1197
|
let raw;
|
|
942
1198
|
try {
|
|
943
|
-
raw = parse(
|
|
1199
|
+
raw = parse(content);
|
|
944
1200
|
} catch (err) {
|
|
945
|
-
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`,
|
|
1201
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, suiteYamlPath);
|
|
946
1202
|
}
|
|
947
|
-
return
|
|
1203
|
+
return parseGradingFromSuiteRaw(raw, suiteYamlPath);
|
|
1204
|
+
}
|
|
1205
|
+
function parseGradingFromSuiteRaw(raw, sourcePath) {
|
|
1206
|
+
const single = SuiteFileSingleSchema.safeParse(raw);
|
|
1207
|
+
if (single.success) {
|
|
1208
|
+
if (!single.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1209
|
+
const config = { judge: { ...single.data.judge } };
|
|
1210
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1211
|
+
return config;
|
|
1212
|
+
}
|
|
1213
|
+
const directory = SuiteFileDirectorySchema.safeParse(raw);
|
|
1214
|
+
if (directory.success) {
|
|
1215
|
+
if (!directory.data.judge) throw new ConfigError("suite file has no judge block", sourcePath);
|
|
1216
|
+
const config = { judge: { ...directory.data.judge } };
|
|
1217
|
+
resolveGradingConfigPaths(config, sourcePath);
|
|
1218
|
+
return config;
|
|
1219
|
+
}
|
|
1220
|
+
throw new ConfigError(`validation failed:\n${formatZodError$1(directory.error ?? single.error, sourcePath)}`, sourcePath);
|
|
948
1221
|
}
|
|
1222
|
+
/** Format a zod validation error with optional source file prefix. */
|
|
1223
|
+
function formatZodError$1(err, sourcePath) {
|
|
1224
|
+
return err.issues.map((issue) => {
|
|
1225
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1226
|
+
return ` ${sourcePath ? `${sourcePath} → ${path}` : path}: ${issue.message}`;
|
|
1227
|
+
}).join("\n");
|
|
1228
|
+
}
|
|
1229
|
+
//#endregion
|
|
1230
|
+
//#region src/config/loader.ts
|
|
949
1231
|
/**
|
|
950
|
-
*
|
|
1232
|
+
* Load a `TestSuite` from a YAML file, directory, or string.
|
|
951
1233
|
*
|
|
952
|
-
*
|
|
1234
|
+
* For unified suite.yaml with optional `judge:` and `pipeline:` blocks,
|
|
1235
|
+
* use {@link loadSuiteDocument}.
|
|
953
1236
|
*/
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
}
|
|
963
|
-
/** Validate one raw case object against {@link TestCaseSchema}. */
|
|
964
|
-
function validateRawCase(raw, sourcePath, index) {
|
|
965
|
-
const validated = TestCaseSchema.safeParse(raw);
|
|
966
|
-
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
967
|
-
return validated.data;
|
|
1237
|
+
/**
|
|
1238
|
+
* Load a suite from a file path or directory path (suite portion only).
|
|
1239
|
+
*
|
|
1240
|
+
* Orchestration blocks (`judge:`, `pipeline:`) are silently stripped — callers
|
|
1241
|
+
* that only need the `TestSuite` are not broken by malformed orchestration YAML.
|
|
1242
|
+
* Use {@link loadSuiteDocument} when you need validated orchestration metadata.
|
|
1243
|
+
*/
|
|
1244
|
+
async function loadSuite(filePath) {
|
|
1245
|
+
return (await loadSuiteDocument(filePath, { validateOrchestration: false })).suite;
|
|
968
1246
|
}
|
|
969
1247
|
/**
|
|
970
|
-
*
|
|
1248
|
+
* Parse suite YAML from a string (single-file layout with inline cases).
|
|
971
1249
|
*
|
|
972
|
-
*
|
|
973
|
-
* are optional in directory layout.
|
|
1250
|
+
* Unknown top-level keys such as `judge` and `pipeline` are stripped.
|
|
974
1251
|
*/
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
} catch (err) {
|
|
982
|
-
if (err instanceof Error && "code" in err && err.code === "ENOENT") return;
|
|
983
|
-
throw err;
|
|
984
|
-
}
|
|
985
|
-
for (const entry of entries) {
|
|
986
|
-
const fullPath = join(dir, entry.name);
|
|
987
|
-
if (entry.isDirectory()) await walk(fullPath);
|
|
988
|
-
else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) files.push(fullPath);
|
|
989
|
-
}
|
|
1252
|
+
function parseSuite(yamlContent, sourcePath) {
|
|
1253
|
+
let raw;
|
|
1254
|
+
try {
|
|
1255
|
+
raw = parse(yamlContent);
|
|
1256
|
+
} catch (err) {
|
|
1257
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
990
1258
|
}
|
|
991
|
-
|
|
992
|
-
|
|
1259
|
+
const validated = TestSuiteSchema.safeParse(raw);
|
|
1260
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
1261
|
+
const suite = transformSuite(validated.data);
|
|
1262
|
+
if (sourcePath) resolveSuitePaths(suite, resolve(sourcePath));
|
|
1263
|
+
return suite;
|
|
1264
|
+
}
|
|
1265
|
+
/** Parse `suite.yaml` for directory layout (cases may be omitted). @internal */
|
|
1266
|
+
function parseSuiteDirectory(yamlContent, sourcePath) {
|
|
1267
|
+
let raw;
|
|
1268
|
+
try {
|
|
1269
|
+
raw = parse(yamlContent);
|
|
1270
|
+
} catch (err) {
|
|
1271
|
+
throw new ConfigError(`YAML parse error: ${err instanceof Error ? err.message : String(err)}`, sourcePath);
|
|
1272
|
+
}
|
|
1273
|
+
const validated = SuiteDirectorySchema.safeParse(raw);
|
|
1274
|
+
if (!validated.success) throw new ConfigError(`validation failed:\n${formatZodError(validated.error, sourcePath)}`, sourcePath);
|
|
1275
|
+
return transformSuiteDirectory(validated.data);
|
|
993
1276
|
}
|
|
994
|
-
/** Format a zod validation error with optional source file prefix. */
|
|
995
1277
|
function formatZodError(err, sourcePath) {
|
|
996
1278
|
return err.issues.map((issue) => {
|
|
997
1279
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
@@ -999,6 +1281,6 @@ function formatZodError(err, sourcePath) {
|
|
|
999
1281
|
}).join("\n");
|
|
1000
1282
|
}
|
|
1001
1283
|
//#endregion
|
|
1002
|
-
export { parseGradingConfig as a, loadGradingConfig as i,
|
|
1284
|
+
export { parseGradingConfig as a, parseCasesFile as c, loadGradingConfig as i, ConfigError as l, parseSuite as n, loadSuiteDocument as o, parseSuiteDirectory as r, DEFAULT_PIPELINE_OUTPUTS as s, loadSuite as t };
|
|
1003
1285
|
|
|
1004
|
-
//# sourceMappingURL=loader-
|
|
1286
|
+
//# sourceMappingURL=loader-CiBm4Kf6.js.map
|