@alis-build/harness-eval 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +17 -4
  2. package/dist/adapters/claude-code/index.d.ts +1 -1
  3. package/dist/adapters/claude-code/index.js +1 -1
  4. package/dist/{claude-code-ycT0JQZF.js → claude-code-DZ4Vkgp6.js} +35 -6
  5. package/dist/{claude-code-ycT0JQZF.js.map → claude-code-DZ4Vkgp6.js.map} +1 -1
  6. package/dist/cli/bin.js +109 -12
  7. package/dist/cli/bin.js.map +1 -1
  8. package/dist/config/loader.d.ts +1 -1
  9. package/dist/config/loader.js +1 -1
  10. package/dist/{index-6Z17eKZx.d.ts → index-V22PrR0p.d.ts} +2 -1
  11. package/dist/index.d.ts +270 -152
  12. package/dist/index.js +124 -5
  13. package/dist/index.js.map +1 -0
  14. package/dist/{loader-DTvoVfN0.d.ts → loader-C9yQHUPC.d.ts} +19 -2
  15. package/dist/{loader-BCnFJ8rm.js → loader-DcI0KfRX.js} +291 -4
  16. package/dist/loader-DcI0KfRX.js.map +1 -0
  17. package/dist/{build-DsVJ_UeU.js → projections-BcX7w-f6.js} +486 -243
  18. package/dist/projections-BcX7w-f6.js.map +1 -0
  19. package/dist/runner/suite.d.ts +1 -1
  20. package/dist/runner/suite.js +1 -1
  21. package/dist/{suite-BoOvK_lq.d.ts → suite-DPJMIEbu.d.ts} +7 -2
  22. package/dist/{suite-chj0j22j.js → suite-Dlzl-HI0.js} +58 -4
  23. package/dist/suite-Dlzl-HI0.js.map +1 -0
  24. package/dist/{types-BQol062t.d.ts → types-CD3TwOtZ.d.ts} +151 -10
  25. package/package.json +4 -2
  26. package/schemas/eval-interchange-instances.schema.json +196 -0
  27. package/schemas/eval-interchange.schema.json +65 -52
  28. package/schemas/eval-run-envelope.schema.json +182 -425
  29. package/dist/build-DsVJ_UeU.js.map +0 -1
  30. package/dist/loader-BCnFJ8rm.js.map +0 -1
  31. package/dist/suite-chj0j22j.js.map +0 -1
  32. package/schemas/eval-interchange-agent-trace.schema.json +0 -322
  33. package/schemas/eval-interchange-proto-instance.schema.json +0 -106
package/dist/index.js CHANGED
@@ -1,5 +1,124 @@
1
- import { _ as isUserMessage, a as AdapterError, c as buildTrajectory, d as isResult, f as isSystemInit, g as isToolUseBlock, h as isToolResultBlock, l as namespaceOf, m as isTextBlock, n as claude_code_exports, o as parseStreamJson, p as isSystemRetry, s as TrajectoryBuilder, u as isAssistantMessage } from "./claude-code-ycT0JQZF.js";
2
- import { A as TRAJECTORY_SCHEMA_VERSION, C as gradeReport, D as emitOtel, E as createClaudeGrader, O as trajectoryToOtlp, S as resolveGradeOptions, T as trajectoryToTranscript, _ as trajectoryRecall, a as toProtoInstances, b as formatGradingConsole, c as toolCallValid, d as toolParameterKvMatch, f as computeTrajectoryMetrics, g as trajectoryPrecision, h as trajectoryInOrderMatch, i as toAgentTrace, k as EVAL_RUN_SCHEMA_VERSION, l as toolNameMatch, m as trajectoryExactMatch, n as buildEvalRunEnvelopeFromFiles, o as toTrajectory, p as trajectoryAnyOrderMatch, r as enrichRepetitionWithInterchange, s as computeToolCallMetrics, t as buildEvalRunEnvelope, u as toolParameterKeyMatch, v as trajectorySingleToolUse, x as gradingReportPassed, y as formatReport } from "./build-DsVJ_UeU.js";
3
- import { a as aggregateCell, c as runRepetition, d as getDefaultAdapter, f as listAdapters, h as evaluateAll, i as DEFAULT_THRESHOLD, l as DEFAULT_ADAPTER_ID, m as evaluate, n as createLimit, o as getRepetitions, p as registerAdapter, r as DEFAULT_REPETITIONS, s as mergeConfig, t as runSuite, u as getAdapter } from "./suite-chj0j22j.js";
4
- import { o as ConfigError, r as parseSuite, t as loadSuite } from "./loader-BCnFJ8rm.js";
5
- export { AdapterError, ConfigError, DEFAULT_ADAPTER_ID, DEFAULT_REPETITIONS, DEFAULT_THRESHOLD, EVAL_RUN_SCHEMA_VERSION, TRAJECTORY_SCHEMA_VERSION, TrajectoryBuilder, aggregateCell, buildEvalRunEnvelope, buildEvalRunEnvelopeFromFiles, buildTrajectory, claude_code_exports as claudeCode, computeToolCallMetrics, computeTrajectoryMetrics, createClaudeGrader, createLimit, emitOtel, enrichRepetitionWithInterchange, evaluate, evaluateAll, formatGradingConsole, formatReport, getAdapter, getDefaultAdapter, getRepetitions, gradeReport, gradingReportPassed, isAssistantMessage, isResult, isSystemInit, isSystemRetry, isTextBlock, isToolResultBlock, isToolUseBlock, isUserMessage, listAdapters, loadSuite, mergeConfig, namespaceOf, parseStreamJson, parseSuite, registerAdapter, resolveGradeOptions, runRepetition, runSuite, toAgentTrace, toProtoInstances, toTrajectory, toolCallValid, toolNameMatch, toolParameterKeyMatch, toolParameterKvMatch, trajectoryAnyOrderMatch, trajectoryExactMatch, trajectoryInOrderMatch, trajectoryPrecision, trajectoryRecall, trajectorySingleToolUse, trajectoryToOtlp, trajectoryToTranscript };
1
+ import { _ as isUserMessage, a as AdapterError, c as buildTrajectory, d as isResult, f as isSystemInit, g as isToolUseBlock, h as isToolResultBlock, l as namespaceOf, m as isTextBlock, n as claude_code_exports, o as parseStreamJson, p as isSystemRetry, s as TrajectoryBuilder, u as isAssistantMessage } from "./claude-code-DZ4Vkgp6.js";
2
+ import { D as trajectoryToOtlp, E as emitOtel, O as EVAL_RUN_SCHEMA_VERSION, S as gradeReport, T as createClaudeGrader, _ as serializeToolInput, a as enrichRepetitionWithProtojson, b as gradingReportPassed, c as parseToolInput, d as trajectoryInOrderMatch, f as trajectoryPrecision, g as toTrajectoryInstances, h as toEvaluationInstance, i as buildEvalRunEnvelopeFromFiles, k as TRAJECTORY_SCHEMA_VERSION, l as trajectoryAnyOrderMatch, m as trajectorySingleToolUse, n as toTrajectory, o as toHarnessMetrics, p as trajectoryRecall, r as buildEvalRunEnvelope, s as computeTrajectoryMetrics, t as toInstancesJsonl, u as trajectoryExactMatch, v as formatReport, w as trajectoryToTranscript, x as resolveGradeOptions, y as formatGradingConsole } from "./projections-BcX7w-f6.js";
3
+ import { a as aggregateCell, c as runRepetition, d as getDefaultAdapter, f as listAdapters, h as evaluateAll, i as DEFAULT_THRESHOLD, l as DEFAULT_ADAPTER_ID, m as evaluate, n as createLimit, o as getRepetitions, p as registerAdapter, r as DEFAULT_REPETITIONS, s as mergeConfig, t as runSuite, u as getAdapter } from "./suite-Dlzl-HI0.js";
4
+ import { o as ConfigError, r as parseSuite, t as loadSuite } from "./loader-DcI0KfRX.js";
5
+ //#region src/metrics/tool-calls.ts
6
+ /**
7
+ * Tool-call-level metrics operating on prediction/reference tool-call pairs.
8
+ *
9
+ * Implements Vertex-aligned per-call checks: validity, name match, parameter
10
+ * key match, and full key-value match. Used by trajectory metrics and
11
+ * available for custom eval pipelines.
12
+ *
13
+ * Scores are 0 or 1 per call; {@link computeToolCallMetrics} averages across
14
+ * aligned index pairs (max length of predicted vs reference).
15
+ */
16
+ /** Normalize harness or wire tool call to canonical wire shape for comparison. */
17
+ function normalizeToolCall(toolCall) {
18
+ if (typeof toolCall.tool_input === "string") return {
19
+ tool_name: toolCall.tool_name,
20
+ tool_input: toolCall.tool_input
21
+ };
22
+ return {
23
+ tool_name: toolCall.tool_name,
24
+ tool_input: serializeToolInput(toolCall.tool_input)
25
+ };
26
+ }
27
+ /** Parse tool_input JSON to an object map, or null when not a plain object. */
28
+ function parsedArgs(toolCall) {
29
+ const parsed = parseToolInput(toolCall.tool_input);
30
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) return null;
31
+ return parsed;
32
+ }
33
+ /**
34
+ * Whether a predicted tool call is well-formed (non-empty name, parseable JSON input).
35
+ *
36
+ * @returns 1 when valid, 0 otherwise.
37
+ */
38
+ function toolCallValid(toolCall) {
39
+ const normalized = normalizeToolCall(toolCall);
40
+ if (!normalized.tool_name.trim()) return 0;
41
+ try {
42
+ JSON.parse(normalized.tool_input);
43
+ return 1;
44
+ } catch {
45
+ return 0;
46
+ }
47
+ }
48
+ /**
49
+ * Whether predicted and reference tool names match exactly.
50
+ *
51
+ * @returns 1 on match, 0 otherwise.
52
+ */
53
+ function toolNameMatch(predicted, reference) {
54
+ const predictedNorm = normalizeToolCall(predicted);
55
+ const referenceNorm = normalizeToolCall(reference);
56
+ return predictedNorm.tool_name === referenceNorm.tool_name ? 1 : 0;
57
+ }
58
+ /**
59
+ * Whether parameter key sets match (same keys, same order after sort).
60
+ *
61
+ * Requires matching tool names first. Returns 0 when args are not objects.
62
+ */
63
+ function toolParameterKeyMatch(predicted, reference) {
64
+ if (toolNameMatch(predicted, reference) === 0) return 0;
65
+ const predictedArgs = parsedArgs(normalizeToolCall(predicted));
66
+ const referenceArgs = parsedArgs(normalizeToolCall(reference));
67
+ if (predictedArgs === null || referenceArgs === null) return 0;
68
+ const predictedKeys = Object.keys(predictedArgs).sort();
69
+ const referenceKeys = Object.keys(referenceArgs).sort();
70
+ if (predictedKeys.length !== referenceKeys.length) return 0;
71
+ return predictedKeys.every((key, index) => key === referenceKeys[index]) ? 1 : 0;
72
+ }
73
+ /** Deep equality via JSON serialization (handles nested objects in args). */
74
+ function valuesEqual(left, right, useStrictStringMatch) {
75
+ if (useStrictStringMatch) return JSON.stringify(left) === JSON.stringify(right);
76
+ return JSON.stringify(left) === JSON.stringify(right);
77
+ }
78
+ /**
79
+ * Whether all reference parameter key-value pairs match in the predicted call.
80
+ *
81
+ * Requires {@link toolParameterKeyMatch} first. Only keys present in reference
82
+ * are checked (predicted may have extra keys).
83
+ */
84
+ function toolParameterKvMatch(predicted, reference, options = {}) {
85
+ if (toolParameterKeyMatch(predicted, reference) === 0) return 0;
86
+ const predictedArgs = parsedArgs(normalizeToolCall(predicted));
87
+ const referenceArgs = parsedArgs(normalizeToolCall(reference));
88
+ for (const key of Object.keys(referenceArgs)) if (!valuesEqual(predictedArgs[key], referenceArgs[key], options.useStrictStringMatch ?? false)) return 0;
89
+ return 1;
90
+ }
91
+ /**
92
+ * Average tool-call metrics across index-aligned predicted/reference pairs.
93
+ *
94
+ * Denominator is `max(predicted.length, reference.length, 1)`. Missing
95
+ * predicted calls at an index are skipped for pair metrics; validity still
96
+ * counts when a predicted call exists.
97
+ */
98
+ function computeToolCallMetrics(predicted, reference, options = {}) {
99
+ const pairCount = Math.max(predicted.length, reference.length, 1);
100
+ let valid = 0;
101
+ let nameMatch = 0;
102
+ let keyMatch = 0;
103
+ let kvMatch = 0;
104
+ for (let index = 0; index < pairCount; index += 1) {
105
+ const predictedCall = predicted[index];
106
+ const referenceCall = reference[index];
107
+ if (!predictedCall) continue;
108
+ valid += toolCallValid(predictedCall);
109
+ if (!referenceCall) continue;
110
+ nameMatch += toolNameMatch(predictedCall, referenceCall);
111
+ keyMatch += toolParameterKeyMatch(predictedCall, referenceCall);
112
+ kvMatch += toolParameterKvMatch(predictedCall, referenceCall, options);
113
+ }
114
+ return {
115
+ tool_call_valid: valid / pairCount,
116
+ tool_name_match: nameMatch / pairCount,
117
+ tool_parameter_key_match: keyMatch / pairCount,
118
+ tool_parameter_kv_match: kvMatch / pairCount
119
+ };
120
+ }
121
+ //#endregion
122
+ export { AdapterError, ConfigError, DEFAULT_ADAPTER_ID, DEFAULT_REPETITIONS, DEFAULT_THRESHOLD, EVAL_RUN_SCHEMA_VERSION, TRAJECTORY_SCHEMA_VERSION, TrajectoryBuilder, aggregateCell, buildEvalRunEnvelope, buildEvalRunEnvelopeFromFiles, buildTrajectory, claude_code_exports as claudeCode, computeToolCallMetrics, computeTrajectoryMetrics, createClaudeGrader, createLimit, emitOtel, enrichRepetitionWithProtojson, evaluate, evaluateAll, formatGradingConsole, formatReport, getAdapter, getDefaultAdapter, getRepetitions, gradeReport, gradingReportPassed, isAssistantMessage, isResult, isSystemInit, isSystemRetry, isTextBlock, isToolResultBlock, isToolUseBlock, isUserMessage, listAdapters, loadSuite, mergeConfig, namespaceOf, parseStreamJson, parseSuite, registerAdapter, resolveGradeOptions, runRepetition, runSuite, toEvaluationInstance, toHarnessMetrics, toInstancesJsonl, toTrajectory, toTrajectoryInstances, toolCallValid, toolNameMatch, toolParameterKeyMatch, toolParameterKvMatch, trajectoryAnyOrderMatch, trajectoryExactMatch, trajectoryInOrderMatch, trajectoryPrecision, trajectoryRecall, trajectorySingleToolUse, trajectoryToOtlp, trajectoryToTranscript };
123
+
124
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","names":[],"sources":["../src/metrics/tool-calls.ts"],"sourcesContent":["/**\n * Tool-call-level metrics operating on prediction/reference tool-call pairs.\n *\n * Implements Vertex-aligned per-call checks: validity, name match, parameter\n * key match, and full key-value match. Used by trajectory metrics and\n * available for custom eval pipelines.\n *\n * Scores are 0 or 1 per call; {@link computeToolCallMetrics} averages across\n * aligned index pairs (max length of predicted vs reference).\n */\n\nimport { parseToolInput, type TrajectoryInput, type WireToolCall } from \"./trajectory\";\nimport { serializeToolInput } from \"../eval-interchange/normalize\";\n\n/** Options for parameter value comparison. */\nexport interface ToolCallMetricOptions {\n /** When true, compare serialized JSON strictly (reserved for future semantics). */\n useStrictStringMatch?: boolean;\n}\n\n/** Aggregated tool-call metric scores (each 0..1). */\nexport interface ToolCallMetrics {\n tool_call_valid: number;\n tool_name_match: number;\n tool_parameter_key_match: number;\n tool_parameter_kv_match: number;\n}\n\ntype ToolCallInput = TrajectoryInput[number];\n\n/** Normalize harness or wire tool call to canonical wire shape for comparison. */\nfunction normalizeToolCall(toolCall: ToolCallInput): WireToolCall {\n if (typeof toolCall.tool_input === \"string\") {\n return {\n tool_name: toolCall.tool_name,\n tool_input: toolCall.tool_input,\n };\n }\n\n return {\n tool_name: toolCall.tool_name,\n tool_input: serializeToolInput(toolCall.tool_input),\n };\n}\n\n/** Parse tool_input JSON to an object map, or null when not a plain object. */\nfunction parsedArgs(toolCall: WireToolCall): Record<string, unknown> | null {\n const parsed = parseToolInput(toolCall.tool_input);\n if (parsed === null || typeof parsed !== \"object\" || Array.isArray(parsed)) {\n return null;\n }\n return parsed as Record<string, unknown>;\n}\n\n/**\n * Whether a predicted tool call is well-formed (non-empty name, parseable JSON input).\n *\n * @returns 1 when valid, 0 otherwise.\n */\nexport function toolCallValid(toolCall: ToolCallInput): number {\n const normalized = normalizeToolCall(toolCall);\n if (!normalized.tool_name.trim()) return 0;\n\n try {\n JSON.parse(normalized.tool_input);\n return 1;\n } catch {\n return 0;\n }\n}\n\n/**\n * Whether predicted and reference tool names match exactly.\n *\n * @returns 1 on match, 0 otherwise.\n */\nexport function toolNameMatch(\n predicted: ToolCallInput,\n reference: ToolCallInput,\n): number {\n const predictedNorm = normalizeToolCall(predicted);\n const referenceNorm = normalizeToolCall(reference);\n return predictedNorm.tool_name === referenceNorm.tool_name ? 1 : 0;\n}\n\n/**\n * Whether parameter key sets match (same keys, same order after sort).\n *\n * Requires matching tool names first. Returns 0 when args are not objects.\n */\nexport function toolParameterKeyMatch(\n predicted: ToolCallInput,\n reference: ToolCallInput,\n): number {\n if (toolNameMatch(predicted, reference) === 0) return 0;\n\n const predictedArgs = parsedArgs(normalizeToolCall(predicted));\n const referenceArgs = parsedArgs(normalizeToolCall(reference));\n if (predictedArgs === null || referenceArgs === null) return 0;\n\n const predictedKeys = Object.keys(predictedArgs).sort();\n const referenceKeys = Object.keys(referenceArgs).sort();\n if (predictedKeys.length !== referenceKeys.length) return 0;\n\n return predictedKeys.every((key, index) => key === referenceKeys[index])\n ? 1\n : 0;\n}\n\n/** Deep equality via JSON serialization (handles nested objects in args). */\nfunction valuesEqual(\n left: unknown,\n right: unknown,\n useStrictStringMatch: boolean,\n): boolean {\n if (useStrictStringMatch) {\n return JSON.stringify(left) === JSON.stringify(right);\n }\n return JSON.stringify(left) === JSON.stringify(right);\n}\n\n/**\n * Whether all reference parameter key-value pairs match in the predicted call.\n *\n * Requires {@link toolParameterKeyMatch} first. Only keys present in reference\n * are checked (predicted may have extra keys).\n */\nexport function toolParameterKvMatch(\n predicted: ToolCallInput,\n reference: ToolCallInput,\n options: ToolCallMetricOptions = {},\n): number {\n if (toolParameterKeyMatch(predicted, reference) === 0) return 0;\n\n const predictedArgs = parsedArgs(normalizeToolCall(predicted))!;\n const referenceArgs = parsedArgs(normalizeToolCall(reference))!;\n\n for (const key of Object.keys(referenceArgs)) {\n if (\n !valuesEqual(\n predictedArgs[key],\n referenceArgs[key],\n options.useStrictStringMatch ?? false,\n )\n ) {\n return 0;\n }\n }\n\n return 1;\n}\n\n/**\n * Average tool-call metrics across index-aligned predicted/reference pairs.\n *\n * Denominator is `max(predicted.length, reference.length, 1)`. Missing\n * predicted calls at an index are skipped for pair metrics; validity still\n * counts when a predicted call exists.\n */\nexport function computeToolCallMetrics(\n predicted: ToolCallInput[],\n reference: ToolCallInput[],\n options: ToolCallMetricOptions = {},\n): ToolCallMetrics {\n const pairCount = Math.max(predicted.length, reference.length, 1);\n let valid = 0;\n let nameMatch = 0;\n let keyMatch = 0;\n let kvMatch = 0;\n\n for (let index = 0; index < pairCount; index += 1) {\n const predictedCall = predicted[index];\n const referenceCall = reference[index];\n if (!predictedCall) continue;\n\n valid += toolCallValid(predictedCall);\n if (!referenceCall) continue;\n\n nameMatch += toolNameMatch(predictedCall, referenceCall);\n keyMatch += toolParameterKeyMatch(predictedCall, referenceCall);\n kvMatch += toolParameterKvMatch(predictedCall, referenceCall, options);\n }\n\n return {\n tool_call_valid: valid / pairCount,\n tool_name_match: nameMatch / pairCount,\n tool_parameter_key_match: keyMatch / pairCount,\n tool_parameter_kv_match: kvMatch / pairCount,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;AA+BA,SAAS,kBAAkB,UAAuC;CAChE,IAAI,OAAO,SAAS,eAAe,UACjC,OAAO;EACL,WAAW,SAAS;EACpB,YAAY,SAAS;CACvB;CAGF,OAAO;EACL,WAAW,SAAS;EACpB,YAAY,mBAAmB,SAAS,UAAU;CACpD;AACF;;AAGA,SAAS,WAAW,UAAwD;CAC1E,MAAM,SAAS,eAAe,SAAS,UAAU;CACjD,IAAI,WAAW,QAAQ,OAAO,WAAW,YAAY,MAAM,QAAQ,MAAM,GACvE,OAAO;CAET,OAAO;AACT;;;;;;AAOA,SAAgB,cAAc,UAAiC;CAC7D,MAAM,aAAa,kBAAkB,QAAQ;CAC7C,IAAI,CAAC,WAAW,UAAU,KAAK,GAAG,OAAO;CAEzC,IAAI;EACF,KAAK,MAAM,WAAW,UAAU;EAChC,OAAO;CACT,QAAQ;EACN,OAAO;CACT;AACF;;;;;;AAOA,SAAgB,cACd,WACA,WACQ;CACR,MAAM,gBAAgB,kBAAkB,SAAS;CACjD,MAAM,gBAAgB,kBAAkB,SAAS;CACjD,OAAO,cAAc,cAAc,cAAc,YAAY,IAAI;AACnE;;;;;;AAOA,SAAgB,sBACd,WACA,WACQ;CACR,IAAI,cAAc,WAAW,SAAS,MAAM,GAAG,OAAO;CAEtD,MAAM,gBAAgB,WAAW,kBAAkB,SAAS,CAAC;CAC7D,MAAM,gBAAgB,WAAW,kBAAkB,SAAS,CAAC;CAC7D,IAAI,kBAAkB,QAAQ,kBAAkB,MAAM,OAAO;CAE7D,MAAM,gBAAgB,OAAO,KAAK,aAAa,CAAC,CAAC,KAAK;CACtD,MAAM,gBAAgB,OAAO,KAAK,aAAa,CAAC,CAAC,KAAK;CACtD,IAAI,cAAc,WAAW,cAAc,QAAQ,OAAO;CAE1D,OAAO,cAAc,OAAO,KAAK,UAAU,QAAQ,cAAc,MAAM,IACnE,IACA;AACN;;AAGA,SAAS,YACP,MACA,OACA,sBACS;CACT,IAAI,sBACF,OAAO,KAAK,UAAU,IAAI,MAAM,KAAK,UAAU,KAAK;CAEtD,OAAO,KAAK,UAAU,IAAI,MAAM,KAAK,UAAU,KAAK;AACtD;;;;;;;AAQA,SAAgB,qBACd,WACA,WACA,UAAiC,CAAC,GAC1B;CACR,IAAI,sBAAsB,WAAW,SAAS,MAAM,GAAG,OAAO;CAE9D,MAAM,gBAAgB,WAAW,kBAAkB,SAAS,CAAC;CAC7D,MAAM,gBAAgB,WAAW,kBAAkB,SAAS,CAAC;CAE7D,KAAK,MAAM,OAAO,OAAO,KAAK,aAAa,GACzC,IACE,CAAC,YACC,cAAc,MACd,cAAc,MACd,QAAQ,wBAAwB,KAClC,GAEA,OAAO;CAIX,OAAO;AACT;;;;;;;;AASA,SAAgB,uBACd,WACA,WACA,UAAiC,CAAC,GACjB;CACjB,MAAM,YAAY,KAAK,IAAI,UAAU,QAAQ,UAAU,QAAQ,CAAC;CAChE,IAAI,QAAQ;CACZ,IAAI,YAAY;CAChB,IAAI,WAAW;CACf,IAAI,UAAU;CAEd,KAAK,IAAI,QAAQ,GAAG,QAAQ,WAAW,SAAS,GAAG;EACjD,MAAM,gBAAgB,UAAU;EAChC,MAAM,gBAAgB,UAAU;EAChC,IAAI,CAAC,eAAe;EAEpB,SAAS,cAAc,aAAa;EACpC,IAAI,CAAC,eAAe;EAEpB,aAAa,cAAc,eAAe,aAAa;EACvD,YAAY,sBAAsB,eAAe,aAAa;EAC9D,WAAW,qBAAqB,eAAe,eAAe,OAAO;CACvE;CAEA,OAAO;EACL,iBAAiB,QAAQ;EACzB,iBAAiB,YAAY;EAC7B,0BAA0B,WAAW;EACrC,yBAAyB,UAAU;CACrC;AACF"}
@@ -1,5 +1,5 @@
1
1
  import { s as SuiteConfig } from "./types-B9H4IZtA.js";
2
- import { d as TestSuite, u as TestCase } from "./types-BQol062t.js";
2
+ import { d as TestSuite, u as TestCase } from "./types-CD3TwOtZ.js";
3
3
  import { z } from "zod";
4
4
 
5
5
  //#region src/config/transform.d.ts
@@ -13,6 +13,7 @@ declare class ConfigError extends Error {
13
13
  }
14
14
  //#endregion
15
15
  //#region src/config/grading-loader.d.ts
16
+ /** Runtime shape of a parsed grading config file. */
16
17
  interface GradingConfig {
17
18
  judge: SuiteConfig & {
18
19
  adapter?: string;
@@ -20,14 +21,30 @@ interface GradingConfig {
20
21
  system_instruction?: string;
21
22
  };
22
23
  }
24
+ /** Load grading YAML from disk and resolve relative paths. */
23
25
  declare function loadGradingConfig(filePath: string): Promise<GradingConfig>;
26
+ /**
27
+ * Parse grading YAML from a string.
28
+ *
29
+ * @param sourcePath Optional path for error messages and path resolution.
30
+ */
24
31
  declare function parseGradingConfig(yamlContent: string, sourcePath?: string): GradingConfig;
25
32
  //#endregion
26
33
  //#region src/config/loader.d.ts
34
+ /**
35
+ * Load a suite from a file path or directory path.
36
+ *
37
+ * @throws {@link ConfigError} when the path is unreadable or validation fails.
38
+ */
27
39
  declare function loadSuite(filePath: string): Promise<TestSuite>;
40
+ /**
41
+ * Parse suite YAML from a string (single-file layout with inline cases).
42
+ *
43
+ * @param sourcePath Optional path for error messages and relative path resolution.
44
+ */
28
45
  declare function parseSuite(yamlContent: string, sourcePath?: string): TestSuite;
29
46
  /** Parse one case file: single case, array, or `{ cases: [...] }`. */
30
47
  declare function parseCasesFile(yamlContent: string, sourcePath?: string): TestCase[];
31
48
  //#endregion
32
49
  export { loadGradingConfig as a, GradingConfig as i, parseCasesFile as n, parseGradingConfig as o, parseSuite as r, ConfigError as s, loadSuite as t };
33
- //# sourceMappingURL=loader-DTvoVfN0.d.ts.map
50
+ //# sourceMappingURL=loader-C9yQHUPC.d.ts.map