@agentv/core 2.17.0 → 2.17.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CPPYERD2.js → chunk-PSYFRPNT.js} +1 -1
- package/dist/chunk-PSYFRPNT.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +50 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +51 -18
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-CPPYERD2.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1777,6 +1777,25 @@ var import_node_path8 = __toESM(require("path"), 1);
|
|
|
1777
1777
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
1778
1778
|
var import_yaml4 = require("yaml");
|
|
1779
1779
|
|
|
1780
|
+
// src/evaluation/interpolation.ts
|
|
1781
|
+
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
1782
|
+
function interpolateEnv(value, env) {
|
|
1783
|
+
if (typeof value === "string") {
|
|
1784
|
+
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
1785
|
+
}
|
|
1786
|
+
if (Array.isArray(value)) {
|
|
1787
|
+
return value.map((item) => interpolateEnv(item, env));
|
|
1788
|
+
}
|
|
1789
|
+
if (value !== null && typeof value === "object") {
|
|
1790
|
+
const result = {};
|
|
1791
|
+
for (const [key, val] of Object.entries(value)) {
|
|
1792
|
+
result[key] = interpolateEnv(val, env);
|
|
1793
|
+
}
|
|
1794
|
+
return result;
|
|
1795
|
+
}
|
|
1796
|
+
return value;
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1780
1799
|
// src/evaluation/loaders/case-file-loader.ts
|
|
1781
1800
|
var import_promises = require("fs/promises");
|
|
1782
1801
|
var import_node_path = __toESM(require("path"), 1);
|
|
@@ -1795,7 +1814,8 @@ function isGlobPattern(filePath) {
|
|
|
1795
1814
|
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
1796
1815
|
}
|
|
1797
1816
|
function parseYamlCases(content, filePath) {
|
|
1798
|
-
const
|
|
1817
|
+
const raw = (0, import_yaml.parse)(content);
|
|
1818
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1799
1819
|
if (!Array.isArray(parsed)) {
|
|
1800
1820
|
throw new Error(
|
|
1801
1821
|
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
@@ -1817,7 +1837,8 @@ function parseJsonlCases(content, filePath) {
|
|
|
1817
1837
|
const line = lines[i].trim();
|
|
1818
1838
|
if (line === "") continue;
|
|
1819
1839
|
try {
|
|
1820
|
-
const
|
|
1840
|
+
const raw = JSON.parse(line);
|
|
1841
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1821
1842
|
if (!isJsonObject(parsed)) {
|
|
1822
1843
|
throw new Error("Expected JSON object");
|
|
1823
1844
|
}
|
|
@@ -3966,7 +3987,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
3966
3987
|
}
|
|
3967
3988
|
try {
|
|
3968
3989
|
const content = await (0, import_promises6.readFile)(sidecarPath, "utf8");
|
|
3969
|
-
const parsed = (0, import_yaml3.parse)(content);
|
|
3990
|
+
const parsed = interpolateEnv((0, import_yaml3.parse)(content), process.env);
|
|
3970
3991
|
if (!isJsonObject(parsed)) {
|
|
3971
3992
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
3972
3993
|
return {};
|
|
@@ -3989,7 +4010,8 @@ function parseJsonlContent(content, filePath) {
|
|
|
3989
4010
|
const line = lines[i].trim();
|
|
3990
4011
|
if (line === "") continue;
|
|
3991
4012
|
try {
|
|
3992
|
-
const
|
|
4013
|
+
const raw = JSON.parse(line);
|
|
4014
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
3993
4015
|
if (!isJsonObject(parsed)) {
|
|
3994
4016
|
throw new Error("Expected JSON object");
|
|
3995
4017
|
}
|
|
@@ -4046,9 +4068,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4046
4068
|
}
|
|
4047
4069
|
const inputMessages = resolveInputMessages(evalcase);
|
|
4048
4070
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
4049
|
-
|
|
4071
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
4072
|
+
if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
|
|
4050
4073
|
logError(
|
|
4051
|
-
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id,
|
|
4074
|
+
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
4052
4075
|
);
|
|
4053
4076
|
continue;
|
|
4054
4077
|
}
|
|
@@ -4126,7 +4149,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4126
4149
|
guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
|
|
4127
4150
|
guideline_patterns: guidelinePatterns,
|
|
4128
4151
|
file_paths: allFilePaths,
|
|
4129
|
-
criteria: outcome,
|
|
4152
|
+
criteria: outcome ?? "",
|
|
4130
4153
|
evaluator: evalCaseEvaluatorKind,
|
|
4131
4154
|
evaluators
|
|
4132
4155
|
};
|
|
@@ -4439,7 +4462,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
4439
4462
|
try {
|
|
4440
4463
|
const absolutePath = import_node_path8.default.resolve(testFilePath);
|
|
4441
4464
|
const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
|
|
4442
|
-
const parsed = (0, import_yaml4.parse)(content);
|
|
4465
|
+
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4443
4466
|
if (!isJsonObject(parsed)) {
|
|
4444
4467
|
return {};
|
|
4445
4468
|
}
|
|
@@ -4489,11 +4512,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4489
4512
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4490
4513
|
const guidelinePatterns = config?.guideline_patterns;
|
|
4491
4514
|
const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
|
|
4492
|
-
const
|
|
4493
|
-
if (!isJsonObject(
|
|
4515
|
+
const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
|
|
4516
|
+
if (!isJsonObject(interpolated)) {
|
|
4494
4517
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
4495
4518
|
}
|
|
4496
|
-
const suite =
|
|
4519
|
+
const suite = interpolated;
|
|
4497
4520
|
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
4498
4521
|
const fallbackDataset = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
4499
4522
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
@@ -4537,9 +4560,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4537
4560
|
}
|
|
4538
4561
|
const testInputMessages = resolveInputMessages(evalcase);
|
|
4539
4562
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
4540
|
-
|
|
4563
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
4564
|
+
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
4541
4565
|
logError2(
|
|
4542
|
-
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id,
|
|
4566
|
+
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
4543
4567
|
);
|
|
4544
4568
|
continue;
|
|
4545
4569
|
}
|
|
@@ -4635,7 +4659,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4635
4659
|
guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
|
|
4636
4660
|
guideline_patterns: guidelinePatterns,
|
|
4637
4661
|
file_paths: allFilePaths,
|
|
4638
|
-
criteria: outcome,
|
|
4662
|
+
criteria: outcome ?? "",
|
|
4639
4663
|
evaluator: evalCaseEvaluatorKind,
|
|
4640
4664
|
evaluators,
|
|
4641
4665
|
workspace: mergedWorkspace,
|
|
@@ -4775,7 +4799,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
4775
4799
|
} catch {
|
|
4776
4800
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
4777
4801
|
}
|
|
4778
|
-
const parsed = (0, import_yaml4.parse)(content);
|
|
4802
|
+
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4779
4803
|
if (!isJsonObject(parsed)) {
|
|
4780
4804
|
throw new Error(
|
|
4781
4805
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -17714,9 +17738,11 @@ async function runEvaluatorList(options) {
|
|
|
17714
17738
|
registry: typeRegistry
|
|
17715
17739
|
};
|
|
17716
17740
|
for (const evaluatorConfig of evaluators ?? []) {
|
|
17741
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
17717
17742
|
try {
|
|
17718
17743
|
const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
|
|
17719
17744
|
const score2 = await evaluatorInstance.evaluate(evalContext);
|
|
17745
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
17720
17746
|
const weight = evaluatorConfig.weight ?? 1;
|
|
17721
17747
|
scored.push({
|
|
17722
17748
|
score: score2,
|
|
@@ -17737,9 +17763,13 @@ async function runEvaluatorList(options) {
|
|
|
17737
17763
|
evaluatorProviderRequest: score2.evaluatorRawRequest,
|
|
17738
17764
|
details: score2.details,
|
|
17739
17765
|
scores: mapChildResults(score2.scores),
|
|
17740
|
-
tokenUsage: score2.tokenUsage
|
|
17766
|
+
tokenUsage: score2.tokenUsage,
|
|
17767
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
17768
|
+
startedAt: startedAt.toISOString(),
|
|
17769
|
+
endedAt: endedAt.toISOString()
|
|
17741
17770
|
});
|
|
17742
17771
|
} catch (error) {
|
|
17772
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
17743
17773
|
const message = error instanceof Error ? error.message : String(error);
|
|
17744
17774
|
const fallbackScore = {
|
|
17745
17775
|
score: 0,
|
|
@@ -17765,7 +17795,10 @@ async function runEvaluatorList(options) {
|
|
|
17765
17795
|
verdict: "fail",
|
|
17766
17796
|
hits: [],
|
|
17767
17797
|
misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
|
|
17768
|
-
reasoning: message
|
|
17798
|
+
reasoning: message,
|
|
17799
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
17800
|
+
startedAt: startedAt.toISOString(),
|
|
17801
|
+
endedAt: endedAt.toISOString()
|
|
17769
17802
|
});
|
|
17770
17803
|
}
|
|
17771
17804
|
if (evaluatorConfig.negate === true && scored.length > 0) {
|