agentv 2.17.0 → 2.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-FIWNPMZ4.js → chunk-MH2ZEUAO.js} +4 -4
- package/dist/{chunk-H6WRFW2C.js → chunk-QAZU7YAH.js} +4 -4
- package/dist/{chunk-H6WRFW2C.js.map → chunk-QAZU7YAH.js.map} +1 -1
- package/dist/{chunk-UJMO2T4J.js → chunk-SO4O4O2B.js} +51 -20
- package/dist/chunk-SO4O4O2B.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-4SYTSJN2.js → dist-AQVAKXMK.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-7K5546RV.js → interactive-T2FAFLD2.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-UJMO2T4J.js.map +0 -1
- /package/dist/{chunk-FIWNPMZ4.js.map → chunk-MH2ZEUAO.js.map} +0 -0
- /package/dist/{dist-4SYTSJN2.js.map → dist-AQVAKXMK.js.map} +0 -0
- /package/dist/{interactive-7K5546RV.js.map → interactive-T2FAFLD2.js.map} +0 -0
|
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
|
|
|
148
148
|
}
|
|
149
149
|
});
|
|
150
150
|
|
|
151
|
-
// ../../packages/core/dist/chunk-
|
|
151
|
+
// ../../packages/core/dist/chunk-PSYFRPNT.js
|
|
152
152
|
import { constants } from "node:fs";
|
|
153
153
|
import { access, readFile } from "node:fs/promises";
|
|
154
154
|
import path from "node:path";
|
|
@@ -4195,7 +4195,7 @@ var coerce = {
|
|
|
4195
4195
|
};
|
|
4196
4196
|
var NEVER = INVALID;
|
|
4197
4197
|
|
|
4198
|
-
// ../../packages/core/dist/chunk-
|
|
4198
|
+
// ../../packages/core/dist/chunk-PSYFRPNT.js
|
|
4199
4199
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
4200
4200
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
4201
4201
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -34107,6 +34107,23 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
34107
34107
|
endTime: metrics.endTime ?? computed.endTime
|
|
34108
34108
|
};
|
|
34109
34109
|
}
|
|
34110
|
+
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
34111
|
+
function interpolateEnv(value, env) {
|
|
34112
|
+
if (typeof value === "string") {
|
|
34113
|
+
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
34114
|
+
}
|
|
34115
|
+
if (Array.isArray(value)) {
|
|
34116
|
+
return value.map((item) => interpolateEnv(item, env));
|
|
34117
|
+
}
|
|
34118
|
+
if (value !== null && typeof value === "object") {
|
|
34119
|
+
const result = {};
|
|
34120
|
+
for (const [key, val] of Object.entries(value)) {
|
|
34121
|
+
result[key] = interpolateEnv(val, env);
|
|
34122
|
+
}
|
|
34123
|
+
return result;
|
|
34124
|
+
}
|
|
34125
|
+
return value;
|
|
34126
|
+
}
|
|
34110
34127
|
var ANSI_YELLOW = "\x1B[33m";
|
|
34111
34128
|
var ANSI_RESET = "\x1B[0m";
|
|
34112
34129
|
var FILE_PROTOCOL = "file://";
|
|
@@ -34120,7 +34137,8 @@ function isGlobPattern(filePath) {
|
|
|
34120
34137
|
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
34121
34138
|
}
|
|
34122
34139
|
function parseYamlCases(content, filePath) {
|
|
34123
|
-
const
|
|
34140
|
+
const raw = parseYaml(content);
|
|
34141
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
34124
34142
|
if (!Array.isArray(parsed)) {
|
|
34125
34143
|
throw new Error(
|
|
34126
34144
|
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
@@ -34142,7 +34160,8 @@ function parseJsonlCases(content, filePath) {
|
|
|
34142
34160
|
const line = lines[i].trim();
|
|
34143
34161
|
if (line === "") continue;
|
|
34144
34162
|
try {
|
|
34145
|
-
const
|
|
34163
|
+
const raw = JSON.parse(line);
|
|
34164
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
34146
34165
|
if (!isJsonObject(parsed)) {
|
|
34147
34166
|
throw new Error("Expected JSON object");
|
|
34148
34167
|
}
|
|
@@ -36247,7 +36266,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
36247
36266
|
}
|
|
36248
36267
|
try {
|
|
36249
36268
|
const content = await readFile5(sidecarPath, "utf8");
|
|
36250
|
-
const parsed = parseYaml2(content);
|
|
36269
|
+
const parsed = interpolateEnv(parseYaml2(content), process.env);
|
|
36251
36270
|
if (!isJsonObject(parsed)) {
|
|
36252
36271
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
36253
36272
|
return {};
|
|
@@ -36270,7 +36289,8 @@ function parseJsonlContent(content, filePath) {
|
|
|
36270
36289
|
const line = lines[i].trim();
|
|
36271
36290
|
if (line === "") continue;
|
|
36272
36291
|
try {
|
|
36273
|
-
const
|
|
36292
|
+
const raw = JSON.parse(line);
|
|
36293
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
36274
36294
|
if (!isJsonObject(parsed)) {
|
|
36275
36295
|
throw new Error("Expected JSON object");
|
|
36276
36296
|
}
|
|
@@ -36327,9 +36347,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
36327
36347
|
}
|
|
36328
36348
|
const inputMessages = resolveInputMessages(evalcase);
|
|
36329
36349
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
36330
|
-
|
|
36350
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
36351
|
+
if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
|
|
36331
36352
|
logError(
|
|
36332
|
-
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id,
|
|
36353
|
+
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
36333
36354
|
);
|
|
36334
36355
|
continue;
|
|
36335
36356
|
}
|
|
@@ -36407,7 +36428,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
36407
36428
|
guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
36408
36429
|
guideline_patterns: guidelinePatterns,
|
|
36409
36430
|
file_paths: allFilePaths,
|
|
36410
|
-
criteria: outcome,
|
|
36431
|
+
criteria: outcome ?? "",
|
|
36411
36432
|
evaluator: evalCaseEvaluatorKind,
|
|
36412
36433
|
evaluators
|
|
36413
36434
|
};
|
|
@@ -36711,7 +36732,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
36711
36732
|
try {
|
|
36712
36733
|
const absolutePath = path8.resolve(testFilePath);
|
|
36713
36734
|
const content = await readFile7(absolutePath, "utf8");
|
|
36714
|
-
const parsed = parse22(content);
|
|
36735
|
+
const parsed = interpolateEnv(parse22(content), process.env);
|
|
36715
36736
|
if (!isJsonObject(parsed)) {
|
|
36716
36737
|
return {};
|
|
36717
36738
|
}
|
|
@@ -36761,11 +36782,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36761
36782
|
const config2 = await loadConfig(absoluteTestPath, repoRootPath);
|
|
36762
36783
|
const guidelinePatterns = config2?.guideline_patterns;
|
|
36763
36784
|
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
36764
|
-
const
|
|
36765
|
-
if (!isJsonObject(
|
|
36785
|
+
const interpolated = interpolateEnv(parse22(rawFile), process.env);
|
|
36786
|
+
if (!isJsonObject(interpolated)) {
|
|
36766
36787
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
36767
36788
|
}
|
|
36768
|
-
const suite =
|
|
36789
|
+
const suite = interpolated;
|
|
36769
36790
|
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
36770
36791
|
const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
36771
36792
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
@@ -36809,9 +36830,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36809
36830
|
}
|
|
36810
36831
|
const testInputMessages = resolveInputMessages(evalcase);
|
|
36811
36832
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
36812
|
-
|
|
36833
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
36834
|
+
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
36813
36835
|
logError2(
|
|
36814
|
-
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id,
|
|
36836
|
+
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
36815
36837
|
);
|
|
36816
36838
|
continue;
|
|
36817
36839
|
}
|
|
@@ -36907,7 +36929,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36907
36929
|
guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
36908
36930
|
guideline_patterns: guidelinePatterns,
|
|
36909
36931
|
file_paths: allFilePaths,
|
|
36910
|
-
criteria: outcome,
|
|
36932
|
+
criteria: outcome ?? "",
|
|
36911
36933
|
evaluator: evalCaseEvaluatorKind,
|
|
36912
36934
|
evaluators,
|
|
36913
36935
|
workspace: mergedWorkspace,
|
|
@@ -37047,7 +37069,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
37047
37069
|
} catch {
|
|
37048
37070
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
37049
37071
|
}
|
|
37050
|
-
const parsed = parse22(content);
|
|
37072
|
+
const parsed = interpolateEnv(parse22(content), process.env);
|
|
37051
37073
|
if (!isJsonObject(parsed)) {
|
|
37052
37074
|
throw new Error(
|
|
37053
37075
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -48473,9 +48495,11 @@ async function runEvaluatorList(options) {
|
|
|
48473
48495
|
registry: typeRegistry
|
|
48474
48496
|
};
|
|
48475
48497
|
for (const evaluatorConfig of evaluators ?? []) {
|
|
48498
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
48476
48499
|
try {
|
|
48477
48500
|
const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
|
|
48478
48501
|
const score2 = await evaluatorInstance.evaluate(evalContext);
|
|
48502
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
48479
48503
|
const weight = evaluatorConfig.weight ?? 1;
|
|
48480
48504
|
scored.push({
|
|
48481
48505
|
score: score2,
|
|
@@ -48496,9 +48520,13 @@ async function runEvaluatorList(options) {
|
|
|
48496
48520
|
evaluatorProviderRequest: score2.evaluatorRawRequest,
|
|
48497
48521
|
details: score2.details,
|
|
48498
48522
|
scores: mapChildResults(score2.scores),
|
|
48499
|
-
tokenUsage: score2.tokenUsage
|
|
48523
|
+
tokenUsage: score2.tokenUsage,
|
|
48524
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
48525
|
+
startedAt: startedAt.toISOString(),
|
|
48526
|
+
endedAt: endedAt.toISOString()
|
|
48500
48527
|
});
|
|
48501
48528
|
} catch (error40) {
|
|
48529
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
48502
48530
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
48503
48531
|
const fallbackScore = {
|
|
48504
48532
|
score: 0,
|
|
@@ -48524,7 +48552,10 @@ async function runEvaluatorList(options) {
|
|
|
48524
48552
|
verdict: "fail",
|
|
48525
48553
|
hits: [],
|
|
48526
48554
|
misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
|
|
48527
|
-
reasoning: message
|
|
48555
|
+
reasoning: message,
|
|
48556
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
48557
|
+
startedAt: startedAt.toISOString(),
|
|
48558
|
+
endedAt: endedAt.toISOString()
|
|
48528
48559
|
});
|
|
48529
48560
|
}
|
|
48530
48561
|
if (evaluatorConfig.negate === true && scored.length > 0) {
|
|
@@ -49681,4 +49712,4 @@ export {
|
|
|
49681
49712
|
OtelStreamingObserver,
|
|
49682
49713
|
createAgentKernel
|
|
49683
49714
|
};
|
|
49684
|
-
//# sourceMappingURL=chunk-
|
|
49715
|
+
//# sourceMappingURL=chunk-SO4O4O2B.js.map
|