@agentv/core 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +40 -13
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +8 -3
- package/dist/index.d.ts +8 -3
- package/dist/index.js +39 -13
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
loadEvalCases: () => loadEvalCases,
|
|
55
55
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
56
56
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
57
|
+
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
57
58
|
readTextFile: () => readTextFile,
|
|
58
59
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
59
60
|
resolveFileReference: () => resolveFileReference,
|
|
@@ -239,6 +240,33 @@ var ANSI_YELLOW = "\x1B[33m";
|
|
|
239
240
|
var ANSI_RESET = "\x1B[0m";
|
|
240
241
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
241
242
|
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
243
|
+
async function readTestSuiteMetadata(testFilePath) {
|
|
244
|
+
try {
|
|
245
|
+
const absolutePath = import_node_path2.default.resolve(testFilePath);
|
|
246
|
+
const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
|
|
247
|
+
const parsed = (0, import_yaml.parse)(content);
|
|
248
|
+
if (!isJsonObject(parsed)) {
|
|
249
|
+
return {};
|
|
250
|
+
}
|
|
251
|
+
return { target: extractTargetFromSuite(parsed) };
|
|
252
|
+
} catch {
|
|
253
|
+
return {};
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function extractTargetFromSuite(suite) {
|
|
257
|
+
const execution = suite.execution;
|
|
258
|
+
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
259
|
+
const executionTarget = execution.target;
|
|
260
|
+
if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
|
|
261
|
+
return executionTarget.trim();
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
const targetValue = suite.target;
|
|
265
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
266
|
+
return targetValue.trim();
|
|
267
|
+
}
|
|
268
|
+
return void 0;
|
|
269
|
+
}
|
|
242
270
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
243
271
|
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
244
272
|
for (const directory of directories) {
|
|
@@ -415,6 +443,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
415
443
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
416
444
|
}
|
|
417
445
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
446
|
+
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
447
|
+
const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
|
|
418
448
|
const results = [];
|
|
419
449
|
for (const rawEvalcase of rawTestcases) {
|
|
420
450
|
if (!isJsonObject(rawEvalcase)) {
|
|
@@ -469,7 +499,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
469
499
|
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
470
500
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
471
501
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
472
|
-
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
502
|
+
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
473
503
|
const userFilePaths = [];
|
|
474
504
|
for (const segment of inputSegments) {
|
|
475
505
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -836,9 +866,9 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
836
866
|
}
|
|
837
867
|
return parts.join(" ");
|
|
838
868
|
}
|
|
839
|
-
async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
869
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
840
870
|
const execution = rawEvalCase.execution;
|
|
841
|
-
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
|
|
871
|
+
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
842
872
|
if (candidateEvaluators === void 0) {
|
|
843
873
|
return void 0;
|
|
844
874
|
}
|
|
@@ -876,6 +906,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
876
906
|
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
877
907
|
);
|
|
878
908
|
}
|
|
909
|
+
} else {
|
|
910
|
+
resolvedCwd = searchRoots[0];
|
|
879
911
|
}
|
|
880
912
|
evaluators.push({
|
|
881
913
|
name,
|
|
@@ -904,8 +936,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
904
936
|
name,
|
|
905
937
|
type: "llm_judge",
|
|
906
938
|
prompt,
|
|
907
|
-
promptPath
|
|
908
|
-
model
|
|
939
|
+
promptPath
|
|
909
940
|
});
|
|
910
941
|
}
|
|
911
942
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
@@ -3222,10 +3253,7 @@ var LlmJudgeEvaluator = class {
|
|
|
3222
3253
|
prompt = substituteVariables(systemPrompt, variables);
|
|
3223
3254
|
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
3224
3255
|
}
|
|
3225
|
-
const metadata = {
|
|
3226
|
-
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
3227
|
-
...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
|
|
3228
|
-
};
|
|
3256
|
+
const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
|
|
3229
3257
|
const response = await judgeProvider.invoke({
|
|
3230
3258
|
question: prompt,
|
|
3231
3259
|
metadata,
|
|
@@ -3245,8 +3273,7 @@ var LlmJudgeEvaluator = class {
|
|
|
3245
3273
|
provider: judgeProvider.id,
|
|
3246
3274
|
prompt,
|
|
3247
3275
|
target: context.target.name,
|
|
3248
|
-
...systemPrompt !== void 0
|
|
3249
|
-
...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
|
|
3276
|
+
...systemPrompt !== void 0 && { systemPrompt }
|
|
3250
3277
|
};
|
|
3251
3278
|
return {
|
|
3252
3279
|
score,
|
|
@@ -4240,8 +4267,7 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
4240
4267
|
now,
|
|
4241
4268
|
judgeProvider,
|
|
4242
4269
|
systemPrompt: customPrompt,
|
|
4243
|
-
evaluator: config
|
|
4244
|
-
judgeModel: config.model
|
|
4270
|
+
evaluator: config
|
|
4245
4271
|
});
|
|
4246
4272
|
}
|
|
4247
4273
|
async function resolveCustomPrompt(config) {
|
|
@@ -4427,6 +4453,7 @@ function createAgentKernel() {
|
|
|
4427
4453
|
loadEvalCases,
|
|
4428
4454
|
normalizeLineEndings,
|
|
4429
4455
|
readTargetDefinitions,
|
|
4456
|
+
readTestSuiteMetadata,
|
|
4430
4457
|
readTextFile,
|
|
4431
4458
|
resolveAndCreateProvider,
|
|
4432
4459
|
resolveFileReference,
|