agentv 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -11752,6 +11752,33 @@ var ANSI_YELLOW = "\x1B[33m";
|
|
|
11752
11752
|
var ANSI_RESET = "\x1B[0m";
|
|
11753
11753
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
11754
11754
|
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
11755
|
+
async function readTestSuiteMetadata(testFilePath) {
|
|
11756
|
+
try {
|
|
11757
|
+
const absolutePath = path8.resolve(testFilePath);
|
|
11758
|
+
const content = await readFile3(absolutePath, "utf8");
|
|
11759
|
+
const parsed = parse3(content);
|
|
11760
|
+
if (!isJsonObject(parsed)) {
|
|
11761
|
+
return {};
|
|
11762
|
+
}
|
|
11763
|
+
return { target: extractTargetFromSuite(parsed) };
|
|
11764
|
+
} catch {
|
|
11765
|
+
return {};
|
|
11766
|
+
}
|
|
11767
|
+
}
|
|
11768
|
+
function extractTargetFromSuite(suite) {
|
|
11769
|
+
const execution = suite.execution;
|
|
11770
|
+
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
11771
|
+
const executionTarget = execution.target;
|
|
11772
|
+
if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
|
|
11773
|
+
return executionTarget.trim();
|
|
11774
|
+
}
|
|
11775
|
+
}
|
|
11776
|
+
const targetValue = suite.target;
|
|
11777
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
11778
|
+
return targetValue.trim();
|
|
11779
|
+
}
|
|
11780
|
+
return void 0;
|
|
11781
|
+
}
|
|
11755
11782
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
11756
11783
|
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
11757
11784
|
for (const directory of directories) {
|
|
@@ -11928,6 +11955,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11928
11955
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
11929
11956
|
}
|
|
11930
11957
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
11958
|
+
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
11959
|
+
const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
|
|
11931
11960
|
const results = [];
|
|
11932
11961
|
for (const rawEvalcase of rawTestcases) {
|
|
11933
11962
|
if (!isJsonObject(rawEvalcase)) {
|
|
@@ -11982,7 +12011,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11982
12011
|
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
11983
12012
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
11984
12013
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
11985
|
-
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
12014
|
+
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
11986
12015
|
const userFilePaths = [];
|
|
11987
12016
|
for (const segment of inputSegments) {
|
|
11988
12017
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -12349,9 +12378,9 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
12349
12378
|
}
|
|
12350
12379
|
return parts.join(" ");
|
|
12351
12380
|
}
|
|
12352
|
-
async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
12381
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
12353
12382
|
const execution = rawEvalCase.execution;
|
|
12354
|
-
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
|
|
12383
|
+
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
12355
12384
|
if (candidateEvaluators === void 0) {
|
|
12356
12385
|
return void 0;
|
|
12357
12386
|
}
|
|
@@ -12389,6 +12418,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
12389
12418
|
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12390
12419
|
);
|
|
12391
12420
|
}
|
|
12421
|
+
} else {
|
|
12422
|
+
resolvedCwd = searchRoots[0];
|
|
12392
12423
|
}
|
|
12393
12424
|
evaluators.push({
|
|
12394
12425
|
name,
|
|
@@ -12417,8 +12448,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
12417
12448
|
name,
|
|
12418
12449
|
type: "llm_judge",
|
|
12419
12450
|
prompt,
|
|
12420
|
-
promptPath
|
|
12421
|
-
model
|
|
12451
|
+
promptPath
|
|
12422
12452
|
});
|
|
12423
12453
|
}
|
|
12424
12454
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
@@ -14157,10 +14187,7 @@ var LlmJudgeEvaluator = class {
|
|
|
14157
14187
|
prompt = substituteVariables(systemPrompt, variables);
|
|
14158
14188
|
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
14159
14189
|
}
|
|
14160
|
-
const metadata = {
|
|
14161
|
-
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
14162
|
-
...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
|
|
14163
|
-
};
|
|
14190
|
+
const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
|
|
14164
14191
|
const response = await judgeProvider.invoke({
|
|
14165
14192
|
question: prompt,
|
|
14166
14193
|
metadata,
|
|
@@ -14180,8 +14207,7 @@ var LlmJudgeEvaluator = class {
|
|
|
14180
14207
|
provider: judgeProvider.id,
|
|
14181
14208
|
prompt,
|
|
14182
14209
|
target: context2.target.name,
|
|
14183
|
-
...systemPrompt !== void 0
|
|
14184
|
-
...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
|
|
14210
|
+
...systemPrompt !== void 0 && { systemPrompt }
|
|
14185
14211
|
};
|
|
14186
14212
|
return {
|
|
14187
14213
|
score,
|
|
@@ -15164,8 +15190,7 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
15164
15190
|
now,
|
|
15165
15191
|
judgeProvider,
|
|
15166
15192
|
systemPrompt: customPrompt,
|
|
15167
|
-
evaluator: config
|
|
15168
|
-
judgeModel: config.model
|
|
15193
|
+
evaluator: config
|
|
15169
15194
|
});
|
|
15170
15195
|
}
|
|
15171
15196
|
async function resolveCustomPrompt(config) {
|
|
@@ -16918,9 +16943,8 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
16918
16943
|
|
|
16919
16944
|
// src/commands/eval/targets.ts
|
|
16920
16945
|
import { constants as constants5 } from "node:fs";
|
|
16921
|
-
import { access as access5
|
|
16946
|
+
import { access as access5 } from "node:fs/promises";
|
|
16922
16947
|
import path13 from "node:path";
|
|
16923
|
-
import { parse as parse6 } from "yaml";
|
|
16924
16948
|
var TARGET_FILE_CANDIDATES = [
|
|
16925
16949
|
"targets.yaml",
|
|
16926
16950
|
"targets.yml",
|
|
@@ -16942,18 +16966,8 @@ async function fileExists5(filePath) {
|
|
|
16942
16966
|
}
|
|
16943
16967
|
}
|
|
16944
16968
|
async function readTestSuiteTarget(testFilePath) {
|
|
16945
|
-
|
|
16946
|
-
|
|
16947
|
-
const parsed = parse6(raw);
|
|
16948
|
-
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
16949
|
-
const targetValue = parsed.target;
|
|
16950
|
-
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
16951
|
-
return targetValue.trim();
|
|
16952
|
-
}
|
|
16953
|
-
}
|
|
16954
|
-
} catch {
|
|
16955
|
-
}
|
|
16956
|
-
return void 0;
|
|
16969
|
+
const metadata = await readTestSuiteMetadata(testFilePath);
|
|
16970
|
+
return metadata.target;
|
|
16957
16971
|
}
|
|
16958
16972
|
async function discoverTargetsFile(options) {
|
|
16959
16973
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
@@ -17924,4 +17938,4 @@ export {
|
|
|
17924
17938
|
createProgram,
|
|
17925
17939
|
runCli
|
|
17926
17940
|
};
|
|
17927
|
-
//# sourceMappingURL=chunk-
|
|
17941
|
+
//# sourceMappingURL=chunk-72BHGHIT.js.map
|