agentv 0.26.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-6ZM7WVSC.js → chunk-RIJO5WBF.js} +13 -13
- package/dist/chunk-RIJO5WBF.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +36 -19
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +217 -217
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +94 -2
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +8 -8
- package/package.json +1 -1
- package/dist/chunk-6ZM7WVSC.js.map +0 -1
- package/dist/templates/agentv/.env.template +0 -23
|
@@ -164,7 +164,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
|
164
164
|
import path19 from "node:path";
|
|
165
165
|
import { pathToFileURL } from "node:url";
|
|
166
166
|
|
|
167
|
-
// ../../packages/core/dist/chunk-
|
|
167
|
+
// ../../packages/core/dist/chunk-V3JCB3HI.js
|
|
168
168
|
import { constants } from "node:fs";
|
|
169
169
|
import { access, readFile } from "node:fs/promises";
|
|
170
170
|
import path from "node:path";
|
|
@@ -4211,7 +4211,7 @@ var coerce = {
|
|
|
4211
4211
|
};
|
|
4212
4212
|
var NEVER = INVALID;
|
|
4213
4213
|
|
|
4214
|
-
// ../../packages/core/dist/chunk-
|
|
4214
|
+
// ../../packages/core/dist/chunk-V3JCB3HI.js
|
|
4215
4215
|
async function fileExists(filePath) {
|
|
4216
4216
|
try {
|
|
4217
4217
|
await access(filePath, constants.F_OK);
|
|
@@ -34578,7 +34578,7 @@ var EVALUATOR_KIND_VALUES = [
|
|
|
34578
34578
|
"rubric",
|
|
34579
34579
|
"composite",
|
|
34580
34580
|
"tool_trajectory",
|
|
34581
|
-
"
|
|
34581
|
+
"expected_tool_calls"
|
|
34582
34582
|
];
|
|
34583
34583
|
var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
34584
34584
|
function isEvaluatorKind(value) {
|
|
@@ -35058,11 +35058,11 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
35058
35058
|
});
|
|
35059
35059
|
continue;
|
|
35060
35060
|
}
|
|
35061
|
-
if (typeValue === "
|
|
35061
|
+
if (typeValue === "expected_tool_calls") {
|
|
35062
35062
|
const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
|
|
35063
35063
|
evaluators.push({
|
|
35064
35064
|
name: name16,
|
|
35065
|
-
type: "
|
|
35065
|
+
type: "expected_tool_calls",
|
|
35066
35066
|
...weight2 !== void 0 ? { weight: weight2 } : {}
|
|
35067
35067
|
});
|
|
35068
35068
|
continue;
|
|
@@ -38212,8 +38212,8 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38212
38212
|
};
|
|
38213
38213
|
}
|
|
38214
38214
|
};
|
|
38215
|
-
var
|
|
38216
|
-
kind = "
|
|
38215
|
+
var ExpectedToolCallsEvaluator = class {
|
|
38216
|
+
kind = "expected_tool_calls";
|
|
38217
38217
|
evaluate(context) {
|
|
38218
38218
|
const { candidateTrace, evalCase } = context;
|
|
38219
38219
|
const expectedSegments = evalCase.expected_segments;
|
|
@@ -39285,8 +39285,8 @@ async function runEvaluatorList(options) {
|
|
|
39285
39285
|
return new ToolTrajectoryEvaluator({
|
|
39286
39286
|
config: memberConfig
|
|
39287
39287
|
});
|
|
39288
|
-
case "
|
|
39289
|
-
return new
|
|
39288
|
+
case "expected_tool_calls":
|
|
39289
|
+
return new ExpectedToolCallsEvaluator();
|
|
39290
39290
|
default: {
|
|
39291
39291
|
const unknownConfig = memberConfig;
|
|
39292
39292
|
throw new Error(`Unsupported evaluator type in composite: ${unknownConfig.type}`);
|
|
@@ -39351,9 +39351,9 @@ async function runEvaluatorList(options) {
|
|
|
39351
39351
|
reasoning: score2.reasoning
|
|
39352
39352
|
});
|
|
39353
39353
|
}
|
|
39354
|
-
if (evaluator.type === "
|
|
39355
|
-
const
|
|
39356
|
-
const score2 =
|
|
39354
|
+
if (evaluator.type === "expected_tool_calls") {
|
|
39355
|
+
const expectedToolCallsEvaluator = new ExpectedToolCallsEvaluator();
|
|
39356
|
+
const score2 = expectedToolCallsEvaluator.evaluate({
|
|
39357
39357
|
evalCase,
|
|
39358
39358
|
candidate,
|
|
39359
39359
|
target,
|
|
@@ -42708,4 +42708,4 @@ export {
|
|
|
42708
42708
|
app,
|
|
42709
42709
|
runCli
|
|
42710
42710
|
};
|
|
42711
|
-
//# sourceMappingURL=chunk-
|
|
42711
|
+
//# sourceMappingURL=chunk-RIJO5WBF.js.map
|