agentv 3.10.0 → 3.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DJ6FJ6J4.js → chunk-6UE665XI.js} +4 -4
- package/dist/{chunk-DDMAQT5P.js → chunk-F7LAJMTO.js} +28 -29
- package/dist/chunk-F7LAJMTO.js.map +1 -0
- package/dist/{chunk-X3KJVUAB.js → chunk-KGK5NUFG.js} +11 -6
- package/dist/chunk-KGK5NUFG.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-DJFWBJNJ.js → dist-3QUJEJUT.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-ST4ZSRK4.js → interactive-EO6AR2R3.js} +3 -3
- package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +84 -0
- package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +144 -0
- package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +67 -0
- package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +101 -0
- package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +458 -0
- package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +36 -0
- package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +118 -0
- package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +12753 -0
- package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +77 -0
- package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +50 -0
- package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +78 -0
- package/dist/templates/.agentv/.env.example +25 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +177 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +316 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +137 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +27 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +115 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +278 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +333 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +79 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +121 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +298 -0
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +78 -0
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +5 -0
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +4 -0
- package/package.json +3 -3
- package/dist/chunk-DDMAQT5P.js.map +0 -1
- package/dist/chunk-X3KJVUAB.js.map +0 -1
- /package/dist/{chunk-DJ6FJ6J4.js.map → chunk-6UE665XI.js.map} +0 -0
- /package/dist/{dist-DJFWBJNJ.js.map → dist-3QUJEJUT.js.map} +0 -0
- /package/dist/{interactive-ST4ZSRK4.js.map → interactive-EO6AR2R3.js.map} +0 -0
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
validateEvalFile,
|
|
18
18
|
validateFileReferences,
|
|
19
19
|
validateTargetsFile
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-F7LAJMTO.js";
|
|
21
21
|
import {
|
|
22
22
|
createBuiltinRegistry,
|
|
23
23
|
createProvider,
|
|
@@ -35,7 +35,7 @@ import {
|
|
|
35
35
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
36
36
|
transpileEvalYamlFile,
|
|
37
37
|
trimBaselineResult
|
|
38
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-KGK5NUFG.js";
|
|
39
39
|
import {
|
|
40
40
|
__commonJS,
|
|
41
41
|
__esm,
|
|
@@ -4187,7 +4187,7 @@ var evalRunCommand = command({
|
|
|
4187
4187
|
},
|
|
4188
4188
|
handler: async (args) => {
|
|
4189
4189
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4190
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4190
|
+
const { launchInteractiveWizard } = await import("./interactive-EO6AR2R3.js");
|
|
4191
4191
|
await launchInteractiveWizard();
|
|
4192
4192
|
return;
|
|
4193
4193
|
}
|
|
@@ -6287,4 +6287,4 @@ export {
|
|
|
6287
6287
|
preprocessArgv,
|
|
6288
6288
|
runCli
|
|
6289
6289
|
};
|
|
6290
|
-
//# sourceMappingURL=chunk-
|
|
6290
|
+
//# sourceMappingURL=chunk-6UE665XI.js.map
|
|
@@ -27,12 +27,12 @@ import {
|
|
|
27
27
|
subscribeToCopilotCliLogEntries,
|
|
28
28
|
subscribeToCopilotSdkLogEntries,
|
|
29
29
|
subscribeToPiLogEntries
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-KGK5NUFG.js";
|
|
31
31
|
|
|
32
32
|
// package.json
|
|
33
33
|
var package_default = {
|
|
34
34
|
name: "agentv",
|
|
35
|
-
version: "3.10.
|
|
35
|
+
version: "3.10.2",
|
|
36
36
|
description: "CLI entry point for AgentV",
|
|
37
37
|
type: "module",
|
|
38
38
|
repository: {
|
|
@@ -51,7 +51,7 @@ var package_default = {
|
|
|
51
51
|
dev: "bun src/cli.ts",
|
|
52
52
|
build: "tsup && bun run copy-readme",
|
|
53
53
|
"copy-readme": `bun -e "import { cpSync } from 'fs'; cpSync('../../README.md', 'README.md')"`,
|
|
54
|
-
prepublishOnly: "bun run
|
|
54
|
+
prepublishOnly: `node -e "if(process.env.ALLOW_PUBLISH!=='1'){console.error('ERROR: Use bun run publish:next, then bun run promote:latest');process.exit(1)}"`,
|
|
55
55
|
typecheck: "tsc --noEmit",
|
|
56
56
|
lint: "biome check .",
|
|
57
57
|
format: "biome format --write .",
|
|
@@ -2259,25 +2259,7 @@ async function validateEvalFile(filePath) {
|
|
|
2259
2259
|
});
|
|
2260
2260
|
}
|
|
2261
2261
|
}
|
|
2262
|
-
|
|
2263
|
-
if (cases === void 0 && "eval_cases" in parsed) {
|
|
2264
|
-
cases = parsed.eval_cases;
|
|
2265
|
-
errors.push({
|
|
2266
|
-
severity: "warning",
|
|
2267
|
-
filePath: absolutePath,
|
|
2268
|
-
location: "eval_cases",
|
|
2269
|
-
message: "'eval_cases' is deprecated. Use 'tests' instead."
|
|
2270
|
-
});
|
|
2271
|
-
}
|
|
2272
|
-
if (cases === void 0 && "evalcases" in parsed) {
|
|
2273
|
-
cases = parsed.evalcases;
|
|
2274
|
-
errors.push({
|
|
2275
|
-
severity: "warning",
|
|
2276
|
-
filePath: absolutePath,
|
|
2277
|
-
location: "evalcases",
|
|
2278
|
-
message: "'evalcases' is deprecated. Use 'tests' instead."
|
|
2279
|
-
});
|
|
2280
|
-
}
|
|
2262
|
+
const cases = parsed.tests;
|
|
2281
2263
|
if (typeof cases === "string") {
|
|
2282
2264
|
validateTestsStringPath(cases, absolutePath, errors);
|
|
2283
2265
|
await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
|
|
@@ -2329,6 +2311,19 @@ async function validateEvalFile(filePath) {
|
|
|
2329
2311
|
for (let i = 0; i < cases.length; i++) {
|
|
2330
2312
|
const evalCase = cases[i];
|
|
2331
2313
|
const location = `tests[${i}]`;
|
|
2314
|
+
if (typeof evalCase === "string") {
|
|
2315
|
+
if (evalCase.startsWith("file://")) {
|
|
2316
|
+
validateTestsStringPath(evalCase, absolutePath, errors);
|
|
2317
|
+
} else {
|
|
2318
|
+
errors.push({
|
|
2319
|
+
severity: "error",
|
|
2320
|
+
filePath: absolutePath,
|
|
2321
|
+
location,
|
|
2322
|
+
message: "Test case string must be a file reference (file://...)"
|
|
2323
|
+
});
|
|
2324
|
+
}
|
|
2325
|
+
continue;
|
|
2326
|
+
}
|
|
2332
2327
|
if (!isObject(evalCase)) {
|
|
2333
2328
|
errors.push({
|
|
2334
2329
|
severity: "error",
|
|
@@ -2542,7 +2537,9 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
2542
2537
|
});
|
|
2543
2538
|
}
|
|
2544
2539
|
const content = message.content;
|
|
2545
|
-
|
|
2540
|
+
const hasToolCalls = "tool_calls" in message;
|
|
2541
|
+
if (content === void 0 && hasToolCalls) {
|
|
2542
|
+
} else if (typeof content === "string") {
|
|
2546
2543
|
validateContentForRoleMarkers(content, `${msgLocation}.content`, filePath, errors);
|
|
2547
2544
|
} else if (Array.isArray(content)) {
|
|
2548
2545
|
for (let j = 0; j < content.length; j++) {
|
|
@@ -2582,12 +2579,13 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
2582
2579
|
});
|
|
2583
2580
|
}
|
|
2584
2581
|
}
|
|
2582
|
+
} else if (isObject(content)) {
|
|
2585
2583
|
} else {
|
|
2586
2584
|
errors.push({
|
|
2587
2585
|
severity: "error",
|
|
2588
2586
|
filePath,
|
|
2589
2587
|
location: `${msgLocation}.content`,
|
|
2590
|
-
message: "Missing or invalid 'content' field (must be a string or
|
|
2588
|
+
message: "Missing or invalid 'content' field (must be a string, array, or object)"
|
|
2591
2589
|
});
|
|
2592
2590
|
}
|
|
2593
2591
|
}
|
|
@@ -3038,7 +3036,7 @@ async function validateTargetsFile(filePath) {
|
|
|
3038
3036
|
let parsed;
|
|
3039
3037
|
try {
|
|
3040
3038
|
const content = await readFile32(absolutePath, "utf8");
|
|
3041
|
-
parsed =
|
|
3039
|
+
parsed = parse3(content);
|
|
3042
3040
|
} catch (error) {
|
|
3043
3041
|
errors.push({
|
|
3044
3042
|
severity: "error",
|
|
@@ -3197,6 +3195,7 @@ async function validateTargetsFile(filePath) {
|
|
|
3197
3195
|
}
|
|
3198
3196
|
const provider = target.provider;
|
|
3199
3197
|
const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
|
|
3198
|
+
const isTemplated = typeof provider === "string" && /^\$\{\{.+\}\}$/.test(provider.trim());
|
|
3200
3199
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
3201
3200
|
errors.push({
|
|
3202
3201
|
severity: "error",
|
|
@@ -3204,7 +3203,7 @@ async function validateTargetsFile(filePath) {
|
|
|
3204
3203
|
location: `${location}.provider`,
|
|
3205
3204
|
message: "Missing or invalid 'provider' field (must be a non-empty string)"
|
|
3206
3205
|
});
|
|
3207
|
-
} else if (!knownProviders.includes(provider)) {
|
|
3206
|
+
} else if (!isTemplated && !knownProviders.includes(provider)) {
|
|
3208
3207
|
errors.push({
|
|
3209
3208
|
severity: "warning",
|
|
3210
3209
|
filePath: absolutePath,
|
|
@@ -3215,7 +3214,7 @@ async function validateTargetsFile(filePath) {
|
|
|
3215
3214
|
if (providerValue === "cli") {
|
|
3216
3215
|
validateCliSettings(target, absolutePath, location, errors);
|
|
3217
3216
|
}
|
|
3218
|
-
if (typeof provider === "string") {
|
|
3217
|
+
if (typeof provider === "string" && !isTemplated) {
|
|
3219
3218
|
validateUnknownSettings(target, provider, absolutePath, location, errors);
|
|
3220
3219
|
}
|
|
3221
3220
|
const graderTarget = target.grader_target ?? target.judge_target;
|
|
@@ -4156,7 +4155,7 @@ async function runEvalCommand(input) {
|
|
|
4156
4155
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
4157
4156
|
if (options.exportOtel || useFileExport) {
|
|
4158
4157
|
try {
|
|
4159
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4158
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-3QUJEJUT.js");
|
|
4160
4159
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4161
4160
|
let headers = {};
|
|
4162
4161
|
if (options.otelBackend) {
|
|
@@ -4501,4 +4500,4 @@ export {
|
|
|
4501
4500
|
selectTarget,
|
|
4502
4501
|
runEvalCommand
|
|
4503
4502
|
};
|
|
4504
|
-
//# sourceMappingURL=chunk-
|
|
4503
|
+
//# sourceMappingURL=chunk-F7LAJMTO.js.map
|