agentv 3.10.0 → 3.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/{chunk-DJ6FJ6J4.js → chunk-6UE665XI.js} +4 -4
  2. package/dist/{chunk-DDMAQT5P.js → chunk-F7LAJMTO.js} +28 -29
  3. package/dist/chunk-F7LAJMTO.js.map +1 -0
  4. package/dist/{chunk-X3KJVUAB.js → chunk-KGK5NUFG.js} +11 -6
  5. package/dist/chunk-KGK5NUFG.js.map +1 -0
  6. package/dist/cli.js +3 -3
  7. package/dist/{dist-DJFWBJNJ.js → dist-3QUJEJUT.js} +2 -2
  8. package/dist/index.js +3 -3
  9. package/dist/{interactive-ST4ZSRK4.js → interactive-EO6AR2R3.js} +3 -3
  10. package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +84 -0
  11. package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +144 -0
  12. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +67 -0
  13. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +101 -0
  14. package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +458 -0
  15. package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +36 -0
  16. package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +118 -0
  17. package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +12753 -0
  18. package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +77 -0
  19. package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +50 -0
  20. package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +78 -0
  21. package/dist/templates/.agentv/.env.example +25 -0
  22. package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +177 -0
  23. package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +316 -0
  24. package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +137 -0
  25. package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -0
  26. package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +27 -0
  27. package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +115 -0
  28. package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +278 -0
  29. package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +333 -0
  30. package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +79 -0
  31. package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +121 -0
  32. package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +298 -0
  33. package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +78 -0
  34. package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +5 -0
  35. package/dist/templates/.github/prompts/agentv-optimize.prompt.md +4 -0
  36. package/package.json +3 -3
  37. package/dist/chunk-DDMAQT5P.js.map +0 -1
  38. package/dist/chunk-X3KJVUAB.js.map +0 -1
  39. /package/dist/{chunk-DJ6FJ6J4.js.map → chunk-6UE665XI.js.map} +0 -0
  40. /package/dist/{dist-DJFWBJNJ.js.map → dist-3QUJEJUT.js.map} +0 -0
  41. /package/dist/{interactive-ST4ZSRK4.js.map → interactive-EO6AR2R3.js.map} +0 -0
@@ -17,7 +17,7 @@ import {
17
17
  validateEvalFile,
18
18
  validateFileReferences,
19
19
  validateTargetsFile
20
- } from "./chunk-DDMAQT5P.js";
20
+ } from "./chunk-F7LAJMTO.js";
21
21
  import {
22
22
  createBuiltinRegistry,
23
23
  createProvider,
@@ -35,7 +35,7 @@ import {
35
35
  toSnakeCaseDeep as toSnakeCaseDeep2,
36
36
  transpileEvalYamlFile,
37
37
  trimBaselineResult
38
- } from "./chunk-X3KJVUAB.js";
38
+ } from "./chunk-KGK5NUFG.js";
39
39
  import {
40
40
  __commonJS,
41
41
  __esm,
@@ -4187,7 +4187,7 @@ var evalRunCommand = command({
4187
4187
  },
4188
4188
  handler: async (args) => {
4189
4189
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4190
- const { launchInteractiveWizard } = await import("./interactive-ST4ZSRK4.js");
4190
+ const { launchInteractiveWizard } = await import("./interactive-EO6AR2R3.js");
4191
4191
  await launchInteractiveWizard();
4192
4192
  return;
4193
4193
  }
@@ -6287,4 +6287,4 @@ export {
6287
6287
  preprocessArgv,
6288
6288
  runCli
6289
6289
  };
6290
- //# sourceMappingURL=chunk-DJ6FJ6J4.js.map
6290
+ //# sourceMappingURL=chunk-6UE665XI.js.map
@@ -27,12 +27,12 @@ import {
27
27
  subscribeToCopilotCliLogEntries,
28
28
  subscribeToCopilotSdkLogEntries,
29
29
  subscribeToPiLogEntries
30
- } from "./chunk-X3KJVUAB.js";
30
+ } from "./chunk-KGK5NUFG.js";
31
31
 
32
32
  // package.json
33
33
  var package_default = {
34
34
  name: "agentv",
35
- version: "3.10.0",
35
+ version: "3.10.2",
36
36
  description: "CLI entry point for AgentV",
37
37
  type: "module",
38
38
  repository: {
@@ -51,7 +51,7 @@ var package_default = {
51
51
  dev: "bun src/cli.ts",
52
52
  build: "tsup && bun run copy-readme",
53
53
  "copy-readme": `bun -e "import { cpSync } from 'fs'; cpSync('../../README.md', 'README.md')"`,
54
- prepublishOnly: "bun run copy-readme",
54
+ prepublishOnly: `node -e "if(process.env.ALLOW_PUBLISH!=='1'){console.error('ERROR: Use bun run publish:next, then bun run promote:latest');process.exit(1)}"`,
55
55
  typecheck: "tsc --noEmit",
56
56
  lint: "biome check .",
57
57
  format: "biome format --write .",
@@ -2259,25 +2259,7 @@ async function validateEvalFile(filePath) {
2259
2259
  });
2260
2260
  }
2261
2261
  }
2262
- let cases = parsed.tests;
2263
- if (cases === void 0 && "eval_cases" in parsed) {
2264
- cases = parsed.eval_cases;
2265
- errors.push({
2266
- severity: "warning",
2267
- filePath: absolutePath,
2268
- location: "eval_cases",
2269
- message: "'eval_cases' is deprecated. Use 'tests' instead."
2270
- });
2271
- }
2272
- if (cases === void 0 && "evalcases" in parsed) {
2273
- cases = parsed.evalcases;
2274
- errors.push({
2275
- severity: "warning",
2276
- filePath: absolutePath,
2277
- location: "evalcases",
2278
- message: "'evalcases' is deprecated. Use 'tests' instead."
2279
- });
2280
- }
2262
+ const cases = parsed.tests;
2281
2263
  if (typeof cases === "string") {
2282
2264
  validateTestsStringPath(cases, absolutePath, errors);
2283
2265
  await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
@@ -2329,6 +2311,19 @@ async function validateEvalFile(filePath) {
2329
2311
  for (let i = 0; i < cases.length; i++) {
2330
2312
  const evalCase = cases[i];
2331
2313
  const location = `tests[${i}]`;
2314
+ if (typeof evalCase === "string") {
2315
+ if (evalCase.startsWith("file://")) {
2316
+ validateTestsStringPath(evalCase, absolutePath, errors);
2317
+ } else {
2318
+ errors.push({
2319
+ severity: "error",
2320
+ filePath: absolutePath,
2321
+ location,
2322
+ message: "Test case string must be a file reference (file://...)"
2323
+ });
2324
+ }
2325
+ continue;
2326
+ }
2332
2327
  if (!isObject(evalCase)) {
2333
2328
  errors.push({
2334
2329
  severity: "error",
@@ -2542,7 +2537,9 @@ function validateMessages(messages, location, filePath, errors) {
2542
2537
  });
2543
2538
  }
2544
2539
  const content = message.content;
2545
- if (typeof content === "string") {
2540
+ const hasToolCalls = "tool_calls" in message;
2541
+ if (content === void 0 && hasToolCalls) {
2542
+ } else if (typeof content === "string") {
2546
2543
  validateContentForRoleMarkers(content, `${msgLocation}.content`, filePath, errors);
2547
2544
  } else if (Array.isArray(content)) {
2548
2545
  for (let j = 0; j < content.length; j++) {
@@ -2582,12 +2579,13 @@ function validateMessages(messages, location, filePath, errors) {
2582
2579
  });
2583
2580
  }
2584
2581
  }
2582
+ } else if (isObject(content)) {
2585
2583
  } else {
2586
2584
  errors.push({
2587
2585
  severity: "error",
2588
2586
  filePath,
2589
2587
  location: `${msgLocation}.content`,
2590
- message: "Missing or invalid 'content' field (must be a string or array)"
2588
+ message: "Missing or invalid 'content' field (must be a string, array, or object)"
2591
2589
  });
2592
2590
  }
2593
2591
  }
@@ -3038,7 +3036,7 @@ async function validateTargetsFile(filePath) {
3038
3036
  let parsed;
3039
3037
  try {
3040
3038
  const content = await readFile32(absolutePath, "utf8");
3041
- parsed = interpolateEnv(parse3(content), process.env);
3039
+ parsed = parse3(content);
3042
3040
  } catch (error) {
3043
3041
  errors.push({
3044
3042
  severity: "error",
@@ -3197,6 +3195,7 @@ async function validateTargetsFile(filePath) {
3197
3195
  }
3198
3196
  const provider = target.provider;
3199
3197
  const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
3198
+ const isTemplated = typeof provider === "string" && /^\$\{\{.+\}\}$/.test(provider.trim());
3200
3199
  if (typeof provider !== "string" || provider.trim().length === 0) {
3201
3200
  errors.push({
3202
3201
  severity: "error",
@@ -3204,7 +3203,7 @@ async function validateTargetsFile(filePath) {
3204
3203
  location: `${location}.provider`,
3205
3204
  message: "Missing or invalid 'provider' field (must be a non-empty string)"
3206
3205
  });
3207
- } else if (!knownProviders.includes(provider)) {
3206
+ } else if (!isTemplated && !knownProviders.includes(provider)) {
3208
3207
  errors.push({
3209
3208
  severity: "warning",
3210
3209
  filePath: absolutePath,
@@ -3215,7 +3214,7 @@ async function validateTargetsFile(filePath) {
3215
3214
  if (providerValue === "cli") {
3216
3215
  validateCliSettings(target, absolutePath, location, errors);
3217
3216
  }
3218
- if (typeof provider === "string") {
3217
+ if (typeof provider === "string" && !isTemplated) {
3219
3218
  validateUnknownSettings(target, provider, absolutePath, location, errors);
3220
3219
  }
3221
3220
  const graderTarget = target.grader_target ?? target.judge_target;
@@ -4156,7 +4155,7 @@ async function runEvalCommand(input) {
4156
4155
  const useFileExport = !!(options.otelFile || options.traceFile);
4157
4156
  if (options.exportOtel || useFileExport) {
4158
4157
  try {
4159
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-DJFWBJNJ.js");
4158
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-3QUJEJUT.js");
4160
4159
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4161
4160
  let headers = {};
4162
4161
  if (options.otelBackend) {
@@ -4501,4 +4500,4 @@ export {
4501
4500
  selectTarget,
4502
4501
  runEvalCommand
4503
4502
  };
4504
- //# sourceMappingURL=chunk-DDMAQT5P.js.map
4503
+ //# sourceMappingURL=chunk-F7LAJMTO.js.map