agentv 3.13.0 → 3.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
2
2
  import {
3
3
  HtmlWriter,
4
4
  RESULT_INDEX_FILENAME,
5
+ RESULT_RUNS_DIRNAME,
5
6
  detectFileType,
6
7
  findRepoRoot,
7
8
  loadLightweightResults,
@@ -21,7 +22,7 @@ import {
21
22
  validateFileReferences,
22
23
  validateTargetsFile,
23
24
  writeArtifactsFromResults
24
- } from "./chunk-6H4IAXQH.js";
25
+ } from "./chunk-4Z5E5CYT.js";
25
26
  import {
26
27
  createBuiltinRegistry,
27
28
  executeScript,
@@ -38,7 +39,7 @@ import {
38
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
39
40
  transpileEvalYamlFile,
40
41
  trimBaselineResult
41
- } from "./chunk-7OHZAFND.js";
42
+ } from "./chunk-D3LNJUUB.js";
42
43
  import {
43
44
  __commonJS,
44
45
  __esm,
@@ -3388,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
3388
3389
  for (const assertion of test.assertions) {
3389
3390
  lines.push(` - name: ${assertion.name}`);
3390
3391
  lines.push(` type: ${assertion.type}`);
3391
- if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
3392
+ if (assertion.type === "llm-grader" && "prompt" in assertion) {
3392
3393
  const prompt = assertion.prompt;
3393
3394
  lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
3394
3395
  }
@@ -3745,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
3745
3746
  if (item.outcome) criteria.push(item.outcome);
3746
3747
  }
3747
3748
  }
3748
- } else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
3749
+ } else if (type === "llm-grader" || type === "llm_grader") {
3749
3750
  const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
3750
3751
  criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
3751
- } else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
3752
+ } else if (type === "code-grader" || type === "code_grader") {
3752
3753
  const name = entry.name ?? type;
3753
3754
  const desc = bag.description ?? entry.description;
3754
3755
  criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
@@ -4175,11 +4176,16 @@ var evalRunCommand = command({
4175
4176
  type: optional(string),
4176
4177
  long: "output-messages",
4177
4178
  description: 'Number of trailing messages to include in results output (default: 1, or "all")'
4179
+ }),
4180
+ threshold: option({
4181
+ type: optional(number),
4182
+ long: "threshold",
4183
+ description: "Suite-level quality gate: exit 1 if mean score falls below this value (0-1)"
4178
4184
  })
4179
4185
  },
4180
4186
  handler: async (args) => {
4181
4187
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4182
- const { launchInteractiveWizard } = await import("./interactive-RV664PCR.js");
4188
+ const { launchInteractiveWizard } = await import("./interactive-HVKLYGRX.js");
4183
4189
  await launchInteractiveWizard();
4184
4190
  return;
4185
4191
  }
@@ -4215,9 +4221,13 @@ var evalRunCommand = command({
4215
4221
  artifacts: args.artifacts,
4216
4222
  graderTarget: args.graderTarget,
4217
4223
  model: args.model,
4218
- outputMessages: args.outputMessages
4224
+ outputMessages: args.outputMessages,
4225
+ threshold: args.threshold
4219
4226
  };
4220
- await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4227
+ const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4228
+ if (result?.thresholdFailed) {
4229
+ process.exit(1);
4230
+ }
4221
4231
  }
4222
4232
  });
4223
4233
 
@@ -4760,7 +4770,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4760
4770
  let hasCodeGraders = false;
4761
4771
  let hasLlmGraders = false;
4762
4772
  for (const assertion of assertions) {
4763
- if (assertion.type === "code-grader" || assertion.type === "code-judge") {
4773
+ if (assertion.type === "code-grader") {
4764
4774
  if (!hasCodeGraders) {
4765
4775
  await mkdir3(codeGradersDir, { recursive: true });
4766
4776
  hasCodeGraders = true;
@@ -4773,7 +4783,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4773
4783
  weight: config.weight ?? 1,
4774
4784
  config: config.config ?? {}
4775
4785
  });
4776
- } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
4786
+ } else if (assertion.type === "llm-grader") {
4777
4787
  if (!hasLlmGraders) {
4778
4788
  await mkdir3(llmGradersDir, { recursive: true });
4779
4789
  hasLlmGraders = true;
@@ -5021,13 +5031,15 @@ function loadOtlpTraceFile(filePath) {
5021
5031
  } : void 0,
5022
5032
  spans: traceSummary?.spans,
5023
5033
  output: stringAttr(rootAttrs.agentv_output_text),
5024
- scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
5034
+ scores: root.events?.filter(
5035
+ (event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
5036
+ ).map((event) => {
5025
5037
  const attrs = parseOtlpAttributes(event.attributes);
5026
- const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
5038
+ const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
5027
5039
  return {
5028
5040
  name,
5029
- type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5030
- score: numberAttr(attrs.agentv_evaluator_score) ?? 0
5041
+ type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5042
+ score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
5031
5043
  };
5032
5044
  })
5033
5045
  };
@@ -5131,13 +5143,13 @@ function toTraceSummary(result) {
5131
5143
  }
5132
5144
  function listResultFiles(cwd, limit) {
5133
5145
  const baseDir = path6.join(cwd, ".agentv", "results");
5134
- const rawDir = path6.join(baseDir, "raw");
5146
+ const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
5135
5147
  const files = [];
5136
5148
  try {
5137
- const entries2 = readdirSync2(rawDir, { withFileTypes: true });
5149
+ const entries2 = readdirSync2(runsDir, { withFileTypes: true });
5138
5150
  for (const entry of entries2) {
5139
5151
  if (entry.isDirectory()) {
5140
- const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
5152
+ const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
5141
5153
  if (primaryPath) {
5142
5154
  files.push({ filePath: primaryPath, displayName: entry.name });
5143
5155
  }
@@ -5145,7 +5157,7 @@ function listResultFiles(cwd, limit) {
5145
5157
  }
5146
5158
  for (const entry of entries2) {
5147
5159
  if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
5148
- files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
5160
+ files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
5149
5161
  }
5150
5162
  }
5151
5163
  } catch {
@@ -7753,4 +7765,4 @@ export {
7753
7765
  preprocessArgv,
7754
7766
  runCli
7755
7767
  };
7756
- //# sourceMappingURL=chunk-DJU4C6NS.js.map
7768
+ //# sourceMappingURL=chunk-X2343WOK.js.map