agentv 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  HtmlWriter,
4
- LEGACY_RESULTS_FILENAME,
5
4
  RESULT_INDEX_FILENAME,
5
+ RESULT_RUNS_DIRNAME,
6
6
  detectFileType,
7
7
  findRepoRoot,
8
8
  loadLightweightResults,
@@ -22,7 +22,7 @@ import {
22
22
  validateFileReferences,
23
23
  validateTargetsFile,
24
24
  writeArtifactsFromResults
25
- } from "./chunk-VLOFRXH4.js";
25
+ } from "./chunk-LSXO22CF.js";
26
26
  import {
27
27
  createBuiltinRegistry,
28
28
  executeScript,
@@ -39,7 +39,7 @@ import {
39
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
40
40
  transpileEvalYamlFile,
41
41
  trimBaselineResult
42
- } from "./chunk-UYBLUYHN.js";
42
+ } from "./chunk-K747KGDP.js";
43
43
  import {
44
44
  __commonJS,
45
45
  __esm,
@@ -3389,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
3389
3389
  for (const assertion of test.assertions) {
3390
3390
  lines.push(` - name: ${assertion.name}`);
3391
3391
  lines.push(` type: ${assertion.type}`);
3392
- if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
3392
+ if (assertion.type === "llm-grader" && "prompt" in assertion) {
3393
3393
  const prompt = assertion.prompt;
3394
3394
  lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
3395
3395
  }
@@ -3746,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
3746
3746
  if (item.outcome) criteria.push(item.outcome);
3747
3747
  }
3748
3748
  }
3749
- } else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
3749
+ } else if (type === "llm-grader" || type === "llm_grader") {
3750
3750
  const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
3751
3751
  criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
3752
- } else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
3752
+ } else if (type === "code-grader" || type === "code_grader") {
3753
3753
  const name = entry.name ?? type;
3754
3754
  const desc = bag.description ?? entry.description;
3755
3755
  criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
@@ -4126,11 +4126,6 @@ var evalRunCommand = command({
4126
4126
  long: "otel-file",
4127
4127
  description: "Write OTLP JSON trace to file (importable by OTel backends)"
4128
4128
  }),
4129
- traceFile: option({
4130
- type: optional(string),
4131
- long: "trace-file",
4132
- description: "Write human-readable trace JSONL to file"
4133
- }),
4134
4129
  exportOtel: flag({
4135
4130
  long: "export-otel",
4136
4131
  description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
@@ -4185,7 +4180,7 @@ var evalRunCommand = command({
4185
4180
  },
4186
4181
  handler: async (args) => {
4187
4182
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-5X62YEEX.js");
4183
+ const { launchInteractiveWizard } = await import("./interactive-76ZJVPI7.js");
4189
4184
  await launchInteractiveWizard();
4190
4185
  return;
4191
4186
  }
@@ -4211,7 +4206,6 @@ var evalRunCommand = command({
4211
4206
  workspacePath: args.workspacePath,
4212
4207
  trace: false,
4213
4208
  otelFile: args.otelFile,
4214
- traceFile: args.traceFile,
4215
4209
  exportOtel: args.exportOtel,
4216
4210
  otelBackend: args.otelBackend,
4217
4211
  otelCaptureContent: args.otelCaptureContent,
@@ -4767,7 +4761,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4767
4761
  let hasCodeGraders = false;
4768
4762
  let hasLlmGraders = false;
4769
4763
  for (const assertion of assertions) {
4770
- if (assertion.type === "code-grader" || assertion.type === "code-judge") {
4764
+ if (assertion.type === "code-grader") {
4771
4765
  if (!hasCodeGraders) {
4772
4766
  await mkdir3(codeGradersDir, { recursive: true });
4773
4767
  hasCodeGraders = true;
@@ -4780,7 +4774,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4780
4774
  weight: config.weight ?? 1,
4781
4775
  config: config.config ?? {}
4782
4776
  });
4783
- } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
4777
+ } else if (assertion.type === "llm-grader") {
4784
4778
  if (!hasLlmGraders) {
4785
4779
  await mkdir3(llmGradersDir, { recursive: true });
4786
4780
  hasLlmGraders = true;
@@ -4866,12 +4860,6 @@ function loadResultFile(filePath) {
4866
4860
  return loadJsonlRecords(resolvedFilePath);
4867
4861
  }
4868
4862
  function resolveTraceResultPath(filePath) {
4869
- if (path6.basename(filePath) === LEGACY_RESULTS_FILENAME) {
4870
- return filePath;
4871
- }
4872
- if (!filePath.endsWith(".jsonl") && !filePath.endsWith(".json")) {
4873
- return resolveWorkspaceOrFilePath(filePath);
4874
- }
4875
4863
  return resolveWorkspaceOrFilePath(filePath);
4876
4864
  }
4877
4865
  function loadJsonlRecords(filePath) {
@@ -4945,7 +4933,9 @@ function loadOtlpTraceFile(filePath) {
4945
4933
  }
4946
4934
  }
4947
4935
  const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
4948
- return roots.map((root, index) => {
4936
+ const supportedRoots = roots.filter(isAgentvEvalRoot);
4937
+ const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
4938
+ return candidateRoots.map((root, index) => {
4949
4939
  const descendants = collectChildSpans(root.spanId, childMap);
4950
4940
  const rootAttrs = parseOtlpAttributes(root.attributes);
4951
4941
  const parsedDescendants = descendants.map((span) => ({
@@ -5032,18 +5022,24 @@ function loadOtlpTraceFile(filePath) {
5032
5022
  } : void 0,
5033
5023
  spans: traceSummary?.spans,
5034
5024
  output: stringAttr(rootAttrs.agentv_output_text),
5035
- scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
5025
+ scores: root.events?.filter(
5026
+ (event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
5027
+ ).map((event) => {
5036
5028
  const attrs = parseOtlpAttributes(event.attributes);
5037
- const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
5029
+ const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
5038
5030
  return {
5039
5031
  name,
5040
- type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5041
- score: numberAttr(attrs.agentv_evaluator_score) ?? 0
5032
+ type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5033
+ score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
5042
5034
  };
5043
5035
  })
5044
5036
  };
5045
5037
  });
5046
5038
  }
5039
+ function isAgentvEvalRoot(span) {
5040
+ const attrs = parseOtlpAttributes(span.attributes);
5041
+ return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
5042
+ }
5047
5043
  function collectChildSpans(spanId, childMap) {
5048
5044
  if (!spanId) return [];
5049
5045
  const direct = childMap.get(spanId) ?? [];
@@ -5138,13 +5134,13 @@ function toTraceSummary(result) {
5138
5134
  }
5139
5135
  function listResultFiles(cwd, limit) {
5140
5136
  const baseDir = path6.join(cwd, ".agentv", "results");
5141
- const rawDir = path6.join(baseDir, "raw");
5137
+ const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
5142
5138
  const files = [];
5143
5139
  try {
5144
- const entries2 = readdirSync2(rawDir, { withFileTypes: true });
5140
+ const entries2 = readdirSync2(runsDir, { withFileTypes: true });
5145
5141
  for (const entry of entries2) {
5146
5142
  if (entry.isDirectory()) {
5147
- const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
5143
+ const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
5148
5144
  if (primaryPath) {
5149
5145
  files.push({ filePath: primaryPath, displayName: entry.name });
5150
5146
  }
@@ -5152,7 +5148,7 @@ function listResultFiles(cwd, limit) {
5152
5148
  }
5153
5149
  for (const entry of entries2) {
5154
5150
  if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
5155
- files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
5151
+ files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
5156
5152
  }
5157
5153
  }
5158
5154
  } catch {
@@ -5317,8 +5313,7 @@ var resultsExportCommand = command({
5317
5313
  const { results } = await loadResults(source, cwd);
5318
5314
  const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
5319
5315
  await writeArtifactsFromResults(results, outputDir, {
5320
- evalFile: sourceFile,
5321
- writeLegacyResults: false
5316
+ evalFile: sourceFile
5322
5317
  });
5323
5318
  console.log(`Exported ${results.length} test(s) to ${outputDir}`);
5324
5319
  for (const result of results) {
@@ -6640,7 +6635,7 @@ var traceScoreCommand = command({
6640
6635
  );
6641
6636
  if (!hasTrace) {
6642
6637
  console.error(
6643
- `${c2.red}Error:${c2.reset} Source lacks trace metrics. Export a trace file with ${c2.bold}--trace-file${c2.reset} or ${c2.bold}--otel-file${c2.reset}.`
6638
+ `${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
6644
6639
  );
6645
6640
  process.exit(1);
6646
6641
  }
@@ -7761,4 +7756,4 @@ export {
7761
7756
  preprocessArgv,
7762
7757
  runCli
7763
7758
  };
7764
- //# sourceMappingURL=chunk-2ELQ6F3C.js.map
7759
+ //# sourceMappingURL=chunk-UK7UMQOX.js.map