agentv 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,12 +27,12 @@ import {
27
27
  subscribeToCopilotCliLogEntries,
28
28
  subscribeToCopilotSdkLogEntries,
29
29
  subscribeToPiLogEntries
30
- } from "./chunk-UYBLUYHN.js";
30
+ } from "./chunk-K747KGDP.js";
31
31
 
32
32
  // package.json
33
33
  var package_default = {
34
34
  name: "agentv",
35
- version: "3.12.0",
35
+ version: "3.13.1",
36
36
  description: "CLI entry point for AgentV",
37
37
  type: "module",
38
38
  repository: {
@@ -302,28 +302,21 @@ function toSnakeCaseDeep(obj) {
302
302
  import { existsSync, statSync } from "node:fs";
303
303
  import path3 from "node:path";
304
304
  var RESULT_INDEX_FILENAME = "index.jsonl";
305
- var LEGACY_RESULTS_FILENAME = "results.jsonl";
305
+ var RESULT_RUNS_DIRNAME = "runs";
306
306
  function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
307
307
  return `eval_${timestamp.toISOString().replace(/[:.]/g, "-")}`;
308
308
  }
309
309
  function buildDefaultRunDir(cwd) {
310
- return path3.join(cwd, ".agentv", "results", "raw", createRunDirName());
310
+ return path3.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME, createRunDirName());
311
311
  }
312
312
  function resolveRunIndexPath(runDir) {
313
313
  return path3.join(runDir, RESULT_INDEX_FILENAME);
314
314
  }
315
- function resolveRunLegacyResultsPath(runDir) {
316
- return path3.join(runDir, LEGACY_RESULTS_FILENAME);
317
- }
318
315
  function resolveExistingRunPrimaryPath(runDir) {
319
316
  const indexPath = resolveRunIndexPath(runDir);
320
317
  if (existsSync(indexPath)) {
321
318
  return indexPath;
322
319
  }
323
- const legacyPath = resolveRunLegacyResultsPath(runDir);
324
- if (existsSync(legacyPath)) {
325
- return legacyPath;
326
- }
327
320
  return void 0;
328
321
  }
329
322
  function isDirectoryPath(filePath) {
@@ -339,9 +332,7 @@ function resolveWorkspaceOrFilePath(filePath) {
339
332
  }
340
333
  const existing = resolveExistingRunPrimaryPath(filePath);
341
334
  if (!existing) {
342
- throw new Error(
343
- `Result workspace is missing ${RESULT_INDEX_FILENAME} and ${LEGACY_RESULTS_FILENAME}: ${filePath}`
344
- );
335
+ throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
345
336
  }
346
337
  return existing;
347
338
  }
@@ -557,7 +548,7 @@ function buildBenchmarkArtifact(results, evalFile = "") {
557
548
  tests_run: testIds
558
549
  },
559
550
  run_summary: runSummary,
560
- per_evaluator_summary: perEvaluatorSummary,
551
+ per_grader_summary: perEvaluatorSummary,
561
552
  notes
562
553
  };
563
554
  }
@@ -670,7 +661,6 @@ async function writeArtifactsFromResults(results, outputDir, options) {
670
661
  const timingPath = path4.join(outputDir, "timing.json");
671
662
  const benchmarkPath = path4.join(outputDir, "benchmark.json");
672
663
  const indexPath = path4.join(outputDir, RESULT_INDEX_FILENAME);
673
- const legacyResultsPath = options?.writeLegacyResults ? path4.join(outputDir, LEGACY_RESULTS_FILENAME) : void 0;
674
664
  await mkdir(outputDir, { recursive: true });
675
665
  const indexRecords = [];
676
666
  for (const result of results) {
@@ -707,10 +697,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
707
697
  await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
708
698
  `, "utf8");
709
699
  await writeJsonlFile(indexPath, indexRecords);
710
- if (legacyResultsPath) {
711
- await writeJsonlFile(legacyResultsPath, results);
712
- }
713
- return { testArtifactDir, timingPath, benchmarkPath, indexPath, legacyResultsPath };
700
+ return { testArtifactDir, timingPath, benchmarkPath, indexPath };
714
701
  }
715
702
 
716
703
  // src/commands/eval/benchmark-writer.ts
@@ -2141,7 +2128,7 @@ async function saveRunCache(cwd, resultPath) {
2141
2128
  const dir = path13.join(cwd, ".agentv");
2142
2129
  await mkdir7(dir, { recursive: true });
2143
2130
  const basename = path13.basename(resultPath);
2144
- const cache = basename === RESULT_INDEX_FILENAME || basename === LEGACY_RESULTS_FILENAME ? {
2131
+ const cache = basename === RESULT_INDEX_FILENAME ? {
2145
2132
  lastRunDir: path13.dirname(resultPath),
2146
2133
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
2147
2134
  } : {
@@ -4050,7 +4037,6 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4050
4037
  verbose: normalizeBoolean(rawOptions.verbose) || yamlExecution?.verbose === true || config?.execution?.verbose === true,
4051
4038
  // Precedence: CLI > YAML config > TS config
4052
4039
  otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0) ?? (config?.execution?.otelFile ? resolveTimestampPlaceholder(config.execution.otelFile) : void 0),
4053
- traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0) ?? (config?.execution?.traceFile ? resolveTimestampPlaceholder(config.execution.traceFile) : void 0),
4054
4040
  exportOtel: normalizeBoolean(rawOptions.exportOtel) || yamlExecution?.export_otel === true,
4055
4041
  otelBackend: normalizeString(rawOptions.otelBackend) ?? yamlExecution?.otel_backend,
4056
4042
  otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent) || yamlExecution?.otel_capture_content === true,
@@ -4418,13 +4404,11 @@ async function runEvalCommand(input) {
4418
4404
  }
4419
4405
  const usesDefaultArtifactWorkspace = !options.outPath;
4420
4406
  const outputPath = options.outPath ? path15.resolve(options.outPath) : buildDefaultOutputPath(cwd);
4421
- const defaultTraceFile = usesDefaultArtifactWorkspace && !options.traceFile ? path15.join(path15.dirname(outputPath), "trace.jsonl") : void 0;
4422
- const traceFilePath = options.traceFile ? path15.resolve(options.traceFile) : defaultTraceFile;
4423
4407
  let otelExporter = null;
4424
- const useFileExport = !!(options.otelFile || traceFilePath);
4408
+ const useFileExport = !!options.otelFile;
4425
4409
  if (options.exportOtel || useFileExport) {
4426
4410
  try {
4427
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-L6R5HJ72.js");
4411
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-LCZDS36N.js");
4428
4412
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4429
4413
  let headers = {};
4430
4414
  if (options.otelBackend) {
@@ -4448,8 +4432,7 @@ async function runEvalCommand(input) {
4448
4432
  headers,
4449
4433
  captureContent,
4450
4434
  groupTurns: options.otelGroupTurns,
4451
- otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0,
4452
- traceFilePath
4435
+ otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0
4453
4436
  });
4454
4437
  const initialized = await otelExporter.init();
4455
4438
  if (!initialized) {
@@ -4465,7 +4448,7 @@ async function runEvalCommand(input) {
4465
4448
  otelExporter = null;
4466
4449
  }
4467
4450
  }
4468
- const primaryWritePath = usesDefaultArtifactWorkspace ? path15.join(path15.dirname(outputPath), LEGACY_RESULTS_FILENAME) : outputPath;
4451
+ const primaryWritePath = outputPath;
4469
4452
  const extraOutputPaths = options.outputPaths.map((p) => path15.resolve(p));
4470
4453
  const allOutputPaths = extraOutputPaths.length > 0 ? [primaryWritePath, ...extraOutputPaths] : [primaryWritePath];
4471
4454
  const uniqueOutputPaths = [...new Set(allOutputPaths)];
@@ -4486,9 +4469,6 @@ async function runEvalCommand(input) {
4486
4469
  if (options.otelFile) {
4487
4470
  console.log(`OTLP JSON file: ${path15.resolve(options.otelFile)}`);
4488
4471
  }
4489
- if (traceFilePath) {
4490
- console.log(`Trace file: ${traceFilePath}`);
4491
- }
4492
4472
  const evaluationRunner = await resolveEvaluationRunner();
4493
4473
  const allResults = [];
4494
4474
  const seenEvalCases = /* @__PURE__ */ new Set();
@@ -4676,11 +4656,9 @@ async function runEvalCommand(input) {
4676
4656
  testArtifactDir,
4677
4657
  timingPath,
4678
4658
  benchmarkPath: workspaceBenchmarkPath,
4679
- indexPath,
4680
- legacyResultsPath
4659
+ indexPath
4681
4660
  } = await writeArtifactsFromResults(allResults, workspaceDir, {
4682
- evalFile,
4683
- writeLegacyResults: true
4661
+ evalFile
4684
4662
  });
4685
4663
  console.log(`Artifact workspace written to: ${workspaceDir}`);
4686
4664
  console.log(` Index: ${indexPath}`);
@@ -4689,9 +4667,6 @@ async function runEvalCommand(input) {
4689
4667
  );
4690
4668
  console.log(` Timing: ${timingPath}`);
4691
4669
  console.log(` Benchmark: ${workspaceBenchmarkPath}`);
4692
- if (legacyResultsPath) {
4693
- console.log(` Compatibility output: ${legacyResultsPath} (deprecated)`);
4694
- }
4695
4670
  }
4696
4671
  if (options.artifacts) {
4697
4672
  const artifactsDir = path15.resolve(options.artifacts);
@@ -4702,8 +4677,7 @@ async function runEvalCommand(input) {
4702
4677
  timingPath,
4703
4678
  benchmarkPath: abp
4704
4679
  } = await writeArtifactsFromResults(allResults, artifactsDir, {
4705
- evalFile,
4706
- writeLegacyResults: false
4680
+ evalFile
4707
4681
  });
4708
4682
  console.log(`Artifacts written to: ${artifactsDir}`);
4709
4683
  console.log(` Index: ${indexPath}`);
@@ -4785,7 +4759,7 @@ export {
4785
4759
  package_default,
4786
4760
  toSnakeCaseDeep,
4787
4761
  RESULT_INDEX_FILENAME,
4788
- LEGACY_RESULTS_FILENAME,
4762
+ RESULT_RUNS_DIRNAME,
4789
4763
  resolveExistingRunPrimaryPath,
4790
4764
  resolveWorkspaceOrFilePath,
4791
4765
  writeArtifactsFromResults,
@@ -4807,4 +4781,4 @@ export {
4807
4781
  selectTarget,
4808
4782
  runEvalCommand
4809
4783
  };
4810
- //# sourceMappingURL=chunk-VLOFRXH4.js.map
4784
+ //# sourceMappingURL=chunk-LSXO22CF.js.map