agentv 3.14.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,13 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  CLI_PLACEHOLDERS,
4
+ COMMON_TARGET_SETTINGS,
4
5
  KNOWN_PROVIDERS,
5
6
  PROVIDER_ALIASES,
6
7
  ResponseCache,
7
8
  buildDirectoryChain,
8
9
  buildSearchRoots,
10
+ deriveCategory,
9
11
  ensureVSCodeSubagents,
10
12
  findGitRoot,
11
13
  interpolateEnv,
@@ -27,12 +29,12 @@ import {
27
29
  subscribeToCopilotCliLogEntries,
28
30
  subscribeToCopilotSdkLogEntries,
29
31
  subscribeToPiLogEntries
30
- } from "./chunk-ELQEFMGO.js";
32
+ } from "./chunk-OXBBWZOY.js";
31
33
 
32
34
  // package.json
33
35
  var package_default = {
34
36
  name: "agentv",
35
- version: "3.14.5",
37
+ version: "4.0.0",
36
38
  description: "CLI entry point for AgentV",
37
39
  type: "module",
38
40
  repository: {
@@ -110,6 +112,27 @@ async function resolveEvalPaths(evalPaths, cwd) {
110
112
  results.add(candidatePath);
111
113
  continue;
112
114
  }
115
+ if (stats.isDirectory()) {
116
+ const dirGlob = path.posix.join(
117
+ candidatePath.replace(/\\/g, "/"),
118
+ "**/*.eval.{yaml,yml}"
119
+ );
120
+ const dirMatches = await fg(dirGlob, {
121
+ absolute: true,
122
+ onlyFiles: true,
123
+ unique: true,
124
+ dot: true,
125
+ followSymbolicLinks: true
126
+ });
127
+ if (dirMatches.length === 0) {
128
+ unmatched.push(pattern);
129
+ } else {
130
+ for (const filePath of dirMatches) {
131
+ results.add(path.normalize(filePath));
132
+ }
133
+ }
134
+ continue;
135
+ }
113
136
  } catch {
114
137
  }
115
138
  const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
@@ -304,7 +327,7 @@ import path3 from "node:path";
304
327
  var RESULT_INDEX_FILENAME = "index.jsonl";
305
328
  var RESULT_RUNS_DIRNAME = "runs";
306
329
  function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
307
- return `eval_${timestamp.toISOString().replace(/[:.]/g, "-")}`;
330
+ return timestamp.toISOString().replace(/[:.]/g, "-");
308
331
  }
309
332
  function buildDefaultRunDir(cwd) {
310
333
  return path3.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME, createRunDirName());
@@ -562,20 +585,16 @@ function safeArtifactPathSegment(value, fallback) {
562
585
  function safeTestId(testId) {
563
586
  return safeArtifactPathSegment(testId, "unknown");
564
587
  }
565
- function safeTargetId(target) {
566
- return safeArtifactPathSegment(target, "default");
567
- }
568
- function getEvalSet(result) {
569
- const record = result;
570
- return result.eval_set ?? record.evalSet;
588
+ function getDataset(result) {
589
+ return result.dataset;
571
590
  }
572
591
  function buildArtifactSubdir(result) {
573
592
  const segments = [];
574
- const evalSet = getEvalSet(result);
593
+ const evalSet = getDataset(result);
575
594
  if (evalSet) {
576
595
  segments.push(safeArtifactPathSegment(evalSet, "default"));
577
596
  }
578
- segments.push(safeTestId(result.testId), safeTargetId(result.target));
597
+ segments.push(safeTestId(result.testId));
579
598
  return path4.posix.join(...segments);
580
599
  }
581
600
  function formatOutputMarkdown(output) {
@@ -598,7 +617,8 @@ function buildResultIndexArtifact(result) {
598
617
  return {
599
618
  timestamp: result.timestamp,
600
619
  test_id: result.testId ?? "unknown",
601
- eval_set: getEvalSet(result),
620
+ dataset: getDataset(result),
621
+ category: result.category,
602
622
  conversation_id: result.conversationId,
603
623
  score: result.score,
604
624
  target: result.target ?? "unknown",
@@ -1667,7 +1687,7 @@ var JunitWriter = class _JunitWriter {
1667
1687
  this.closed = true;
1668
1688
  const grouped = /* @__PURE__ */ new Map();
1669
1689
  for (const result of this.results) {
1670
- const suite = result.eval_set ?? "default";
1690
+ const suite = result.dataset ?? "default";
1671
1691
  const existing = grouped.get(suite);
1672
1692
  if (existing) {
1673
1693
  existing.push(result);
@@ -1995,7 +2015,8 @@ function hydrateManifestRecord(baseDir, record) {
1995
2015
  return {
1996
2016
  timestamp: record.timestamp,
1997
2017
  testId,
1998
- eval_set: record.eval_set,
2018
+ dataset: record.dataset,
2019
+ category: record.category,
1999
2020
  target: record.target,
2000
2021
  score: record.score,
2001
2022
  executionStatus: record.execution_status,
@@ -2053,6 +2074,7 @@ function loadLightweightResults(sourceFile) {
2053
2074
  return parseResultManifest(content).map((record) => ({
2054
2075
  testId: record.test_id ?? record.eval_id ?? "unknown",
2055
2076
  target: record.target,
2077
+ experiment: record.experiment,
2056
2078
  score: record.score,
2057
2079
  scores: record.scores,
2058
2080
  executionStatus: record.execution_status,
@@ -3020,7 +3042,7 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
3020
3042
  function isObject2(value) {
3021
3043
  return typeof value === "object" && value !== null && !Array.isArray(value);
3022
3044
  }
3023
- var COMMON_SETTINGS = /* @__PURE__ */ new Set(["provider_batching", "providerBatching"]);
3045
+ var COMMON_SETTINGS = new Set(COMMON_TARGET_SETTINGS);
3024
3046
  var RETRY_SETTINGS = /* @__PURE__ */ new Set([
3025
3047
  "max_retries",
3026
3048
  "maxRetries",
@@ -4140,9 +4162,12 @@ async function prepareFileMetadata(params) {
4140
4162
  repoRoot,
4141
4163
  verbose: options.verbose
4142
4164
  });
4165
+ const relativePath = path15.relative(cwd, testFilePath);
4166
+ const category = deriveCategory(relativePath);
4143
4167
  const suite = await loadTestSuite(testFilePath, repoRoot, {
4144
4168
  verbose: options.verbose,
4145
- filter: options.filter
4169
+ filter: options.filter,
4170
+ category
4146
4171
  });
4147
4172
  const filteredIds = suite.tests.map((value) => value.id);
4148
4173
  const cliTargets = options.cliTargets;
@@ -4421,7 +4446,7 @@ async function runEvalCommand(input) {
4421
4446
  const useFileExport = !!options.otelFile;
4422
4447
  if (options.exportOtel || useFileExport) {
4423
4448
  try {
4424
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-5EEXTTC3.js");
4449
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-3Z22B6SU.js");
4425
4450
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4426
4451
  let headers = {};
4427
4452
  if (options.otelBackend) {
@@ -4794,6 +4819,7 @@ export {
4794
4819
  resolveExistingRunPrimaryPath,
4795
4820
  resolveWorkspaceOrFilePath,
4796
4821
  writeArtifactsFromResults,
4822
+ parseResultManifest,
4797
4823
  resolveResultSourcePath,
4798
4824
  loadManifestResults,
4799
4825
  loadLightweightResults,
@@ -4812,4 +4838,4 @@ export {
4812
4838
  selectTarget,
4813
4839
  runEvalCommand
4814
4840
  };
4815
- //# sourceMappingURL=chunk-GUXXTOYK.js.map
4841
+ //# sourceMappingURL=chunk-OT2J474N.js.map