agentv 4.18.0-next.1 → 4.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,22 @@ import {
15
15
  resolveWorkspaceOrFilePath,
16
16
  toSnakeCaseDeep,
17
17
  writeArtifactsFromResults
18
- } from "./chunk-HBDOJJFY.js";
18
+ } from "./chunk-NL6P5MUH.js";
19
+ import {
20
+ ResponseCache,
21
+ commitAndPushResultsBranch,
22
+ createDraftResultsPr,
23
+ deriveCategory,
24
+ directorySizeBytes,
25
+ getResultsRepoStatus,
26
+ loadTsConfig,
27
+ prepareResultsRepoBranch,
28
+ resolveResultsRepoRunsDir,
29
+ shouldEnableCache,
30
+ shouldSkipCacheForTemperature,
31
+ stageResultsArtifacts,
32
+ syncResultsRepo
33
+ } from "./chunk-R2QDYORI.js";
19
34
  import {
20
35
  CLI_PLACEHOLDERS,
21
36
  COMMON_TARGET_SETTINGS,
@@ -23,47 +38,34 @@ import {
23
38
  DEFAULT_THRESHOLD,
24
39
  KNOWN_PROVIDERS,
25
40
  PROVIDER_ALIASES,
26
- ResponseCache,
27
41
  buildDirectoryChain,
28
42
  buildSearchRoots,
29
- commitAndPushResultsBranch,
30
- createDraftResultsPr,
31
- deriveCategory,
32
- directorySizeBytes,
33
43
  ensureVSCodeSubagents,
34
44
  findDeprecatedCamelCaseTargetWarnings,
35
45
  findGitRoot,
36
- getResultsRepoStatus,
37
46
  interpolateEnv,
38
47
  isGraderKind,
39
48
  listTargetNames,
40
49
  loadCasesFromFile,
41
50
  loadConfig,
42
51
  loadTestSuite,
43
- loadTsConfig,
44
52
  normalizeLineEndings,
45
- prepareResultsRepoBranch,
46
53
  readTargetDefinitions,
47
54
  readTestSuiteMetadata,
48
55
  resolveFileReference,
49
- resolveResultsRepoRunsDir,
50
56
  resolveTargetDefinition,
51
57
  runEvaluation,
52
- shouldEnableCache,
53
- shouldSkipCacheForTemperature,
54
- stageResultsArtifacts,
55
58
  subscribeToCodexLogEntries,
56
59
  subscribeToCopilotCliLogEntries,
57
60
  subscribeToCopilotSdkLogEntries,
58
61
  subscribeToPiLogEntries,
59
- syncResultsRepo,
60
62
  toCamelCaseDeep
61
- } from "./chunk-RCOAXXHP.js";
63
+ } from "./chunk-PTYQS37Y.js";
62
64
 
63
65
  // package.json
64
66
  var package_default = {
65
67
  name: "agentv",
66
- version: "4.18.0-next.1",
68
+ version: "4.19.0",
67
69
  description: "CLI entry point for AgentV",
68
70
  type: "module",
69
71
  repository: {
@@ -148,12 +150,15 @@ async function resolveEvalPaths(evalPaths, cwd) {
148
150
  const candidatePath = path.isAbsolute(pattern) ? path.normalize(pattern) : path.resolve(cwd, pattern);
149
151
  try {
150
152
  const stats = await stat(candidatePath);
151
- if (stats.isFile() && /\.(ya?ml|jsonl|json)$/i.test(candidatePath)) {
153
+ if (stats.isFile() && /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(candidatePath)) {
152
154
  results.add(candidatePath);
153
155
  continue;
154
156
  }
155
157
  if (stats.isDirectory()) {
156
- const dirGlob = path.posix.join(candidatePath.replace(/\\/g, "/"), "**/*.eval.{yaml,yml}");
158
+ const dirGlob = path.posix.join(
159
+ candidatePath.replace(/\\/g, "/"),
160
+ "**/{*.eval.yaml,*.eval.yml,eval.yaml,eval.yml,*.eval.ts,*.eval.mts}"
161
+ );
157
162
  const dirMatches = await fg(dirGlob, {
158
163
  absolute: true,
159
164
  onlyFiles: true,
@@ -179,7 +184,9 @@ async function resolveEvalPaths(evalPaths, cwd) {
179
184
  followSymbolicLinks: true,
180
185
  ignore: ignorePatterns
181
186
  });
182
- const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json)$/i.test(filePath));
187
+ const yamlMatches = matches.filter(
188
+ (filePath) => /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(filePath)
189
+ );
183
190
  for (const filePath of yamlMatches) {
184
191
  results.add(path.normalize(filePath));
185
192
  }
@@ -201,7 +208,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
201
208
  throw new Error(
202
209
  `No eval files matched any provided paths or globs: ${includePatterns.join(
203
210
  ", "
204
- )}. Provide YAML, JSONL, or JSON paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl", "evals.json").`
211
+ )}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/eval.yaml", "evals/**/*.eval.ts").`
205
212
  );
206
213
  }
207
214
  const sorted = Array.from(results);
@@ -4874,6 +4881,52 @@ async function prepareFileMetadata(params) {
4874
4881
  inlineTargetLabel: `transcript (${path15.basename(options.transcript)})`
4875
4882
  }
4876
4883
  ];
4884
+ } else if (suite.inlineTarget && options.cliTargets.length === 0) {
4885
+ const targetDefinition = suite.inlineTarget;
4886
+ const resolvedTarget = options.dryRun ? {
4887
+ kind: "mock",
4888
+ name: `${targetDefinition.name}-dry-run`,
4889
+ graderTarget: void 0,
4890
+ config: {
4891
+ response: '{"answer":"Mock dry-run response"}',
4892
+ delayMs: options.dryRunDelay,
4893
+ delayMinMs: options.dryRunDelayMin,
4894
+ delayMaxMs: options.dryRunDelayMax
4895
+ }
4896
+ } : resolveTargetDefinition(targetDefinition, process.env, testFilePath, {
4897
+ emitDeprecationWarnings: false
4898
+ });
4899
+ selections = [
4900
+ {
4901
+ selection: {
4902
+ definitions: [targetDefinition],
4903
+ resolvedTarget,
4904
+ targetName: targetDefinition.name,
4905
+ targetSource: "test-file",
4906
+ targetsFilePath: testFilePath
4907
+ },
4908
+ inlineTargetLabel: resolveTargetLabel(targetDefinition.name, resolvedTarget.name)
4909
+ }
4910
+ ];
4911
+ } else if (suite.providerFactory && options.cliTargets.length === 0) {
4912
+ const taskTarget = {
4913
+ kind: "mock",
4914
+ name: "custom-task",
4915
+ graderTarget: void 0,
4916
+ config: {}
4917
+ };
4918
+ selections = [
4919
+ {
4920
+ selection: {
4921
+ definitions: [],
4922
+ resolvedTarget: taskTarget,
4923
+ targetName: "custom-task",
4924
+ targetSource: "test-file",
4925
+ targetsFilePath: testFilePath
4926
+ },
4927
+ inlineTargetLabel: "custom-task"
4928
+ }
4929
+ ];
4877
4930
  } else {
4878
4931
  const cliTargets = options.cliTargets;
4879
4932
  const suiteTargets2 = suite.targets;
@@ -4944,7 +4997,8 @@ async function prepareFileMetadata(params) {
4944
4997
  budgetUsd: suite.budgetUsd,
4945
4998
  failOnError: suite.failOnError,
4946
4999
  threshold: suite.threshold,
4947
- tags: suite.metadata?.tags
5000
+ tags: suite.metadata?.tags,
5001
+ providerFactory: suite.providerFactory
4948
5002
  };
4949
5003
  }
4950
5004
  async function runSingleEvalFile(params) {
@@ -5223,7 +5277,7 @@ async function runEvalCommand(input) {
5223
5277
  const useFileExport = !!options.otelFile;
5224
5278
  if (options.exportOtel || useFileExport) {
5225
5279
  try {
5226
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-7W4OI3X2.js");
5280
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-RTIUSC6L.js");
5227
5281
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5228
5282
  let headers = {};
5229
5283
  if (options.otelBackend) {
@@ -5283,23 +5337,7 @@ async function runEvalCommand(input) {
5283
5337
  const displayIdTracker = createDisplayIdTracker();
5284
5338
  const perFileWorkers = options.workers;
5285
5339
  const fileMetadata = /* @__PURE__ */ new Map();
5286
- const tsFiles = [];
5287
- const yamlFiles = [];
5288
5340
  for (const testFilePath of resolvedTestFiles) {
5289
- if (/\.(ts|js|mts|mjs)$/.test(testFilePath)) {
5290
- tsFiles.push(testFilePath);
5291
- } else {
5292
- yamlFiles.push(testFilePath);
5293
- }
5294
- }
5295
- for (const tsFile of tsFiles) {
5296
- await ensureFileExists(tsFile, "TypeScript eval file");
5297
- await import(pathToFileURL(tsFile).href);
5298
- }
5299
- if (yamlFiles.length === 0 && tsFiles.length > 0) {
5300
- return;
5301
- }
5302
- for (const testFilePath of yamlFiles) {
5303
5341
  const meta = await prepareFileMetadata({
5304
5342
  testFilePath,
5305
5343
  repoRoot,
@@ -5434,7 +5472,7 @@ async function runEvalCommand(input) {
5434
5472
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5435
5473
  let transcriptProviderFactory;
5436
5474
  if (options.transcript) {
5437
- const { TranscriptProvider } = await import("./dist-7W4OI3X2.js");
5475
+ const { TranscriptProvider } = await import("./dist-RTIUSC6L.js");
5438
5476
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5439
5477
  const totalTests = [...fileMetadata.values()].reduce(
5440
5478
  (sum, meta) => sum + meta.testCases.length,
@@ -5494,7 +5532,7 @@ async function runEvalCommand(input) {
5494
5532
  budgetUsd: targetPrep.budgetUsd,
5495
5533
  failOnError: targetPrep.failOnError,
5496
5534
  threshold: resolvedThreshold,
5497
- providerFactory: transcriptProviderFactory
5535
+ providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory
5498
5536
  });
5499
5537
  const evalFile = path15.relative(cwd, testFilePath);
5500
5538
  const existingSummary = remoteEvalSummaries.find(
@@ -5574,7 +5612,7 @@ async function runEvalCommand(input) {
5574
5612
  if (usesDefaultArtifactWorkspace && allResults.length > 0) {
5575
5613
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
5576
5614
  if (isResumeAppend) {
5577
- const { writePerTestArtifacts } = await import("./artifact-writer-WH3OE42V.js");
5615
+ const { writePerTestArtifacts } = await import("./artifact-writer-YATMDPWI.js");
5578
5616
  await writePerTestArtifacts(allResults, runDir, {
5579
5617
  experiment: normalizeExperimentName(options.experiment)
5580
5618
  });
@@ -5784,4 +5822,4 @@ export {
5784
5822
  getCategories,
5785
5823
  filterByCategory
5786
5824
  };
5787
- //# sourceMappingURL=chunk-VRPCMCLQ.js.map
5825
+ //# sourceMappingURL=chunk-IWI4AJRS.js.map