agentv 4.38.1 → 4.40.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/{artifact-writer-MK5X5MSO.js → artifact-writer-GIAIMGPQ.js} +14 -11
  2. package/dist/{chunk-QOBQ5XYF.js → chunk-76FOHROU.js} +16 -4
  3. package/dist/chunk-76FOHROU.js.map +1 -0
  4. package/dist/{chunk-VBHHZQS6.js → chunk-BLXYBUU4.js} +1825 -333
  5. package/dist/chunk-BLXYBUU4.js.map +1 -0
  6. package/dist/{chunk-NLTIK3LV.js → chunk-I3SC4FOT.js} +499 -347
  7. package/dist/chunk-I3SC4FOT.js.map +1 -0
  8. package/dist/{chunk-OIN3MVOD.js → chunk-S2JJCLHV.js} +67 -68
  9. package/dist/chunk-S2JJCLHV.js.map +1 -0
  10. package/dist/chunk-TWQP7JYQ.js +494 -0
  11. package/dist/chunk-TWQP7JYQ.js.map +1 -0
  12. package/dist/{chunk-6M5S4IJW.js → chunk-WKA5QDNQ.js} +586 -183
  13. package/dist/chunk-WKA5QDNQ.js.map +1 -0
  14. package/dist/cli.js +6 -6
  15. package/dist/dashboard/assets/index-BnYCCJ7O.css +1 -0
  16. package/dist/dashboard/assets/index-DaueD7GO.js +118 -0
  17. package/dist/dashboard/assets/{index-SIl6NbIJ.js → index-_jpKSzIf.js} +1 -1
  18. package/dist/dashboard/index.html +2 -2
  19. package/dist/{dist-HVLBDG5F.js → dist-6Z4OSITR.js} +54 -16
  20. package/dist/index.js +6 -6
  21. package/dist/{interactive-45LPG2YJ.js → interactive-OUB3GZRC.js} +6 -6
  22. package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js} +2 -2
  23. package/dist/skills/agentv-eval-writer/SKILL.md +49 -24
  24. package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +21 -15
  25. package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js → ts-eval-loader-NWH3B4HG-UXXCZKLP.js} +2 -2
  26. package/package.json +1 -1
  27. package/dist/chunk-6M5S4IJW.js.map +0 -1
  28. package/dist/chunk-DKUAETXE.js +0 -1362
  29. package/dist/chunk-DKUAETXE.js.map +0 -1
  30. package/dist/chunk-NLTIK3LV.js.map +0 -1
  31. package/dist/chunk-OIN3MVOD.js.map +0 -1
  32. package/dist/chunk-QOBQ5XYF.js.map +0 -1
  33. package/dist/chunk-VBHHZQS6.js.map +0 -1
  34. package/dist/dashboard/assets/index-BpnllKET.css +0 -1
  35. package/dist/dashboard/assets/index-Cm9SUopp.js +0 -118
  36. /package/dist/{artifact-writer-MK5X5MSO.js.map → artifact-writer-GIAIMGPQ.js.map} +0 -0
  37. /package/dist/{dist-HVLBDG5F.js.map → dist-6Z4OSITR.js.map} +0 -0
  38. /package/dist/{interactive-45LPG2YJ.js.map → interactive-OUB3GZRC.js.map} +0 -0
  39. /package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js.map → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js.map} +0 -0
  40. /package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js.map → ts-eval-loader-NWH3B4HG-UXXCZKLP.js.map} +0 -0
@@ -1,22 +1,8 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
- RESULT_INDEX_FILENAME,
4
- RESULT_RUNS_DIRNAME,
5
- aggregateRunDir,
6
- buildDefaultRunDir,
7
- buildTestTargetKey,
8
- deduplicateByTestIdTarget,
9
- isDirectoryPath,
10
- normalizeExperimentName,
11
- parseJsonlResults,
12
- resolveExistingRunPrimaryPath,
13
- resolveRunIndexPath,
14
- resolveRunManifestPath,
15
- resolveWorkspaceOrFilePath,
16
3
  toSnakeCaseDeep as toSnakeCaseDeep2,
17
- writeArtifactsFromResults,
18
- writeInitialBenchmarkArtifact
19
- } from "./chunk-DKUAETXE.js";
4
+ writeArtifactsFromResults
5
+ } from "./chunk-TWQP7JYQ.js";
20
6
  import {
21
7
  RunBudgetTracker,
22
8
  buildWipBranchName,
@@ -32,9 +18,8 @@ import {
32
18
  pushWipCheckpoint,
33
19
  resolveResultsRepoRunsDir,
34
20
  setupWipWorktree,
35
- syncResultsRepoForProject,
36
- traceFromTranscriptJsonLines
37
- } from "./chunk-NLTIK3LV.js";
21
+ syncResultsRepoForProject
22
+ } from "./chunk-I3SC4FOT.js";
38
23
  import {
39
24
  CLI_PLACEHOLDERS,
40
25
  COMMON_TARGET_SETTINGS,
@@ -43,9 +28,13 @@ import {
43
28
  KNOWN_PROVIDERS,
44
29
  PROVIDER_ALIASES,
45
30
  ResponseCache,
31
+ ResultRowSchemaError,
32
+ aggregateRunDir,
46
33
  buildDirectoryChain,
47
34
  buildSearchRoots,
35
+ buildTestTargetKey,
48
36
  buildTraceFromMessages,
37
+ deduplicateByTestIdTarget,
49
38
  ensureVSCodeSubagents,
50
39
  findDeprecatedCamelCaseTargetWarnings,
51
40
  findGitRoot,
@@ -59,6 +48,8 @@ import {
59
48
  loadCasesFromFile,
60
49
  loadConfig,
61
50
  loadTestSuite,
51
+ normalizeResultRow,
52
+ parseJsonlResults,
62
53
  parseYamlValue,
63
54
  readTargetDefinitions,
64
55
  readTestSuiteMetadata,
@@ -73,8 +64,10 @@ import {
73
64
  subscribeToCopilotSdkLogEntries,
74
65
  subscribeToPiLogEntries,
75
66
  toCamelCaseDeep,
76
- toSnakeCaseDeep
77
- } from "./chunk-VBHHZQS6.js";
67
+ toSnakeCaseDeep,
68
+ traceFromTranscriptJsonLines,
69
+ writeInitialBenchmarkArtifact
70
+ } from "./chunk-BLXYBUU4.js";
78
71
 
79
72
  // src/commands/eval/shared.ts
80
73
  import { constants } from "node:fs";
@@ -190,7 +183,7 @@ async function findRepoRoot(start) {
190
183
  // package.json
191
184
  var package_default = {
192
185
  name: "agentv",
193
- version: "4.38.1",
186
+ version: "4.40.1-next.1",
194
187
  description: "CLI entry point for AgentV",
195
188
  type: "module",
196
189
  repository: {
@@ -298,10 +291,10 @@ async function discoverTargetsFile(options) {
298
291
  }
299
292
 
300
293
  // src/commands/eval/run-eval.ts
301
- import { constants as constants4, existsSync as existsSync6, mkdirSync as mkdirSync2 } from "node:fs";
302
- import { access as access5, readFile as readFile8 } from "node:fs/promises";
303
- import path12 from "node:path";
304
- import { pathToFileURL } from "node:url";
294
+ import { constants as constants4, existsSync as existsSync7, mkdirSync as mkdirSync2 } from "node:fs";
295
+ import { access as access6, readFile as readFile8 } from "node:fs/promises";
296
+ import path14 from "node:path";
297
+ import { pathToFileURL as pathToFileURL2 } from "node:url";
305
298
 
306
299
  // src/version-check.ts
307
300
  import { coerce, satisfies, validRange } from "semver";
@@ -355,19 +348,100 @@ function enforceRequiredVersion(requiredVersion, options) {
355
348
  }
356
349
 
357
350
  // src/commands/results/remote.ts
358
- import { existsSync as existsSync4 } from "node:fs";
359
- import path7 from "node:path";
351
+ import { existsSync as existsSync5 } from "node:fs";
352
+ import path8 from "node:path";
360
353
 
361
354
  // src/commands/inspect/utils.ts
362
- import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
363
- import path4 from "node:path";
355
+ import { readFileSync as readFileSync2, readdirSync, statSync as statSync2 } from "node:fs";
356
+ import path5 from "node:path";
364
357
 
365
- // src/commands/results/manifest.ts
366
- import { existsSync, readFileSync } from "node:fs";
358
+ // src/commands/eval/result-layout.ts
359
+ import { existsSync, statSync } from "node:fs";
367
360
  import path3 from "node:path";
361
+ var RESULT_INDEX_FILENAME = "index.jsonl";
362
+ var RESULT_RUNS_DIRNAME = "runs";
363
+ var DEFAULT_EXPERIMENT_NAME = "default";
364
+ function normalizeExperimentName(experiment) {
365
+ const trimmed = experiment?.trim();
366
+ if (!trimmed) {
367
+ return DEFAULT_EXPERIMENT_NAME;
368
+ }
369
+ if (!/^[A-Za-z0-9._-]+$/.test(trimmed)) {
370
+ throw new Error(
371
+ `Invalid experiment name "${trimmed}". Use only letters, numbers, ".", "_" and "-".`
372
+ );
373
+ }
374
+ return trimmed;
375
+ }
376
+ function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
377
+ return timestamp.toISOString().replace(/[:.]/g, "-");
378
+ }
379
+ function buildDefaultRunDir(cwd, experiment, timestamp = /* @__PURE__ */ new Date()) {
380
+ return path3.join(
381
+ cwd,
382
+ ".agentv",
383
+ "results",
384
+ RESULT_RUNS_DIRNAME,
385
+ normalizeExperimentName(experiment),
386
+ createRunDirName(timestamp)
387
+ );
388
+ }
389
+ function resolveRunIndexPath(runDir) {
390
+ return path3.join(runDir, RESULT_INDEX_FILENAME);
391
+ }
392
+ function isRunManifestPath(filePath) {
393
+ return path3.basename(filePath) === RESULT_INDEX_FILENAME;
394
+ }
395
+ function resolveExistingRunPrimaryPath(runDir) {
396
+ const indexPath = resolveRunIndexPath(runDir);
397
+ if (existsSync(indexPath)) {
398
+ return indexPath;
399
+ }
400
+ return void 0;
401
+ }
402
+ function isDirectoryPath(filePath) {
403
+ try {
404
+ return statSync(filePath).isDirectory();
405
+ } catch {
406
+ return false;
407
+ }
408
+ }
409
+ function resolveWorkspaceOrFilePath(filePath) {
410
+ if (!isDirectoryPath(filePath)) {
411
+ return filePath;
412
+ }
413
+ const existing = resolveExistingRunPrimaryPath(filePath);
414
+ if (!existing) {
415
+ throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
416
+ }
417
+ return existing;
418
+ }
419
+ function resolveRunManifestPath(filePath) {
420
+ if (isDirectoryPath(filePath)) {
421
+ return resolveWorkspaceOrFilePath(filePath);
422
+ }
423
+ if (!isRunManifestPath(filePath)) {
424
+ throw new Error(
425
+ `Expected a run workspace directory or ${RESULT_INDEX_FILENAME} manifest: ${filePath}`
426
+ );
427
+ }
428
+ return filePath;
429
+ }
430
+
431
+ // src/commands/results/manifest.ts
432
+ import { existsSync as existsSync2, readFileSync } from "node:fs";
433
+ import path4 from "node:path";
368
434
  function parseJsonlLines(content) {
369
435
  return content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => JSON.parse(line));
370
436
  }
437
+ function parseResultRows(content, sourceLabel) {
438
+ return content.split(/\r?\n/).map((line, index) => ({ line: line.trim(), lineNumber: index + 1 })).filter(({ line }) => line.length > 0).map(
439
+ ({ line, lineNumber }) => normalizeResultRow(JSON.parse(line), {
440
+ lineNumber,
441
+ sourceLabel
442
+ })
443
+ );
444
+ }
371
445
  function parseMarkdownMessages(content) {
372
446
  const trimmed = content.trim();
373
447
  if (!trimmed.startsWith("@[")) {
@@ -383,8 +457,8 @@ function readOptionalText(baseDir, relativePath) {
383
457
  if (!relativePath) {
384
458
  return void 0;
385
459
  }
386
- const absolutePath = path3.join(baseDir, relativePath);
387
- if (!existsSync(absolutePath)) {
460
+ const absolutePath = path4.join(baseDir, relativePath);
461
+ if (!existsSync2(absolutePath)) {
388
462
  return void 0;
389
463
  }
390
464
  return readFileSync(absolutePath, "utf8");
@@ -430,6 +504,7 @@ function hydrateTrace(baseDir, record) {
430
504
  return buildTraceFromMessages({
431
505
  input: hydrateInput(baseDir, record),
432
506
  output: output ? [{ role: "assistant", content: output }] : [],
507
+ summary: record.trace ? toCamelCaseDeep(record.trace) : void 0,
433
508
  finalOutput: output,
434
509
  target: record.target,
435
510
  testId: record.test_id
@@ -484,11 +559,11 @@ function hydrateManifestRecord(baseDir, record) {
484
559
  };
485
560
  }
486
561
  function parseResultManifest(content) {
487
- return parseJsonlLines(content);
562
+ return parseResultRows(content);
488
563
  }
489
564
  function resolveResultSourcePath(source, cwd) {
490
- const resolved = path3.isAbsolute(source) ? source : path3.resolve(cwd ?? process.cwd(), source);
491
- if (isDirectoryPath(resolved) || path3.basename(resolved) === RESULT_INDEX_FILENAME) {
565
+ const resolved = path4.isAbsolute(source) ? source : path4.resolve(cwd ?? process.cwd(), source);
566
+ if (isDirectoryPath(resolved) || path4.basename(resolved) === RESULT_INDEX_FILENAME) {
492
567
  return resolveRunManifestPath(resolved);
493
568
  }
494
569
  return resolved;
@@ -496,8 +571,8 @@ function resolveResultSourcePath(source, cwd) {
496
571
  function loadManifestResults(sourceFile) {
497
572
  const resolvedSourceFile = resolveRunManifestPath(sourceFile);
498
573
  const content = readFileSync(resolvedSourceFile, "utf8");
499
- const records = parseResultManifest(content);
500
- const baseDir = path3.dirname(resolvedSourceFile);
574
+ const records = parseResultRows(content, resolvedSourceFile);
575
+ const baseDir = path4.dirname(resolvedSourceFile);
501
576
  return records.map((record) => hydrateManifestRecord(baseDir, record));
502
577
  }
503
578
  function loadLightweightResults(sourceFile) {
@@ -544,10 +619,10 @@ function padLeft(str, len) {
544
619
  }
545
620
  function loadResultFile(filePath) {
546
621
  const resolvedFilePath = resolveTraceResultPath(filePath);
547
- if (path4.extname(resolvedFilePath) === ".json") {
622
+ if (path5.extname(resolvedFilePath) === ".json") {
548
623
  return loadOtlpTraceFile(resolvedFilePath);
549
624
  }
550
- if (path4.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
625
+ if (path5.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
551
626
  return loadManifestAsRawResults(resolvedFilePath);
552
627
  }
553
628
  return loadJsonlRecords(resolvedFilePath);
@@ -555,15 +630,33 @@ function loadResultFile(filePath) {
555
630
  function resolveTraceResultPath(filePath) {
556
631
  return resolveWorkspaceOrFilePath(filePath);
557
632
  }
633
+ function isRecord(value) {
634
+ return typeof value === "object" && value !== null && !Array.isArray(value);
635
+ }
636
+ function shouldUseTraceScoreError(value) {
637
+ if (!isRecord(value)) {
638
+ return false;
639
+ }
640
+ return typeof value.test_id === "string" || typeof value.testId === "string" || Object.hasOwn(value, "score") || Object.hasOwn(value, "trace") || Object.hasOwn(value, "spans");
641
+ }
558
642
  function loadJsonlRecords(filePath) {
559
643
  const content = readFileSync2(filePath, "utf8");
560
644
  const lines = content.trim().split("\n").filter((line) => line.trim());
561
645
  return lines.map((line, i) => {
562
- const record = JSON.parse(line);
563
- if (typeof record.score !== "number") {
564
- throw new Error(`Missing or invalid score in result at line ${i + 1}: ${line.slice(0, 100)}`);
646
+ const parsed = JSON.parse(line);
647
+ try {
648
+ return normalizeResultRow(parsed, {
649
+ lineNumber: i + 1,
650
+ sourceLabel: filePath
651
+ });
652
+ } catch (error) {
653
+ if (error instanceof ResultRowSchemaError && shouldUseTraceScoreError(parsed)) {
654
+ throw new Error(
655
+ `Missing or invalid score in result at line ${i + 1}: ${line.slice(0, 100)}`
656
+ );
657
+ }
658
+ throw error;
565
659
  }
566
- return record;
567
660
  });
568
661
  }
569
662
  function loadManifestAsRawResults(filePath) {
@@ -827,7 +920,7 @@ function toTraceSummary(result) {
827
920
  return toCamelCaseDeep(rawTrace);
828
921
  }
829
922
  function buildRunId(relativeRunPath) {
830
- const normalized = relativeRunPath.split(path4.sep).join("/");
923
+ const normalized = relativeRunPath.split(path5.sep).join("/");
831
924
  const segments = normalized.split("/").filter(Boolean);
832
925
  if (segments.length >= 2) {
833
926
  const experiment = segments.slice(0, -1).join("/");
@@ -841,7 +934,7 @@ function buildRunId(relativeRunPath) {
841
934
  }
842
935
  function readRunDisplayName(runDir) {
843
936
  try {
844
- const benchmark = JSON.parse(readFileSync2(path4.join(runDir, "benchmark.json"), "utf8"));
937
+ const benchmark = JSON.parse(readFileSync2(path5.join(runDir, "benchmark.json"), "utf8"));
845
938
  const displayName = benchmark.metadata?.display_name;
846
939
  return typeof displayName === "string" && displayName.trim() ? displayName.trim() : void 0;
847
940
  } catch {
@@ -851,8 +944,8 @@ function readRunDisplayName(runDir) {
851
944
  function collectRunManifestPaths(runsDir, currentDir, files) {
852
945
  const primaryPath = resolveExistingRunPrimaryPath(currentDir);
853
946
  if (primaryPath) {
854
- const relativeRunPath = path4.relative(runsDir, currentDir);
855
- const sortName = path4.basename(currentDir);
947
+ const relativeRunPath = path5.relative(runsDir, currentDir);
948
+ const sortName = path5.basename(currentDir);
856
949
  files.push({
857
950
  filePath: primaryPath,
858
951
  displayName: readRunDisplayName(currentDir) ?? sortName,
@@ -864,7 +957,7 @@ function collectRunManifestPaths(runsDir, currentDir, files) {
864
957
  const entries = readdirSync(currentDir, { withFileTypes: true });
865
958
  for (const entry of entries) {
866
959
  if (entry.isDirectory()) {
867
- collectRunManifestPaths(runsDir, path4.join(currentDir, entry.name), files);
960
+ collectRunManifestPaths(runsDir, path5.join(currentDir, entry.name), files);
868
961
  }
869
962
  }
870
963
  }
@@ -874,7 +967,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
874
967
  const entries = readdirSync(runsDir, { withFileTypes: true });
875
968
  for (const entry of entries) {
876
969
  if (entry.isDirectory()) {
877
- collectRunManifestPaths(runsDir, path4.join(runsDir, entry.name), files);
970
+ collectRunManifestPaths(runsDir, path5.join(runsDir, entry.name), files);
878
971
  }
879
972
  }
880
973
  } catch {
@@ -884,7 +977,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
884
977
  const metas = [];
885
978
  for (const { filePath, displayName, runId } of limited) {
886
979
  try {
887
- const fileStat = statSync(filePath);
980
+ const fileStat = statSync2(filePath);
888
981
  const results = loadResultFile(filePath);
889
982
  const testCount = results.length;
890
983
  const passCount = results.filter((r) => r.score >= DEFAULT_THRESHOLD).length;
@@ -909,7 +1002,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
909
1002
  }
910
1003
  function listResultFiles(cwd, limit) {
911
1004
  return listResultFilesFromRunsDir(
912
- path4.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME),
1005
+ path5.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME),
913
1006
  limit
914
1007
  );
915
1008
  }
@@ -945,21 +1038,21 @@ function formatScore(score) {
945
1038
 
946
1039
  // src/commands/results/remote-metadata.ts
947
1040
  import { execFileSync } from "node:child_process";
948
- import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
949
- import path6 from "node:path";
1041
+ import { existsSync as existsSync4, mkdirSync, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
1042
+ import path7 from "node:path";
950
1043
 
951
1044
  // src/commands/results/run-tags.ts
952
- import { existsSync as existsSync2, readFileSync as readFileSync3, unlinkSync, writeFileSync } from "node:fs";
953
- import path5 from "node:path";
1045
+ import { existsSync as existsSync3, readFileSync as readFileSync3, unlinkSync, writeFileSync } from "node:fs";
1046
+ import path6 from "node:path";
954
1047
  var RUN_TAGS_FILENAME = "tags.json";
955
1048
  var MAX_TAGS_PER_RUN = 20;
956
1049
  var MAX_TAG_LENGTH = 60;
957
1050
  function runTagsPath(manifestPath) {
958
- return path5.join(path5.dirname(manifestPath), RUN_TAGS_FILENAME);
1051
+ return path6.join(path6.dirname(manifestPath), RUN_TAGS_FILENAME);
959
1052
  }
960
1053
  function readRunTags(manifestPath) {
961
1054
  const fp = runTagsPath(manifestPath);
962
- if (!existsSync2(fp)) return void 0;
1055
+ if (!existsSync3(fp)) return void 0;
963
1056
  try {
964
1057
  const parsed = JSON.parse(readFileSync3(fp, "utf8"));
965
1058
  if (!parsed || typeof parsed !== "object") return void 0;
@@ -993,7 +1086,7 @@ function writeRunTags(manifestPath, tags) {
993
1086
  }
994
1087
  function deleteRunTags(manifestPath) {
995
1088
  const fp = runTagsPath(manifestPath);
996
- if (existsSync2(fp)) {
1089
+ if (existsSync3(fp)) {
997
1090
  unlinkSync(fp);
998
1091
  }
999
1092
  }
@@ -1026,8 +1119,8 @@ function normalizeTags(tags) {
1026
1119
  }
1027
1120
 
1028
1121
  // src/commands/results/remote-metadata.ts
1029
- var RESULTS_RUNS_DIR = path6.join(".agentv", "results", "runs");
1030
- var REMOTE_METADATA_RUNS_DIR = path6.join(".agentv", "results", "metadata", "runs");
1122
+ var RESULTS_RUNS_DIR = path7.join(".agentv", "results", "runs");
1123
+ var REMOTE_METADATA_RUNS_DIR = path7.join(".agentv", "results", "metadata", "runs");
1031
1124
  function cleanGitEnv() {
1032
1125
  const env = {};
1033
1126
  for (const [key, value] of Object.entries(process.env)) {
@@ -1053,10 +1146,10 @@ function tryRunGit(repoDir, args) {
1053
1146
  }
1054
1147
  }
1055
1148
  function toGitPath(filePath) {
1056
- return filePath.split(path6.sep).join("/");
1149
+ return filePath.split(path7.sep).join("/");
1057
1150
  }
1058
1151
  function readTagsFile(filePath) {
1059
- if (!existsSync3(filePath)) return void 0;
1152
+ if (!existsSync4(filePath)) return void 0;
1060
1153
  try {
1061
1154
  return parseTagsFile(readFileSync4(filePath, "utf8"));
1062
1155
  } catch {
@@ -1099,27 +1192,27 @@ function resolveComparisonRef(repoDir) {
1099
1192
  return tryRunGit(repoDir, ["rev-parse", "--verify", "HEAD"]) ? "HEAD" : void 0;
1100
1193
  }
1101
1194
  function resolveRemoteRunMetadataPaths(repoDir, manifestPath) {
1102
- const runsRoot = path6.resolve(repoDir, RESULTS_RUNS_DIR);
1103
- const manifestDir = path6.resolve(path6.dirname(manifestPath));
1104
- const runRelativePath = path6.relative(runsRoot, manifestDir);
1105
- if (runRelativePath.length === 0 || runRelativePath.startsWith("..") || path6.isAbsolute(runRelativePath)) {
1195
+ const runsRoot = path7.resolve(repoDir, RESULTS_RUNS_DIR);
1196
+ const manifestDir = path7.resolve(path7.dirname(manifestPath));
1197
+ const runRelativePath = path7.relative(runsRoot, manifestDir);
1198
+ if (runRelativePath.length === 0 || runRelativePath.startsWith("..") || path7.isAbsolute(runRelativePath)) {
1106
1199
  throw new Error(
1107
1200
  `Remote run manifest is outside the results repo runs directory: ${manifestPath}`
1108
1201
  );
1109
1202
  }
1110
- const overlayTagsPath = path6.join(
1203
+ const overlayTagsPath = path7.join(
1111
1204
  repoDir,
1112
1205
  REMOTE_METADATA_RUNS_DIR,
1113
1206
  runRelativePath,
1114
1207
  RUN_TAGS_FILENAME
1115
1208
  );
1116
- const artifactTagsPath = path6.join(runsRoot, runRelativePath, RUN_TAGS_FILENAME);
1209
+ const artifactTagsPath = path7.join(runsRoot, runRelativePath, RUN_TAGS_FILENAME);
1117
1210
  return {
1118
1211
  runRelativePath,
1119
1212
  artifactTagsPath,
1120
- artifactTagsGitPath: toGitPath(path6.relative(repoDir, artifactTagsPath)),
1213
+ artifactTagsGitPath: toGitPath(path7.relative(repoDir, artifactTagsPath)),
1121
1214
  overlayTagsPath,
1122
- overlayTagsGitPath: toGitPath(path6.relative(repoDir, overlayTagsPath))
1215
+ overlayTagsGitPath: toGitPath(path7.relative(repoDir, overlayTagsPath))
1123
1216
  };
1124
1217
  }
1125
1218
  function readRemoteRunTagsContext(repoDir, manifestPath) {
@@ -1149,7 +1242,7 @@ function toRemoteRunTagState(context) {
1149
1242
  };
1150
1243
  }
1151
1244
  function assertWritableResultsRepo(repoDir) {
1152
- if (!existsSync3(repoDir)) {
1245
+ if (!existsSync4(repoDir)) {
1153
1246
  throw new Error("Writable results repo is not configured for remote metadata");
1154
1247
  }
1155
1248
  const insideWorkTree = tryRunGit(repoDir, ["rev-parse", "--is-inside-work-tree"]);
@@ -1174,7 +1267,7 @@ function writeRemoteRunTags(repoDir, manifestPath, tags) {
1174
1267
  tags: cleaned,
1175
1268
  updated_at: (/* @__PURE__ */ new Date()).toISOString()
1176
1269
  };
1177
- mkdirSync(path6.dirname(context.paths.overlayTagsPath), { recursive: true });
1270
+ mkdirSync(path7.dirname(context.paths.overlayTagsPath), { recursive: true });
1178
1271
  writeFileSync2(context.paths.overlayTagsPath, `${JSON.stringify(entry, null, 2)}
1179
1272
  `, "utf8");
1180
1273
  return readRemoteRunTags(repoDir, manifestPath);
@@ -1187,7 +1280,7 @@ function deleteRemoteRunTags(repoDir, manifestPath) {
1187
1280
  var gitRunsCache = /* @__PURE__ */ new Map();
1188
1281
  var GIT_RUNS_CACHE_TTL_MS = 6e4;
1189
1282
  function getResultsStorageRef(config) {
1190
- return config.branch ? `origin/${config.branch}` : void 0;
1283
+ return config.branch;
1191
1284
  }
1192
1285
  function cachedListGitRuns(repoDir, ref) {
1193
1286
  const now = Date.now();
@@ -1220,13 +1313,13 @@ function getStatusMessage(error) {
1220
1313
  return error instanceof Error ? error.message : String(error);
1221
1314
  }
1222
1315
  function getRelativeRunPath(cwd, runDir) {
1223
- const relative = path7.relative(path7.join(cwd, ".agentv", "results", "runs"), runDir);
1224
- if (!relative.startsWith("..") && !path7.isAbsolute(relative)) {
1316
+ const relative = path8.relative(path8.join(cwd, ".agentv", "results", "runs"), runDir);
1317
+ if (!relative.startsWith("..") && !path8.isAbsolute(relative)) {
1225
1318
  return relative;
1226
1319
  }
1227
- const experiment = path7.basename(path7.dirname(runDir));
1228
- const runName = path7.basename(runDir);
1229
- return experiment && experiment !== runName ? path7.join(experiment, runName) : runName;
1320
+ const experiment = path8.basename(path8.dirname(runDir));
1321
+ const runName = path8.basename(runDir);
1322
+ return experiment && experiment !== runName ? path8.join(experiment, runName) : runName;
1230
1323
  }
1231
1324
  function buildCommitTitle(payload) {
1232
1325
  const passed = payload.results.filter((result) => result.score >= DEFAULT_THRESHOLD).length;
@@ -1242,23 +1335,65 @@ async function maybeWarnLargeArtifact(runDir) {
1242
1335
  );
1243
1336
  }
1244
1337
  }
1245
- async function loadNormalizedResultsConfig(cwd, projectId) {
1338
+ async function loadNormalizedResultsConfig(cwd, projectId, overrides) {
1246
1339
  const repoRoot = await findRepoRoot(cwd) ?? cwd;
1247
- const config = await loadConfig(path7.join(cwd, "_"), repoRoot);
1340
+ const config = await loadConfig(path8.join(cwd, "_"), repoRoot);
1248
1341
  const project = projectId !== void 0 ? getProject(projectId) : getProjectForPath(repoRoot) ?? getProjectForPath(cwd);
1249
1342
  const projectResults = project?.results ? {
1250
1343
  mode: "github",
1251
- repo: project.results.repoUrl,
1252
- branch: project.results.branch,
1253
- path: project.results.path,
1254
- auto_push: project.results.sync?.autoPush,
1255
- branch_prefix: project.results.branchPrefix
1344
+ ...project.results.repoUrl !== void 0 && {
1345
+ repo: project.results.repoUrl,
1346
+ repo_url: project.results.repoUrl
1347
+ },
1348
+ ...project.results.repoPath !== void 0 && { repo_path: project.results.repoPath },
1349
+ ...project.results.branch !== void 0 && { branch: project.results.branch },
1350
+ ...project.results.remote !== void 0 && { remote: project.results.remote },
1351
+ ...project.results.path !== void 0 && { path: project.results.path },
1352
+ ...(project.results.sync?.autoPush !== void 0 || project.results.sync?.requirePush !== void 0) && {
1353
+ sync: {
1354
+ ...project.results.sync?.autoPush !== void 0 && {
1355
+ auto_push: project.results.sync.autoPush
1356
+ },
1357
+ ...project.results.sync?.requirePush !== void 0 && {
1358
+ require_push: project.results.sync.requirePush
1359
+ }
1360
+ }
1361
+ },
1362
+ ...project.results.branchPrefix !== void 0 && {
1363
+ branch_prefix: project.results.branchPrefix
1364
+ }
1256
1365
  } : void 0;
1257
1366
  const resultsConfig = projectResults ?? resolveResultsConfigForProject(config, project?.id);
1258
- if (!resultsConfig) {
1367
+ if (!resultsConfig && !overrides) {
1259
1368
  return void 0;
1260
1369
  }
1261
- return normalizeResultsConfig(resultsConfig);
1370
+ const baseConfig = resultsConfig ? normalizeResultsConfig(resultsConfig, { baseDir: project?.path ?? repoRoot }) : void 0;
1371
+ const repoOverride = overrides?.repo ?? overrides?.repo_url ?? overrides?.repo_path;
1372
+ if (!baseConfig && !repoOverride) {
1373
+ return void 0;
1374
+ }
1375
+ if (!overrides) {
1376
+ return baseConfig;
1377
+ }
1378
+ const merged = {
1379
+ mode: "github",
1380
+ ...overrides.repo !== void 0 ? { repo: overrides.repo } : overrides.repo_url !== void 0 ? { repo_url: overrides.repo_url } : overrides.repo_path !== void 0 ? { repo_path: overrides.repo_path } : baseConfig?.repo_path ? { repo_path: baseConfig.repo_path } : baseConfig?.repo_url ? { repo_url: baseConfig.repo_url } : baseConfig?.repo ? { repo: baseConfig.repo } : {},
1381
+ ...overrides.branch !== void 0 ? { branch: overrides.branch } : baseConfig?.branch ? { branch: baseConfig.branch } : {},
1382
+ ...overrides.remote !== void 0 ? { remote: overrides.remote } : baseConfig?.remote ? { remote: baseConfig.remote } : {},
1383
+ ...repoOverride === void 0 && baseConfig?.repo_path === void 0 && baseConfig?.path ? { path: baseConfig.path } : {},
1384
+ ...(overrides.auto_push !== void 0 || overrides.require_push !== void 0 || baseConfig?.auto_push !== void 0 || baseConfig?.require_push !== void 0) && {
1385
+ sync: {
1386
+ ...(overrides.auto_push ?? baseConfig?.auto_push) !== void 0 && {
1387
+ auto_push: overrides.auto_push ?? baseConfig?.auto_push
1388
+ },
1389
+ ...(overrides.require_push ?? baseConfig?.require_push) !== void 0 && {
1390
+ require_push: overrides.require_push ?? baseConfig?.require_push
1391
+ }
1392
+ }
1393
+ },
1394
+ ...baseConfig?.branch_prefix ? { branch_prefix: baseConfig.branch_prefix } : {}
1395
+ };
1396
+ return normalizeResultsConfig(merged, { baseDir: project?.path ?? repoRoot });
1262
1397
  }
1263
1398
  function encodeRemoteRunId(filename) {
1264
1399
  return `${REMOTE_RUN_PREFIX}${filename}`;
@@ -1348,7 +1483,7 @@ async function listMergedResultFiles(cwd, limit, projectId) {
1348
1483
  filename: encodeRemoteRunId(r.run_id),
1349
1484
  raw_filename: r.run_id,
1350
1485
  source: "remote",
1351
- path: path7.join(config.path, r.manifest_path),
1486
+ path: path8.join(config.path, r.manifest_path),
1352
1487
  displayName: r.display_name,
1353
1488
  timestamp: r.timestamp,
1354
1489
  testCount: r.test_count,
@@ -1394,20 +1529,20 @@ async function findRunById(cwd, runId, projectId) {
1394
1529
  return runs.find((run) => run.filename === runId);
1395
1530
  }
1396
1531
  async function ensureRemoteRunAvailable(cwd, meta, projectId) {
1397
- if (meta.source !== "remote" || existsSync4(meta.path)) {
1532
+ if (meta.source !== "remote" || existsSync5(meta.path)) {
1398
1533
  return;
1399
1534
  }
1400
1535
  const config = await loadNormalizedResultsConfig(cwd, projectId);
1401
1536
  if (!config) {
1402
1537
  throw new Error("Remote results are not configured");
1403
1538
  }
1404
- const relativeManifestPath = path7.relative(config.path, meta.path).split(path7.sep).join("/");
1539
+ const relativeManifestPath = path8.relative(config.path, meta.path).split(path8.sep).join("/");
1405
1540
  if (relativeManifestPath.length === 0 || relativeManifestPath === meta.path || relativeManifestPath.startsWith("../")) {
1406
1541
  throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
1407
1542
  }
1408
- const relativeRunPath = path7.posix.relative(
1543
+ const relativeRunPath = path8.posix.relative(
1409
1544
  ".agentv/results/runs",
1410
- path7.posix.dirname(relativeManifestPath)
1545
+ path8.posix.dirname(relativeManifestPath)
1411
1546
  );
1412
1547
  await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config));
1413
1548
  }
@@ -1444,8 +1579,12 @@ async function clearRemoteRunTags(cwd, meta, projectId) {
1444
1579
  return deleteRemoteRunTags(config.path, meta.path);
1445
1580
  }
1446
1581
  async function maybeAutoExportRunArtifacts(payload) {
1447
- const config = await loadNormalizedResultsConfig(payload.cwd);
1448
- if (!config?.auto_push) {
1582
+ const config = await loadNormalizedResultsConfig(
1583
+ payload.cwd,
1584
+ void 0,
1585
+ payload.results_overrides
1586
+ );
1587
+ if (!config) {
1449
1588
  return "disabled";
1450
1589
  }
1451
1590
  try {
@@ -1459,12 +1598,18 @@ async function maybeAutoExportRunArtifacts(payload) {
1459
1598
  commitMessage: commitTitle
1460
1599
  });
1461
1600
  if (!pushed) {
1462
- console.warn("Warning: results export produced no git changes. Skipping push.");
1601
+ console.warn("Warning: results export produced no git changes.");
1463
1602
  return "already_published";
1464
1603
  }
1465
- console.log(`Results pushed to ${config.repo} (${config.path}/${relativeRunPath})`);
1604
+ const pushLabel = config.auto_push || config.require_push ? "pushed" : "published locally";
1605
+ console.log(
1606
+ `Results ${pushLabel} to ${config.repo} (${config.branch ?? "default branch"}:${relativeRunPath})`
1607
+ );
1466
1608
  return "published";
1467
1609
  } catch (error) {
1610
+ if (config.require_push) {
1611
+ throw error;
1612
+ }
1468
1613
  console.warn(`Warning: skipping results export: ${getStatusMessage(error)}`);
1469
1614
  console.warn("Warning: Run 'gh auth login' if GitHub authentication is missing.");
1470
1615
  return "failed";
@@ -1474,13 +1619,13 @@ async function maybeAutoExportRunArtifacts(payload) {
1474
1619
  // src/commands/eval/env.ts
1475
1620
  import { constants as constants3 } from "node:fs";
1476
1621
  import { access as access3 } from "node:fs/promises";
1477
- import path8 from "node:path";
1622
+ import path9 from "node:path";
1478
1623
  import { config as loadDotenv } from "dotenv";
1479
1624
  function uniqueDirs(directories) {
1480
1625
  const seen = /* @__PURE__ */ new Set();
1481
1626
  const result = [];
1482
1627
  for (const dir of directories) {
1483
- const absolute = path8.resolve(dir);
1628
+ const absolute = path9.resolve(dir);
1484
1629
  if (seen.has(absolute)) {
1485
1630
  continue;
1486
1631
  }
@@ -1499,14 +1644,14 @@ async function fileExists2(filePath) {
1499
1644
  }
1500
1645
  function collectAncestorDirectories(start, boundary) {
1501
1646
  const directories = [];
1502
- const boundaryDir = path8.resolve(boundary);
1503
- let current = path8.resolve(start);
1647
+ const boundaryDir = path9.resolve(boundary);
1648
+ let current = path9.resolve(start);
1504
1649
  while (current !== void 0) {
1505
1650
  directories.push(current);
1506
1651
  if (current === boundaryDir) {
1507
1652
  break;
1508
1653
  }
1509
- const parent = path8.dirname(current);
1654
+ const parent = path9.dirname(current);
1510
1655
  if (parent === current) {
1511
1656
  break;
1512
1657
  }
@@ -1516,12 +1661,12 @@ function collectAncestorDirectories(start, boundary) {
1516
1661
  }
1517
1662
  async function loadEnvFromHierarchy(options) {
1518
1663
  const { testFilePath, repoRoot, verbose } = options;
1519
- const testDir = path8.dirname(path8.resolve(testFilePath));
1664
+ const testDir = path9.dirname(path9.resolve(testFilePath));
1520
1665
  const cwd = process.cwd();
1521
1666
  const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
1522
1667
  const envFiles = [];
1523
1668
  for (const dir of searchDirs) {
1524
- const candidate = path8.join(dir, ".env");
1669
+ const candidate = path9.join(dir, ".env");
1525
1670
  if (await fileExists2(candidate)) {
1526
1671
  envFiles.push(candidate);
1527
1672
  }
@@ -1542,10 +1687,118 @@ async function loadEnvFromHierarchy(options) {
1542
1687
  return envFiles[0];
1543
1688
  }
1544
1689
 
1690
+ // src/commands/eval/otel-backends.ts
1691
+ import { access as access4 } from "node:fs/promises";
1692
+ import path10 from "node:path";
1693
+ import { pathToFileURL } from "node:url";
1694
+ var RESOLVER_EXTENSIONS = [".mjs", ".js"];
1695
+ var builtinOtelBackendResolvers = [
1696
+ {
1697
+ name: "langfuse",
1698
+ resolve: ({ env }) => {
1699
+ const baseUrl = trimTrailingSlash(env.LANGFUSE_HOST ?? "https://cloud.langfuse.com");
1700
+ const publicKey = env.LANGFUSE_PUBLIC_KEY ?? "";
1701
+ const secretKey = env.LANGFUSE_SECRET_KEY ?? "";
1702
+ return {
1703
+ endpoint: `${baseUrl}/api/public/otel/v1/traces`,
1704
+ headers: {
1705
+ Authorization: `Basic ${Buffer.from(`${publicKey}:${secretKey}`).toString("base64")}`
1706
+ }
1707
+ };
1708
+ }
1709
+ },
1710
+ {
1711
+ name: "braintrust",
1712
+ resolve: ({ env }) => {
1713
+ const headers = {
1714
+ Authorization: `Bearer ${env.BRAINTRUST_API_KEY ?? ""}`
1715
+ };
1716
+ const parent = env.BRAINTRUST_PARENT ?? (env.BRAINTRUST_PROJECT_ID ? `project_id:${env.BRAINTRUST_PROJECT_ID}` : void 0) ?? (env.BRAINTRUST_PROJECT ? `project_name:${env.BRAINTRUST_PROJECT}` : void 0);
1717
+ if (parent) {
1718
+ headers["x-bt-parent"] = parent;
1719
+ }
1720
+ return {
1721
+ endpoint: "https://api.braintrust.dev/otel/v1/traces",
1722
+ headers
1723
+ };
1724
+ }
1725
+ },
1726
+ {
1727
+ name: "confident",
1728
+ resolve: ({ env }) => ({
1729
+ endpoint: "https://otel.confident-ai.com/v1/traces",
1730
+ headers: {
1731
+ "x-confident-api-key": env.CONFIDENT_API_KEY ?? ""
1732
+ }
1733
+ })
1734
+ }
1735
+ ];
1736
+ var builtinOtelBackendResolversByName = new Map(
1737
+ builtinOtelBackendResolvers.map((resolver) => [resolver.name, resolver])
1738
+ );
1739
+ async function resolveOtelBackend(name, context) {
1740
+ const resolver = await loadOtelBackendResolver(name, context.cwd);
1741
+ return resolver?.resolve(context);
1742
+ }
1743
+ async function loadOtelBackendResolver(name, cwd) {
1744
+ const localResolverPath = await findLocalOtelBackendResolver(name, cwd);
1745
+ if (localResolverPath) {
1746
+ return importOtelBackendResolver(localResolverPath, name);
1747
+ }
1748
+ return builtinOtelBackendResolversByName.get(name);
1749
+ }
1750
+ async function findLocalOtelBackendResolver(name, cwd) {
1751
+ if (!isSafeResolverName(name)) {
1752
+ return void 0;
1753
+ }
1754
+ for (const dir of getResolverSearchDirs(cwd)) {
1755
+ for (const ext of RESOLVER_EXTENSIONS) {
1756
+ const candidate = path10.join(dir, `${name}${ext}`);
1757
+ try {
1758
+ await access4(candidate);
1759
+ return candidate;
1760
+ } catch {
1761
+ }
1762
+ }
1763
+ }
1764
+ return void 0;
1765
+ }
1766
+ function getResolverSearchDirs(cwd) {
1767
+ const dirs = [];
1768
+ let current = path10.resolve(cwd);
1769
+ const root = path10.parse(current).root;
1770
+ while (current !== root) {
1771
+ dirs.push(path10.join(current, ".agentv", "otel-backends"));
1772
+ current = path10.dirname(current);
1773
+ }
1774
+ return dirs;
1775
+ }
1776
+ function isSafeResolverName(name) {
1777
+ return name.length > 0 && !name.includes("/") && !name.includes("\\") && !name.startsWith(".");
1778
+ }
1779
+ async function importOtelBackendResolver(filePath, fallbackName) {
1780
+ const mod = await import(pathToFileURL(filePath).href);
1781
+ const candidate = [mod.default, mod.otelBackend, mod.resolver].find(
1782
+ (value) => value && typeof value.resolve === "function"
1783
+ );
1784
+ if (!candidate) {
1785
+ throw new Error(
1786
+ `OTel backend resolver '${fallbackName}' from ${filePath} must export a resolver object`
1787
+ );
1788
+ }
1789
+ return {
1790
+ ...candidate,
1791
+ name: typeof candidate.name === "string" && candidate.name.length > 0 ? candidate.name : fallbackName
1792
+ };
1793
+ }
1794
+ function trimTrailingSlash(value) {
1795
+ return value.replace(/\/+$/, "");
1796
+ }
1797
+
1545
1798
  // src/commands/eval/jsonl-writer.ts
1546
1799
  import { createWriteStream } from "node:fs";
1547
1800
  import { mkdir } from "node:fs/promises";
1548
- import path9 from "node:path";
1801
+ import path11 from "node:path";
1549
1802
  import { finished } from "node:stream/promises";
1550
1803
 
1551
1804
  // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -1763,7 +2016,7 @@ var JsonlWriter = class _JsonlWriter {
1763
2016
  this.stream = stream;
1764
2017
  }
1765
2018
  static async open(filePath, options) {
1766
- await mkdir(path9.dirname(filePath), { recursive: true });
2019
+ await mkdir(path11.dirname(filePath), { recursive: true });
1767
2020
  const flags = options?.append ? "a" : "w";
1768
2021
  const stream = createWriteStream(filePath, { flags, encoding: "utf8" });
1769
2022
  return new _JsonlWriter(stream);
@@ -1888,12 +2141,12 @@ var ProgressDisplay = class {
1888
2141
  }
1889
2142
  addLogPaths(paths) {
1890
2143
  const newPaths = [];
1891
- for (const path14 of paths) {
1892
- if (this.logPathSet.has(path14)) {
2144
+ for (const path16 of paths) {
2145
+ if (this.logPathSet.has(path16)) {
1893
2146
  continue;
1894
2147
  }
1895
- this.logPathSet.add(path14);
1896
- newPaths.push(path14);
2148
+ this.logPathSet.add(path16);
2149
+ newPaths.push(path16);
1897
2150
  }
1898
2151
  if (newPaths.length === 0) {
1899
2152
  return;
@@ -1946,9 +2199,9 @@ async function loadNonErrorResults(jsonlPath) {
1946
2199
  }
1947
2200
 
1948
2201
  // src/commands/eval/run-cache.ts
1949
- import { existsSync as existsSync5 } from "node:fs";
2202
+ import { existsSync as existsSync6 } from "node:fs";
1950
2203
  import { mkdir as mkdir2, readFile, writeFile } from "node:fs/promises";
1951
- import path10 from "node:path";
2204
+ import path12 from "node:path";
1952
2205
  var CACHE_FILENAME = "cache.json";
1953
2206
  function resolveRunCacheFile(cache) {
1954
2207
  if (cache.lastRunDir) {
@@ -1957,7 +2210,7 @@ function resolveRunCacheFile(cache) {
1957
2210
  return "";
1958
2211
  }
1959
2212
  function cachePath(cwd) {
1960
- return path10.join(cwd, ".agentv", CACHE_FILENAME);
2213
+ return path12.join(cwd, ".agentv", CACHE_FILENAME);
1961
2214
  }
1962
2215
  async function loadRunCache(cwd) {
1963
2216
  try {
@@ -1970,17 +2223,17 @@ async function loadRunCache(cwd) {
1970
2223
  async function resolveCachedRunDir(cwd) {
1971
2224
  const cache = await loadRunCache(cwd);
1972
2225
  if (!cache?.lastRunDir) return void 0;
1973
- if (!existsSync5(cache.lastRunDir)) return void 0;
2226
+ if (!existsSync6(cache.lastRunDir)) return void 0;
1974
2227
  return cache.lastRunDir;
1975
2228
  }
1976
2229
  async function saveRunCache(cwd, resultPath) {
1977
- if (path10.basename(resultPath) !== RESULT_INDEX_FILENAME) {
2230
+ if (path12.basename(resultPath) !== RESULT_INDEX_FILENAME) {
1978
2231
  return;
1979
2232
  }
1980
- const dir = path10.join(cwd, ".agentv");
2233
+ const dir = path12.join(cwd, ".agentv");
1981
2234
  await mkdir2(dir, { recursive: true });
1982
2235
  const cache = {
1983
- lastRunDir: path10.dirname(resultPath),
2236
+ lastRunDir: path12.dirname(resultPath),
1984
2237
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1985
2238
  };
1986
2239
  await writeFile(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
@@ -2249,7 +2502,7 @@ function formatMatrixSummary(results) {
2249
2502
 
2250
2503
  // ../../packages/core/dist/evaluation/validation/index.js
2251
2504
  import { readFile as readFile2 } from "node:fs/promises";
2252
- import path11 from "node:path";
2505
+ import path13 from "node:path";
2253
2506
  import { readFile as readFile22, readdir, stat as stat2 } from "node:fs/promises";
2254
2507
  import path22 from "node:path";
2255
2508
  import { readFile as readFile3 } from "node:fs/promises";
@@ -2260,7 +2513,7 @@ import { readFile as readFile5 } from "node:fs/promises";
2260
2513
  import path52 from "node:path";
2261
2514
  import { readFile as readFile6 } from "node:fs/promises";
2262
2515
  import path62 from "node:path";
2263
- import { access as access4, readFile as readFile7 } from "node:fs/promises";
2516
+ import { access as access5, readFile as readFile7 } from "node:fs/promises";
2264
2517
  import path72 from "node:path";
2265
2518
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
2266
2519
  var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
@@ -2295,8 +2548,8 @@ async function detectFileType(filePath) {
2295
2548
  }
2296
2549
  }
2297
2550
  function inferFileTypeFromPath(filePath) {
2298
- const normalized = path11.normalize(filePath).replace(/\\/g, "/");
2299
- const basename = path11.basename(filePath);
2551
+ const normalized = path13.normalize(filePath).replace(/\\/g, "/");
2552
+ const basename = path13.basename(filePath);
2300
2553
  if (normalized.includes("/.agentv/")) {
2301
2554
  if (basename === "config.yaml" || basename === "config.yml") {
2302
2555
  return "config";
@@ -3258,6 +3511,8 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
3258
3511
  ...COMMON_SETTINGS,
3259
3512
  "cli_url",
3260
3513
  "cli_path",
3514
+ "args",
3515
+ "arguments",
3261
3516
  "github_token",
3262
3517
  "model",
3263
3518
  "cwd",
@@ -3266,8 +3521,12 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
3266
3521
  "log_format",
3267
3522
  "stream_log",
3268
3523
  "system_prompt",
3269
- "custom_provider",
3270
- "byok"
3524
+ "subprovider",
3525
+ "base_url",
3526
+ "api_key",
3527
+ "bearer_token",
3528
+ "api_version",
3529
+ "wire_api"
3271
3530
  ]);
3272
3531
  var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
3273
3532
  ...COMMON_SETTINGS,
@@ -3283,7 +3542,12 @@ var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
3283
3542
  "log_format",
3284
3543
  "stream_log",
3285
3544
  "system_prompt",
3286
- "custom_provider"
3545
+ "subprovider",
3546
+ "base_url",
3547
+ "api_key",
3548
+ "bearer_token",
3549
+ "api_version",
3550
+ "wire_api"
3287
3551
  ]);
3288
3552
  var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
3289
3553
  ...COMMON_SETTINGS,
@@ -3302,7 +3566,7 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
3302
3566
  var REPLAY_SETTINGS = /* @__PURE__ */ new Set([
3303
3567
  ...COMMON_SETTINGS,
3304
3568
  "fixtures",
3305
- "trace_envelopes",
3569
+ "execution_traces",
3306
3570
  "source_target",
3307
3571
  "suite",
3308
3572
  "eval_path",
@@ -3648,13 +3912,13 @@ async function validateTargetsFile(filePath) {
3648
3912
  }
3649
3913
  if (providerValue === "replay") {
3650
3914
  const hasFixtures = isNonEmptyString(target.fixtures);
3651
- const hasTraceEnvelopes = isNonEmptyString(target.trace_envelopes);
3652
- if (hasFixtures === hasTraceEnvelopes) {
3915
+ const hasExecutionTraces = isNonEmptyString(target.execution_traces);
3916
+ if (hasFixtures === hasExecutionTraces) {
3653
3917
  errors.push({
3654
3918
  severity: "error",
3655
3919
  filePath: absolutePath,
3656
3920
  location,
3657
- message: "Replay provider requires exactly one replay source: 'fixtures' or 'trace_envelopes'"
3921
+ message: "Replay provider requires exactly one replay source: 'fixtures' or 'execution_traces'"
3658
3922
  });
3659
3923
  }
3660
3924
  if (!isNonEmptyString(target.source_target)) {
@@ -3909,7 +4173,27 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
3909
4173
  });
3910
4174
  }
3911
4175
  }
3912
- validateGitRemoteUrl(errors, filePath, resultsRecord.repo_url, `${location}.repo_url`);
4176
+ const hasRepoUrl = resultsRecord.repo_url !== void 0;
4177
+ const hasRepoPath = resultsRecord.repo_path !== void 0;
4178
+ if (hasRepoUrl && hasRepoPath) {
4179
+ errors.push({
4180
+ severity: "error",
4181
+ filePath,
4182
+ location,
4183
+ message: `Field '${location}' must set only one of repo_url or repo_path`
4184
+ });
4185
+ } else if (hasRepoUrl) {
4186
+ validateGitRemoteUrl(errors, filePath, resultsRecord.repo_url, `${location}.repo_url`);
4187
+ } else if (hasRepoPath) {
4188
+ validateRequiredString(errors, filePath, resultsRecord.repo_path, `${location}.repo_path`);
4189
+ } else {
4190
+ errors.push({
4191
+ severity: "error",
4192
+ filePath,
4193
+ location,
4194
+ message: `Field '${location}' must set repo_url or repo_path`
4195
+ });
4196
+ }
3913
4197
  if (resultsRecord.branch !== void 0 && (typeof resultsRecord.branch !== "string" || resultsRecord.branch.trim().length === 0)) {
3914
4198
  errors.push({
3915
4199
  severity: "error",
@@ -3918,6 +4202,14 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
3918
4202
  message: `Field '${location}.branch' must be a non-empty string`
3919
4203
  });
3920
4204
  }
4205
+ if (resultsRecord.remote !== void 0 && (typeof resultsRecord.remote !== "string" || resultsRecord.remote.trim().length === 0)) {
4206
+ errors.push({
4207
+ severity: "error",
4208
+ filePath,
4209
+ location: `${location}.remote`,
4210
+ message: `Field '${location}.remote' must be a non-empty string`
4211
+ });
4212
+ }
3921
4213
  if (resultsRecord.path !== void 0) {
3922
4214
  if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
3923
4215
  errors.push({
@@ -3953,6 +4245,14 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
3953
4245
  message: `Field '${location}.sync.auto_push' must be a boolean`
3954
4246
  });
3955
4247
  }
4248
+ if (syncRecord.require_push !== void 0 && typeof syncRecord.require_push !== "boolean") {
4249
+ errors.push({
4250
+ severity: "error",
4251
+ filePath,
4252
+ location: `${location}.sync.require_push`,
4253
+ message: `Field '${location}.sync.require_push' must be a boolean`
4254
+ });
4255
+ }
3956
4256
  }
3957
4257
  }
3958
4258
  if (resultsRecord.branch_prefix !== void 0 && (typeof resultsRecord.branch_prefix !== "string" || resultsRecord.branch_prefix.trim().length === 0)) {
@@ -3978,7 +4278,7 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
3978
4278
  return;
3979
4279
  }
3980
4280
  const resultsRecord = rawResults;
3981
- if (resultsRecord.mode !== "github") {
4281
+ if (resultsRecord.mode !== void 0 && resultsRecord.mode !== "github") {
3982
4282
  errors.push({
3983
4283
  severity: "error",
3984
4284
  filePath,
@@ -3986,7 +4286,31 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
3986
4286
  message: `Field '${location}.mode' must be 'github'`
3987
4287
  });
3988
4288
  }
3989
- validateRequiredString(errors, filePath, resultsRecord.repo, `${location}.repo`);
4289
+ const hasLegacyRepo = resultsRecord.repo !== void 0;
4290
+ const hasRepoUrl = resultsRecord.repo_url !== void 0;
4291
+ const hasRepoPath = resultsRecord.repo_path !== void 0;
4292
+ const sourceCount = [hasLegacyRepo, hasRepoUrl, hasRepoPath].filter(Boolean).length;
4293
+ if (sourceCount === 0) {
4294
+ errors.push({
4295
+ severity: "error",
4296
+ filePath,
4297
+ location,
4298
+ message: `Field '${location}' must set repo_url/repo or repo_path`
4299
+ });
4300
+ } else if (sourceCount > 1) {
4301
+ errors.push({
4302
+ severity: "error",
4303
+ filePath,
4304
+ location,
4305
+ message: `Field '${location}' must set only one of repo_url/repo or repo_path`
4306
+ });
4307
+ } else if (hasLegacyRepo) {
4308
+ validateRequiredString(errors, filePath, resultsRecord.repo, `${location}.repo`);
4309
+ } else if (hasRepoUrl) {
4310
+ validateGitRemoteUrl(errors, filePath, resultsRecord.repo_url, `${location}.repo_url`);
4311
+ } else {
4312
+ validateRequiredString(errors, filePath, resultsRecord.repo_path, `${location}.repo_path`);
4313
+ }
3990
4314
  if (resultsRecord.branch !== void 0 && (typeof resultsRecord.branch !== "string" || resultsRecord.branch.trim().length === 0)) {
3991
4315
  errors.push({
3992
4316
  severity: "error",
@@ -3995,6 +4319,14 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
3995
4319
  message: `Field '${location}.branch' must be a non-empty string`
3996
4320
  });
3997
4321
  }
4322
+ if (resultsRecord.remote !== void 0 && (typeof resultsRecord.remote !== "string" || resultsRecord.remote.trim().length === 0)) {
4323
+ errors.push({
4324
+ severity: "error",
4325
+ filePath,
4326
+ location: `${location}.remote`,
4327
+ message: `Field '${location}.remote' must be a non-empty string`
4328
+ });
4329
+ }
3998
4330
  if (resultsRecord.path !== void 0) {
3999
4331
  if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
4000
4332
  errors.push({
@@ -4023,6 +4355,34 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
4023
4355
  message: `Field '${location}.auto_push' must be a boolean`
4024
4356
  });
4025
4357
  }
4358
+ if (resultsRecord.sync !== void 0) {
4359
+ if (typeof resultsRecord.sync !== "object" || resultsRecord.sync === null || Array.isArray(resultsRecord.sync)) {
4360
+ errors.push({
4361
+ severity: "error",
4362
+ filePath,
4363
+ location: `${location}.sync`,
4364
+ message: `Field '${location}.sync' must be an object`
4365
+ });
4366
+ } else {
4367
+ const syncRecord = resultsRecord.sync;
4368
+ if (syncRecord.auto_push !== void 0 && typeof syncRecord.auto_push !== "boolean") {
4369
+ errors.push({
4370
+ severity: "error",
4371
+ filePath,
4372
+ location: `${location}.sync.auto_push`,
4373
+ message: `Field '${location}.sync.auto_push' must be a boolean`
4374
+ });
4375
+ }
4376
+ if (syncRecord.require_push !== void 0 && typeof syncRecord.require_push !== "boolean") {
4377
+ errors.push({
4378
+ severity: "error",
4379
+ filePath,
4380
+ location: `${location}.sync.require_push`,
4381
+ message: `Field '${location}.sync.require_push' must be a boolean`
4382
+ });
4383
+ }
4384
+ }
4385
+ }
4026
4386
  if (resultsRecord.branch_prefix !== void 0 && (typeof resultsRecord.branch_prefix !== "string" || resultsRecord.branch_prefix.trim().length === 0)) {
4027
4387
  errors.push({
4028
4388
  severity: "error",
@@ -4251,7 +4611,7 @@ function looksLikeFilePath(arg) {
4251
4611
  }
4252
4612
  async function fileExists3(filePath) {
4253
4613
  try {
4254
- await access4(filePath);
4614
+ await access5(filePath);
4255
4615
  return true;
4256
4616
  } catch {
4257
4617
  return false;
@@ -4609,6 +4969,15 @@ function normalizeString(value) {
4609
4969
  const trimmed = value.trim();
4610
4970
  return trimmed.length > 0 ? trimmed : void 0;
4611
4971
  }
4972
+ function resultsRepoOverride(value) {
4973
+ if (!value) {
4974
+ return {};
4975
+ }
4976
+ if (value === "current" || value === "." || value.startsWith("./") || value.startsWith("../") || value.startsWith("/") || value.startsWith("~/") || value.startsWith("~\\") || /^[A-Za-z]:[/\\]/.test(value)) {
4977
+ return { repo_path: value === "current" ? "." : value };
4978
+ }
4979
+ return { repo: value };
4980
+ }
4612
4981
  function resolveTimestampPlaceholder(value) {
4613
4982
  if (!value.includes("{timestamp}")) {
4614
4983
  return value;
@@ -4664,13 +5033,13 @@ function normalizeSourceMetadataByEvalFile(value) {
4664
5033
  const entries = [...value.entries()].filter(
4665
5034
  (entry) => typeof entry[0] === "string" && typeof entry[1] === "object" && entry[1] !== null && !Array.isArray(entry[1])
4666
5035
  );
4667
- return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path12.resolve(key), metadata])) : void 0;
5036
+ return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path14.resolve(key), metadata])) : void 0;
4668
5037
  }
4669
5038
  if (typeof value === "object" && value !== null && !Array.isArray(value)) {
4670
5039
  const entries = Object.entries(value).filter(
4671
5040
  (entry) => typeof entry[1] === "object" && entry[1] !== null && !Array.isArray(entry[1])
4672
5041
  );
4673
- return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path12.resolve(key), metadata])) : void 0;
5042
+ return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path14.resolve(key), metadata])) : void 0;
4674
5043
  }
4675
5044
  return void 0;
4676
5045
  }
@@ -4684,10 +5053,10 @@ var LEGACY_OUTPUT_FILE_EXTENSIONS = /* @__PURE__ */ new Set([
4684
5053
  ".htm"
4685
5054
  ]);
4686
5055
  function looksLikeLegacyOutputFilePath(value) {
4687
- return LEGACY_OUTPUT_FILE_EXTENSIONS.has(path12.extname(value).toLowerCase());
5056
+ return LEGACY_OUTPUT_FILE_EXTENSIONS.has(path14.extname(value).toLowerCase());
4688
5057
  }
4689
5058
  function outputFileMigrationMessage(value) {
4690
- const ext = path12.extname(value).toLowerCase();
5059
+ const ext = path14.extname(value).toLowerCase();
4691
5060
  const removalHint = ext === ".xml" ? "JUnit XML export from agentv eval has been removed." : "Flat result file export from agentv eval has been removed.";
4692
5061
  return `--output expects a run directory, not a file path: ${value}
4693
5062
  ${removalHint} Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.`;
@@ -4695,7 +5064,7 @@ ${removalHint} Set --output <dir> for the canonical run artifacts; AgentV always
4695
5064
  function artifactsMigrationMessage(artifactsDir, outputDir) {
4696
5065
  const lines = [`--artifacts was removed from agentv eval. Use --output ${artifactsDir} instead.`];
4697
5066
  if (outputDir && looksLikeLegacyOutputFilePath(outputDir)) {
4698
- const ext = path12.extname(outputDir).toLowerCase();
5067
+ const ext = path14.extname(outputDir).toLowerCase();
4699
5068
  lines.push(
4700
5069
  ext === ".xml" ? "JUnit XML export from agentv eval has been removed." : "Flat result file export from agentv eval has been removed."
4701
5070
  );
@@ -4774,6 +5143,21 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4774
5143
  const yamlWorkspacePath = normalizeString(yamlExecutionRecord?.workspace_path);
4775
5144
  const workspacePath = cliWorkspacePath ?? yamlWorkspacePath;
4776
5145
  const workspaceMode = cliWorkspacePath ? "static" : cliWorkspaceMode ?? yamlWorkspaceMode;
5146
+ const resultsRepo = normalizeString(rawOptions.resultsRepo);
5147
+ const resultsPush = normalizeBoolean(rawOptions.resultsPush);
5148
+ const resultsNoPush = normalizeBoolean(rawOptions.noResultsPush);
5149
+ const resultsRequirePush = normalizeBoolean(rawOptions.resultsRequirePush);
5150
+ const resultsOverrides = {
5151
+ ...resultsRepoOverride(resultsRepo),
5152
+ ...normalizeString(rawOptions.resultsBranch) !== void 0 && {
5153
+ branch: normalizeString(rawOptions.resultsBranch)
5154
+ },
5155
+ ...normalizeString(rawOptions.resultsRemote) !== void 0 && {
5156
+ remote: normalizeString(rawOptions.resultsRemote)
5157
+ },
5158
+ ...resultsPush || resultsNoPush ? { auto_push: resultsPush && !resultsNoPush } : {},
5159
+ ...resultsRequirePush ? { require_push: true } : {}
5160
+ };
4777
5161
  return {
4778
5162
  target: singleTarget,
4779
5163
  cliTargets,
@@ -4825,11 +5209,12 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4825
5209
  budgetUsd: normalizeOptionalNumber(rawOptions.budgetUsd),
4826
5210
  sourceMetadataByEvalFile: normalizeSourceMetadataByEvalFile(
4827
5211
  rawOptions.sourceMetadataByEvalFile
4828
- )
5212
+ ),
5213
+ resultsOverrides: Object.keys(resultsOverrides).length > 0 ? resultsOverrides : void 0
4829
5214
  };
4830
5215
  }
4831
5216
  function withSourceMetadata(result, testFilePath, options) {
4832
- const sourceMetadata = options.sourceMetadataByEvalFile?.get(path12.resolve(testFilePath));
5217
+ const sourceMetadata = options.sourceMetadataByEvalFile?.get(path14.resolve(testFilePath));
4833
5218
  if (!sourceMetadata) {
4834
5219
  return result;
4835
5220
  }
@@ -4843,7 +5228,7 @@ function withSourceMetadata(result, testFilePath, options) {
4843
5228
  }
4844
5229
  async function ensureFileExists(filePath, description) {
4845
5230
  try {
4846
- await access5(filePath, constants4.F_OK);
5231
+ await access6(filePath, constants4.F_OK);
4847
5232
  } catch {
4848
5233
  throw new Error(`${description} not found: ${filePath}`);
4849
5234
  }
@@ -4851,7 +5236,7 @@ async function ensureFileExists(filePath, description) {
4851
5236
  function buildDefaultOutputPathForExperiment(cwd, experiment) {
4852
5237
  const runDir = buildDefaultRunDir(cwd, experiment);
4853
5238
  mkdirSync2(runDir, { recursive: true });
4854
- return path12.join(runDir, "index.jsonl");
5239
+ return path14.join(runDir, "index.jsonl");
4855
5240
  }
4856
5241
  function createProgressReporter(maxWorkers, options) {
4857
5242
  const display = new ProgressDisplay(maxWorkers, options);
@@ -4865,7 +5250,7 @@ function createProgressReporter(maxWorkers, options) {
4865
5250
  };
4866
5251
  }
4867
5252
  function makeTestCaseKey(testFilePath, testId) {
4868
- return `${path12.resolve(testFilePath)}::${testId}`;
5253
+ return `${path14.resolve(testFilePath)}::${testId}`;
4869
5254
  }
4870
5255
  function resolveTargetLabel(requestedName, resolvedName) {
4871
5256
  if (resolvedName !== requestedName) {
@@ -4927,7 +5312,7 @@ async function prepareFileMetadata(params) {
4927
5312
  repoRoot,
4928
5313
  verbose: options.verbose
4929
5314
  });
4930
- const relativePath = path12.relative(cwd, testFilePath);
5315
+ const relativePath = path14.relative(cwd, testFilePath);
4931
5316
  const category = deriveCategory(relativePath);
4932
5317
  const suite = await loadTestSuite(testFilePath, repoRoot, {
4933
5318
  verbose: options.verbose,
@@ -4952,7 +5337,7 @@ async function prepareFileMetadata(params) {
4952
5337
  selections = [
4953
5338
  {
4954
5339
  selection: transcriptSelection,
4955
- inlineTargetLabel: `transcript (${path12.basename(options.transcript)})`
5340
+ inlineTargetLabel: `transcript (${path14.basename(options.transcript)})`
4956
5341
  }
4957
5342
  ];
4958
5343
  } else if (suite.inlineTarget && options.cliTargets.length === 0) {
@@ -5118,7 +5503,7 @@ async function runSingleEvalFile(params) {
5118
5503
  } = params;
5119
5504
  const targetName = selection.targetName;
5120
5505
  const replayRecording = options.recordReplay ? {
5121
- fixturesPath: path12.resolve(options.recordReplay),
5506
+ fixturesPath: path14.resolve(options.recordReplay),
5122
5507
  sourceTarget: targetName,
5123
5508
  variant: options.recordReplayVariant
5124
5509
  } : void 0;
@@ -5269,7 +5654,7 @@ async function runEvalCommand(input) {
5269
5654
  );
5270
5655
  }
5271
5656
  const repoRoot = await findRepoRoot(cwd);
5272
- const yamlConfig = await loadConfig(path12.join(cwd, "_"), repoRoot);
5657
+ const yamlConfig = await loadConfig(path14.join(cwd, "_"), repoRoot);
5273
5658
  let requiredVersionCheck;
5274
5659
  if (yamlConfig?.required_version) {
5275
5660
  requiredVersionCheck = await enforceRequiredVersion(yamlConfig.required_version, {
@@ -5305,7 +5690,7 @@ async function runEvalCommand(input) {
5305
5690
  }
5306
5691
  let retryNonErrorResults;
5307
5692
  if (options.retryErrors) {
5308
- const retryPath = path12.resolve(options.retryErrors);
5693
+ const retryPath = path14.resolve(options.retryErrors);
5309
5694
  await ensureFileExists(retryPath, "Retry-errors JSONL file");
5310
5695
  const completedIds = await loadFullyCompletedTestIds(retryPath);
5311
5696
  const errorIds = await loadErrorTestIds(retryPath);
@@ -5323,7 +5708,7 @@ async function runEvalCommand(input) {
5323
5708
  if (cachedDir) {
5324
5709
  options = { ...options, outputDir: cachedDir };
5325
5710
  const flagLabel = options.rerunFailed ? "rerun-failed" : "resume";
5326
- const displayDir = path12.relative(cwd, cachedDir) || cachedDir;
5711
+ const displayDir = path14.relative(cwd, cachedDir) || cachedDir;
5327
5712
  console.log(`Auto-detected last run dir for --${flagLabel}: ${displayDir}`);
5328
5713
  }
5329
5714
  }
@@ -5332,8 +5717,8 @@ async function runEvalCommand(input) {
5332
5717
  if (options.resume && !options.retryErrors) {
5333
5718
  const explicitResumeDir = options.outputDir;
5334
5719
  if (explicitResumeDir) {
5335
- const resumeIndexPath = path12.join(path12.resolve(explicitResumeDir), "index.jsonl");
5336
- if (existsSync6(resumeIndexPath)) {
5720
+ const resumeIndexPath = path14.join(path14.resolve(explicitResumeDir), "index.jsonl");
5721
+ if (existsSync7(resumeIndexPath)) {
5337
5722
  const content = await readFile8(resumeIndexPath, "utf8");
5338
5723
  const existingResults = parseJsonlResults(content);
5339
5724
  resumeSkipKeys = /* @__PURE__ */ new Set();
@@ -5357,7 +5742,7 @@ async function runEvalCommand(input) {
5357
5742
  }
5358
5743
  }
5359
5744
  if (options.workspacePath) {
5360
- const resolvedWorkspace = path12.resolve(options.workspacePath);
5745
+ const resolvedWorkspace = path14.resolve(options.workspacePath);
5361
5746
  try {
5362
5747
  const { stat: stat3 } = await import("node:fs/promises");
5363
5748
  const stats = await stat3(resolvedWorkspace);
@@ -5379,27 +5764,35 @@ async function runEvalCommand(input) {
5379
5764
  let runDir;
5380
5765
  let outputPath;
5381
5766
  if (explicitDir) {
5382
- runDir = path12.resolve(explicitDir);
5767
+ runDir = path14.resolve(explicitDir);
5383
5768
  mkdirSync2(runDir, { recursive: true });
5384
- outputPath = path12.join(runDir, "index.jsonl");
5769
+ outputPath = path14.join(runDir, "index.jsonl");
5385
5770
  } else {
5386
5771
  outputPath = buildDefaultOutputPathForExperiment(cwd, options.experiment);
5387
- runDir = path12.dirname(outputPath);
5772
+ runDir = path14.dirname(outputPath);
5388
5773
  }
5389
5774
  let otelExporter = null;
5390
5775
  const useFileExport = !!options.otelFile;
5391
5776
  if (options.exportOtel || useFileExport) {
5392
5777
  try {
5393
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-HVLBDG5F.js");
5778
+ const { OtelTraceExporter } = await import("./dist-6Z4OSITR.js");
5394
5779
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5395
5780
  let headers = {};
5781
+ let resourceAttributes = {};
5396
5782
  if (options.otelBackend) {
5397
- const preset = OTEL_BACKEND_PRESETS[options.otelBackend];
5398
- if (preset) {
5399
- endpoint = preset.endpoint;
5400
- headers = preset.headers(process.env);
5783
+ const resolvedBackend = await resolveOtelBackend(options.otelBackend, {
5784
+ env: process.env,
5785
+ cwd
5786
+ });
5787
+ if (resolvedBackend) {
5788
+ endpoint = resolvedBackend.endpoint;
5789
+ headers = { ...headers, ...resolvedBackend.headers };
5790
+ resourceAttributes = { ...resourceAttributes, ...resolvedBackend.resourceAttributes };
5791
+ for (const warning of resolvedBackend.warnings ?? []) {
5792
+ console.warn(warning);
5793
+ }
5401
5794
  } else {
5402
- console.warn(`Unknown OTel backend preset: ${options.otelBackend}`);
5795
+ console.warn(`Unknown OTel backend resolver: ${options.otelBackend}`);
5403
5796
  }
5404
5797
  }
5405
5798
  if (process.env.OTEL_EXPORTER_OTLP_HEADERS) {
@@ -5412,9 +5805,10 @@ async function runEvalCommand(input) {
5412
5805
  otelExporter = new OtelTraceExporter({
5413
5806
  endpoint,
5414
5807
  headers,
5808
+ resourceAttributes,
5415
5809
  captureContent,
5416
5810
  groupTurns: options.otelGroupTurns,
5417
- otlpFilePath: options.otelFile ? path12.resolve(options.otelFile) : void 0
5811
+ otlpFilePath: options.otelFile ? path14.resolve(options.otelFile) : void 0
5418
5812
  });
5419
5813
  const initialized = await otelExporter.init();
5420
5814
  if (!initialized) {
@@ -5432,9 +5826,9 @@ async function runEvalCommand(input) {
5432
5826
  }
5433
5827
  const primaryWritePath = outputPath;
5434
5828
  console.log(`Artifact directory: ${runDir}`);
5435
- const resolvedTestFiles = input.testFiles.map((file) => path12.resolve(file));
5829
+ const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
5436
5830
  if (options.otelFile) {
5437
- console.log(`OTLP JSON file: ${path12.resolve(options.otelFile)}`);
5831
+ console.log(`OTLP JSON file: ${path14.resolve(options.otelFile)}`);
5438
5832
  }
5439
5833
  const evaluationRunner = await resolveEvaluationRunner();
5440
5834
  const allResults = [];
@@ -5462,7 +5856,7 @@ async function runEvalCommand(input) {
5462
5856
  for (const [testFilePath, meta] of fileMetadata.entries()) {
5463
5857
  if (!matchesTagFilters(meta.tags, options.tags, options.excludeTags)) {
5464
5858
  fileMetadata.delete(testFilePath);
5465
- skippedFiles.push(path12.relative(cwd, testFilePath));
5859
+ skippedFiles.push(path14.relative(cwd, testFilePath));
5466
5860
  }
5467
5861
  }
5468
5862
  if (skippedFiles.length > 0 && options.verbose) {
@@ -5485,12 +5879,12 @@ async function runEvalCommand(input) {
5485
5879
  tsConfigCache: options.tsConfigCache
5486
5880
  });
5487
5881
  const activeCachePath = options.cachePath ?? yamlCachePath ?? options.tsConfigCachePath;
5488
- const cache = cacheEnabled ? new ResponseCache(activeCachePath ? path12.resolve(activeCachePath) : void 0) : void 0;
5882
+ const cache = cacheEnabled ? new ResponseCache(activeCachePath ? path14.resolve(activeCachePath) : void 0) : void 0;
5489
5883
  if (cache) {
5490
5884
  console.log(`Response cache: enabled (${cache.cachePath})`);
5491
5885
  }
5492
5886
  if (options.recordReplay) {
5493
- console.log(`Replay recording: ${path12.resolve(options.recordReplay)}`);
5887
+ console.log(`Replay recording: ${path14.resolve(options.recordReplay)}`);
5494
5888
  }
5495
5889
  const yamlThreshold = firstMeta?.threshold;
5496
5890
  const resolvedThreshold = options.threshold ?? yamlThreshold;
@@ -5586,7 +5980,7 @@ async function runEvalCommand(input) {
5586
5980
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5587
5981
  let transcriptProviderFactory;
5588
5982
  if (options.transcript) {
5589
- const { TranscriptProvider } = await import("./dist-HVLBDG5F.js");
5983
+ const { TranscriptProvider } = await import("./dist-6Z4OSITR.js");
5590
5984
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5591
5985
  const totalTests = [...fileMetadata.values()].reduce(
5592
5986
  (sum, meta) => sum + meta.testCases.length,
@@ -5614,7 +6008,11 @@ async function runEvalCommand(input) {
5614
6008
  let wipCleanedUp = false;
5615
6009
  let finalExportStatus = "disabled";
5616
6010
  {
5617
- const wipConfig = await loadNormalizedResultsConfig(cwd).catch(() => void 0);
6011
+ const wipConfig = await loadNormalizedResultsConfig(
6012
+ cwd,
6013
+ void 0,
6014
+ options.resultsOverrides
6015
+ ).catch(() => void 0);
5618
6016
  if (wipConfig?.auto_push) {
5619
6017
  wipLoop = new WipCheckpointLoop({
5620
6018
  config: wipConfig,
@@ -5631,7 +6029,7 @@ async function runEvalCommand(input) {
5631
6029
  if (!targetPrep2) continue;
5632
6030
  const budgetMsg = `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`;
5633
6031
  console.log(`
5634
- \u26A0 ${budgetMsg} \u2014 skipping ${path12.basename(testFilePath)}`);
6032
+ \u26A0 ${budgetMsg} \u2014 skipping ${path14.basename(testFilePath)}`);
5635
6033
  for (const { selection } of targetPrep2.selections) {
5636
6034
  const skippedResults = targetPrep2.testCases.map((testCase) => ({
5637
6035
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -5710,7 +6108,7 @@ async function runEvalCommand(input) {
5710
6108
  threshold: resolvedThreshold,
5711
6109
  providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory
5712
6110
  });
5713
- const evalFile = path12.relative(cwd, testFilePath);
6111
+ const evalFile = path14.relative(cwd, testFilePath);
5714
6112
  const existingSummary = remoteEvalSummaries.find(
5715
6113
  (summary2) => summary2.evalFile === evalFile
5716
6114
  );
@@ -5727,7 +6125,7 @@ async function runEvalCommand(input) {
5727
6125
  const message = fileError instanceof Error ? fileError.message : String(fileError);
5728
6126
  console.error(
5729
6127
  `
5730
- [ERROR] \u26A0 Eval file failed: ${path12.basename(testFilePath)} \u2014 ${message}
6128
+ [ERROR] \u26A0 Eval file failed: ${path14.basename(testFilePath)} \u2014 ${message}
5731
6129
  `
5732
6130
  );
5733
6131
  const errorResults = filteredTestCases.map(
@@ -5806,7 +6204,7 @@ ${formatRequiredVersionFailureNote(requiredVersionCheck)}`);
5806
6204
  );
5807
6205
  const taskBundleTargets = buildTaskBundleTargetSelections(activeTestFiles, fileMetadata);
5808
6206
  if (isResumeAppend) {
5809
- const { writePerTestArtifacts } = await import("./artifact-writer-MK5X5MSO.js");
6207
+ const { writePerTestArtifacts } = await import("./artifact-writer-GIAIMGPQ.js");
5810
6208
  await writePerTestArtifacts(allResults, runDir, {
5811
6209
  experiment: normalizeExperimentName(options.experiment),
5812
6210
  cwd,
@@ -5818,7 +6216,7 @@ ${formatRequiredVersionFailureNote(requiredVersionCheck)}`);
5818
6216
  runDir,
5819
6217
  { evalFile, experiment: normalizeExperimentName(options.experiment) }
5820
6218
  );
5821
- const indexPath = path12.join(runDir, "index.jsonl");
6219
+ const indexPath = path14.join(runDir, "index.jsonl");
5822
6220
  console.log(`Artifact workspace updated: ${runDir}`);
5823
6221
  console.log(` Index: ${indexPath}`);
5824
6222
  console.log(` Per-test artifacts: ${runDir} (${allResults.length} new test directories)`);
@@ -5879,13 +6277,14 @@ Results written to: ${outputPath}`);
5879
6277
  status: result.executionStatus === "execution_error" || result.error ? "ERROR" : result.score >= DEFAULT_THRESHOLD ? "PASS" : "FAIL"
5880
6278
  }))
5881
6279
  })),
5882
- experiment: normalizeExperimentName(options.experiment)
6280
+ experiment: normalizeExperimentName(options.experiment),
6281
+ results_overrides: options.resultsOverrides
5883
6282
  });
5884
6283
  }
5885
6284
  if (summary.executionErrorCount > 0 && !options.retryErrors && !options.resume) {
5886
- const evalFileArgs = activeTestFiles.map((f) => path12.relative(cwd, f)).join(" ");
6285
+ const evalFileArgs = activeTestFiles.map((f) => path14.relative(cwd, f)).join(" ");
5887
6286
  const targetFlag = options.target ? ` --target ${options.target}` : "";
5888
- const relativeRunDir = path12.relative(cwd, runDir);
6287
+ const relativeRunDir = path14.relative(cwd, runDir);
5889
6288
  console.log(
5890
6289
  `
5891
6290
  Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed tests with:
@@ -5934,8 +6333,8 @@ async function resolveEvaluationRunner() {
5934
6333
  if (!overridePath) {
5935
6334
  return runEvaluation;
5936
6335
  }
5937
- const resolved = path12.isAbsolute(overridePath) ? overridePath : path12.resolve(process.cwd(), overridePath);
5938
- const moduleUrl = pathToFileURL(resolved).href;
6336
+ const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
6337
+ const moduleUrl = pathToFileURL2(resolved).href;
5939
6338
  const mod = await import(moduleUrl);
5940
6339
  const candidate = mod.runEvaluation;
5941
6340
  if (typeof candidate !== "function") {
@@ -5947,11 +6346,11 @@ async function resolveEvaluationRunner() {
5947
6346
  }
5948
6347
 
5949
6348
  // src/commands/eval/discover.ts
5950
- import path13 from "node:path";
6349
+ import path15 from "node:path";
5951
6350
  import fg2 from "fast-glob";
5952
6351
  async function discoverEvalFiles(cwd) {
5953
6352
  const repoRoot = await findRepoRoot(cwd);
5954
- const config = await loadConfig(path13.join(cwd, "_"), repoRoot);
6353
+ const config = await loadConfig(path15.join(cwd, "_"), repoRoot);
5955
6354
  const patterns = config?.eval_patterns && config.eval_patterns.length > 0 ? config.eval_patterns : DEFAULT_EVAL_PATTERNS;
5956
6355
  const ignore = ["**/node_modules/**", "**/dist/**"];
5957
6356
  const matches = await fg2(patterns, {
@@ -5963,7 +6362,7 @@ async function discoverEvalFiles(cwd) {
5963
6362
  caseSensitiveMatch: false
5964
6363
  });
5965
6364
  const evalFiles = matches.map((absPath) => {
5966
- const relativePath = path13.relative(cwd, absPath);
6365
+ const relativePath = path15.relative(cwd, absPath);
5967
6366
  const category = deriveCategory(relativePath);
5968
6367
  return { path: absPath, relativePath, category };
5969
6368
  });
@@ -5987,6 +6386,10 @@ export {
5987
6386
  resolveEvalPaths,
5988
6387
  findRepoRoot,
5989
6388
  package_default,
6389
+ RESULT_INDEX_FILENAME,
6390
+ createRunDirName,
6391
+ buildDefaultRunDir,
6392
+ resolveRunManifestPath,
5990
6393
  parseResultManifest,
5991
6394
  resolveResultSourcePath,
5992
6395
  loadManifestResults,
@@ -6036,4 +6439,4 @@ export {
6036
6439
  getCategories,
6037
6440
  filterByCategory
6038
6441
  };
6039
- //# sourceMappingURL=chunk-6M5S4IJW.js.map
6442
+ //# sourceMappingURL=chunk-WKA5QDNQ.js.map