agentv 4.30.0 → 4.31.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{artifact-writer-JJLJUUUY.js → artifact-writer-FZ5RUHWC.js} +4 -4
  2. package/dist/{chunk-L3FMZXCD.js → chunk-377GONL7.js} +30 -29
  3. package/dist/chunk-377GONL7.js.map +1 -0
  4. package/dist/{chunk-J4AMO3MD.js → chunk-A346PC3C.js} +102 -41
  5. package/dist/{chunk-J4AMO3MD.js.map → chunk-A346PC3C.js.map} +1 -1
  6. package/dist/{chunk-B2YJVU72.js → chunk-E6KALARL.js} +140 -64
  7. package/dist/chunk-E6KALARL.js.map +1 -0
  8. package/dist/{chunk-YFVD6FC6.js → chunk-IJPWTVDU.js} +3 -3
  9. package/dist/{chunk-LUZRCQNH.js → chunk-LOYPSIE7.js} +278 -52
  10. package/dist/chunk-LOYPSIE7.js.map +1 -0
  11. package/dist/cli.js +5 -5
  12. package/dist/{dist-GYCHDLTZ.js → dist-CRYAFKLS.js} +11 -7
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-DIQOOZX4.js → interactive-O5EUQPCE.js} +5 -5
  15. package/dist/studio/assets/index-9UixPaIJ.js +116 -0
  16. package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
  17. package/dist/studio/assets/{index-SVeKAE-f.js → index-DPrj3J9P.js} +1 -1
  18. package/dist/studio/index.html +2 -2
  19. package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
  20. package/package.json +1 -1
  21. package/dist/chunk-B2YJVU72.js.map +0 -1
  22. package/dist/chunk-L3FMZXCD.js.map +0 -1
  23. package/dist/chunk-LUZRCQNH.js.map +0 -1
  24. package/dist/studio/assets/index-BUmBj4ej.js +0 -116
  25. package/dist/studio/assets/index-CIpCCDKl.css +0 -1
  26. /package/dist/{artifact-writer-JJLJUUUY.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
  27. /package/dist/{chunk-YFVD6FC6.js.map → chunk-IJPWTVDU.js.map} +0 -0
  28. /package/dist/{dist-GYCHDLTZ.js.map → dist-CRYAFKLS.js.map} +0 -0
  29. /package/dist/{interactive-DIQOOZX4.js.map → interactive-O5EUQPCE.js.map} +0 -0
  30. /package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
@@ -16,7 +16,7 @@ import {
16
16
  toSnakeCaseDeep,
17
17
  writeArtifactsFromResults,
18
18
  writeInitialBenchmarkArtifact
19
- } from "./chunk-YFVD6FC6.js";
19
+ } from "./chunk-IJPWTVDU.js";
20
20
  import {
21
21
  ResponseCache,
22
22
  RunBudgetTracker,
@@ -24,12 +24,15 @@ import {
24
24
  directPushResults,
25
25
  directorySizeBytes,
26
26
  getResultsRepoStatus,
27
+ listGitRuns,
27
28
  loadTsConfig,
29
+ materializeGitRun,
30
+ normalizeResultsConfig,
28
31
  resolveResultsRepoRunsDir,
29
32
  shouldEnableCache,
30
33
  shouldSkipCacheForTemperature,
31
34
  syncResultsRepo
32
- } from "./chunk-LUZRCQNH.js";
35
+ } from "./chunk-LOYPSIE7.js";
33
36
  import {
34
37
  CLI_PLACEHOLDERS,
35
38
  COMMON_TARGET_SETTINGS,
@@ -61,7 +64,7 @@ import {
61
64
  subscribeToCopilotSdkLogEntries,
62
65
  subscribeToPiLogEntries,
63
66
  toCamelCaseDeep
64
- } from "./chunk-L3FMZXCD.js";
67
+ } from "./chunk-377GONL7.js";
65
68
 
66
69
  // src/commands/eval/shared.ts
67
70
  import { constants } from "node:fs";
@@ -177,7 +180,7 @@ async function findRepoRoot(start) {
177
180
  // package.json
178
181
  var package_default = {
179
182
  name: "agentv",
180
- version: "4.30.0",
183
+ version: "4.31.1-next.1",
181
184
  description: "CLI entry point for AgentV",
182
185
  type: "module",
183
186
  repository: {
@@ -285,7 +288,7 @@ async function discoverTargetsFile(options) {
285
288
  }
286
289
 
287
290
  // src/commands/eval/run-eval.ts
288
- import { constants as constants4, existsSync as existsSync4, mkdirSync } from "node:fs";
291
+ import { constants as constants4, existsSync as existsSync5, mkdirSync } from "node:fs";
289
292
  import { access as access5, readFile as readFile8 } from "node:fs/promises";
290
293
  import path15 from "node:path";
291
294
  import { pathToFileURL } from "node:url";
@@ -514,6 +517,7 @@ ${ANSI_GREEN}Update complete.${ANSI_RESET}`);
514
517
  }
515
518
 
516
519
  // src/commands/results/remote.ts
520
+ import { existsSync as existsSync3 } from "node:fs";
517
521
  import path5 from "node:path";
518
522
 
519
523
  // src/commands/inspect/utils.ts
@@ -1074,19 +1078,30 @@ function formatScore(score) {
1074
1078
  }
1075
1079
 
1076
1080
  // src/commands/results/remote.ts
1081
+ var gitRunsCache = /* @__PURE__ */ new Map();
1082
+ var GIT_RUNS_CACHE_TTL_MS = 6e4;
1083
+ function cachedListGitRuns(repoDir) {
1084
+ const now = Date.now();
1085
+ const cached = gitRunsCache.get(repoDir);
1086
+ if (cached && cached.expiresAt > now) {
1087
+ return cached.data;
1088
+ }
1089
+ const promise = listGitRuns(repoDir);
1090
+ gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
1091
+ promise.catch(() => {
1092
+ }).finally(() => {
1093
+ const entry = gitRunsCache.get(repoDir);
1094
+ if (entry && entry.expiresAt <= Date.now()) {
1095
+ gitRunsCache.delete(repoDir);
1096
+ }
1097
+ });
1098
+ return promise;
1099
+ }
1077
1100
  var REMOTE_RUN_PREFIX = "remote::";
1078
1101
  var SIZE_WARNING_BYTES = 10 * 1024 * 1024;
1079
1102
  function getStatusMessage(error) {
1080
1103
  return error instanceof Error ? error.message : String(error);
1081
1104
  }
1082
- function normalizeResultsExportConfig(config) {
1083
- return {
1084
- repo: config.repo,
1085
- path: config.path,
1086
- auto_push: config.auto_push === true,
1087
- branch_prefix: config.branch_prefix?.trim() || "eval-results"
1088
- };
1089
- }
1090
1105
  function getRelativeRunPath(cwd, runDir) {
1091
1106
  const relative = path5.relative(path5.join(cwd, ".agentv", "results", "runs"), runDir);
1092
1107
  if (!relative.startsWith("..") && !path5.isAbsolute(relative)) {
@@ -1113,10 +1128,10 @@ async function maybeWarnLargeArtifact(runDir) {
1113
1128
  async function loadNormalizedResultsConfig(cwd) {
1114
1129
  const repoRoot = await findRepoRoot(cwd) ?? cwd;
1115
1130
  const config = await loadConfig(path5.join(cwd, "_"), repoRoot);
1116
- if (!config?.results?.export) {
1131
+ if (!config?.results) {
1117
1132
  return void 0;
1118
1133
  }
1119
- return normalizeResultsExportConfig(config.results.export);
1134
+ return normalizeResultsConfig(config.results);
1120
1135
  }
1121
1136
  function encodeRemoteRunId(filename) {
1122
1137
  return `${REMOTE_RUN_PREFIX}${filename}`;
@@ -1124,7 +1139,14 @@ function encodeRemoteRunId(filename) {
1124
1139
  async function getRemoteResultsStatus(cwd) {
1125
1140
  const config = await loadNormalizedResultsConfig(cwd);
1126
1141
  const status = getResultsRepoStatus(config);
1127
- const runCount = config && status.available ? listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length : 0;
1142
+ let runCount = 0;
1143
+ if (config && status.available) {
1144
+ try {
1145
+ runCount = (await cachedListGitRuns(config.path)).length;
1146
+ } catch {
1147
+ runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
1148
+ }
1149
+ }
1128
1150
  return {
1129
1151
  ...status,
1130
1152
  run_count: runCount
@@ -1165,14 +1187,43 @@ async function listMergedResultFiles(cwd, limit) {
1165
1187
  remote_status: remoteStatus
1166
1188
  };
1167
1189
  }
1168
- const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1169
- (meta) => ({
1170
- ...meta,
1171
- filename: encodeRemoteRunId(meta.filename),
1172
- raw_filename: meta.filename,
1173
- source: "remote"
1174
- })
1175
- );
1190
+ let remoteRuns = [];
1191
+ if (config.mode === "github") {
1192
+ try {
1193
+ const gitRuns = await cachedListGitRuns(config.path);
1194
+ remoteRuns = gitRuns.map((r) => ({
1195
+ filename: encodeRemoteRunId(r.run_id),
1196
+ raw_filename: r.run_id,
1197
+ source: "remote",
1198
+ path: path5.join(config.path, r.manifest_path),
1199
+ displayName: r.display_name,
1200
+ timestamp: r.timestamp,
1201
+ testCount: r.test_count,
1202
+ passRate: r.pass_rate || 0,
1203
+ avgScore: r.avg_score || 0,
1204
+ sizeBytes: r.size_bytes || 0
1205
+ }));
1206
+ } catch (error) {
1207
+ console.error("git-native listing failed, falling back", error);
1208
+ remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1209
+ (meta) => ({
1210
+ ...meta,
1211
+ filename: encodeRemoteRunId(meta.filename),
1212
+ raw_filename: meta.filename,
1213
+ source: "remote"
1214
+ })
1215
+ );
1216
+ }
1217
+ } else {
1218
+ remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1219
+ (meta) => ({
1220
+ ...meta,
1221
+ filename: encodeRemoteRunId(meta.filename),
1222
+ raw_filename: meta.filename,
1223
+ source: "remote"
1224
+ })
1225
+ );
1226
+ }
1176
1227
  const merged = [...localRuns, ...remoteRuns].sort(
1177
1228
  (a, b) => b.timestamp.localeCompare(a.timestamp)
1178
1229
  );
@@ -1185,6 +1236,21 @@ async function findRunById(cwd, runId) {
1185
1236
  const { runs } = await listMergedResultFiles(cwd);
1186
1237
  return runs.find((run) => run.filename === runId);
1187
1238
  }
1239
+ async function ensureRemoteRunAvailable(cwd, meta) {
1240
+ if (meta.source !== "remote" || existsSync3(meta.path)) {
1241
+ return;
1242
+ }
1243
+ const config = await loadNormalizedResultsConfig(cwd);
1244
+ if (!config) {
1245
+ throw new Error("Remote results are not configured");
1246
+ }
1247
+ const relativeManifestPath = path5.relative(config.path, meta.path).split(path5.sep).join("/");
1248
+ if (relativeManifestPath.length === 0 || relativeManifestPath === meta.path || relativeManifestPath.startsWith("../")) {
1249
+ throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
1250
+ }
1251
+ const relativeRunPath = path5.posix.relative("runs", path5.posix.dirname(relativeManifestPath));
1252
+ await materializeGitRun(config.path, relativeRunPath);
1253
+ }
1188
1254
  async function maybeAutoExportRunArtifacts(payload) {
1189
1255
  const config = await loadNormalizedResultsConfig(payload.cwd);
1190
1256
  if (!config?.auto_push) {
@@ -2474,7 +2540,7 @@ async function loadNonErrorResults(jsonlPath) {
2474
2540
  }
2475
2541
 
2476
2542
  // src/commands/eval/run-cache.ts
2477
- import { existsSync as existsSync3 } from "node:fs";
2543
+ import { existsSync as existsSync4 } from "node:fs";
2478
2544
  import { mkdir as mkdir6, readFile, writeFile as writeFile5 } from "node:fs/promises";
2479
2545
  import path13 from "node:path";
2480
2546
  var CACHE_FILENAME = "cache.json";
@@ -2498,7 +2564,7 @@ async function loadRunCache(cwd) {
2498
2564
  async function resolveCachedRunDir(cwd) {
2499
2565
  const cache = await loadRunCache(cwd);
2500
2566
  if (!cache?.lastRunDir) return void 0;
2501
- if (!existsSync3(cache.lastRunDir)) return void 0;
2567
+ if (!existsSync4(cache.lastRunDir)) return void 0;
2502
2568
  return cache.lastRunDir;
2503
2569
  }
2504
2570
  async function saveRunCache(cwd, resultPath) {
@@ -4204,51 +4270,60 @@ async function validateConfigFile(filePath) {
4204
4270
  message: "Field 'results' must be an object"
4205
4271
  });
4206
4272
  } else {
4207
- const exportConfig = results.export;
4208
- if (exportConfig !== void 0) {
4209
- if (typeof exportConfig !== "object" || exportConfig === null || Array.isArray(exportConfig)) {
4273
+ const resultsRecord = results;
4274
+ if (resultsRecord.mode !== "github") {
4275
+ errors.push({
4276
+ severity: "error",
4277
+ filePath,
4278
+ location: "results.mode",
4279
+ message: "Field 'results.mode' must be 'github'"
4280
+ });
4281
+ }
4282
+ if (typeof resultsRecord.repo !== "string" || resultsRecord.repo.trim().length === 0) {
4283
+ errors.push({
4284
+ severity: "error",
4285
+ filePath,
4286
+ location: "results.repo",
4287
+ message: "Field 'results.repo' must be a non-empty string"
4288
+ });
4289
+ }
4290
+ if (resultsRecord.path !== void 0) {
4291
+ if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
4210
4292
  errors.push({
4211
4293
  severity: "error",
4212
4294
  filePath,
4213
- location: "results.export",
4214
- message: "Field 'results.export' must be an object"
4295
+ location: "results.path",
4296
+ message: "Field 'results.path' must be a non-empty string"
4215
4297
  });
4216
4298
  } else {
4217
- const exportRecord = exportConfig;
4218
- if (typeof exportRecord.repo !== "string" || exportRecord.repo.trim().length === 0) {
4219
- errors.push({
4220
- severity: "error",
4221
- filePath,
4222
- location: "results.export.repo",
4223
- message: "Field 'results.export.repo' must be a non-empty string"
4224
- });
4225
- }
4226
- if (typeof exportRecord.path !== "string" || exportRecord.path.trim().length === 0) {
4227
- errors.push({
4228
- severity: "error",
4229
- filePath,
4230
- location: "results.export.path",
4231
- message: "Field 'results.export.path' must be a non-empty string"
4232
- });
4233
- }
4234
- if (exportRecord.auto_push !== void 0 && typeof exportRecord.auto_push !== "boolean") {
4299
+ const p = resultsRecord.path.trim();
4300
+ const isFilesystemPath = p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
4301
+ if (!isFilesystemPath) {
4235
4302
  errors.push({
4236
4303
  severity: "error",
4237
4304
  filePath,
4238
- location: "results.export.auto_push",
4239
- message: "Field 'results.export.auto_push' must be a boolean"
4240
- });
4241
- }
4242
- if (exportRecord.branch_prefix !== void 0 && (typeof exportRecord.branch_prefix !== "string" || exportRecord.branch_prefix.trim().length === 0)) {
4243
- errors.push({
4244
- severity: "error",
4245
- filePath,
4246
- location: "results.export.branch_prefix",
4247
- message: "Field 'results.export.branch_prefix' must be a non-empty string"
4305
+ location: "results.path",
4306
+ message: `'results.path' must be an absolute or home-relative filesystem path (e.g., ~/data/agentv-results). Found: '${p}'. Remove 'path' to use the default.`
4248
4307
  });
4249
4308
  }
4250
4309
  }
4251
4310
  }
4311
+ if (resultsRecord.auto_push !== void 0 && typeof resultsRecord.auto_push !== "boolean") {
4312
+ errors.push({
4313
+ severity: "error",
4314
+ filePath,
4315
+ location: "results.auto_push",
4316
+ message: "Field 'results.auto_push' must be a boolean"
4317
+ });
4318
+ }
4319
+ if (resultsRecord.branch_prefix !== void 0 && (typeof resultsRecord.branch_prefix !== "string" || resultsRecord.branch_prefix.trim().length === 0)) {
4320
+ errors.push({
4321
+ severity: "error",
4322
+ filePath,
4323
+ location: "results.branch_prefix",
4324
+ message: "Field 'results.branch_prefix' must be a non-empty string"
4325
+ });
4326
+ }
4252
4327
  }
4253
4328
  }
4254
4329
  const allowedFields = /* @__PURE__ */ new Set([
@@ -5397,7 +5472,7 @@ async function runEvalCommand(input) {
5397
5472
  const explicitResumeDir = options.outputDir ?? options.artifacts;
5398
5473
  if (explicitResumeDir) {
5399
5474
  const resumeIndexPath = path15.join(path15.resolve(explicitResumeDir), "index.jsonl");
5400
- if (existsSync4(resumeIndexPath)) {
5475
+ if (existsSync5(resumeIndexPath)) {
5401
5476
  const content = await readFile8(resumeIndexPath, "utf8");
5402
5477
  const existingResults = parseJsonlResults(content);
5403
5478
  resumeSkipKeys = /* @__PURE__ */ new Set();
@@ -5480,7 +5555,7 @@ async function runEvalCommand(input) {
5480
5555
  const useFileExport = !!options.otelFile;
5481
5556
  if (options.exportOtel || useFileExport) {
5482
5557
  try {
5483
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-GYCHDLTZ.js");
5558
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-CRYAFKLS.js");
5484
5559
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5485
5560
  let headers = {};
5486
5561
  if (options.otelBackend) {
@@ -5679,7 +5754,7 @@ async function runEvalCommand(input) {
5679
5754
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5680
5755
  let transcriptProviderFactory;
5681
5756
  if (options.transcript) {
5682
- const { TranscriptProvider } = await import("./dist-GYCHDLTZ.js");
5757
+ const { TranscriptProvider } = await import("./dist-CRYAFKLS.js");
5683
5758
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5684
5759
  const totalTests = [...fileMetadata.values()].reduce(
5685
5760
  (sum, meta) => sum + meta.testCases.length,
@@ -5856,7 +5931,7 @@ async function runEvalCommand(input) {
5856
5931
  if (usesDefaultArtifactWorkspace && allResults.length > 0) {
5857
5932
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
5858
5933
  if (isResumeAppend) {
5859
- const { writePerTestArtifacts } = await import("./artifact-writer-JJLJUUUY.js");
5934
+ const { writePerTestArtifacts } = await import("./artifact-writer-FZ5RUHWC.js");
5860
5935
  await writePerTestArtifacts(allResults, runDir, {
5861
5936
  experiment: normalizeExperimentName(options.experiment)
5862
5937
  });
@@ -6062,6 +6137,7 @@ export {
6062
6137
  syncRemoteResults,
6063
6138
  listMergedResultFiles,
6064
6139
  findRunById,
6140
+ ensureRemoteRunAvailable,
6065
6141
  maybeAutoExportRunArtifacts,
6066
6142
  resolveRunCacheFile,
6067
6143
  loadRunCache,
@@ -6080,4 +6156,4 @@ export {
6080
6156
  getCategories,
6081
6157
  filterByCategory
6082
6158
  };
6083
- //# sourceMappingURL=chunk-B2YJVU72.js.map
6159
+ //# sourceMappingURL=chunk-E6KALARL.js.map