agentv 4.31.0-next.1 → 4.31.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{artifact-writer-K4B2S7OE.js → artifact-writer-FZ5RUHWC.js} +4 -4
  2. package/dist/{chunk-2Z2V7RJO.js → chunk-377GONL7.js} +29 -11
  3. package/dist/chunk-377GONL7.js.map +1 -0
  4. package/dist/{chunk-NACNTFNH.js → chunk-45EYE5HJ.js} +99 -38
  5. package/dist/{chunk-NACNTFNH.js.map → chunk-45EYE5HJ.js.map} +1 -1
  6. package/dist/{chunk-BMSSZSND.js → chunk-IJPWTVDU.js} +3 -3
  7. package/dist/{chunk-FLSABQ33.js → chunk-LOYPSIE7.js} +268 -42
  8. package/dist/chunk-LOYPSIE7.js.map +1 -0
  9. package/dist/{chunk-BCZHBAUK.js → chunk-ZFTDIK4V.js} +123 -35
  10. package/dist/chunk-ZFTDIK4V.js.map +1 -0
  11. package/dist/cli.js +5 -5
  12. package/dist/{dist-2E6ZNN32.js → dist-CRYAFKLS.js} +9 -5
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-22PLS22Z.js → interactive-WZW2FF43.js} +5 -5
  15. package/dist/studio/assets/index-9UixPaIJ.js +116 -0
  16. package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
  17. package/dist/studio/assets/{index-Bh_LwYWq.js → index-DPrj3J9P.js} +1 -1
  18. package/dist/studio/index.html +2 -2
  19. package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
  20. package/package.json +1 -1
  21. package/dist/chunk-2Z2V7RJO.js.map +0 -1
  22. package/dist/chunk-BCZHBAUK.js.map +0 -1
  23. package/dist/chunk-FLSABQ33.js.map +0 -1
  24. package/dist/studio/assets/index-CIpCCDKl.css +0 -1
  25. package/dist/studio/assets/index-DsVyXC9S.js +0 -116
  26. /package/dist/{artifact-writer-K4B2S7OE.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
  27. /package/dist/{chunk-BMSSZSND.js.map → chunk-IJPWTVDU.js.map} +0 -0
  28. /package/dist/{dist-2E6ZNN32.js.map → dist-CRYAFKLS.js.map} +0 -0
  29. /package/dist/{interactive-22PLS22Z.js.map → interactive-WZW2FF43.js.map} +0 -0
  30. /package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
@@ -16,7 +16,7 @@ import {
16
16
  toSnakeCaseDeep,
17
17
  writeArtifactsFromResults,
18
18
  writeInitialBenchmarkArtifact
19
- } from "./chunk-BMSSZSND.js";
19
+ } from "./chunk-IJPWTVDU.js";
20
20
  import {
21
21
  ResponseCache,
22
22
  RunBudgetTracker,
@@ -24,12 +24,15 @@ import {
24
24
  directPushResults,
25
25
  directorySizeBytes,
26
26
  getResultsRepoStatus,
27
+ listGitRuns,
27
28
  loadTsConfig,
29
+ materializeGitRun,
30
+ normalizeResultsConfig,
28
31
  resolveResultsRepoRunsDir,
29
32
  shouldEnableCache,
30
33
  shouldSkipCacheForTemperature,
31
34
  syncResultsRepo
32
- } from "./chunk-FLSABQ33.js";
35
+ } from "./chunk-LOYPSIE7.js";
33
36
  import {
34
37
  CLI_PLACEHOLDERS,
35
38
  COMMON_TARGET_SETTINGS,
@@ -61,7 +64,7 @@ import {
61
64
  subscribeToCopilotSdkLogEntries,
62
65
  subscribeToPiLogEntries,
63
66
  toCamelCaseDeep
64
- } from "./chunk-2Z2V7RJO.js";
67
+ } from "./chunk-377GONL7.js";
65
68
 
66
69
  // src/commands/eval/shared.ts
67
70
  import { constants } from "node:fs";
@@ -177,7 +180,7 @@ async function findRepoRoot(start) {
177
180
  // package.json
178
181
  var package_default = {
179
182
  name: "agentv",
180
- version: "4.31.0-next.1",
183
+ version: "4.31.1",
181
184
  description: "CLI entry point for AgentV",
182
185
  type: "module",
183
186
  repository: {
@@ -285,7 +288,7 @@ async function discoverTargetsFile(options) {
285
288
  }
286
289
 
287
290
  // src/commands/eval/run-eval.ts
288
- import { constants as constants4, existsSync as existsSync4, mkdirSync } from "node:fs";
291
+ import { constants as constants4, existsSync as existsSync5, mkdirSync } from "node:fs";
289
292
  import { access as access5, readFile as readFile8 } from "node:fs/promises";
290
293
  import path15 from "node:path";
291
294
  import { pathToFileURL } from "node:url";
@@ -514,6 +517,7 @@ ${ANSI_GREEN}Update complete.${ANSI_RESET}`);
514
517
  }
515
518
 
516
519
  // src/commands/results/remote.ts
520
+ import { existsSync as existsSync3 } from "node:fs";
517
521
  import path5 from "node:path";
518
522
 
519
523
  // src/commands/inspect/utils.ts
@@ -1074,19 +1078,30 @@ function formatScore(score) {
1074
1078
  }
1075
1079
 
1076
1080
  // src/commands/results/remote.ts
1081
+ var gitRunsCache = /* @__PURE__ */ new Map();
1082
+ var GIT_RUNS_CACHE_TTL_MS = 6e4;
1083
+ function cachedListGitRuns(repoDir) {
1084
+ const now = Date.now();
1085
+ const cached = gitRunsCache.get(repoDir);
1086
+ if (cached && cached.expiresAt > now) {
1087
+ return cached.data;
1088
+ }
1089
+ const promise = listGitRuns(repoDir);
1090
+ gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
1091
+ promise.catch(() => {
1092
+ }).finally(() => {
1093
+ const entry = gitRunsCache.get(repoDir);
1094
+ if (entry && entry.expiresAt <= Date.now()) {
1095
+ gitRunsCache.delete(repoDir);
1096
+ }
1097
+ });
1098
+ return promise;
1099
+ }
1077
1100
  var REMOTE_RUN_PREFIX = "remote::";
1078
1101
  var SIZE_WARNING_BYTES = 10 * 1024 * 1024;
1079
1102
  function getStatusMessage(error) {
1080
1103
  return error instanceof Error ? error.message : String(error);
1081
1104
  }
1082
- function normalizeResultsConfig(config) {
1083
- return {
1084
- repo: config.repo,
1085
- path: config.path,
1086
- auto_push: config.auto_push === true,
1087
- branch_prefix: config.branch_prefix?.trim() || "eval-results"
1088
- };
1089
- }
1090
1105
  function getRelativeRunPath(cwd, runDir) {
1091
1106
  const relative = path5.relative(path5.join(cwd, ".agentv", "results", "runs"), runDir);
1092
1107
  if (!relative.startsWith("..") && !path5.isAbsolute(relative)) {
@@ -1124,7 +1139,14 @@ function encodeRemoteRunId(filename) {
1124
1139
  async function getRemoteResultsStatus(cwd) {
1125
1140
  const config = await loadNormalizedResultsConfig(cwd);
1126
1141
  const status = getResultsRepoStatus(config);
1127
- const runCount = config && status.available ? listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length : 0;
1142
+ let runCount = 0;
1143
+ if (config && status.available) {
1144
+ try {
1145
+ runCount = (await cachedListGitRuns(config.path)).length;
1146
+ } catch {
1147
+ runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
1148
+ }
1149
+ }
1128
1150
  return {
1129
1151
  ...status,
1130
1152
  run_count: runCount
@@ -1165,14 +1187,43 @@ async function listMergedResultFiles(cwd, limit) {
1165
1187
  remote_status: remoteStatus
1166
1188
  };
1167
1189
  }
1168
- const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1169
- (meta) => ({
1170
- ...meta,
1171
- filename: encodeRemoteRunId(meta.filename),
1172
- raw_filename: meta.filename,
1173
- source: "remote"
1174
- })
1175
- );
1190
+ let remoteRuns = [];
1191
+ if (config.mode === "github") {
1192
+ try {
1193
+ const gitRuns = await cachedListGitRuns(config.path);
1194
+ remoteRuns = gitRuns.map((r) => ({
1195
+ filename: encodeRemoteRunId(r.run_id),
1196
+ raw_filename: r.run_id,
1197
+ source: "remote",
1198
+ path: path5.join(config.path, r.manifest_path),
1199
+ displayName: r.display_name,
1200
+ timestamp: r.timestamp,
1201
+ testCount: r.test_count,
1202
+ passRate: r.pass_rate || 0,
1203
+ avgScore: r.avg_score || 0,
1204
+ sizeBytes: r.size_bytes || 0
1205
+ }));
1206
+ } catch (error) {
1207
+ console.error("git-native listing failed, falling back", error);
1208
+ remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1209
+ (meta) => ({
1210
+ ...meta,
1211
+ filename: encodeRemoteRunId(meta.filename),
1212
+ raw_filename: meta.filename,
1213
+ source: "remote"
1214
+ })
1215
+ );
1216
+ }
1217
+ } else {
1218
+ remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
1219
+ (meta) => ({
1220
+ ...meta,
1221
+ filename: encodeRemoteRunId(meta.filename),
1222
+ raw_filename: meta.filename,
1223
+ source: "remote"
1224
+ })
1225
+ );
1226
+ }
1176
1227
  const merged = [...localRuns, ...remoteRuns].sort(
1177
1228
  (a, b) => b.timestamp.localeCompare(a.timestamp)
1178
1229
  );
@@ -1185,6 +1236,21 @@ async function findRunById(cwd, runId) {
1185
1236
  const { runs } = await listMergedResultFiles(cwd);
1186
1237
  return runs.find((run) => run.filename === runId);
1187
1238
  }
1239
+ async function ensureRemoteRunAvailable(cwd, meta) {
1240
+ if (meta.source !== "remote" || existsSync3(meta.path)) {
1241
+ return;
1242
+ }
1243
+ const config = await loadNormalizedResultsConfig(cwd);
1244
+ if (!config) {
1245
+ throw new Error("Remote results are not configured");
1246
+ }
1247
+ const relativeManifestPath = path5.relative(config.path, meta.path).split(path5.sep).join("/");
1248
+ if (relativeManifestPath.length === 0 || relativeManifestPath === meta.path || relativeManifestPath.startsWith("../")) {
1249
+ throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
1250
+ }
1251
+ const relativeRunPath = path5.posix.relative("runs", path5.posix.dirname(relativeManifestPath));
1252
+ await materializeGitRun(config.path, relativeRunPath);
1253
+ }
1188
1254
  async function maybeAutoExportRunArtifacts(payload) {
1189
1255
  const config = await loadNormalizedResultsConfig(payload.cwd);
1190
1256
  if (!config?.auto_push) {
@@ -2474,7 +2540,7 @@ async function loadNonErrorResults(jsonlPath) {
2474
2540
  }
2475
2541
 
2476
2542
  // src/commands/eval/run-cache.ts
2477
- import { existsSync as existsSync3 } from "node:fs";
2543
+ import { existsSync as existsSync4 } from "node:fs";
2478
2544
  import { mkdir as mkdir6, readFile, writeFile as writeFile5 } from "node:fs/promises";
2479
2545
  import path13 from "node:path";
2480
2546
  var CACHE_FILENAME = "cache.json";
@@ -2498,7 +2564,7 @@ async function loadRunCache(cwd) {
2498
2564
  async function resolveCachedRunDir(cwd) {
2499
2565
  const cache = await loadRunCache(cwd);
2500
2566
  if (!cache?.lastRunDir) return void 0;
2501
- if (!existsSync3(cache.lastRunDir)) return void 0;
2567
+ if (!existsSync4(cache.lastRunDir)) return void 0;
2502
2568
  return cache.lastRunDir;
2503
2569
  }
2504
2570
  async function saveRunCache(cwd, resultPath) {
@@ -4205,22 +4271,43 @@ async function validateConfigFile(filePath) {
4205
4271
  });
4206
4272
  } else {
4207
4273
  const resultsRecord = results;
4208
- if (typeof resultsRecord.repo !== "string" || resultsRecord.repo.trim().length === 0) {
4274
+ if (resultsRecord.mode !== "github") {
4209
4275
  errors.push({
4210
4276
  severity: "error",
4211
4277
  filePath,
4212
- location: "results.repo",
4213
- message: "Field 'results.repo' must be a non-empty string"
4278
+ location: "results.mode",
4279
+ message: "Field 'results.mode' must be 'github'"
4214
4280
  });
4215
4281
  }
4216
- if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
4282
+ if (typeof resultsRecord.repo !== "string" || resultsRecord.repo.trim().length === 0) {
4217
4283
  errors.push({
4218
4284
  severity: "error",
4219
4285
  filePath,
4220
- location: "results.path",
4221
- message: "Field 'results.path' must be a non-empty string"
4286
+ location: "results.repo",
4287
+ message: "Field 'results.repo' must be a non-empty string"
4222
4288
  });
4223
4289
  }
4290
+ if (resultsRecord.path !== void 0) {
4291
+ if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
4292
+ errors.push({
4293
+ severity: "error",
4294
+ filePath,
4295
+ location: "results.path",
4296
+ message: "Field 'results.path' must be a non-empty string"
4297
+ });
4298
+ } else {
4299
+ const p = resultsRecord.path.trim();
4300
+ const isFilesystemPath = p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
4301
+ if (!isFilesystemPath) {
4302
+ errors.push({
4303
+ severity: "error",
4304
+ filePath,
4305
+ location: "results.path",
4306
+ message: `'results.path' must be an absolute or home-relative filesystem path (e.g., ~/data/agentv-results). Found: '${p}'. Remove 'path' to use the default.`
4307
+ });
4308
+ }
4309
+ }
4310
+ }
4224
4311
  if (resultsRecord.auto_push !== void 0 && typeof resultsRecord.auto_push !== "boolean") {
4225
4312
  errors.push({
4226
4313
  severity: "error",
@@ -5385,7 +5472,7 @@ async function runEvalCommand(input) {
5385
5472
  const explicitResumeDir = options.outputDir ?? options.artifacts;
5386
5473
  if (explicitResumeDir) {
5387
5474
  const resumeIndexPath = path15.join(path15.resolve(explicitResumeDir), "index.jsonl");
5388
- if (existsSync4(resumeIndexPath)) {
5475
+ if (existsSync5(resumeIndexPath)) {
5389
5476
  const content = await readFile8(resumeIndexPath, "utf8");
5390
5477
  const existingResults = parseJsonlResults(content);
5391
5478
  resumeSkipKeys = /* @__PURE__ */ new Set();
@@ -5468,7 +5555,7 @@ async function runEvalCommand(input) {
5468
5555
  const useFileExport = !!options.otelFile;
5469
5556
  if (options.exportOtel || useFileExport) {
5470
5557
  try {
5471
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-2E6ZNN32.js");
5558
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-CRYAFKLS.js");
5472
5559
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5473
5560
  let headers = {};
5474
5561
  if (options.otelBackend) {
@@ -5667,7 +5754,7 @@ async function runEvalCommand(input) {
5667
5754
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5668
5755
  let transcriptProviderFactory;
5669
5756
  if (options.transcript) {
5670
- const { TranscriptProvider } = await import("./dist-2E6ZNN32.js");
5757
+ const { TranscriptProvider } = await import("./dist-CRYAFKLS.js");
5671
5758
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5672
5759
  const totalTests = [...fileMetadata.values()].reduce(
5673
5760
  (sum, meta) => sum + meta.testCases.length,
@@ -5844,7 +5931,7 @@ async function runEvalCommand(input) {
5844
5931
  if (usesDefaultArtifactWorkspace && allResults.length > 0) {
5845
5932
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
5846
5933
  if (isResumeAppend) {
5847
- const { writePerTestArtifacts } = await import("./artifact-writer-K4B2S7OE.js");
5934
+ const { writePerTestArtifacts } = await import("./artifact-writer-FZ5RUHWC.js");
5848
5935
  await writePerTestArtifacts(allResults, runDir, {
5849
5936
  experiment: normalizeExperimentName(options.experiment)
5850
5937
  });
@@ -6050,6 +6137,7 @@ export {
6050
6137
  syncRemoteResults,
6051
6138
  listMergedResultFiles,
6052
6139
  findRunById,
6140
+ ensureRemoteRunAvailable,
6053
6141
  maybeAutoExportRunArtifacts,
6054
6142
  resolveRunCacheFile,
6055
6143
  loadRunCache,
@@ -6068,4 +6156,4 @@ export {
6068
6156
  getCategories,
6069
6157
  filterByCategory
6070
6158
  };
6071
- //# sourceMappingURL=chunk-BCZHBAUK.js.map
6159
+ //# sourceMappingURL=chunk-ZFTDIK4V.js.map