agentv 4.31.0-next.1 → 4.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-K4B2S7OE.js → artifact-writer-FZ5RUHWC.js} +4 -4
- package/dist/{chunk-2Z2V7RJO.js → chunk-377GONL7.js} +29 -11
- package/dist/chunk-377GONL7.js.map +1 -0
- package/dist/{chunk-NACNTFNH.js → chunk-45EYE5HJ.js} +99 -38
- package/dist/{chunk-NACNTFNH.js.map → chunk-45EYE5HJ.js.map} +1 -1
- package/dist/{chunk-BMSSZSND.js → chunk-IJPWTVDU.js} +3 -3
- package/dist/{chunk-FLSABQ33.js → chunk-LOYPSIE7.js} +268 -42
- package/dist/chunk-LOYPSIE7.js.map +1 -0
- package/dist/{chunk-BCZHBAUK.js → chunk-ZFTDIK4V.js} +123 -35
- package/dist/chunk-ZFTDIK4V.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/{dist-2E6ZNN32.js → dist-CRYAFKLS.js} +9 -5
- package/dist/index.js +5 -5
- package/dist/{interactive-22PLS22Z.js → interactive-WZW2FF43.js} +5 -5
- package/dist/studio/assets/index-9UixPaIJ.js +116 -0
- package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
- package/dist/studio/assets/{index-Bh_LwYWq.js → index-DPrj3J9P.js} +1 -1
- package/dist/studio/index.html +2 -2
- package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-2Z2V7RJO.js.map +0 -1
- package/dist/chunk-BCZHBAUK.js.map +0 -1
- package/dist/chunk-FLSABQ33.js.map +0 -1
- package/dist/studio/assets/index-CIpCCDKl.css +0 -1
- package/dist/studio/assets/index-DsVyXC9S.js +0 -116
- /package/dist/{artifact-writer-K4B2S7OE.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
- /package/dist/{chunk-BMSSZSND.js.map → chunk-IJPWTVDU.js.map} +0 -0
- /package/dist/{dist-2E6ZNN32.js.map → dist-CRYAFKLS.js.map} +0 -0
- /package/dist/{interactive-22PLS22Z.js.map → interactive-WZW2FF43.js.map} +0 -0
- /package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
toSnakeCaseDeep,
|
|
17
17
|
writeArtifactsFromResults,
|
|
18
18
|
writeInitialBenchmarkArtifact
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-IJPWTVDU.js";
|
|
20
20
|
import {
|
|
21
21
|
ResponseCache,
|
|
22
22
|
RunBudgetTracker,
|
|
@@ -24,12 +24,15 @@ import {
|
|
|
24
24
|
directPushResults,
|
|
25
25
|
directorySizeBytes,
|
|
26
26
|
getResultsRepoStatus,
|
|
27
|
+
listGitRuns,
|
|
27
28
|
loadTsConfig,
|
|
29
|
+
materializeGitRun,
|
|
30
|
+
normalizeResultsConfig,
|
|
28
31
|
resolveResultsRepoRunsDir,
|
|
29
32
|
shouldEnableCache,
|
|
30
33
|
shouldSkipCacheForTemperature,
|
|
31
34
|
syncResultsRepo
|
|
32
|
-
} from "./chunk-
|
|
35
|
+
} from "./chunk-LOYPSIE7.js";
|
|
33
36
|
import {
|
|
34
37
|
CLI_PLACEHOLDERS,
|
|
35
38
|
COMMON_TARGET_SETTINGS,
|
|
@@ -61,7 +64,7 @@ import {
|
|
|
61
64
|
subscribeToCopilotSdkLogEntries,
|
|
62
65
|
subscribeToPiLogEntries,
|
|
63
66
|
toCamelCaseDeep
|
|
64
|
-
} from "./chunk-
|
|
67
|
+
} from "./chunk-377GONL7.js";
|
|
65
68
|
|
|
66
69
|
// src/commands/eval/shared.ts
|
|
67
70
|
import { constants } from "node:fs";
|
|
@@ -177,7 +180,7 @@ async function findRepoRoot(start) {
|
|
|
177
180
|
// package.json
|
|
178
181
|
var package_default = {
|
|
179
182
|
name: "agentv",
|
|
180
|
-
version: "4.31.
|
|
183
|
+
version: "4.31.1",
|
|
181
184
|
description: "CLI entry point for AgentV",
|
|
182
185
|
type: "module",
|
|
183
186
|
repository: {
|
|
@@ -285,7 +288,7 @@ async function discoverTargetsFile(options) {
|
|
|
285
288
|
}
|
|
286
289
|
|
|
287
290
|
// src/commands/eval/run-eval.ts
|
|
288
|
-
import { constants as constants4, existsSync as
|
|
291
|
+
import { constants as constants4, existsSync as existsSync5, mkdirSync } from "node:fs";
|
|
289
292
|
import { access as access5, readFile as readFile8 } from "node:fs/promises";
|
|
290
293
|
import path15 from "node:path";
|
|
291
294
|
import { pathToFileURL } from "node:url";
|
|
@@ -514,6 +517,7 @@ ${ANSI_GREEN}Update complete.${ANSI_RESET}`);
|
|
|
514
517
|
}
|
|
515
518
|
|
|
516
519
|
// src/commands/results/remote.ts
|
|
520
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
517
521
|
import path5 from "node:path";
|
|
518
522
|
|
|
519
523
|
// src/commands/inspect/utils.ts
|
|
@@ -1074,19 +1078,30 @@ function formatScore(score) {
|
|
|
1074
1078
|
}
|
|
1075
1079
|
|
|
1076
1080
|
// src/commands/results/remote.ts
|
|
1081
|
+
var gitRunsCache = /* @__PURE__ */ new Map();
|
|
1082
|
+
var GIT_RUNS_CACHE_TTL_MS = 6e4;
|
|
1083
|
+
function cachedListGitRuns(repoDir) {
|
|
1084
|
+
const now = Date.now();
|
|
1085
|
+
const cached = gitRunsCache.get(repoDir);
|
|
1086
|
+
if (cached && cached.expiresAt > now) {
|
|
1087
|
+
return cached.data;
|
|
1088
|
+
}
|
|
1089
|
+
const promise = listGitRuns(repoDir);
|
|
1090
|
+
gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
|
|
1091
|
+
promise.catch(() => {
|
|
1092
|
+
}).finally(() => {
|
|
1093
|
+
const entry = gitRunsCache.get(repoDir);
|
|
1094
|
+
if (entry && entry.expiresAt <= Date.now()) {
|
|
1095
|
+
gitRunsCache.delete(repoDir);
|
|
1096
|
+
}
|
|
1097
|
+
});
|
|
1098
|
+
return promise;
|
|
1099
|
+
}
|
|
1077
1100
|
var REMOTE_RUN_PREFIX = "remote::";
|
|
1078
1101
|
var SIZE_WARNING_BYTES = 10 * 1024 * 1024;
|
|
1079
1102
|
function getStatusMessage(error) {
|
|
1080
1103
|
return error instanceof Error ? error.message : String(error);
|
|
1081
1104
|
}
|
|
1082
|
-
function normalizeResultsConfig(config) {
|
|
1083
|
-
return {
|
|
1084
|
-
repo: config.repo,
|
|
1085
|
-
path: config.path,
|
|
1086
|
-
auto_push: config.auto_push === true,
|
|
1087
|
-
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
1088
|
-
};
|
|
1089
|
-
}
|
|
1090
1105
|
function getRelativeRunPath(cwd, runDir) {
|
|
1091
1106
|
const relative = path5.relative(path5.join(cwd, ".agentv", "results", "runs"), runDir);
|
|
1092
1107
|
if (!relative.startsWith("..") && !path5.isAbsolute(relative)) {
|
|
@@ -1124,7 +1139,14 @@ function encodeRemoteRunId(filename) {
|
|
|
1124
1139
|
async function getRemoteResultsStatus(cwd) {
|
|
1125
1140
|
const config = await loadNormalizedResultsConfig(cwd);
|
|
1126
1141
|
const status = getResultsRepoStatus(config);
|
|
1127
|
-
|
|
1142
|
+
let runCount = 0;
|
|
1143
|
+
if (config && status.available) {
|
|
1144
|
+
try {
|
|
1145
|
+
runCount = (await cachedListGitRuns(config.path)).length;
|
|
1146
|
+
} catch {
|
|
1147
|
+
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1128
1150
|
return {
|
|
1129
1151
|
...status,
|
|
1130
1152
|
run_count: runCount
|
|
@@ -1165,14 +1187,43 @@ async function listMergedResultFiles(cwd, limit) {
|
|
|
1165
1187
|
remote_status: remoteStatus
|
|
1166
1188
|
};
|
|
1167
1189
|
}
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1190
|
+
let remoteRuns = [];
|
|
1191
|
+
if (config.mode === "github") {
|
|
1192
|
+
try {
|
|
1193
|
+
const gitRuns = await cachedListGitRuns(config.path);
|
|
1194
|
+
remoteRuns = gitRuns.map((r) => ({
|
|
1195
|
+
filename: encodeRemoteRunId(r.run_id),
|
|
1196
|
+
raw_filename: r.run_id,
|
|
1197
|
+
source: "remote",
|
|
1198
|
+
path: path5.join(config.path, r.manifest_path),
|
|
1199
|
+
displayName: r.display_name,
|
|
1200
|
+
timestamp: r.timestamp,
|
|
1201
|
+
testCount: r.test_count,
|
|
1202
|
+
passRate: r.pass_rate || 0,
|
|
1203
|
+
avgScore: r.avg_score || 0,
|
|
1204
|
+
sizeBytes: r.size_bytes || 0
|
|
1205
|
+
}));
|
|
1206
|
+
} catch (error) {
|
|
1207
|
+
console.error("git-native listing failed, falling back", error);
|
|
1208
|
+
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
|
|
1209
|
+
(meta) => ({
|
|
1210
|
+
...meta,
|
|
1211
|
+
filename: encodeRemoteRunId(meta.filename),
|
|
1212
|
+
raw_filename: meta.filename,
|
|
1213
|
+
source: "remote"
|
|
1214
|
+
})
|
|
1215
|
+
);
|
|
1216
|
+
}
|
|
1217
|
+
} else {
|
|
1218
|
+
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
|
|
1219
|
+
(meta) => ({
|
|
1220
|
+
...meta,
|
|
1221
|
+
filename: encodeRemoteRunId(meta.filename),
|
|
1222
|
+
raw_filename: meta.filename,
|
|
1223
|
+
source: "remote"
|
|
1224
|
+
})
|
|
1225
|
+
);
|
|
1226
|
+
}
|
|
1176
1227
|
const merged = [...localRuns, ...remoteRuns].sort(
|
|
1177
1228
|
(a, b) => b.timestamp.localeCompare(a.timestamp)
|
|
1178
1229
|
);
|
|
@@ -1185,6 +1236,21 @@ async function findRunById(cwd, runId) {
|
|
|
1185
1236
|
const { runs } = await listMergedResultFiles(cwd);
|
|
1186
1237
|
return runs.find((run) => run.filename === runId);
|
|
1187
1238
|
}
|
|
1239
|
+
async function ensureRemoteRunAvailable(cwd, meta) {
|
|
1240
|
+
if (meta.source !== "remote" || existsSync3(meta.path)) {
|
|
1241
|
+
return;
|
|
1242
|
+
}
|
|
1243
|
+
const config = await loadNormalizedResultsConfig(cwd);
|
|
1244
|
+
if (!config) {
|
|
1245
|
+
throw new Error("Remote results are not configured");
|
|
1246
|
+
}
|
|
1247
|
+
const relativeManifestPath = path5.relative(config.path, meta.path).split(path5.sep).join("/");
|
|
1248
|
+
if (relativeManifestPath.length === 0 || relativeManifestPath === meta.path || relativeManifestPath.startsWith("../")) {
|
|
1249
|
+
throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
|
|
1250
|
+
}
|
|
1251
|
+
const relativeRunPath = path5.posix.relative("runs", path5.posix.dirname(relativeManifestPath));
|
|
1252
|
+
await materializeGitRun(config.path, relativeRunPath);
|
|
1253
|
+
}
|
|
1188
1254
|
async function maybeAutoExportRunArtifacts(payload) {
|
|
1189
1255
|
const config = await loadNormalizedResultsConfig(payload.cwd);
|
|
1190
1256
|
if (!config?.auto_push) {
|
|
@@ -2474,7 +2540,7 @@ async function loadNonErrorResults(jsonlPath) {
|
|
|
2474
2540
|
}
|
|
2475
2541
|
|
|
2476
2542
|
// src/commands/eval/run-cache.ts
|
|
2477
|
-
import { existsSync as
|
|
2543
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
2478
2544
|
import { mkdir as mkdir6, readFile, writeFile as writeFile5 } from "node:fs/promises";
|
|
2479
2545
|
import path13 from "node:path";
|
|
2480
2546
|
var CACHE_FILENAME = "cache.json";
|
|
@@ -2498,7 +2564,7 @@ async function loadRunCache(cwd) {
|
|
|
2498
2564
|
async function resolveCachedRunDir(cwd) {
|
|
2499
2565
|
const cache = await loadRunCache(cwd);
|
|
2500
2566
|
if (!cache?.lastRunDir) return void 0;
|
|
2501
|
-
if (!
|
|
2567
|
+
if (!existsSync4(cache.lastRunDir)) return void 0;
|
|
2502
2568
|
return cache.lastRunDir;
|
|
2503
2569
|
}
|
|
2504
2570
|
async function saveRunCache(cwd, resultPath) {
|
|
@@ -4205,22 +4271,43 @@ async function validateConfigFile(filePath) {
|
|
|
4205
4271
|
});
|
|
4206
4272
|
} else {
|
|
4207
4273
|
const resultsRecord = results;
|
|
4208
|
-
if (
|
|
4274
|
+
if (resultsRecord.mode !== "github") {
|
|
4209
4275
|
errors.push({
|
|
4210
4276
|
severity: "error",
|
|
4211
4277
|
filePath,
|
|
4212
|
-
location: "results.
|
|
4213
|
-
message: "Field 'results.
|
|
4278
|
+
location: "results.mode",
|
|
4279
|
+
message: "Field 'results.mode' must be 'github'"
|
|
4214
4280
|
});
|
|
4215
4281
|
}
|
|
4216
|
-
if (typeof resultsRecord.
|
|
4282
|
+
if (typeof resultsRecord.repo !== "string" || resultsRecord.repo.trim().length === 0) {
|
|
4217
4283
|
errors.push({
|
|
4218
4284
|
severity: "error",
|
|
4219
4285
|
filePath,
|
|
4220
|
-
location: "results.
|
|
4221
|
-
message: "Field 'results.
|
|
4286
|
+
location: "results.repo",
|
|
4287
|
+
message: "Field 'results.repo' must be a non-empty string"
|
|
4222
4288
|
});
|
|
4223
4289
|
}
|
|
4290
|
+
if (resultsRecord.path !== void 0) {
|
|
4291
|
+
if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
|
|
4292
|
+
errors.push({
|
|
4293
|
+
severity: "error",
|
|
4294
|
+
filePath,
|
|
4295
|
+
location: "results.path",
|
|
4296
|
+
message: "Field 'results.path' must be a non-empty string"
|
|
4297
|
+
});
|
|
4298
|
+
} else {
|
|
4299
|
+
const p = resultsRecord.path.trim();
|
|
4300
|
+
const isFilesystemPath = p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
|
|
4301
|
+
if (!isFilesystemPath) {
|
|
4302
|
+
errors.push({
|
|
4303
|
+
severity: "error",
|
|
4304
|
+
filePath,
|
|
4305
|
+
location: "results.path",
|
|
4306
|
+
message: `'results.path' must be an absolute or home-relative filesystem path (e.g., ~/data/agentv-results). Found: '${p}'. Remove 'path' to use the default.`
|
|
4307
|
+
});
|
|
4308
|
+
}
|
|
4309
|
+
}
|
|
4310
|
+
}
|
|
4224
4311
|
if (resultsRecord.auto_push !== void 0 && typeof resultsRecord.auto_push !== "boolean") {
|
|
4225
4312
|
errors.push({
|
|
4226
4313
|
severity: "error",
|
|
@@ -5385,7 +5472,7 @@ async function runEvalCommand(input) {
|
|
|
5385
5472
|
const explicitResumeDir = options.outputDir ?? options.artifacts;
|
|
5386
5473
|
if (explicitResumeDir) {
|
|
5387
5474
|
const resumeIndexPath = path15.join(path15.resolve(explicitResumeDir), "index.jsonl");
|
|
5388
|
-
if (
|
|
5475
|
+
if (existsSync5(resumeIndexPath)) {
|
|
5389
5476
|
const content = await readFile8(resumeIndexPath, "utf8");
|
|
5390
5477
|
const existingResults = parseJsonlResults(content);
|
|
5391
5478
|
resumeSkipKeys = /* @__PURE__ */ new Set();
|
|
@@ -5468,7 +5555,7 @@ async function runEvalCommand(input) {
|
|
|
5468
5555
|
const useFileExport = !!options.otelFile;
|
|
5469
5556
|
if (options.exportOtel || useFileExport) {
|
|
5470
5557
|
try {
|
|
5471
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
5558
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-CRYAFKLS.js");
|
|
5472
5559
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5473
5560
|
let headers = {};
|
|
5474
5561
|
if (options.otelBackend) {
|
|
@@ -5667,7 +5754,7 @@ async function runEvalCommand(input) {
|
|
|
5667
5754
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5668
5755
|
let transcriptProviderFactory;
|
|
5669
5756
|
if (options.transcript) {
|
|
5670
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5757
|
+
const { TranscriptProvider } = await import("./dist-CRYAFKLS.js");
|
|
5671
5758
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5672
5759
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5673
5760
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5844,7 +5931,7 @@ async function runEvalCommand(input) {
|
|
|
5844
5931
|
if (usesDefaultArtifactWorkspace && allResults.length > 0) {
|
|
5845
5932
|
const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
|
|
5846
5933
|
if (isResumeAppend) {
|
|
5847
|
-
const { writePerTestArtifacts } = await import("./artifact-writer-
|
|
5934
|
+
const { writePerTestArtifacts } = await import("./artifact-writer-FZ5RUHWC.js");
|
|
5848
5935
|
await writePerTestArtifacts(allResults, runDir, {
|
|
5849
5936
|
experiment: normalizeExperimentName(options.experiment)
|
|
5850
5937
|
});
|
|
@@ -6050,6 +6137,7 @@ export {
|
|
|
6050
6137
|
syncRemoteResults,
|
|
6051
6138
|
listMergedResultFiles,
|
|
6052
6139
|
findRunById,
|
|
6140
|
+
ensureRemoteRunAvailable,
|
|
6053
6141
|
maybeAutoExportRunArtifacts,
|
|
6054
6142
|
resolveRunCacheFile,
|
|
6055
6143
|
loadRunCache,
|
|
@@ -6068,4 +6156,4 @@ export {
|
|
|
6068
6156
|
getCategories,
|
|
6069
6157
|
filterByCategory
|
|
6070
6158
|
};
|
|
6071
|
-
//# sourceMappingURL=chunk-
|
|
6159
|
+
//# sourceMappingURL=chunk-ZFTDIK4V.js.map
|