agentv 4.30.0 → 4.31.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-JJLJUUUY.js → artifact-writer-FZ5RUHWC.js} +4 -4
- package/dist/{chunk-L3FMZXCD.js → chunk-377GONL7.js} +30 -29
- package/dist/chunk-377GONL7.js.map +1 -0
- package/dist/{chunk-J4AMO3MD.js → chunk-A346PC3C.js} +102 -41
- package/dist/{chunk-J4AMO3MD.js.map → chunk-A346PC3C.js.map} +1 -1
- package/dist/{chunk-B2YJVU72.js → chunk-E6KALARL.js} +140 -64
- package/dist/chunk-E6KALARL.js.map +1 -0
- package/dist/{chunk-YFVD6FC6.js → chunk-IJPWTVDU.js} +3 -3
- package/dist/{chunk-LUZRCQNH.js → chunk-LOYPSIE7.js} +278 -52
- package/dist/chunk-LOYPSIE7.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/{dist-GYCHDLTZ.js → dist-CRYAFKLS.js} +11 -7
- package/dist/index.js +5 -5
- package/dist/{interactive-DIQOOZX4.js → interactive-O5EUQPCE.js} +5 -5
- package/dist/studio/assets/index-9UixPaIJ.js +116 -0
- package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
- package/dist/studio/assets/{index-SVeKAE-f.js → index-DPrj3J9P.js} +1 -1
- package/dist/studio/index.html +2 -2
- package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-B2YJVU72.js.map +0 -1
- package/dist/chunk-L3FMZXCD.js.map +0 -1
- package/dist/chunk-LUZRCQNH.js.map +0 -1
- package/dist/studio/assets/index-BUmBj4ej.js +0 -116
- package/dist/studio/assets/index-CIpCCDKl.css +0 -1
- /package/dist/{artifact-writer-JJLJUUUY.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
- /package/dist/{chunk-YFVD6FC6.js.map → chunk-IJPWTVDU.js.map} +0 -0
- /package/dist/{dist-GYCHDLTZ.js.map → dist-CRYAFKLS.js.map} +0 -0
- /package/dist/{interactive-DIQOOZX4.js.map → interactive-O5EUQPCE.js.map} +0 -0
- /package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
detectPackageManager,
|
|
9
9
|
discoverEvalFiles,
|
|
10
10
|
enforceRequiredVersion,
|
|
11
|
+
ensureRemoteRunAvailable,
|
|
11
12
|
fetchLatestVersion,
|
|
12
13
|
findRepoRoot,
|
|
13
14
|
findRunById,
|
|
@@ -45,7 +46,7 @@ import {
|
|
|
45
46
|
validateFileReferences,
|
|
46
47
|
validateTargetsFile,
|
|
47
48
|
validateWorkspacePaths
|
|
48
|
-
} from "./chunk-
|
|
49
|
+
} from "./chunk-E6KALARL.js";
|
|
49
50
|
import {
|
|
50
51
|
RESULT_INDEX_FILENAME,
|
|
51
52
|
aggregateRunDir,
|
|
@@ -53,7 +54,7 @@ import {
|
|
|
53
54
|
resolveRunManifestPath,
|
|
54
55
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
55
56
|
writeArtifactsFromResults
|
|
56
|
-
} from "./chunk-
|
|
57
|
+
} from "./chunk-IJPWTVDU.js";
|
|
57
58
|
import {
|
|
58
59
|
DEFAULT_CATEGORY,
|
|
59
60
|
addProject,
|
|
@@ -73,7 +74,7 @@ import {
|
|
|
73
74
|
toTranscriptJsonLines,
|
|
74
75
|
transpileEvalYamlFile,
|
|
75
76
|
trimBaselineResult
|
|
76
|
-
} from "./chunk-
|
|
77
|
+
} from "./chunk-LOYPSIE7.js";
|
|
77
78
|
import {
|
|
78
79
|
DEFAULT_THRESHOLD,
|
|
79
80
|
createBuiltinRegistry,
|
|
@@ -103,7 +104,7 @@ import {
|
|
|
103
104
|
runStartsWithAssertion,
|
|
104
105
|
toCamelCaseDeep,
|
|
105
106
|
toSnakeCaseDeep
|
|
106
|
-
} from "./chunk-
|
|
107
|
+
} from "./chunk-377GONL7.js";
|
|
107
108
|
import {
|
|
108
109
|
__commonJS,
|
|
109
110
|
__require,
|
|
@@ -4053,7 +4054,7 @@ var evalRunCommand = command({
|
|
|
4053
4054
|
},
|
|
4054
4055
|
handler: async (args) => {
|
|
4055
4056
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4056
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4057
|
+
const { launchInteractiveWizard } = await import("./interactive-O5EUQPCE.js");
|
|
4057
4058
|
await launchInteractiveWizard();
|
|
4058
4059
|
return;
|
|
4059
4060
|
}
|
|
@@ -11104,16 +11105,69 @@ function inferExperimentFromRunId(runId) {
|
|
|
11104
11105
|
}
|
|
11105
11106
|
return experiment;
|
|
11106
11107
|
}
|
|
11108
|
+
async function ensureRunReadable(searchDir, meta) {
|
|
11109
|
+
await ensureRemoteRunAvailable(searchDir, meta);
|
|
11110
|
+
}
|
|
11111
|
+
async function loadManifestResultsForMeta(searchDir, meta) {
|
|
11112
|
+
await ensureRunReadable(searchDir, meta);
|
|
11113
|
+
return loadManifestResults(meta.path);
|
|
11114
|
+
}
|
|
11115
|
+
async function loadLightweightResultsForMeta(searchDir, meta) {
|
|
11116
|
+
await ensureRunReadable(searchDir, meta);
|
|
11117
|
+
return loadLightweightResults(meta.path);
|
|
11118
|
+
}
|
|
11119
|
+
async function parseManifestForMeta(searchDir, meta) {
|
|
11120
|
+
await ensureRunReadable(searchDir, meta);
|
|
11121
|
+
return parseResultManifest(readFileSync12(meta.path, "utf8"));
|
|
11122
|
+
}
|
|
11123
|
+
var DEFAULT_RUN_PAGE_LIMIT = 50;
|
|
11124
|
+
function parseRunPageLimit(limitParam) {
|
|
11125
|
+
if (limitParam === void 0) {
|
|
11126
|
+
return void 0;
|
|
11127
|
+
}
|
|
11128
|
+
if (!/^\d+$/.test(limitParam)) {
|
|
11129
|
+
return null;
|
|
11130
|
+
}
|
|
11131
|
+
const limit = Number.parseInt(limitParam, 10);
|
|
11132
|
+
return limit > 0 ? limit : null;
|
|
11133
|
+
}
|
|
11134
|
+
function paginateRuns(runs, cursor, limit) {
|
|
11135
|
+
if (limit === void 0) {
|
|
11136
|
+
return { runs };
|
|
11137
|
+
}
|
|
11138
|
+
if (!cursor) {
|
|
11139
|
+
const page2 = runs.slice(0, limit);
|
|
11140
|
+
return {
|
|
11141
|
+
runs: page2,
|
|
11142
|
+
...limit < runs.length && page2.length > 0 ? { nextCursor: page2.at(-1)?.filename } : {}
|
|
11143
|
+
};
|
|
11144
|
+
}
|
|
11145
|
+
const cursorIndex = runs.findIndex((run2) => run2.filename === cursor);
|
|
11146
|
+
if (cursorIndex === -1) {
|
|
11147
|
+
return { runs: [] };
|
|
11148
|
+
}
|
|
11149
|
+
const page = runs.slice(cursorIndex + 1, cursorIndex + 1 + limit);
|
|
11150
|
+
return {
|
|
11151
|
+
runs: page,
|
|
11152
|
+
...cursorIndex + 1 + limit < runs.length && page.length > 0 ? { nextCursor: page.at(-1)?.filename } : {}
|
|
11153
|
+
};
|
|
11154
|
+
}
|
|
11107
11155
|
async function handleRuns(c4, { searchDir, agentvDir }) {
|
|
11108
11156
|
const { runs: metas } = await listMergedResultFiles(searchDir);
|
|
11109
11157
|
const { threshold: passThreshold } = loadStudioConfig(agentvDir);
|
|
11110
|
-
|
|
11111
|
-
|
|
11158
|
+
const parsedLimit = parseRunPageLimit(c4.req.query("limit"));
|
|
11159
|
+
if (parsedLimit === null) {
|
|
11160
|
+
return c4.json({ error: "limit must be a positive integer" }, 400);
|
|
11161
|
+
}
|
|
11162
|
+
const cursor = c4.req.query("cursor");
|
|
11163
|
+
const limit = parsedLimit ?? (cursor ? DEFAULT_RUN_PAGE_LIMIT : void 0);
|
|
11164
|
+
const runs = await Promise.all(
|
|
11165
|
+
metas.map(async (m) => {
|
|
11112
11166
|
let target;
|
|
11113
11167
|
let experiment = inferExperimentFromRunId(m.raw_filename);
|
|
11114
11168
|
let passRate = m.passRate;
|
|
11115
11169
|
try {
|
|
11116
|
-
const records =
|
|
11170
|
+
const records = await loadLightweightResultsForMeta(searchDir, m);
|
|
11117
11171
|
if (records.length > 0) {
|
|
11118
11172
|
target = records[0].target;
|
|
11119
11173
|
experiment = records[0].experiment ?? experiment;
|
|
@@ -11141,6 +11195,11 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
|
|
|
11141
11195
|
...liveStatus && { status: liveStatus }
|
|
11142
11196
|
};
|
|
11143
11197
|
})
|
|
11198
|
+
);
|
|
11199
|
+
const page = paginateRuns(runs, cursor, limit);
|
|
11200
|
+
return c4.json({
|
|
11201
|
+
runs: page.runs,
|
|
11202
|
+
...page.nextCursor ? { next_cursor: page.nextCursor } : {}
|
|
11144
11203
|
});
|
|
11145
11204
|
}
|
|
11146
11205
|
async function handleRunLog(c4, { searchDir }) {
|
|
@@ -11148,17 +11207,17 @@ async function handleRunLog(c4, { searchDir }) {
|
|
|
11148
11207
|
const meta = await findRunById(searchDir, filename);
|
|
11149
11208
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11150
11209
|
if (meta.source === "remote") {
|
|
11151
|
-
return c4.json({ error: "
|
|
11210
|
+
return c4.json({ error: "Run log is not available for remote runs" }, 404);
|
|
11152
11211
|
}
|
|
11153
11212
|
const logPath = path21.join(path21.dirname(meta.path), "console.log");
|
|
11154
11213
|
if (!existsSync15(logPath)) {
|
|
11155
|
-
return c4.json({ error: "
|
|
11214
|
+
return c4.json({ error: "Run log not found for this run" }, 404);
|
|
11156
11215
|
}
|
|
11157
11216
|
try {
|
|
11158
11217
|
const content = readFileSync12(logPath, "utf8");
|
|
11159
11218
|
return c4.text(content);
|
|
11160
11219
|
} catch {
|
|
11161
|
-
return c4.json({ error: "Failed to read
|
|
11220
|
+
return c4.json({ error: "Failed to read run log" }, 500);
|
|
11162
11221
|
}
|
|
11163
11222
|
}
|
|
11164
11223
|
async function handleRunDetail(c4, { searchDir }) {
|
|
@@ -11166,7 +11225,7 @@ async function handleRunDetail(c4, { searchDir }) {
|
|
|
11166
11225
|
const meta = await findRunById(searchDir, filename);
|
|
11167
11226
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11168
11227
|
try {
|
|
11169
|
-
const loaded =
|
|
11228
|
+
const loaded = await loadManifestResultsForMeta(searchDir, meta);
|
|
11170
11229
|
const resumeMeta = meta.source === "local" ? deriveResumeMeta(searchDir, meta.path) : {};
|
|
11171
11230
|
return c4.json({
|
|
11172
11231
|
results: stripHeavyFields(loaded),
|
|
@@ -11205,7 +11264,7 @@ async function handleRunSuites(c4, { searchDir, agentvDir }) {
|
|
|
11205
11264
|
const meta = await findRunById(searchDir, filename);
|
|
11206
11265
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11207
11266
|
try {
|
|
11208
|
-
const loaded =
|
|
11267
|
+
const loaded = await loadManifestResultsForMeta(searchDir, meta);
|
|
11209
11268
|
const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
|
|
11210
11269
|
const suiteMap = /* @__PURE__ */ new Map();
|
|
11211
11270
|
for (const r of loaded) {
|
|
@@ -11233,7 +11292,7 @@ async function handleRunCategories(c4, { searchDir, agentvDir }) {
|
|
|
11233
11292
|
const meta = await findRunById(searchDir, filename);
|
|
11234
11293
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11235
11294
|
try {
|
|
11236
|
-
const loaded =
|
|
11295
|
+
const loaded = await loadManifestResultsForMeta(searchDir, meta);
|
|
11237
11296
|
const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
|
|
11238
11297
|
const categoryMap = /* @__PURE__ */ new Map();
|
|
11239
11298
|
for (const r of loaded) {
|
|
@@ -11269,7 +11328,7 @@ async function handleCategorySuites(c4, { searchDir, agentvDir }) {
|
|
|
11269
11328
|
const meta = await findRunById(searchDir, filename);
|
|
11270
11329
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11271
11330
|
try {
|
|
11272
|
-
const loaded =
|
|
11331
|
+
const loaded = await loadManifestResultsForMeta(searchDir, meta);
|
|
11273
11332
|
const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
|
|
11274
11333
|
const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
|
|
11275
11334
|
const suiteMap = /* @__PURE__ */ new Map();
|
|
@@ -11299,7 +11358,7 @@ async function handleEvalDetail(c4, { searchDir }) {
|
|
|
11299
11358
|
const meta = await findRunById(searchDir, filename);
|
|
11300
11359
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11301
11360
|
try {
|
|
11302
|
-
const loaded =
|
|
11361
|
+
const loaded = await loadManifestResultsForMeta(searchDir, meta);
|
|
11303
11362
|
const result = loaded.find((r) => r.testId === evalId);
|
|
11304
11363
|
if (!result) return c4.json({ error: "Eval not found" }, 404);
|
|
11305
11364
|
return c4.json({ eval: result });
|
|
@@ -11313,8 +11372,7 @@ async function handleEvalFiles(c4, { searchDir }) {
|
|
|
11313
11372
|
const meta = await findRunById(searchDir, filename);
|
|
11314
11373
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
11315
11374
|
try {
|
|
11316
|
-
const
|
|
11317
|
-
const records = parseResultManifest(content);
|
|
11375
|
+
const records = await parseManifestForMeta(searchDir, meta);
|
|
11318
11376
|
const record = records.find((r) => r.test_id === evalId);
|
|
11319
11377
|
if (!record) return c4.json({ error: "Eval not found" }, 404);
|
|
11320
11378
|
const baseDir = path21.dirname(meta.path);
|
|
@@ -11348,6 +11406,7 @@ async function handleEvalFileContent(c4, { searchDir }) {
|
|
|
11348
11406
|
const markerIdx = c4.req.path.indexOf(marker);
|
|
11349
11407
|
const filePath = markerIdx >= 0 ? c4.req.path.slice(markerIdx + marker.length) : "";
|
|
11350
11408
|
if (!filePath) return c4.json({ error: "No file path specified" }, 400);
|
|
11409
|
+
await ensureRunReadable(searchDir, meta);
|
|
11351
11410
|
const baseDir = path21.dirname(meta.path);
|
|
11352
11411
|
const absolutePath = path21.resolve(baseDir, filePath);
|
|
11353
11412
|
if (!absolutePath.startsWith(path21.resolve(baseDir) + path21.sep) && absolutePath !== path21.resolve(baseDir)) {
|
|
@@ -11370,7 +11429,7 @@ async function handleExperiments(c4, { searchDir, agentvDir }) {
|
|
|
11370
11429
|
const experimentMap = /* @__PURE__ */ new Map();
|
|
11371
11430
|
for (const m of metas) {
|
|
11372
11431
|
try {
|
|
11373
|
-
const records =
|
|
11432
|
+
const records = await loadLightweightResultsForMeta(searchDir, m);
|
|
11374
11433
|
for (const r of records) {
|
|
11375
11434
|
const experiment = r.experiment ?? "default";
|
|
11376
11435
|
const entry = experimentMap.get(experiment) ?? {
|
|
@@ -11422,7 +11481,7 @@ async function handleCompare(c4, { searchDir, agentvDir }) {
|
|
|
11422
11481
|
const runTags = tagsEntry?.tags ?? [];
|
|
11423
11482
|
if (!runTags.some((t) => filterTags.has(t))) continue;
|
|
11424
11483
|
}
|
|
11425
|
-
const records =
|
|
11484
|
+
const records = await loadLightweightResultsForMeta(searchDir, m);
|
|
11426
11485
|
const runTestMap = /* @__PURE__ */ new Map();
|
|
11427
11486
|
let runEvalCount = 0;
|
|
11428
11487
|
let runPassedCount = 0;
|
|
@@ -11538,7 +11597,7 @@ async function handleTargets(c4, { searchDir, agentvDir }) {
|
|
|
11538
11597
|
const targetMap = /* @__PURE__ */ new Map();
|
|
11539
11598
|
for (const m of metas) {
|
|
11540
11599
|
try {
|
|
11541
|
-
const records =
|
|
11600
|
+
const records = await loadLightweightResultsForMeta(searchDir, m);
|
|
11542
11601
|
for (const r of records) {
|
|
11543
11602
|
const target = r.target ?? "default";
|
|
11544
11603
|
const entry = targetMap.get(target) ?? {
|
|
@@ -11734,7 +11793,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
11734
11793
|
let target;
|
|
11735
11794
|
let experiment = inferExperimentFromRunId(m.raw_filename);
|
|
11736
11795
|
try {
|
|
11737
|
-
const records =
|
|
11796
|
+
const records = await loadLightweightResultsForMeta(p.path, m);
|
|
11738
11797
|
if (records.length > 0) {
|
|
11739
11798
|
target = records[0].target;
|
|
11740
11799
|
experiment = records[0].experiment ?? experiment;
|
|
@@ -11854,23 +11913,25 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
11854
11913
|
});
|
|
11855
11914
|
app2.get("/api/index", async (c4) => {
|
|
11856
11915
|
const { runs: metas } = await listMergedResultFiles(searchDir);
|
|
11857
|
-
const entries2 =
|
|
11858
|
-
|
|
11859
|
-
|
|
11860
|
-
|
|
11861
|
-
|
|
11862
|
-
|
|
11863
|
-
|
|
11864
|
-
|
|
11865
|
-
|
|
11866
|
-
|
|
11867
|
-
|
|
11868
|
-
|
|
11869
|
-
|
|
11870
|
-
|
|
11871
|
-
|
|
11872
|
-
|
|
11873
|
-
|
|
11916
|
+
const entries2 = await Promise.all(
|
|
11917
|
+
metas.map(async (m) => {
|
|
11918
|
+
let totalCostUsd = 0;
|
|
11919
|
+
try {
|
|
11920
|
+
const loaded = await loadManifestResultsForMeta(searchDir, m);
|
|
11921
|
+
totalCostUsd = loaded.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
|
|
11922
|
+
} catch {
|
|
11923
|
+
}
|
|
11924
|
+
return {
|
|
11925
|
+
run_filename: m.filename,
|
|
11926
|
+
display_name: m.displayName,
|
|
11927
|
+
test_count: m.testCount,
|
|
11928
|
+
pass_rate: m.passRate,
|
|
11929
|
+
avg_score: m.avgScore,
|
|
11930
|
+
total_cost_usd: totalCostUsd,
|
|
11931
|
+
timestamp: m.timestamp
|
|
11932
|
+
};
|
|
11933
|
+
})
|
|
11934
|
+
);
|
|
11874
11935
|
return c4.json({ entries: entries2 });
|
|
11875
11936
|
});
|
|
11876
11937
|
app2.get(
|
|
@@ -12076,7 +12137,7 @@ var resultsServeCommand = command({
|
|
|
12076
12137
|
}
|
|
12077
12138
|
const registry = loadProjectRegistry();
|
|
12078
12139
|
const { projectDashboard } = resolveDashboardMode(registry.projects.length, { single });
|
|
12079
|
-
|
|
12140
|
+
syncProjects(registry.projects).catch((err2) => console.error("Background project sync failed:", err2));
|
|
12080
12141
|
try {
|
|
12081
12142
|
let results = [];
|
|
12082
12143
|
let sourceFile;
|
|
@@ -13583,4 +13644,4 @@ export {
|
|
|
13583
13644
|
preprocessArgv,
|
|
13584
13645
|
runCli
|
|
13585
13646
|
};
|
|
13586
|
-
//# sourceMappingURL=chunk-
|
|
13647
|
+
//# sourceMappingURL=chunk-A346PC3C.js.map
|