agentv 4.30.0 → 4.31.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{artifact-writer-JJLJUUUY.js → artifact-writer-FZ5RUHWC.js} +4 -4
  2. package/dist/{chunk-L3FMZXCD.js → chunk-377GONL7.js} +30 -29
  3. package/dist/chunk-377GONL7.js.map +1 -0
  4. package/dist/{chunk-J4AMO3MD.js → chunk-A346PC3C.js} +102 -41
  5. package/dist/{chunk-J4AMO3MD.js.map → chunk-A346PC3C.js.map} +1 -1
  6. package/dist/{chunk-B2YJVU72.js → chunk-E6KALARL.js} +140 -64
  7. package/dist/chunk-E6KALARL.js.map +1 -0
  8. package/dist/{chunk-YFVD6FC6.js → chunk-IJPWTVDU.js} +3 -3
  9. package/dist/{chunk-LUZRCQNH.js → chunk-LOYPSIE7.js} +278 -52
  10. package/dist/chunk-LOYPSIE7.js.map +1 -0
  11. package/dist/cli.js +5 -5
  12. package/dist/{dist-GYCHDLTZ.js → dist-CRYAFKLS.js} +11 -7
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-DIQOOZX4.js → interactive-O5EUQPCE.js} +5 -5
  15. package/dist/studio/assets/index-9UixPaIJ.js +116 -0
  16. package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
  17. package/dist/studio/assets/{index-SVeKAE-f.js → index-DPrj3J9P.js} +1 -1
  18. package/dist/studio/index.html +2 -2
  19. package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
  20. package/package.json +1 -1
  21. package/dist/chunk-B2YJVU72.js.map +0 -1
  22. package/dist/chunk-L3FMZXCD.js.map +0 -1
  23. package/dist/chunk-LUZRCQNH.js.map +0 -1
  24. package/dist/studio/assets/index-BUmBj4ej.js +0 -116
  25. package/dist/studio/assets/index-CIpCCDKl.css +0 -1
  26. /package/dist/{artifact-writer-JJLJUUUY.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
  27. /package/dist/{chunk-YFVD6FC6.js.map → chunk-IJPWTVDU.js.map} +0 -0
  28. /package/dist/{dist-GYCHDLTZ.js.map → dist-CRYAFKLS.js.map} +0 -0
  29. /package/dist/{interactive-DIQOOZX4.js.map → interactive-O5EUQPCE.js.map} +0 -0
  30. /package/dist/{ts-eval-loader-JL5DGTJL-U5LTKGOE.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
@@ -8,6 +8,7 @@ import {
8
8
  detectPackageManager,
9
9
  discoverEvalFiles,
10
10
  enforceRequiredVersion,
11
+ ensureRemoteRunAvailable,
11
12
  fetchLatestVersion,
12
13
  findRepoRoot,
13
14
  findRunById,
@@ -45,7 +46,7 @@ import {
45
46
  validateFileReferences,
46
47
  validateTargetsFile,
47
48
  validateWorkspacePaths
48
- } from "./chunk-B2YJVU72.js";
49
+ } from "./chunk-E6KALARL.js";
49
50
  import {
50
51
  RESULT_INDEX_FILENAME,
51
52
  aggregateRunDir,
@@ -53,7 +54,7 @@ import {
53
54
  resolveRunManifestPath,
54
55
  toSnakeCaseDeep as toSnakeCaseDeep2,
55
56
  writeArtifactsFromResults
56
- } from "./chunk-YFVD6FC6.js";
57
+ } from "./chunk-IJPWTVDU.js";
57
58
  import {
58
59
  DEFAULT_CATEGORY,
59
60
  addProject,
@@ -73,7 +74,7 @@ import {
73
74
  toTranscriptJsonLines,
74
75
  transpileEvalYamlFile,
75
76
  trimBaselineResult
76
- } from "./chunk-LUZRCQNH.js";
77
+ } from "./chunk-LOYPSIE7.js";
77
78
  import {
78
79
  DEFAULT_THRESHOLD,
79
80
  createBuiltinRegistry,
@@ -103,7 +104,7 @@ import {
103
104
  runStartsWithAssertion,
104
105
  toCamelCaseDeep,
105
106
  toSnakeCaseDeep
106
- } from "./chunk-L3FMZXCD.js";
107
+ } from "./chunk-377GONL7.js";
107
108
  import {
108
109
  __commonJS,
109
110
  __require,
@@ -4053,7 +4054,7 @@ var evalRunCommand = command({
4053
4054
  },
4054
4055
  handler: async (args) => {
4055
4056
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4056
- const { launchInteractiveWizard } = await import("./interactive-DIQOOZX4.js");
4057
+ const { launchInteractiveWizard } = await import("./interactive-O5EUQPCE.js");
4057
4058
  await launchInteractiveWizard();
4058
4059
  return;
4059
4060
  }
@@ -11104,16 +11105,69 @@ function inferExperimentFromRunId(runId) {
11104
11105
  }
11105
11106
  return experiment;
11106
11107
  }
11108
+ async function ensureRunReadable(searchDir, meta) {
11109
+ await ensureRemoteRunAvailable(searchDir, meta);
11110
+ }
11111
+ async function loadManifestResultsForMeta(searchDir, meta) {
11112
+ await ensureRunReadable(searchDir, meta);
11113
+ return loadManifestResults(meta.path);
11114
+ }
11115
+ async function loadLightweightResultsForMeta(searchDir, meta) {
11116
+ await ensureRunReadable(searchDir, meta);
11117
+ return loadLightweightResults(meta.path);
11118
+ }
11119
+ async function parseManifestForMeta(searchDir, meta) {
11120
+ await ensureRunReadable(searchDir, meta);
11121
+ return parseResultManifest(readFileSync12(meta.path, "utf8"));
11122
+ }
11123
+ var DEFAULT_RUN_PAGE_LIMIT = 50;
11124
+ function parseRunPageLimit(limitParam) {
11125
+ if (limitParam === void 0) {
11126
+ return void 0;
11127
+ }
11128
+ if (!/^\d+$/.test(limitParam)) {
11129
+ return null;
11130
+ }
11131
+ const limit = Number.parseInt(limitParam, 10);
11132
+ return limit > 0 ? limit : null;
11133
+ }
11134
+ function paginateRuns(runs, cursor, limit) {
11135
+ if (limit === void 0) {
11136
+ return { runs };
11137
+ }
11138
+ if (!cursor) {
11139
+ const page2 = runs.slice(0, limit);
11140
+ return {
11141
+ runs: page2,
11142
+ ...limit < runs.length && page2.length > 0 ? { nextCursor: page2.at(-1)?.filename } : {}
11143
+ };
11144
+ }
11145
+ const cursorIndex = runs.findIndex((run2) => run2.filename === cursor);
11146
+ if (cursorIndex === -1) {
11147
+ return { runs: [] };
11148
+ }
11149
+ const page = runs.slice(cursorIndex + 1, cursorIndex + 1 + limit);
11150
+ return {
11151
+ runs: page,
11152
+ ...cursorIndex + 1 + limit < runs.length && page.length > 0 ? { nextCursor: page.at(-1)?.filename } : {}
11153
+ };
11154
+ }
11107
11155
  async function handleRuns(c4, { searchDir, agentvDir }) {
11108
11156
  const { runs: metas } = await listMergedResultFiles(searchDir);
11109
11157
  const { threshold: passThreshold } = loadStudioConfig(agentvDir);
11110
- return c4.json({
11111
- runs: metas.map((m) => {
11158
+ const parsedLimit = parseRunPageLimit(c4.req.query("limit"));
11159
+ if (parsedLimit === null) {
11160
+ return c4.json({ error: "limit must be a positive integer" }, 400);
11161
+ }
11162
+ const cursor = c4.req.query("cursor");
11163
+ const limit = parsedLimit ?? (cursor ? DEFAULT_RUN_PAGE_LIMIT : void 0);
11164
+ const runs = await Promise.all(
11165
+ metas.map(async (m) => {
11112
11166
  let target;
11113
11167
  let experiment = inferExperimentFromRunId(m.raw_filename);
11114
11168
  let passRate = m.passRate;
11115
11169
  try {
11116
- const records = loadLightweightResults(m.path);
11170
+ const records = await loadLightweightResultsForMeta(searchDir, m);
11117
11171
  if (records.length > 0) {
11118
11172
  target = records[0].target;
11119
11173
  experiment = records[0].experiment ?? experiment;
@@ -11141,6 +11195,11 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
11141
11195
  ...liveStatus && { status: liveStatus }
11142
11196
  };
11143
11197
  })
11198
+ );
11199
+ const page = paginateRuns(runs, cursor, limit);
11200
+ return c4.json({
11201
+ runs: page.runs,
11202
+ ...page.nextCursor ? { next_cursor: page.nextCursor } : {}
11144
11203
  });
11145
11204
  }
11146
11205
  async function handleRunLog(c4, { searchDir }) {
@@ -11148,17 +11207,17 @@ async function handleRunLog(c4, { searchDir }) {
11148
11207
  const meta = await findRunById(searchDir, filename);
11149
11208
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11150
11209
  if (meta.source === "remote") {
11151
- return c4.json({ error: "Console log is not available for remote runs" }, 404);
11210
+ return c4.json({ error: "Run log is not available for remote runs" }, 404);
11152
11211
  }
11153
11212
  const logPath = path21.join(path21.dirname(meta.path), "console.log");
11154
11213
  if (!existsSync15(logPath)) {
11155
- return c4.json({ error: "Console log not found for this run" }, 404);
11214
+ return c4.json({ error: "Run log not found for this run" }, 404);
11156
11215
  }
11157
11216
  try {
11158
11217
  const content = readFileSync12(logPath, "utf8");
11159
11218
  return c4.text(content);
11160
11219
  } catch {
11161
- return c4.json({ error: "Failed to read console log" }, 500);
11220
+ return c4.json({ error: "Failed to read run log" }, 500);
11162
11221
  }
11163
11222
  }
11164
11223
  async function handleRunDetail(c4, { searchDir }) {
@@ -11166,7 +11225,7 @@ async function handleRunDetail(c4, { searchDir }) {
11166
11225
  const meta = await findRunById(searchDir, filename);
11167
11226
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11168
11227
  try {
11169
- const loaded = loadManifestResults(meta.path);
11228
+ const loaded = await loadManifestResultsForMeta(searchDir, meta);
11170
11229
  const resumeMeta = meta.source === "local" ? deriveResumeMeta(searchDir, meta.path) : {};
11171
11230
  return c4.json({
11172
11231
  results: stripHeavyFields(loaded),
@@ -11205,7 +11264,7 @@ async function handleRunSuites(c4, { searchDir, agentvDir }) {
11205
11264
  const meta = await findRunById(searchDir, filename);
11206
11265
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11207
11266
  try {
11208
- const loaded = loadManifestResults(meta.path);
11267
+ const loaded = await loadManifestResultsForMeta(searchDir, meta);
11209
11268
  const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
11210
11269
  const suiteMap = /* @__PURE__ */ new Map();
11211
11270
  for (const r of loaded) {
@@ -11233,7 +11292,7 @@ async function handleRunCategories(c4, { searchDir, agentvDir }) {
11233
11292
  const meta = await findRunById(searchDir, filename);
11234
11293
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11235
11294
  try {
11236
- const loaded = loadManifestResults(meta.path);
11295
+ const loaded = await loadManifestResultsForMeta(searchDir, meta);
11237
11296
  const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
11238
11297
  const categoryMap = /* @__PURE__ */ new Map();
11239
11298
  for (const r of loaded) {
@@ -11269,7 +11328,7 @@ async function handleCategorySuites(c4, { searchDir, agentvDir }) {
11269
11328
  const meta = await findRunById(searchDir, filename);
11270
11329
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11271
11330
  try {
11272
- const loaded = loadManifestResults(meta.path);
11331
+ const loaded = await loadManifestResultsForMeta(searchDir, meta);
11273
11332
  const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
11274
11333
  const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
11275
11334
  const suiteMap = /* @__PURE__ */ new Map();
@@ -11299,7 +11358,7 @@ async function handleEvalDetail(c4, { searchDir }) {
11299
11358
  const meta = await findRunById(searchDir, filename);
11300
11359
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11301
11360
  try {
11302
- const loaded = loadManifestResults(meta.path);
11361
+ const loaded = await loadManifestResultsForMeta(searchDir, meta);
11303
11362
  const result = loaded.find((r) => r.testId === evalId);
11304
11363
  if (!result) return c4.json({ error: "Eval not found" }, 404);
11305
11364
  return c4.json({ eval: result });
@@ -11313,8 +11372,7 @@ async function handleEvalFiles(c4, { searchDir }) {
11313
11372
  const meta = await findRunById(searchDir, filename);
11314
11373
  if (!meta) return c4.json({ error: "Run not found" }, 404);
11315
11374
  try {
11316
- const content = readFileSync12(meta.path, "utf8");
11317
- const records = parseResultManifest(content);
11375
+ const records = await parseManifestForMeta(searchDir, meta);
11318
11376
  const record = records.find((r) => r.test_id === evalId);
11319
11377
  if (!record) return c4.json({ error: "Eval not found" }, 404);
11320
11378
  const baseDir = path21.dirname(meta.path);
@@ -11348,6 +11406,7 @@ async function handleEvalFileContent(c4, { searchDir }) {
11348
11406
  const markerIdx = c4.req.path.indexOf(marker);
11349
11407
  const filePath = markerIdx >= 0 ? c4.req.path.slice(markerIdx + marker.length) : "";
11350
11408
  if (!filePath) return c4.json({ error: "No file path specified" }, 400);
11409
+ await ensureRunReadable(searchDir, meta);
11351
11410
  const baseDir = path21.dirname(meta.path);
11352
11411
  const absolutePath = path21.resolve(baseDir, filePath);
11353
11412
  if (!absolutePath.startsWith(path21.resolve(baseDir) + path21.sep) && absolutePath !== path21.resolve(baseDir)) {
@@ -11370,7 +11429,7 @@ async function handleExperiments(c4, { searchDir, agentvDir }) {
11370
11429
  const experimentMap = /* @__PURE__ */ new Map();
11371
11430
  for (const m of metas) {
11372
11431
  try {
11373
- const records = loadLightweightResults(m.path);
11432
+ const records = await loadLightweightResultsForMeta(searchDir, m);
11374
11433
  for (const r of records) {
11375
11434
  const experiment = r.experiment ?? "default";
11376
11435
  const entry = experimentMap.get(experiment) ?? {
@@ -11422,7 +11481,7 @@ async function handleCompare(c4, { searchDir, agentvDir }) {
11422
11481
  const runTags = tagsEntry?.tags ?? [];
11423
11482
  if (!runTags.some((t) => filterTags.has(t))) continue;
11424
11483
  }
11425
- const records = loadLightweightResults(m.path);
11484
+ const records = await loadLightweightResultsForMeta(searchDir, m);
11426
11485
  const runTestMap = /* @__PURE__ */ new Map();
11427
11486
  let runEvalCount = 0;
11428
11487
  let runPassedCount = 0;
@@ -11538,7 +11597,7 @@ async function handleTargets(c4, { searchDir, agentvDir }) {
11538
11597
  const targetMap = /* @__PURE__ */ new Map();
11539
11598
  for (const m of metas) {
11540
11599
  try {
11541
- const records = loadLightweightResults(m.path);
11600
+ const records = await loadLightweightResultsForMeta(searchDir, m);
11542
11601
  for (const r of records) {
11543
11602
  const target = r.target ?? "default";
11544
11603
  const entry = targetMap.get(target) ?? {
@@ -11734,7 +11793,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
11734
11793
  let target;
11735
11794
  let experiment = inferExperimentFromRunId(m.raw_filename);
11736
11795
  try {
11737
- const records = loadLightweightResults(m.path);
11796
+ const records = await loadLightweightResultsForMeta(p.path, m);
11738
11797
  if (records.length > 0) {
11739
11798
  target = records[0].target;
11740
11799
  experiment = records[0].experiment ?? experiment;
@@ -11854,23 +11913,25 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
11854
11913
  });
11855
11914
  app2.get("/api/index", async (c4) => {
11856
11915
  const { runs: metas } = await listMergedResultFiles(searchDir);
11857
- const entries2 = metas.map((m) => {
11858
- let totalCostUsd = 0;
11859
- try {
11860
- const loaded = loadManifestResults(m.path);
11861
- totalCostUsd = loaded.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
11862
- } catch {
11863
- }
11864
- return {
11865
- run_filename: m.filename,
11866
- display_name: m.displayName,
11867
- test_count: m.testCount,
11868
- pass_rate: m.passRate,
11869
- avg_score: m.avgScore,
11870
- total_cost_usd: totalCostUsd,
11871
- timestamp: m.timestamp
11872
- };
11873
- });
11916
+ const entries2 = await Promise.all(
11917
+ metas.map(async (m) => {
11918
+ let totalCostUsd = 0;
11919
+ try {
11920
+ const loaded = await loadManifestResultsForMeta(searchDir, m);
11921
+ totalCostUsd = loaded.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
11922
+ } catch {
11923
+ }
11924
+ return {
11925
+ run_filename: m.filename,
11926
+ display_name: m.displayName,
11927
+ test_count: m.testCount,
11928
+ pass_rate: m.passRate,
11929
+ avg_score: m.avgScore,
11930
+ total_cost_usd: totalCostUsd,
11931
+ timestamp: m.timestamp
11932
+ };
11933
+ })
11934
+ );
11874
11935
  return c4.json({ entries: entries2 });
11875
11936
  });
11876
11937
  app2.get(
@@ -12076,7 +12137,7 @@ var resultsServeCommand = command({
12076
12137
  }
12077
12138
  const registry = loadProjectRegistry();
12078
12139
  const { projectDashboard } = resolveDashboardMode(registry.projects.length, { single });
12079
- await syncProjects(registry.projects);
12140
+ syncProjects(registry.projects).catch((err2) => console.error("Background project sync failed:", err2));
12080
12141
  try {
12081
12142
  let results = [];
12082
12143
  let sourceFile;
@@ -13583,4 +13644,4 @@ export {
13583
13644
  preprocessArgv,
13584
13645
  runCli
13585
13646
  };
13586
- //# sourceMappingURL=chunk-J4AMO3MD.js.map
13647
+ //# sourceMappingURL=chunk-A346PC3C.js.map