agentv 4.12.8 → 4.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@ import {
42
42
  validateFileReferences,
43
43
  validateTargetsFile,
44
44
  writeArtifactsFromResults
45
- } from "./chunk-ESNYJD7I.js";
45
+ } from "./chunk-I4BXYYI6.js";
46
46
  import {
47
47
  DEFAULT_CATEGORY,
48
48
  DEFAULT_THRESHOLD,
@@ -76,7 +76,7 @@ import {
76
76
  toTranscriptJsonLine,
77
77
  transpileEvalYamlFile,
78
78
  trimBaselineResult
79
- } from "./chunk-LRVNXL6J.js";
79
+ } from "./chunk-72AVLOSN.js";
80
80
  import {
81
81
  __commonJS,
82
82
  __require,
@@ -3912,7 +3912,7 @@ var evalRunCommand = command({
3912
3912
  },
3913
3913
  handler: async (args) => {
3914
3914
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3915
- const { launchInteractiveWizard } = await import("./interactive-3RS54ZZX.js");
3915
+ const { launchInteractiveWizard } = await import("./interactive-ENAAA4RQ.js");
3916
3916
  await launchInteractiveWizard();
3917
3917
  return;
3918
3918
  }
@@ -7250,8 +7250,8 @@ var resultsCommand = subcommands({
7250
7250
  });
7251
7251
 
7252
7252
  // src/commands/results/serve.ts
7253
- import { existsSync as existsSync12, readFileSync as readFileSync10, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync4 } from "node:fs";
7254
- import path16 from "node:path";
7253
+ import { existsSync as existsSync13, readFileSync as readFileSync11, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync5 } from "node:fs";
7254
+ import path17 from "node:path";
7255
7255
  import { fileURLToPath as fileURLToPath3 } from "node:url";
7256
7256
  import { Hono } from "hono";
7257
7257
 
@@ -7637,19 +7637,96 @@ Process error: ${err2.message}`;
7637
7637
  });
7638
7638
  }
7639
7639
 
7640
- // src/commands/results/studio-config.ts
7641
- import { existsSync as existsSync11, mkdirSync as mkdirSync2, readFileSync as readFileSync9, writeFileSync as writeFileSync3 } from "node:fs";
7640
+ // src/commands/results/run-tags.ts
7641
+ import { existsSync as existsSync11, readFileSync as readFileSync9, unlinkSync as unlinkSync2, writeFileSync as writeFileSync3 } from "node:fs";
7642
7642
  import path15 from "node:path";
7643
+ var RUN_TAGS_FILENAME = "tags.json";
7644
+ var MAX_TAGS_PER_RUN = 20;
7645
+ var MAX_TAG_LENGTH = 60;
7646
+ function runTagsPath(manifestPath) {
7647
+ return path15.join(path15.dirname(manifestPath), RUN_TAGS_FILENAME);
7648
+ }
7649
+ function readRunTags(manifestPath) {
7650
+ const fp = runTagsPath(manifestPath);
7651
+ if (!existsSync11(fp)) return void 0;
7652
+ try {
7653
+ const parsed = JSON.parse(readFileSync9(fp, "utf8"));
7654
+ if (!parsed || typeof parsed !== "object") return void 0;
7655
+ const record = parsed;
7656
+ if (!Array.isArray(record.tags)) return void 0;
7657
+ const tags = record.tags.filter(
7658
+ (t) => typeof t === "string" && t.trim().length > 0
7659
+ );
7660
+ if (tags.length === 0) return void 0;
7661
+ return {
7662
+ tags,
7663
+ updated_at: typeof record.updated_at === "string" ? record.updated_at : ""
7664
+ };
7665
+ } catch {
7666
+ return void 0;
7667
+ }
7668
+ }
7669
+ function writeRunTags(manifestPath, tags) {
7670
+ const cleaned = normalizeTags(tags);
7671
+ if (cleaned.length === 0) {
7672
+ deleteRunTags(manifestPath);
7673
+ return null;
7674
+ }
7675
+ const entry = {
7676
+ tags: cleaned,
7677
+ updated_at: (/* @__PURE__ */ new Date()).toISOString()
7678
+ };
7679
+ writeFileSync3(runTagsPath(manifestPath), `${JSON.stringify(entry, null, 2)}
7680
+ `, "utf8");
7681
+ return entry;
7682
+ }
7683
+ function deleteRunTags(manifestPath) {
7684
+ const fp = runTagsPath(manifestPath);
7685
+ if (existsSync11(fp)) {
7686
+ unlinkSync2(fp);
7687
+ }
7688
+ }
7689
+ function normalizeTags(tags) {
7690
+ const seen = /* @__PURE__ */ new Set();
7691
+ const out = [];
7692
+ for (const raw of tags) {
7693
+ if (typeof raw !== "string") {
7694
+ throw new Error("Tags must be strings");
7695
+ }
7696
+ const trimmed = raw.trim();
7697
+ if (trimmed === "") continue;
7698
+ if (trimmed.length > MAX_TAG_LENGTH) {
7699
+ throw new Error(`Tag "${trimmed.slice(0, 20)}\u2026" exceeds ${MAX_TAG_LENGTH} characters`);
7700
+ }
7701
+ for (let i = 0; i < trimmed.length; i++) {
7702
+ const code = trimmed.charCodeAt(i);
7703
+ if (code < 32 || code === 127) {
7704
+ throw new Error("Tag must not contain control characters");
7705
+ }
7706
+ }
7707
+ if (seen.has(trimmed)) continue;
7708
+ seen.add(trimmed);
7709
+ out.push(trimmed);
7710
+ }
7711
+ if (out.length > MAX_TAGS_PER_RUN) {
7712
+ throw new Error(`Too many tags (max ${MAX_TAGS_PER_RUN})`);
7713
+ }
7714
+ return out;
7715
+ }
7716
+
7717
+ // src/commands/results/studio-config.ts
7718
+ import { existsSync as existsSync12, mkdirSync as mkdirSync2, readFileSync as readFileSync10, writeFileSync as writeFileSync4 } from "node:fs";
7719
+ import path16 from "node:path";
7643
7720
  import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
7644
7721
  var DEFAULTS = {
7645
7722
  threshold: DEFAULT_THRESHOLD
7646
7723
  };
7647
7724
  function loadStudioConfig(agentvDir) {
7648
- const configPath = path15.join(agentvDir, "config.yaml");
7649
- if (!existsSync11(configPath)) {
7725
+ const configPath = path16.join(agentvDir, "config.yaml");
7726
+ if (!existsSync12(configPath)) {
7650
7727
  return { ...DEFAULTS };
7651
7728
  }
7652
- const raw = readFileSync9(configPath, "utf-8");
7729
+ const raw = readFileSync10(configPath, "utf-8");
7653
7730
  const parsed = parseYaml(raw);
7654
7731
  if (!parsed || typeof parsed !== "object") {
7655
7732
  return { ...DEFAULTS };
@@ -7671,13 +7748,13 @@ function loadStudioConfig(agentvDir) {
7671
7748
  };
7672
7749
  }
7673
7750
  function saveStudioConfig(agentvDir, config) {
7674
- if (!existsSync11(agentvDir)) {
7751
+ if (!existsSync12(agentvDir)) {
7675
7752
  mkdirSync2(agentvDir, { recursive: true });
7676
7753
  }
7677
- const configPath = path15.join(agentvDir, "config.yaml");
7754
+ const configPath = path16.join(agentvDir, "config.yaml");
7678
7755
  let existing = {};
7679
- if (existsSync11(configPath)) {
7680
- const raw = readFileSync9(configPath, "utf-8");
7756
+ if (existsSync12(configPath)) {
7757
+ const raw = readFileSync10(configPath, "utf-8");
7681
7758
  const parsed = parseYaml(raw);
7682
7759
  if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
7683
7760
  existing = parsed;
@@ -7693,14 +7770,14 @@ function saveStudioConfig(agentvDir, config) {
7693
7770
  existing.studio = { ...config };
7694
7771
  }
7695
7772
  const yamlStr = stringifyYaml2(existing);
7696
- writeFileSync3(configPath, yamlStr, "utf-8");
7773
+ writeFileSync4(configPath, yamlStr, "utf-8");
7697
7774
  }
7698
7775
 
7699
7776
  // src/commands/results/serve.ts
7700
7777
  async function resolveSourceFile2(source, cwd) {
7701
7778
  if (source) {
7702
7779
  let resolved = resolveResultSourcePath(source, cwd);
7703
- if (!existsSync12(resolved)) {
7780
+ if (!existsSync13(resolved)) {
7704
7781
  throw new Error(`Source file not found: ${resolved}`);
7705
7782
  }
7706
7783
  resolved = resolveRunManifestPath(resolved);
@@ -7708,7 +7785,7 @@ async function resolveSourceFile2(source, cwd) {
7708
7785
  }
7709
7786
  const cache = await loadRunCache(cwd);
7710
7787
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
7711
- if (cachedFile && existsSync12(cachedFile)) {
7788
+ if (cachedFile && existsSync13(cachedFile)) {
7712
7789
  return cachedFile;
7713
7790
  }
7714
7791
  const metas = listResultFiles(cwd, 10);
@@ -7738,26 +7815,26 @@ function resolveDashboardMode(projectCount, options) {
7738
7815
  return { isMultiProject: projectCount > 1, showMultiWarning: false };
7739
7816
  }
7740
7817
  function feedbackPath(resultDir) {
7741
- return path16.join(resultDir, "feedback.json");
7818
+ return path17.join(resultDir, "feedback.json");
7742
7819
  }
7743
7820
  function readFeedback(cwd) {
7744
7821
  const fp = feedbackPath(cwd);
7745
- if (!existsSync12(fp)) {
7822
+ if (!existsSync13(fp)) {
7746
7823
  return { reviews: [] };
7747
7824
  }
7748
7825
  try {
7749
- return JSON.parse(readFileSync10(fp, "utf8"));
7826
+ return JSON.parse(readFileSync11(fp, "utf8"));
7750
7827
  } catch (err2) {
7751
7828
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
7752
7829
  return { reviews: [] };
7753
7830
  }
7754
7831
  }
7755
7832
  function writeFeedback(cwd, data) {
7756
- writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
7833
+ writeFileSync5(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
7757
7834
  `, "utf8");
7758
7835
  }
7759
7836
  function buildFileTree(dirPath, relativeTo) {
7760
- if (!existsSync12(dirPath) || !statSync5(dirPath).isDirectory()) {
7837
+ if (!existsSync13(dirPath) || !statSync5(dirPath).isDirectory()) {
7761
7838
  return [];
7762
7839
  }
7763
7840
  const entries2 = readdirSync4(dirPath, { withFileTypes: true });
@@ -7765,8 +7842,8 @@ function buildFileTree(dirPath, relativeTo) {
7765
7842
  if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
7766
7843
  return a.name.localeCompare(b.name);
7767
7844
  }).map((entry) => {
7768
- const fullPath = path16.join(dirPath, entry.name);
7769
- const relPath = path16.relative(relativeTo, fullPath);
7845
+ const fullPath = path17.join(dirPath, entry.name);
7846
+ const relPath = path17.relative(relativeTo, fullPath);
7770
7847
  if (entry.isDirectory()) {
7771
7848
  return {
7772
7849
  name: entry.name,
@@ -7779,7 +7856,7 @@ function buildFileTree(dirPath, relativeTo) {
7779
7856
  });
7780
7857
  }
7781
7858
  function inferLanguage(filePath) {
7782
- const ext = path16.extname(filePath).toLowerCase();
7859
+ const ext = path17.extname(filePath).toLowerCase();
7783
7860
  const langMap = {
7784
7861
  ".json": "json",
7785
7862
  ".jsonl": "json",
@@ -7834,6 +7911,7 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
7834
7911
  }
7835
7912
  } catch {
7836
7913
  }
7914
+ const tagsEntry = readRunTags(m.path);
7837
7915
  return {
7838
7916
  filename: m.filename,
7839
7917
  display_name: m.displayName,
@@ -7845,7 +7923,8 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
7845
7923
  size_bytes: m.sizeBytes,
7846
7924
  source: m.source,
7847
7925
  ...target && { target },
7848
- ...experiment && { experiment }
7926
+ ...experiment && { experiment },
7927
+ ...tagsEntry && { tags: tagsEntry.tags }
7849
7928
  };
7850
7929
  })
7851
7930
  });
@@ -7978,11 +8057,11 @@ async function handleEvalFiles(c4, { searchDir }) {
7978
8057
  const meta = await findRunById(searchDir, filename);
7979
8058
  if (!meta) return c4.json({ error: "Run not found" }, 404);
7980
8059
  try {
7981
- const content = readFileSync10(meta.path, "utf8");
8060
+ const content = readFileSync11(meta.path, "utf8");
7982
8061
  const records = parseResultManifest(content);
7983
8062
  const record = records.find((r) => r.test_id === evalId);
7984
8063
  if (!record) return c4.json({ error: "Eval not found" }, 404);
7985
- const baseDir = path16.dirname(meta.path);
8064
+ const baseDir = path17.dirname(meta.path);
7986
8065
  const knownPaths = [
7987
8066
  record.grading_path,
7988
8067
  record.timing_path,
@@ -7991,14 +8070,14 @@ async function handleEvalFiles(c4, { searchDir }) {
7991
8070
  record.response_path
7992
8071
  ].filter((p) => !!p);
7993
8072
  if (knownPaths.length === 0) return c4.json({ files: [] });
7994
- const artifactDirs = knownPaths.map((p) => path16.dirname(p));
8073
+ const artifactDirs = knownPaths.map((p) => path17.dirname(p));
7995
8074
  let commonDir = artifactDirs[0];
7996
8075
  for (const dir of artifactDirs) {
7997
8076
  while (!dir.startsWith(commonDir)) {
7998
- commonDir = path16.dirname(commonDir);
8077
+ commonDir = path17.dirname(commonDir);
7999
8078
  }
8000
8079
  }
8001
- const artifactAbsDir = path16.join(baseDir, commonDir);
8080
+ const artifactAbsDir = path17.join(baseDir, commonDir);
8002
8081
  const files = buildFileTree(artifactAbsDir, baseDir);
8003
8082
  return c4.json({ files });
8004
8083
  } catch {
@@ -8013,16 +8092,16 @@ async function handleEvalFileContent(c4, { searchDir }) {
8013
8092
  const markerIdx = c4.req.path.indexOf(marker);
8014
8093
  const filePath = markerIdx >= 0 ? c4.req.path.slice(markerIdx + marker.length) : "";
8015
8094
  if (!filePath) return c4.json({ error: "No file path specified" }, 400);
8016
- const baseDir = path16.dirname(meta.path);
8017
- const absolutePath = path16.resolve(baseDir, filePath);
8018
- if (!absolutePath.startsWith(path16.resolve(baseDir) + path16.sep) && absolutePath !== path16.resolve(baseDir)) {
8095
+ const baseDir = path17.dirname(meta.path);
8096
+ const absolutePath = path17.resolve(baseDir, filePath);
8097
+ if (!absolutePath.startsWith(path17.resolve(baseDir) + path17.sep) && absolutePath !== path17.resolve(baseDir)) {
8019
8098
  return c4.json({ error: "Path traversal not allowed" }, 403);
8020
8099
  }
8021
- if (!existsSync12(absolutePath) || !statSync5(absolutePath).isFile()) {
8100
+ if (!existsSync13(absolutePath) || !statSync5(absolutePath).isFile()) {
8022
8101
  return c4.json({ error: "File not found" }, 404);
8023
8102
  }
8024
8103
  try {
8025
- const fileContent = readFileSync10(absolutePath, "utf8");
8104
+ const fileContent = readFileSync11(absolutePath, "utf8");
8026
8105
  const language = inferLanguage(absolutePath);
8027
8106
  return c4.json({ content: fileContent, language });
8028
8107
  } catch {
@@ -8071,17 +8150,38 @@ async function handleExperiments(c4, { searchDir, agentvDir }) {
8071
8150
  async function handleCompare(c4, { searchDir, agentvDir }) {
8072
8151
  const { runs: metas } = await listMergedResultFiles(searchDir);
8073
8152
  const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
8153
+ const tagsParam = c4.req.query("tags") ?? "";
8154
+ const filterTags = new Set(
8155
+ tagsParam.split(",").map((t) => t.trim()).filter(Boolean)
8156
+ );
8074
8157
  const cellMap = /* @__PURE__ */ new Map();
8158
+ const runEntries = [];
8075
8159
  const experimentsSet = /* @__PURE__ */ new Set();
8076
8160
  const targetsSet = /* @__PURE__ */ new Set();
8161
+ const MAX_TESTS_PER_CELL = 100;
8077
8162
  for (const m of metas) {
8078
8163
  try {
8164
+ const tagsEntry = readRunTags(m.path);
8165
+ if (filterTags.size > 0) {
8166
+ const runTags = tagsEntry?.tags ?? [];
8167
+ if (!runTags.some((t) => filterTags.has(t))) continue;
8168
+ }
8079
8169
  const records = loadLightweightResults(m.path);
8170
+ const runTestMap = /* @__PURE__ */ new Map();
8171
+ let runEvalCount = 0;
8172
+ let runPassedCount = 0;
8173
+ let runScoreSum = 0;
8174
+ let runExperiment = "default";
8175
+ let runTarget = "default";
8176
+ let runStartedAt = m.timestamp;
8080
8177
  for (const r of records) {
8081
8178
  const experiment = r.experiment ?? "default";
8082
8179
  const target = r.target ?? "default";
8083
8180
  experimentsSet.add(experiment);
8084
8181
  targetsSet.add(target);
8182
+ runExperiment = experiment;
8183
+ runTarget = target;
8184
+ if (r.timestamp && r.timestamp < runStartedAt) runStartedAt = r.timestamp;
8085
8185
  const key = JSON.stringify([experiment, target]);
8086
8186
  const entry = cellMap.get(key) ?? {
8087
8187
  experiment,
@@ -8102,11 +8202,34 @@ async function handleCompare(c4, { searchDir, agentvDir }) {
8102
8202
  execution_status: r.executionStatus
8103
8203
  });
8104
8204
  cellMap.set(key, entry);
8105
- }
8205
+ runTestMap.set(r.testId, {
8206
+ test_id: r.testId,
8207
+ score: r.score,
8208
+ passed,
8209
+ execution_status: r.executionStatus
8210
+ });
8211
+ runEvalCount++;
8212
+ if (passed) runPassedCount++;
8213
+ runScoreSum += r.score;
8214
+ }
8215
+ if (runEvalCount === 0) continue;
8216
+ const runTests = [...runTestMap.values()].slice(-MAX_TESTS_PER_CELL);
8217
+ runEntries.push({
8218
+ run_id: m.filename,
8219
+ started_at: runStartedAt,
8220
+ experiment: runExperiment,
8221
+ target: runTarget,
8222
+ ...tagsEntry && { tags: tagsEntry.tags },
8223
+ source: m.source,
8224
+ eval_count: runEvalCount,
8225
+ passed_count: runPassedCount,
8226
+ pass_rate: runPassedCount / runEvalCount,
8227
+ avg_score: runScoreSum / runEvalCount,
8228
+ tests: runTests
8229
+ });
8106
8230
  } catch {
8107
8231
  }
8108
8232
  }
8109
- const MAX_TESTS_PER_CELL = 100;
8110
8233
  const cells = [...cellMap.values()].map((entry) => {
8111
8234
  const dedupMap = /* @__PURE__ */ new Map();
8112
8235
  for (const t of entry.tests) {
@@ -8124,10 +8247,12 @@ async function handleCompare(c4, { searchDir, agentvDir }) {
8124
8247
  tests: cappedTests
8125
8248
  };
8126
8249
  });
8250
+ runEntries.sort((a, b) => b.started_at.localeCompare(a.started_at));
8127
8251
  return c4.json({
8128
8252
  experiments: [...experimentsSet].sort(),
8129
8253
  targets: [...targetsSet].sort(),
8130
- cells
8254
+ cells,
8255
+ runs: runEntries
8131
8256
  });
8132
8257
  }
8133
8258
  async function handleTargets(c4, { searchDir, agentvDir }) {
@@ -8168,28 +8293,72 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
8168
8293
  return c4.json({
8169
8294
  ...loadStudioConfig(agentvDir),
8170
8295
  read_only: options?.readOnly === true,
8171
- project_name: path16.basename(searchDir),
8296
+ project_name: path17.basename(searchDir),
8172
8297
  multi_project_dashboard: options?.multiProjectDashboard === true
8173
8298
  });
8174
8299
  }
8175
8300
  function handleFeedbackRead(c4, { searchDir }) {
8176
- const resultsDir = path16.join(searchDir, ".agentv", "results");
8177
- return c4.json(readFeedback(existsSync12(resultsDir) ? resultsDir : searchDir));
8301
+ const resultsDir = path17.join(searchDir, ".agentv", "results");
8302
+ return c4.json(readFeedback(existsSync13(resultsDir) ? resultsDir : searchDir));
8303
+ }
8304
+ async function handleRunTagsPut(c4, { searchDir }) {
8305
+ const filename = c4.req.param("filename") ?? "";
8306
+ const meta = await findRunById(searchDir, filename);
8307
+ if (!meta) return c4.json({ error: "Run not found" }, 404);
8308
+ if (meta.source === "remote") {
8309
+ return c4.json({ error: "Tags can only be set on local runs" }, 400);
8310
+ }
8311
+ let body;
8312
+ try {
8313
+ body = await c4.req.json();
8314
+ } catch {
8315
+ return c4.json({ error: "Invalid JSON" }, 400);
8316
+ }
8317
+ if (!body || typeof body !== "object") {
8318
+ return c4.json({ error: "Invalid payload" }, 400);
8319
+ }
8320
+ const tags = body.tags;
8321
+ if (!Array.isArray(tags)) {
8322
+ return c4.json({ error: "Missing tags array" }, 400);
8323
+ }
8324
+ try {
8325
+ const entry = writeRunTags(meta.path, tags);
8326
+ return c4.json({
8327
+ tags: entry?.tags ?? [],
8328
+ updated_at: entry?.updated_at ?? (/* @__PURE__ */ new Date()).toISOString()
8329
+ });
8330
+ } catch (err2) {
8331
+ return c4.json({ error: err2.message }, 400);
8332
+ }
8333
+ }
8334
+ async function handleRunTagsDelete(c4, { searchDir }) {
8335
+ const filename = c4.req.param("filename") ?? "";
8336
+ const meta = await findRunById(searchDir, filename);
8337
+ if (!meta) return c4.json({ error: "Run not found" }, 404);
8338
+ if (meta.source === "remote") {
8339
+ return c4.json({ error: "Tags can only be removed on local runs" }, 400);
8340
+ }
8341
+ try {
8342
+ deleteRunTags(meta.path);
8343
+ return c4.json({ ok: true });
8344
+ } catch (err2) {
8345
+ return c4.json({ error: err2.message }, 500);
8346
+ }
8178
8347
  }
8179
8348
  function createApp(results, resultDir, cwd, sourceFile, options) {
8180
8349
  const searchDir = cwd ?? resultDir;
8181
- const agentvDir = path16.join(searchDir, ".agentv");
8350
+ const agentvDir = path17.join(searchDir, ".agentv");
8182
8351
  const defaultCtx = { searchDir, agentvDir };
8183
8352
  const readOnly = options?.readOnly === true;
8184
8353
  const app2 = new Hono();
8185
8354
  function withBenchmark(c4, handler) {
8186
8355
  const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
8187
- if (!benchmark || !existsSync12(benchmark.path)) {
8356
+ if (!benchmark || !existsSync13(benchmark.path)) {
8188
8357
  return c4.json({ error: "Project not found" }, 404);
8189
8358
  }
8190
8359
  return handler(c4, {
8191
8360
  searchDir: benchmark.path,
8192
- agentvDir: path16.join(benchmark.path, ".agentv")
8361
+ agentvDir: path17.join(benchmark.path, ".agentv")
8193
8362
  });
8194
8363
  }
8195
8364
  app2.post("/api/config", async (c4) => {
@@ -8349,6 +8518,18 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
8349
8518
  app2.get("/api/remote/status", async (c4) => c4.json(await getRemoteResultsStatus(searchDir)));
8350
8519
  app2.post("/api/remote/sync", async (c4) => c4.json(await syncRemoteResults(searchDir)));
8351
8520
  app2.get("/api/runs", (c4) => handleRuns(c4, defaultCtx));
8521
+ app2.put("/api/runs/:filename/tags", (c4) => {
8522
+ if (readOnly) {
8523
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
8524
+ }
8525
+ return handleRunTagsPut(c4, defaultCtx);
8526
+ });
8527
+ app2.delete("/api/runs/:filename/tags", (c4) => {
8528
+ if (readOnly) {
8529
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
8530
+ }
8531
+ return handleRunTagsDelete(c4, defaultCtx);
8532
+ });
8352
8533
  app2.get("/api/runs/:filename", (c4) => handleRunDetail(c4, defaultCtx));
8353
8534
  app2.get("/api/runs/:filename/suites", (c4) => handleRunSuites(c4, defaultCtx));
8354
8535
  app2.get("/api/runs/:filename/categories", (c4) => handleRunCategories(c4, defaultCtx));
@@ -8450,6 +8631,18 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
8450
8631
  (c4) => withBenchmark(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
8451
8632
  );
8452
8633
  app2.get("/api/benchmarks/:benchmarkId/runs", (c4) => withBenchmark(c4, handleRuns));
8634
+ app2.put("/api/benchmarks/:benchmarkId/runs/:filename/tags", (c4) => {
8635
+ if (readOnly) {
8636
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
8637
+ }
8638
+ return withBenchmark(c4, handleRunTagsPut);
8639
+ });
8640
+ app2.delete("/api/benchmarks/:benchmarkId/runs/:filename/tags", (c4) => {
8641
+ if (readOnly) {
8642
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
8643
+ }
8644
+ return withBenchmark(c4, handleRunTagsDelete);
8645
+ });
8453
8646
  app2.get("/api/benchmarks/:benchmarkId/runs/:filename", (c4) => withBenchmark(c4, handleRunDetail));
8454
8647
  app2.get(
8455
8648
  "/api/benchmarks/:benchmarkId/runs/:filename/suites",
@@ -8492,20 +8685,20 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
8492
8685
  { readOnly }
8493
8686
  );
8494
8687
  const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
8495
- if (!studioDistPath || !existsSync12(path16.join(studioDistPath, "index.html"))) {
8688
+ if (!studioDistPath || !existsSync13(path17.join(studioDistPath, "index.html"))) {
8496
8689
  throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
8497
8690
  }
8498
8691
  app2.get("/", (c4) => {
8499
- const indexPath = path16.join(studioDistPath, "index.html");
8500
- if (existsSync12(indexPath)) return c4.html(readFileSync10(indexPath, "utf8"));
8692
+ const indexPath = path17.join(studioDistPath, "index.html");
8693
+ if (existsSync13(indexPath)) return c4.html(readFileSync11(indexPath, "utf8"));
8501
8694
  return c4.notFound();
8502
8695
  });
8503
8696
  app2.get("/assets/*", (c4) => {
8504
8697
  const assetPath = c4.req.path;
8505
- const filePath = path16.join(studioDistPath, assetPath);
8506
- if (!existsSync12(filePath)) return c4.notFound();
8507
- const content = readFileSync10(filePath);
8508
- const ext = path16.extname(filePath);
8698
+ const filePath = path17.join(studioDistPath, assetPath);
8699
+ if (!existsSync13(filePath)) return c4.notFound();
8700
+ const content = readFileSync11(filePath);
8701
+ const ext = path17.extname(filePath);
8509
8702
  const mimeTypes = {
8510
8703
  ".js": "application/javascript",
8511
8704
  ".css": "text/css",
@@ -8526,26 +8719,26 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
8526
8719
  });
8527
8720
  app2.get("*", (c4) => {
8528
8721
  if (c4.req.path.startsWith("/api/")) return c4.json({ error: "Not found" }, 404);
8529
- const indexPath = path16.join(studioDistPath, "index.html");
8530
- if (existsSync12(indexPath)) return c4.html(readFileSync10(indexPath, "utf8"));
8722
+ const indexPath = path17.join(studioDistPath, "index.html");
8723
+ if (existsSync13(indexPath)) return c4.html(readFileSync11(indexPath, "utf8"));
8531
8724
  return c4.notFound();
8532
8725
  });
8533
8726
  return app2;
8534
8727
  }
8535
8728
  function resolveStudioDistDir() {
8536
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath3(import.meta.url));
8729
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path17.dirname(fileURLToPath3(import.meta.url));
8537
8730
  const candidates = [
8538
8731
  // From src/commands/results/ → sibling apps/studio/dist
8539
- path16.resolve(currentDir, "../../../../studio/dist"),
8732
+ path17.resolve(currentDir, "../../../../studio/dist"),
8540
8733
  // From dist/ → sibling apps/studio/dist (monorepo dev)
8541
- path16.resolve(currentDir, "../../studio/dist"),
8734
+ path17.resolve(currentDir, "../../studio/dist"),
8542
8735
  // Bundled inside CLI dist (published package: dist/studio/)
8543
- path16.resolve(currentDir, "studio"),
8736
+ path17.resolve(currentDir, "studio"),
8544
8737
  // From dist/ in monorepo root context
8545
- path16.resolve(currentDir, "../../../apps/studio/dist")
8738
+ path17.resolve(currentDir, "../../../apps/studio/dist")
8546
8739
  ];
8547
8740
  for (const candidate of candidates) {
8548
- if (existsSync12(candidate) && existsSync12(path16.join(candidate, "index.html"))) {
8741
+ if (existsSync13(candidate) && existsSync13(path17.join(candidate, "index.html"))) {
8549
8742
  return candidate;
8550
8743
  }
8551
8744
  }
@@ -8651,7 +8844,7 @@ Discovered ${discovered.length} project(s).`);
8651
8844
  } else {
8652
8845
  const cache = await loadRunCache(cwd);
8653
8846
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
8654
- if (cachedFile && existsSync12(cachedFile)) {
8847
+ if (cachedFile && existsSync13(cachedFile)) {
8655
8848
  sourceFile = cachedFile;
8656
8849
  results = loadManifestResults(cachedFile);
8657
8850
  } else {
@@ -8662,7 +8855,7 @@ Discovered ${discovered.length} project(s).`);
8662
8855
  }
8663
8856
  }
8664
8857
  }
8665
- const resultDir = sourceFile ? path16.dirname(path16.resolve(sourceFile)) : cwd;
8858
+ const resultDir = sourceFile ? path17.dirname(path17.resolve(sourceFile)) : cwd;
8666
8859
  const app2 = createApp(results, resultDir, cwd, sourceFile, {
8667
8860
  readOnly,
8668
8861
  multiProjectDashboard: isMultiProject
@@ -8786,8 +8979,8 @@ var selfCommand = subcommands({
8786
8979
  });
8787
8980
 
8788
8981
  // src/commands/transpile/index.ts
8789
- import { writeFileSync as writeFileSync5 } from "node:fs";
8790
- import path17 from "node:path";
8982
+ import { writeFileSync as writeFileSync6 } from "node:fs";
8983
+ import path18 from "node:path";
8791
8984
  var transpileCommand = command({
8792
8985
  name: "transpile",
8793
8986
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -8811,7 +9004,7 @@ var transpileCommand = command({
8811
9004
  handler: async ({ input, outDir, stdout }) => {
8812
9005
  let result;
8813
9006
  try {
8814
- result = transpileEvalYamlFile(path17.resolve(input));
9007
+ result = transpileEvalYamlFile(path18.resolve(input));
8815
9008
  } catch (error) {
8816
9009
  console.error(`Error: ${error.message}`);
8817
9010
  process.exit(1);
@@ -8835,12 +9028,12 @@ var transpileCommand = command({
8835
9028
  process.stdout.write("\n");
8836
9029
  return;
8837
9030
  }
8838
- const outputDir = outDir ? path17.resolve(outDir) : path17.dirname(path17.resolve(input));
9031
+ const outputDir = outDir ? path18.resolve(outDir) : path18.dirname(path18.resolve(input));
8839
9032
  const fileNames = getOutputFilenames(result);
8840
9033
  for (const [skill, evalsJson] of result.files) {
8841
9034
  const fileName = fileNames.get(skill) ?? "evals.json";
8842
- const outputPath = path17.join(outputDir, fileName);
8843
- writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
9035
+ const outputPath = path18.join(outputDir, fileName);
9036
+ writeFileSync6(outputPath, `${JSON.stringify(evalsJson, null, 2)}
8844
9037
  `);
8845
9038
  console.log(`Transpiled to ${outputPath}`);
8846
9039
  }
@@ -8848,7 +9041,7 @@ var transpileCommand = command({
8848
9041
  });
8849
9042
 
8850
9043
  // src/commands/trend/index.ts
8851
- import path18 from "node:path";
9044
+ import path19 from "node:path";
8852
9045
  var colors2 = {
8853
9046
  reset: "\x1B[0m",
8854
9047
  bold: "\x1B[1m",
@@ -8898,7 +9091,7 @@ function colorizeSlope(value) {
8898
9091
  }
8899
9092
  function ensureTrendIndexPath(source, cwd) {
8900
9093
  const resolved = resolveResultSourcePath(source, cwd);
8901
- if (path18.basename(resolved) !== RESULT_INDEX_FILENAME) {
9094
+ if (path19.basename(resolved) !== RESULT_INDEX_FILENAME) {
8902
9095
  throw new Error(
8903
9096
  `Unsupported result source for trend: ${source}. Use a run workspace directory or ${RESULT_INDEX_FILENAME} manifest.`
8904
9097
  );
@@ -8918,7 +9111,7 @@ function resolveTrendSources(cwd, sources, last) {
8918
9111
  if (last < 2) {
8919
9112
  throw new Error("--last must be at least 2");
8920
9113
  }
8921
- const metas = listResultFiles(cwd).filter((meta) => path18.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
9114
+ const metas = listResultFiles(cwd).filter((meta) => path19.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
8922
9115
  if (metas.length < 2) {
8923
9116
  throw new Error(
8924
9117
  "Trend analysis requires at least 2 canonical run workspaces in .agentv/results/runs/"
@@ -8933,10 +9126,10 @@ function getRunLabel(sourcePath, timestamp) {
8933
9126
  if (timestamp) {
8934
9127
  return timestamp;
8935
9128
  }
8936
- return path18.basename(path18.dirname(sourcePath));
9129
+ return path19.basename(path19.dirname(sourcePath));
8937
9130
  }
8938
9131
  function getRunSortKey(sourcePath, timestamp) {
8939
- return timestamp ?? path18.basename(path18.dirname(sourcePath));
9132
+ return timestamp ?? path19.basename(path19.dirname(sourcePath));
8940
9133
  }
8941
9134
  function mean2(values) {
8942
9135
  return values.reduce((sum, value) => sum + value, 0) / values.length;
@@ -9191,7 +9384,7 @@ var trendCommand = command({
9191
9384
  });
9192
9385
 
9193
9386
  // src/commands/trim/index.ts
9194
- import { readFileSync as readFileSync11, writeFileSync as writeFileSync6 } from "node:fs";
9387
+ import { readFileSync as readFileSync12, writeFileSync as writeFileSync7 } from "node:fs";
9195
9388
  var trimCommand = command({
9196
9389
  name: "trim",
9197
9390
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -9210,7 +9403,7 @@ var trimCommand = command({
9210
9403
  },
9211
9404
  handler: async ({ input, out }) => {
9212
9405
  try {
9213
- const content = readFileSync11(input, "utf8");
9406
+ const content = readFileSync12(input, "utf8");
9214
9407
  const lines = content.trim().split("\n").filter((line) => line.trim());
9215
9408
  const trimmedLines = lines.map((line) => {
9216
9409
  const record = JSON.parse(line);
@@ -9222,7 +9415,7 @@ var trimCommand = command({
9222
9415
  const output = `${trimmedLines.join("\n")}
9223
9416
  `;
9224
9417
  if (out) {
9225
- writeFileSync6(out, output, "utf8");
9418
+ writeFileSync7(out, output, "utf8");
9226
9419
  console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
9227
9420
  } else {
9228
9421
  process.stdout.write(output);
@@ -9317,7 +9510,7 @@ function isTTY() {
9317
9510
  // src/commands/validate/validate-files.ts
9318
9511
  import { constants } from "node:fs";
9319
9512
  import { access, readdir as readdir4, stat } from "node:fs/promises";
9320
- import path19 from "node:path";
9513
+ import path20 from "node:path";
9321
9514
  import fg2 from "fast-glob";
9322
9515
  async function validateFiles(paths) {
9323
9516
  const filePaths = await expandPaths(paths);
@@ -9336,7 +9529,7 @@ async function validateFiles(paths) {
9336
9529
  };
9337
9530
  }
9338
9531
  async function validateSingleFile(filePath) {
9339
- const absolutePath = path19.resolve(filePath);
9532
+ const absolutePath = path20.resolve(filePath);
9340
9533
  const fileType = await detectFileType(absolutePath);
9341
9534
  let result;
9342
9535
  if (fileType === "eval") {
@@ -9361,7 +9554,7 @@ async function validateSingleFile(filePath) {
9361
9554
  async function expandPaths(paths) {
9362
9555
  const expanded = /* @__PURE__ */ new Set();
9363
9556
  for (const inputPath of paths) {
9364
- const absolutePath = path19.resolve(inputPath);
9557
+ const absolutePath = path20.resolve(inputPath);
9365
9558
  try {
9366
9559
  await access(absolutePath, constants.F_OK);
9367
9560
  const stats = await stat(absolutePath);
@@ -9389,7 +9582,7 @@ async function expandPaths(paths) {
9389
9582
  if (yamlMatches.length === 0) {
9390
9583
  console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
9391
9584
  }
9392
- for (const f of yamlMatches) expanded.add(path19.normalize(f));
9585
+ for (const f of yamlMatches) expanded.add(path20.normalize(f));
9393
9586
  }
9394
9587
  const sorted = Array.from(expanded);
9395
9588
  sorted.sort();
@@ -9400,7 +9593,7 @@ async function findYamlFiles(dirPath) {
9400
9593
  try {
9401
9594
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
9402
9595
  for (const entry of entries2) {
9403
- const fullPath = path19.join(dirPath, entry.name);
9596
+ const fullPath = path20.join(dirPath, entry.name);
9404
9597
  if (entry.isDirectory()) {
9405
9598
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
9406
9599
  continue;
@@ -9417,7 +9610,7 @@ async function findYamlFiles(dirPath) {
9417
9610
  return results;
9418
9611
  }
9419
9612
  function isYamlFile(filePath) {
9420
- const ext = path19.extname(filePath).toLowerCase();
9613
+ const ext = path20.extname(filePath).toLowerCase();
9421
9614
  return ext === ".yaml" || ext === ".yml";
9422
9615
  }
9423
9616
 
@@ -9472,9 +9665,9 @@ var validateCommand = command({
9472
9665
  });
9473
9666
 
9474
9667
  // src/commands/workspace/clean.ts
9475
- import { existsSync as existsSync13 } from "node:fs";
9668
+ import { existsSync as existsSync14 } from "node:fs";
9476
9669
  import { readFile as readFile6, readdir as readdir5, rm } from "node:fs/promises";
9477
- import path20 from "node:path";
9670
+ import path21 from "node:path";
9478
9671
  async function confirm(message) {
9479
9672
  const readline2 = await import("node:readline");
9480
9673
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -9501,7 +9694,7 @@ var cleanCommand = command({
9501
9694
  },
9502
9695
  handler: async ({ repo, force }) => {
9503
9696
  const poolRoot = getWorkspacePoolRoot();
9504
- if (!existsSync13(poolRoot)) {
9697
+ if (!existsSync14(poolRoot)) {
9505
9698
  console.log("No workspace pool entries found.");
9506
9699
  return;
9507
9700
  }
@@ -9510,8 +9703,8 @@ var cleanCommand = command({
9510
9703
  const poolDirs = entries2.filter((e) => e.isDirectory());
9511
9704
  const matchingDirs = [];
9512
9705
  for (const dir of poolDirs) {
9513
- const poolDir = path20.join(poolRoot, dir.name);
9514
- const metadataPath = path20.join(poolDir, "metadata.json");
9706
+ const poolDir = path21.join(poolRoot, dir.name);
9707
+ const metadataPath = path21.join(poolDir, "metadata.json");
9515
9708
  try {
9516
9709
  const raw = await readFile6(metadataPath, "utf-8");
9517
9710
  const metadata = JSON.parse(raw);
@@ -9542,7 +9735,7 @@ var cleanCommand = command({
9542
9735
  }
9543
9736
  for (const dir of matchingDirs) {
9544
9737
  await rm(dir, { recursive: true, force: true });
9545
- console.log(`Removed: ${path20.basename(dir).slice(0, 12)}...`);
9738
+ console.log(`Removed: ${path21.basename(dir).slice(0, 12)}...`);
9546
9739
  }
9547
9740
  console.log("Done.");
9548
9741
  } else {
@@ -9560,7 +9753,7 @@ var cleanCommand = command({
9560
9753
  });
9561
9754
 
9562
9755
  // src/commands/workspace/deps.ts
9563
- import path21 from "node:path";
9756
+ import path22 from "node:path";
9564
9757
  var depsCommand = command({
9565
9758
  name: "deps",
9566
9759
  description: "Scan eval files and list git repo dependencies needed by workspaces",
@@ -9584,7 +9777,7 @@ var depsCommand = command({
9584
9777
  const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
9585
9778
  const result = await scanRepoDeps(resolvedPaths);
9586
9779
  for (const err2 of result.errors) {
9587
- console.error(`warning: ${path21.relative(cwd, err2.file)}: ${err2.message}`);
9780
+ console.error(`warning: ${path22.relative(cwd, err2.file)}: ${err2.message}`);
9588
9781
  }
9589
9782
  const output = {
9590
9783
  repos: result.repos.map((r) => ({
@@ -9592,7 +9785,7 @@ var depsCommand = command({
9592
9785
  ...r.ref !== void 0 && { ref: r.ref },
9593
9786
  ...r.clone !== void 0 && { clone: r.clone },
9594
9787
  ...r.checkout !== void 0 && { checkout: r.checkout },
9595
- ...usedBy && { used_by: r.usedBy.map((p) => path21.relative(cwd, p)) }
9788
+ ...usedBy && { used_by: r.usedBy.map((p) => path22.relative(cwd, p)) }
9596
9789
  }))
9597
9790
  };
9598
9791
  console.log(JSON.stringify(output, null, 2));
@@ -9600,15 +9793,15 @@ var depsCommand = command({
9600
9793
  });
9601
9794
 
9602
9795
  // src/commands/workspace/list.ts
9603
- import { existsSync as existsSync14 } from "node:fs";
9796
+ import { existsSync as existsSync15 } from "node:fs";
9604
9797
  import { readFile as readFile7, readdir as readdir6, stat as stat2 } from "node:fs/promises";
9605
- import path22 from "node:path";
9798
+ import path23 from "node:path";
9606
9799
  async function getDirectorySize(dirPath) {
9607
9800
  let totalSize = 0;
9608
9801
  try {
9609
9802
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
9610
9803
  for (const entry of entries2) {
9611
- const fullPath = path22.join(dirPath, entry.name);
9804
+ const fullPath = path23.join(dirPath, entry.name);
9612
9805
  if (entry.isDirectory()) {
9613
9806
  totalSize += await getDirectorySize(fullPath);
9614
9807
  } else {
@@ -9632,7 +9825,7 @@ var listCommand = command({
9632
9825
  args: {},
9633
9826
  handler: async () => {
9634
9827
  const poolRoot = getWorkspacePoolRoot();
9635
- if (!existsSync14(poolRoot)) {
9828
+ if (!existsSync15(poolRoot)) {
9636
9829
  console.log("No workspace pool entries found.");
9637
9830
  return;
9638
9831
  }
@@ -9643,11 +9836,11 @@ var listCommand = command({
9643
9836
  return;
9644
9837
  }
9645
9838
  for (const dir of poolDirs) {
9646
- const poolDir = path22.join(poolRoot, dir.name);
9839
+ const poolDir = path23.join(poolRoot, dir.name);
9647
9840
  const fingerprint = dir.name;
9648
9841
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
9649
9842
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
9650
- const metadataPath = path22.join(poolDir, "metadata.json");
9843
+ const metadataPath = path23.join(poolDir, "metadata.json");
9651
9844
  let metadata = null;
9652
9845
  try {
9653
9846
  const raw = await readFile7(metadataPath, "utf-8");
@@ -9694,8 +9887,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
9694
9887
  var AGENTV_DIR = getAgentvHome();
9695
9888
  var CACHE_FILE = "version-check.json";
9696
9889
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
9697
- async function getCachedUpdateInfo(path23) {
9698
- const filePath = path23 ?? join5(AGENTV_DIR, CACHE_FILE);
9890
+ async function getCachedUpdateInfo(path24) {
9891
+ const filePath = path24 ?? join5(AGENTV_DIR, CACHE_FILE);
9699
9892
  try {
9700
9893
  const raw = await readFile8(filePath, "utf-8");
9701
9894
  const data = JSON.parse(raw);
@@ -9856,4 +10049,4 @@ export {
9856
10049
  preprocessArgv,
9857
10050
  runCli
9858
10051
  };
9859
- //# sourceMappingURL=chunk-MYSFJC5U.js.map
10052
+ //# sourceMappingURL=chunk-R747FXKW.js.map