agentv 4.3.0 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{agentv-provider-NFFLXG5M-TJAWCWCX.js → agentv-provider-TXM4UEUT-SUZSAXWZ.js} +2 -2
  2. package/dist/{chunk-ZDJN5FSI.js → chunk-EM2JOZFS.js} +4 -12
  3. package/dist/{chunk-ZDJN5FSI.js.map → chunk-EM2JOZFS.js.map} +1 -1
  4. package/dist/{chunk-ASU5L5ZW.js → chunk-FXN26R2H.js} +198 -102
  5. package/dist/chunk-FXN26R2H.js.map +1 -0
  6. package/dist/{chunk-XLM3RNN7.js → chunk-HMOXP7T5.js} +63 -64
  7. package/dist/{chunk-XLM3RNN7.js.map → chunk-HMOXP7T5.js.map} +1 -1
  8. package/dist/{chunk-C5GOHBQM.js → chunk-QOBQ5XYF.js} +2 -2
  9. package/dist/{chunk-BJV6MDBE.js → chunk-ZKO2LGRR.js} +2 -2
  10. package/dist/cli.js +5 -5
  11. package/dist/{dist-VVXR6TYM.js → dist-M7R6II6Y.js} +4 -4
  12. package/dist/{esm-5Q4BZALM-5REQWAUV.js → esm-ZADQ4XQH-5LX2IKZV.js} +2 -2
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-BKK53ETJ.js → interactive-FMOEUIKE.js} +5 -5
  15. package/dist/{otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js → otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js} +2 -2
  16. package/dist/studio/assets/{index-Cir5Hc8S.js → index-Bv9YUyqt.js} +1 -1
  17. package/dist/studio/assets/index-CElXpUjl.css +1 -0
  18. package/dist/studio/assets/index-CaOaajcd.js +65 -0
  19. package/dist/studio/index.html +2 -2
  20. package/package.json +1 -1
  21. package/dist/chunk-ASU5L5ZW.js.map +0 -1
  22. package/dist/studio/assets/index-D8LVkz9x.js +0 -71
  23. package/dist/studio/assets/index-izxfmBKC.css +0 -1
  24. /package/dist/{agentv-provider-NFFLXG5M-TJAWCWCX.js.map → agentv-provider-TXM4UEUT-SUZSAXWZ.js.map} +0 -0
  25. /package/dist/{chunk-C5GOHBQM.js.map → chunk-QOBQ5XYF.js.map} +0 -0
  26. /package/dist/{chunk-BJV6MDBE.js.map → chunk-ZKO2LGRR.js.map} +0 -0
  27. /package/dist/{dist-VVXR6TYM.js.map → dist-M7R6II6Y.js.map} +0 -0
  28. /package/dist/{esm-5Q4BZALM-5REQWAUV.js.map → esm-ZADQ4XQH-5LX2IKZV.js.map} +0 -0
  29. /package/dist/{interactive-BKK53ETJ.js.map → interactive-FMOEUIKE.js.map} +0 -0
  30. /package/dist/{otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map → otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js.map} +0 -0
@@ -24,9 +24,10 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-ZDJN5FSI.js";
27
+ } from "./chunk-EM2JOZFS.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
+ PASS_THRESHOLD,
30
31
  createBuiltinRegistry,
31
32
  deriveCategory,
32
33
  executeScript,
@@ -43,7 +44,7 @@ import {
43
44
  toSnakeCaseDeep as toSnakeCaseDeep2,
44
45
  transpileEvalYamlFile,
45
46
  trimBaselineResult
46
- } from "./chunk-XLM3RNN7.js";
47
+ } from "./chunk-HMOXP7T5.js";
47
48
  import {
48
49
  __commonJS,
49
50
  __esm,
@@ -4217,7 +4218,7 @@ var evalRunCommand = command({
4217
4218
  },
4218
4219
  handler: async (args) => {
4219
4220
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4220
- const { launchInteractiveWizard } = await import("./interactive-BKK53ETJ.js");
4221
+ const { launchInteractiveWizard } = await import("./interactive-FMOEUIKE.js");
4221
4222
  await launchInteractiveWizard();
4222
4223
  return;
4223
4224
  }
@@ -4948,7 +4949,7 @@ async function writeJson(filePath, data) {
4948
4949
  }
4949
4950
 
4950
4951
  // src/commands/pipeline/run.ts
4951
- import { execSync } from "node:child_process";
4952
+ import { exec } from "node:child_process";
4952
4953
  import { existsSync as existsSync3, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4953
4954
  import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
4954
4955
  import { tmpdir } from "node:os";
@@ -4997,7 +4998,7 @@ var evalRunCommand2 = command({
4997
4998
  workers: option({
4998
4999
  type: optional(number),
4999
5000
  long: "workers",
5000
- description: "Parallel workers for target invocation (default: all tests)"
5001
+ description: "Parallel workers for target invocation (default: targets.yaml workers, then 5)"
5001
5002
  }),
5002
5003
  experiment: option({
5003
5004
  type: optional(string),
@@ -5025,6 +5026,7 @@ var evalRunCommand2 = command({
5025
5026
  let targetInfo = null;
5026
5027
  let targetName = "agent";
5027
5028
  let targetKind = "agent";
5029
+ let targetWorkers;
5028
5030
  try {
5029
5031
  const selection = await selectTarget({
5030
5032
  testFilePath: resolvedEvalPath,
@@ -5037,6 +5039,7 @@ var evalRunCommand2 = command({
5037
5039
  env: process.env
5038
5040
  });
5039
5041
  targetName = selection.targetName;
5042
+ targetWorkers = selection.resolvedTarget.workers;
5040
5043
  if (selection.resolvedTarget.kind === "cli") {
5041
5044
  targetKind = "cli";
5042
5045
  const config = selection.resolvedTarget.config;
@@ -5104,8 +5107,14 @@ var evalRunCommand2 = command({
5104
5107
  process.env.AGENTV_RUN_TIMESTAMP = (/* @__PURE__ */ new Date()).toISOString().replace(/:/g, "-").replace(/\./g, "-");
5105
5108
  }
5106
5109
  const mergedEnv = { ...process.env, ...envVars };
5107
- const maxWorkers = workers ?? testIds.length;
5108
- console.log(`Invoking ${testIds.length} CLI target(s) (${maxWorkers} workers)...`);
5110
+ const maxWorkers = workers ?? targetWorkers ?? 5;
5111
+ let invCompleted = 0;
5112
+ const invTotal = testIds.length;
5113
+ const writeInvProgress = () => {
5114
+ process.stderr.write(`\rInvoking: ${invCompleted}/${invTotal} done`);
5115
+ };
5116
+ console.log(`Invoking ${invTotal} CLI target(s) (${maxWorkers} workers)...`);
5117
+ writeInvProgress();
5109
5118
  const invokeTarget = async (testId) => {
5110
5119
  const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
5111
5120
  const testDir = join4(outDir, ...subpath);
@@ -5125,12 +5134,20 @@ var evalRunCommand2 = command({
5125
5134
  rendered = rendered.replace("{PROMPT}", inputText);
5126
5135
  const start = performance.now();
5127
5136
  try {
5128
- execSync(rendered, {
5129
- cwd,
5130
- timeout: timeoutMs,
5131
- env: mergedEnv,
5132
- stdio: ["pipe", "pipe", "pipe"],
5133
- maxBuffer: 10 * 1024 * 1024
5137
+ await new Promise((resolveP, rejectP) => {
5138
+ exec(
5139
+ rendered,
5140
+ {
5141
+ cwd,
5142
+ timeout: timeoutMs,
5143
+ env: mergedEnv,
5144
+ maxBuffer: 10 * 1024 * 1024
5145
+ },
5146
+ (error) => {
5147
+ if (error) rejectP(error);
5148
+ else resolveP();
5149
+ }
5150
+ );
5134
5151
  });
5135
5152
  const durationMs = Math.round(performance.now() - start);
5136
5153
  let response;
@@ -5145,7 +5162,9 @@ var evalRunCommand2 = command({
5145
5162
  total_duration_seconds: Math.round(durationMs / 10) / 100,
5146
5163
  execution_status: "ok"
5147
5164
  });
5148
- console.log(` ${testId}: OK (${durationMs}ms, ${response.length} chars)`);
5165
+ process.stderr.write(`
5166
+ ${testId}: OK (${durationMs}ms, ${response.length} chars)
5167
+ `);
5149
5168
  } catch (error) {
5150
5169
  const durationMs = Math.round(performance.now() - start);
5151
5170
  const message = error instanceof Error ? error.message : String(error);
@@ -5156,8 +5175,14 @@ var evalRunCommand2 = command({
5156
5175
  total_duration_seconds: Math.round(durationMs / 10) / 100,
5157
5176
  execution_status: "execution_error"
5158
5177
  });
5159
- console.error(` ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}`);
5178
+ process.stderr.write(
5179
+ `
5180
+ ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}
5181
+ `
5182
+ );
5160
5183
  } finally {
5184
+ invCompleted++;
5185
+ writeInvProgress();
5161
5186
  try {
5162
5187
  if (existsSync3(promptFile)) unlinkSync(promptFile);
5163
5188
  if (existsSync3(outputFile)) unlinkSync(outputFile);
@@ -5176,6 +5201,7 @@ var evalRunCommand2 = command({
5176
5201
  }
5177
5202
  }
5178
5203
  await Promise.all(pending);
5204
+ process.stderr.write("\n");
5179
5205
  } else {
5180
5206
  console.log("Subagent-as-target mode \u2014 skipping CLI invocation.");
5181
5207
  }
@@ -5637,7 +5663,7 @@ function listResultFiles(cwd, limit) {
5637
5663
  const fileStat = statSync2(filePath);
5638
5664
  const results = loadResultFile(filePath);
5639
5665
  const testCount = results.length;
5640
- const passCount = results.filter((r) => r.score >= 1).length;
5666
+ const passCount = results.filter((r) => r.score >= PASS_THRESHOLD).length;
5641
5667
  const passRate = testCount > 0 ? passCount / testCount : 0;
5642
5668
  const avgScore = testCount > 0 ? results.reduce((sum, r) => sum + r.score, 0) / testCount : 0;
5643
5669
  const filenameTimestamp = extractTimestampFromFilename(displayName);
@@ -6207,40 +6233,89 @@ var resultsCommand = subcommands({
6207
6233
  });
6208
6234
 
6209
6235
  // src/commands/results/serve.ts
6210
- import { existsSync as existsSync7, readFileSync as readFileSync8, readdirSync as readdirSync3, statSync as statSync4, writeFileSync as writeFileSync3 } from "node:fs";
6211
- import path9 from "node:path";
6236
+ import { existsSync as existsSync8, readFileSync as readFileSync9, readdirSync as readdirSync3, statSync as statSync4, writeFileSync as writeFileSync4 } from "node:fs";
6237
+ import path10 from "node:path";
6212
6238
  import { fileURLToPath as fileURLToPath2 } from "node:url";
6213
6239
  import { Hono } from "hono";
6240
+
6241
+ // src/commands/results/studio-config.ts
6242
+ import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync8, writeFileSync as writeFileSync3 } from "node:fs";
6243
+ import path9 from "node:path";
6244
+ import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
6245
+ var DEFAULTS = {
6246
+ pass_threshold: PASS_THRESHOLD
6247
+ };
6248
+ function loadStudioConfig(agentvDir) {
6249
+ const configPath = path9.join(agentvDir, "config.yaml");
6250
+ if (!existsSync7(configPath)) {
6251
+ return { ...DEFAULTS };
6252
+ }
6253
+ const raw = readFileSync8(configPath, "utf-8");
6254
+ const parsed = parseYaml(raw);
6255
+ if (!parsed || typeof parsed !== "object") {
6256
+ return { ...DEFAULTS };
6257
+ }
6258
+ const threshold = typeof parsed.pass_threshold === "number" ? parsed.pass_threshold : DEFAULTS.pass_threshold;
6259
+ return {
6260
+ pass_threshold: Math.min(1, Math.max(0, threshold))
6261
+ };
6262
+ }
6263
+ function saveStudioConfig(agentvDir, config) {
6264
+ if (!existsSync7(agentvDir)) {
6265
+ mkdirSync2(agentvDir, { recursive: true });
6266
+ }
6267
+ const configPath = path9.join(agentvDir, "config.yaml");
6268
+ const yamlStr = stringifyYaml2(config);
6269
+ writeFileSync3(configPath, yamlStr, "utf-8");
6270
+ }
6271
+
6272
+ // src/commands/results/serve.ts
6214
6273
  function feedbackPath(resultDir) {
6215
- return path9.join(resultDir, "feedback.json");
6274
+ return path10.join(resultDir, "feedback.json");
6216
6275
  }
6217
6276
  function readFeedback(cwd) {
6218
6277
  const fp = feedbackPath(cwd);
6219
- if (!existsSync7(fp)) {
6278
+ if (!existsSync8(fp)) {
6220
6279
  return { reviews: [] };
6221
6280
  }
6222
6281
  try {
6223
- return JSON.parse(readFileSync8(fp, "utf8"));
6282
+ return JSON.parse(readFileSync9(fp, "utf8"));
6224
6283
  } catch (err2) {
6225
6284
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
6226
6285
  return { reviews: [] };
6227
6286
  }
6228
6287
  }
6229
6288
  function writeFeedback(cwd, data) {
6230
- writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
6289
+ writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
6231
6290
  `, "utf8");
6232
6291
  }
6233
6292
  function createApp(results, resultDir, cwd, sourceFile, options) {
6234
6293
  const searchDir = cwd ?? resultDir;
6294
+ const agentvDir = path10.join(searchDir, ".agentv");
6235
6295
  const app2 = new Hono();
6296
+ app2.get("/api/config", (c3) => c3.json(loadStudioConfig(agentvDir)));
6297
+ app2.post("/api/config", async (c3) => {
6298
+ try {
6299
+ const body = await c3.req.json();
6300
+ const current = loadStudioConfig(agentvDir);
6301
+ const updated = { ...current, ...body };
6302
+ if (typeof updated.pass_threshold === "number") {
6303
+ updated.pass_threshold = Math.min(1, Math.max(0, updated.pass_threshold));
6304
+ }
6305
+ saveStudioConfig(agentvDir, updated);
6306
+ return c3.json(updated);
6307
+ } catch {
6308
+ return c3.json({ error: "Failed to save config" }, 500);
6309
+ }
6310
+ });
6236
6311
  const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
6237
- if (!studioDistPath || !existsSync7(path9.join(studioDistPath, "index.html"))) {
6312
+ if (!studioDistPath || !existsSync8(path10.join(studioDistPath, "index.html"))) {
6238
6313
  throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
6239
6314
  }
6240
6315
  app2.get("/", (c3) => {
6241
- const indexPath = path9.join(studioDistPath, "index.html");
6242
- if (existsSync7(indexPath)) {
6243
- return c3.html(readFileSync8(indexPath, "utf8"));
6316
+ const indexPath = path10.join(studioDistPath, "index.html");
6317
+ if (existsSync8(indexPath)) {
6318
+ return c3.html(readFileSync9(indexPath, "utf8"));
6244
6319
  }
6245
6320
  return c3.notFound();
6246
6321
  });
@@ -6338,12 +6413,13 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6338
6413
  }
6339
6414
  try {
6340
6415
  const loaded = patchTestIds(loadManifestResults(meta.path));
6416
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6341
6417
  const datasetMap = /* @__PURE__ */ new Map();
6342
6418
  for (const r of loaded) {
6343
6419
  const ds = r.dataset ?? r.target ?? "default";
6344
6420
  const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6345
6421
  entry.total++;
6346
- if (r.score >= 1) entry.passed++;
6422
+ if (r.score >= pass_threshold) entry.passed++;
6347
6423
  entry.scoreSum += r.score;
6348
6424
  datasetMap.set(ds, entry);
6349
6425
  }
@@ -6368,6 +6444,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6368
6444
  }
6369
6445
  try {
6370
6446
  const loaded = patchTestIds(loadManifestResults(meta.path));
6447
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6371
6448
  const categoryMap = /* @__PURE__ */ new Map();
6372
6449
  for (const r of loaded) {
6373
6450
  const cat = r.category ?? DEFAULT_CATEGORY;
@@ -6378,7 +6455,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6378
6455
  datasets: /* @__PURE__ */ new Set()
6379
6456
  };
6380
6457
  entry.total++;
6381
- if (r.score >= 1) entry.passed++;
6458
+ if (r.score >= pass_threshold) entry.passed++;
6382
6459
  entry.scoreSum += r.score;
6383
6460
  entry.datasets.add(r.dataset ?? r.target ?? "default");
6384
6461
  categoryMap.set(cat, entry);
@@ -6406,13 +6483,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6406
6483
  }
6407
6484
  try {
6408
6485
  const loaded = patchTestIds(loadManifestResults(meta.path));
6486
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6409
6487
  const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
6410
6488
  const datasetMap = /* @__PURE__ */ new Map();
6411
6489
  for (const r of filtered) {
6412
6490
  const ds = r.dataset ?? r.target ?? "default";
6413
6491
  const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6414
6492
  entry.total++;
6415
- if (r.score >= 1) entry.passed++;
6493
+ if (r.score >= pass_threshold) entry.passed++;
6416
6494
  entry.scoreSum += r.score;
6417
6495
  datasetMap.set(ds, entry);
6418
6496
  }
@@ -6468,7 +6546,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6468
6546
  return c3.json({ entries: entries2 });
6469
6547
  });
6470
6548
  function buildFileTree(dirPath, relativeTo) {
6471
- if (!existsSync7(dirPath) || !statSync4(dirPath).isDirectory()) {
6549
+ if (!existsSync8(dirPath) || !statSync4(dirPath).isDirectory()) {
6472
6550
  return [];
6473
6551
  }
6474
6552
  const entries2 = readdirSync3(dirPath, { withFileTypes: true });
@@ -6476,8 +6554,8 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6476
6554
  if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
6477
6555
  return a.name.localeCompare(b.name);
6478
6556
  }).map((entry) => {
6479
- const fullPath = path9.join(dirPath, entry.name);
6480
- const relPath = path9.relative(relativeTo, fullPath);
6557
+ const fullPath = path10.join(dirPath, entry.name);
6558
+ const relPath = path10.relative(relativeTo, fullPath);
6481
6559
  if (entry.isDirectory()) {
6482
6560
  return {
6483
6561
  name: entry.name,
@@ -6490,7 +6568,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6490
6568
  });
6491
6569
  }
6492
6570
  function inferLanguage(filePath) {
6493
- const ext = path9.extname(filePath).toLowerCase();
6571
+ const ext = path10.extname(filePath).toLowerCase();
6494
6572
  const langMap = {
6495
6573
  ".json": "json",
6496
6574
  ".jsonl": "json",
@@ -6525,13 +6603,13 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6525
6603
  return c3.json({ error: "Run not found" }, 404);
6526
6604
  }
6527
6605
  try {
6528
- const content = readFileSync8(meta.path, "utf8");
6606
+ const content = readFileSync9(meta.path, "utf8");
6529
6607
  const records = parseResultManifest(content);
6530
6608
  const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
6531
6609
  if (!record) {
6532
6610
  return c3.json({ error: "Eval not found" }, 404);
6533
6611
  }
6534
- const baseDir = path9.dirname(meta.path);
6612
+ const baseDir = path10.dirname(meta.path);
6535
6613
  const knownPaths = [
6536
6614
  record.grading_path,
6537
6615
  record.timing_path,
@@ -6542,14 +6620,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6542
6620
  if (knownPaths.length === 0) {
6543
6621
  return c3.json({ files: [] });
6544
6622
  }
6545
- const artifactDirs = knownPaths.map((p) => path9.dirname(p));
6623
+ const artifactDirs = knownPaths.map((p) => path10.dirname(p));
6546
6624
  let commonDir = artifactDirs[0];
6547
6625
  for (const dir of artifactDirs) {
6548
6626
  while (!dir.startsWith(commonDir)) {
6549
- commonDir = path9.dirname(commonDir);
6627
+ commonDir = path10.dirname(commonDir);
6550
6628
  }
6551
6629
  }
6552
- const artifactAbsDir = path9.join(baseDir, commonDir);
6630
+ const artifactAbsDir = path10.join(baseDir, commonDir);
6553
6631
  const files = buildFileTree(artifactAbsDir, baseDir);
6554
6632
  return c3.json({ files });
6555
6633
  } catch {
@@ -6570,16 +6648,16 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6570
6648
  if (!filePath) {
6571
6649
  return c3.json({ error: "No file path specified" }, 400);
6572
6650
  }
6573
- const baseDir = path9.dirname(meta.path);
6574
- const absolutePath = path9.resolve(baseDir, filePath);
6575
- if (!absolutePath.startsWith(path9.resolve(baseDir) + path9.sep) && absolutePath !== path9.resolve(baseDir)) {
6651
+ const baseDir = path10.dirname(meta.path);
6652
+ const absolutePath = path10.resolve(baseDir, filePath);
6653
+ if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
6576
6654
  return c3.json({ error: "Path traversal not allowed" }, 403);
6577
6655
  }
6578
- if (!existsSync7(absolutePath) || !statSync4(absolutePath).isFile()) {
6656
+ if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
6579
6657
  return c3.json({ error: "File not found" }, 404);
6580
6658
  }
6581
6659
  try {
6582
- const fileContent = readFileSync8(absolutePath, "utf8");
6660
+ const fileContent = readFileSync9(absolutePath, "utf8");
6583
6661
  const language = inferLanguage(absolutePath);
6584
6662
  return c3.json({ content: fileContent, language });
6585
6663
  } catch {
@@ -6588,6 +6666,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6588
6666
  });
6589
6667
  app2.get("/api/experiments", (c3) => {
6590
6668
  const metas = listResultFiles(searchDir);
6669
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6591
6670
  const experimentMap = /* @__PURE__ */ new Map();
6592
6671
  for (const m of metas) {
6593
6672
  try {
@@ -6604,7 +6683,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6604
6683
  entry.runFilenames.add(m.filename);
6605
6684
  if (r.target) entry.targets.add(r.target);
6606
6685
  entry.evalCount++;
6607
- if (r.score >= 1) entry.passedCount++;
6686
+ if (r.score >= pass_threshold) entry.passedCount++;
6608
6687
  if (r.timestamp && r.timestamp > entry.lastTimestamp) {
6609
6688
  entry.lastTimestamp = r.timestamp;
6610
6689
  }
@@ -6626,6 +6705,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6626
6705
  });
6627
6706
  app2.get("/api/targets", (c3) => {
6628
6707
  const metas = listResultFiles(searchDir);
6708
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6629
6709
  const targetMap = /* @__PURE__ */ new Map();
6630
6710
  for (const m of metas) {
6631
6711
  try {
@@ -6641,7 +6721,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6641
6721
  entry.runFilenames.add(m.filename);
6642
6722
  if (r.experiment) entry.experiments.add(r.experiment);
6643
6723
  entry.evalCount++;
6644
- if (r.score >= 1) entry.passedCount++;
6724
+ if (r.score >= pass_threshold) entry.passedCount++;
6645
6725
  targetMap.set(target, entry);
6646
6726
  }
6647
6727
  } catch {
@@ -6659,12 +6739,12 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6659
6739
  });
6660
6740
  app2.get("/assets/*", (c3) => {
6661
6741
  const assetPath = c3.req.path;
6662
- const filePath = path9.join(studioDistPath, assetPath);
6663
- if (!existsSync7(filePath)) {
6742
+ const filePath = path10.join(studioDistPath, assetPath);
6743
+ if (!existsSync8(filePath)) {
6664
6744
  return c3.notFound();
6665
6745
  }
6666
- const content = readFileSync8(filePath);
6667
- const ext = path9.extname(filePath);
6746
+ const content = readFileSync9(filePath);
6747
+ const ext = path10.extname(filePath);
6668
6748
  const mimeTypes = {
6669
6749
  ".js": "application/javascript",
6670
6750
  ".css": "text/css",
@@ -6687,28 +6767,28 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6687
6767
  if (c3.req.path.startsWith("/api/")) {
6688
6768
  return c3.json({ error: "Not found" }, 404);
6689
6769
  }
6690
- const indexPath = path9.join(studioDistPath, "index.html");
6691
- if (existsSync7(indexPath)) {
6692
- return c3.html(readFileSync8(indexPath, "utf8"));
6770
+ const indexPath = path10.join(studioDistPath, "index.html");
6771
+ if (existsSync8(indexPath)) {
6772
+ return c3.html(readFileSync9(indexPath, "utf8"));
6693
6773
  }
6694
6774
  return c3.notFound();
6695
6775
  });
6696
6776
  return app2;
6697
6777
  }
6698
6778
  function resolveStudioDistDir() {
6699
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path9.dirname(fileURLToPath2(import.meta.url));
6779
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path10.dirname(fileURLToPath2(import.meta.url));
6700
6780
  const candidates = [
6701
6781
  // From src/commands/results/ → sibling apps/studio/dist
6702
- path9.resolve(currentDir, "../../../../studio/dist"),
6782
+ path10.resolve(currentDir, "../../../../studio/dist"),
6703
6783
  // From dist/ → sibling apps/studio/dist (monorepo dev)
6704
- path9.resolve(currentDir, "../../studio/dist"),
6784
+ path10.resolve(currentDir, "../../studio/dist"),
6705
6785
  // Bundled inside CLI dist (published package: dist/studio/)
6706
- path9.resolve(currentDir, "studio"),
6786
+ path10.resolve(currentDir, "studio"),
6707
6787
  // From dist/ in monorepo root context
6708
- path9.resolve(currentDir, "../../../apps/studio/dist")
6788
+ path10.resolve(currentDir, "../../../apps/studio/dist")
6709
6789
  ];
6710
6790
  for (const candidate of candidates) {
6711
- if (existsSync7(candidate) && existsSync7(path9.join(candidate, "index.html"))) {
6791
+ if (existsSync8(candidate) && existsSync8(path10.join(candidate, "index.html"))) {
6712
6792
  return candidate;
6713
6793
  }
6714
6794
  }
@@ -6756,7 +6836,7 @@ var resultsServeCommand = command({
6756
6836
  let sourceFile;
6757
6837
  if (source) {
6758
6838
  const resolved = resolveResultSourcePath(source, cwd);
6759
- if (!existsSync7(resolved)) {
6839
+ if (!existsSync8(resolved)) {
6760
6840
  console.error(`Error: Source file not found: ${resolved}`);
6761
6841
  process.exit(1);
6762
6842
  }
@@ -6765,7 +6845,7 @@ var resultsServeCommand = command({
6765
6845
  } else {
6766
6846
  const cache = await loadRunCache(cwd);
6767
6847
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
6768
- if (cachedFile && existsSync7(cachedFile)) {
6848
+ if (cachedFile && existsSync8(cachedFile)) {
6769
6849
  sourceFile = cachedFile;
6770
6850
  results = patchTestIds(loadManifestResults(cachedFile));
6771
6851
  } else {
@@ -6776,7 +6856,7 @@ var resultsServeCommand = command({
6776
6856
  }
6777
6857
  }
6778
6858
  }
6779
- const resultDir = sourceFile ? path9.dirname(path9.resolve(sourceFile)) : cwd;
6859
+ const resultDir = sourceFile ? path10.dirname(path10.resolve(sourceFile)) : cwd;
6780
6860
  const app2 = createApp(results, resultDir, cwd, sourceFile);
6781
6861
  if (results.length > 0 && sourceFile) {
6782
6862
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
@@ -7669,8 +7749,8 @@ var traceCommand = subcommands({
7669
7749
  });
7670
7750
 
7671
7751
  // src/commands/transpile/index.ts
7672
- import { writeFileSync as writeFileSync4 } from "node:fs";
7673
- import path10 from "node:path";
7752
+ import { writeFileSync as writeFileSync5 } from "node:fs";
7753
+ import path11 from "node:path";
7674
7754
  var transpileCommand = command({
7675
7755
  name: "transpile",
7676
7756
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -7694,7 +7774,7 @@ var transpileCommand = command({
7694
7774
  handler: async ({ input, outDir, stdout }) => {
7695
7775
  let result;
7696
7776
  try {
7697
- result = transpileEvalYamlFile(path10.resolve(input));
7777
+ result = transpileEvalYamlFile(path11.resolve(input));
7698
7778
  } catch (error) {
7699
7779
  console.error(`Error: ${error.message}`);
7700
7780
  process.exit(1);
@@ -7718,12 +7798,12 @@ var transpileCommand = command({
7718
7798
  process.stdout.write("\n");
7719
7799
  return;
7720
7800
  }
7721
- const outputDir = outDir ? path10.resolve(outDir) : path10.dirname(path10.resolve(input));
7801
+ const outputDir = outDir ? path11.resolve(outDir) : path11.dirname(path11.resolve(input));
7722
7802
  const fileNames = getOutputFilenames(result);
7723
7803
  for (const [skill, evalsJson] of result.files) {
7724
7804
  const fileName = fileNames.get(skill) ?? "evals.json";
7725
- const outputPath = path10.join(outputDir, fileName);
7726
- writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7805
+ const outputPath = path11.join(outputDir, fileName);
7806
+ writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7727
7807
  `);
7728
7808
  console.log(`Transpiled to ${outputPath}`);
7729
7809
  }
@@ -7731,7 +7811,7 @@ var transpileCommand = command({
7731
7811
  });
7732
7812
 
7733
7813
  // src/commands/trim/index.ts
7734
- import { readFileSync as readFileSync9, writeFileSync as writeFileSync5 } from "node:fs";
7814
+ import { readFileSync as readFileSync10, writeFileSync as writeFileSync6 } from "node:fs";
7735
7815
  var trimCommand = command({
7736
7816
  name: "trim",
7737
7817
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -7750,7 +7830,7 @@ var trimCommand = command({
7750
7830
  },
7751
7831
  handler: async ({ input, out }) => {
7752
7832
  try {
7753
- const content = readFileSync9(input, "utf8");
7833
+ const content = readFileSync10(input, "utf8");
7754
7834
  const lines = content.trim().split("\n").filter((line) => line.trim());
7755
7835
  const trimmedLines = lines.map((line) => {
7756
7836
  const record = JSON.parse(line);
@@ -7762,7 +7842,7 @@ var trimCommand = command({
7762
7842
  const output = `${trimmedLines.join("\n")}
7763
7843
  `;
7764
7844
  if (out) {
7765
- writeFileSync5(out, output, "utf8");
7845
+ writeFileSync6(out, output, "utf8");
7766
7846
  console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
7767
7847
  } else {
7768
7848
  process.stdout.write(output);
@@ -7857,7 +7937,8 @@ function isTTY() {
7857
7937
  // src/commands/validate/validate-files.ts
7858
7938
  import { constants } from "node:fs";
7859
7939
  import { access, readdir as readdir4, stat } from "node:fs/promises";
7860
- import path11 from "node:path";
7940
+ import path12 from "node:path";
7941
+ import fg2 from "fast-glob";
7861
7942
  async function validateFiles(paths) {
7862
7943
  const filePaths = await expandPaths(paths);
7863
7944
  const results = [];
@@ -7875,7 +7956,7 @@ async function validateFiles(paths) {
7875
7956
  };
7876
7957
  }
7877
7958
  async function validateSingleFile(filePath) {
7878
- const absolutePath = path11.resolve(filePath);
7959
+ const absolutePath = path12.resolve(filePath);
7879
7960
  const fileType = await detectFileType(absolutePath);
7880
7961
  let result;
7881
7962
  if (fileType === "eval") {
@@ -7898,33 +7979,48 @@ async function validateSingleFile(filePath) {
7898
7979
  return result;
7899
7980
  }
7900
7981
  async function expandPaths(paths) {
7901
- const expanded = [];
7982
+ const expanded = /* @__PURE__ */ new Set();
7902
7983
  for (const inputPath of paths) {
7903
- const absolutePath = path11.resolve(inputPath);
7984
+ const absolutePath = path12.resolve(inputPath);
7904
7985
  try {
7905
7986
  await access(absolutePath, constants.F_OK);
7987
+ const stats = await stat(absolutePath);
7988
+ if (stats.isFile()) {
7989
+ if (isYamlFile(absolutePath)) expanded.add(absolutePath);
7990
+ continue;
7991
+ }
7992
+ if (stats.isDirectory()) {
7993
+ const yamlFiles = await findYamlFiles(absolutePath);
7994
+ for (const f of yamlFiles) expanded.add(f);
7995
+ continue;
7996
+ }
7906
7997
  } catch {
7907
- console.warn(`Warning: Path not found: ${inputPath}`);
7908
- continue;
7909
7998
  }
7910
- const stats = await stat(absolutePath);
7911
- if (stats.isFile()) {
7912
- if (isYamlFile(absolutePath)) {
7913
- expanded.push(absolutePath);
7914
- }
7915
- } else if (stats.isDirectory()) {
7916
- const yamlFiles = await findYamlFiles(absolutePath);
7917
- expanded.push(...yamlFiles);
7999
+ const globPattern = inputPath.includes("\\") ? inputPath.replace(/\\/g, "/") : inputPath;
8000
+ const matches = await fg2(globPattern, {
8001
+ cwd: process.cwd(),
8002
+ absolute: true,
8003
+ onlyFiles: true,
8004
+ unique: true,
8005
+ dot: false,
8006
+ followSymbolicLinks: true
8007
+ });
8008
+ const yamlMatches = matches.filter((f) => isYamlFile(f));
8009
+ if (yamlMatches.length === 0) {
8010
+ console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
7918
8011
  }
8012
+ for (const f of yamlMatches) expanded.add(path12.normalize(f));
7919
8013
  }
7920
- return expanded;
8014
+ const sorted = Array.from(expanded);
8015
+ sorted.sort();
8016
+ return sorted;
7921
8017
  }
7922
8018
  async function findYamlFiles(dirPath) {
7923
8019
  const results = [];
7924
8020
  try {
7925
8021
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
7926
8022
  for (const entry of entries2) {
7927
- const fullPath = path11.join(dirPath, entry.name);
8023
+ const fullPath = path12.join(dirPath, entry.name);
7928
8024
  if (entry.isDirectory()) {
7929
8025
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
7930
8026
  continue;
@@ -7941,7 +8037,7 @@ async function findYamlFiles(dirPath) {
7941
8037
  return results;
7942
8038
  }
7943
8039
  function isYamlFile(filePath) {
7944
- const ext = path11.extname(filePath).toLowerCase();
8040
+ const ext = path12.extname(filePath).toLowerCase();
7945
8041
  return ext === ".yaml" || ext === ".yml";
7946
8042
  }
7947
8043
 
@@ -7979,9 +8075,9 @@ var validateCommand = command({
7979
8075
  });
7980
8076
 
7981
8077
  // src/commands/workspace/clean.ts
7982
- import { existsSync as existsSync8 } from "node:fs";
8078
+ import { existsSync as existsSync9 } from "node:fs";
7983
8079
  import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
7984
- import path12 from "node:path";
8080
+ import path13 from "node:path";
7985
8081
  async function confirm(message) {
7986
8082
  const readline2 = await import("node:readline");
7987
8083
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -8008,7 +8104,7 @@ var cleanCommand = command({
8008
8104
  },
8009
8105
  handler: async ({ repo, force }) => {
8010
8106
  const poolRoot = getWorkspacePoolRoot();
8011
- if (!existsSync8(poolRoot)) {
8107
+ if (!existsSync9(poolRoot)) {
8012
8108
  console.log("No workspace pool entries found.");
8013
8109
  return;
8014
8110
  }
@@ -8017,8 +8113,8 @@ var cleanCommand = command({
8017
8113
  const poolDirs = entries2.filter((e) => e.isDirectory());
8018
8114
  const matchingDirs = [];
8019
8115
  for (const dir of poolDirs) {
8020
- const poolDir = path12.join(poolRoot, dir.name);
8021
- const metadataPath = path12.join(poolDir, "metadata.json");
8116
+ const poolDir = path13.join(poolRoot, dir.name);
8117
+ const metadataPath = path13.join(poolDir, "metadata.json");
8022
8118
  try {
8023
8119
  const raw = await readFile5(metadataPath, "utf-8");
8024
8120
  const metadata = JSON.parse(raw);
@@ -8049,7 +8145,7 @@ var cleanCommand = command({
8049
8145
  }
8050
8146
  for (const dir of matchingDirs) {
8051
8147
  await rm(dir, { recursive: true, force: true });
8052
- console.log(`Removed: ${path12.basename(dir).slice(0, 12)}...`);
8148
+ console.log(`Removed: ${path13.basename(dir).slice(0, 12)}...`);
8053
8149
  }
8054
8150
  console.log("Done.");
8055
8151
  } else {
@@ -8067,15 +8163,15 @@ var cleanCommand = command({
8067
8163
  });
8068
8164
 
8069
8165
  // src/commands/workspace/list.ts
8070
- import { existsSync as existsSync9 } from "node:fs";
8166
+ import { existsSync as existsSync10 } from "node:fs";
8071
8167
  import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
8072
- import path13 from "node:path";
8168
+ import path14 from "node:path";
8073
8169
  async function getDirectorySize(dirPath) {
8074
8170
  let totalSize = 0;
8075
8171
  try {
8076
8172
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
8077
8173
  for (const entry of entries2) {
8078
- const fullPath = path13.join(dirPath, entry.name);
8174
+ const fullPath = path14.join(dirPath, entry.name);
8079
8175
  if (entry.isDirectory()) {
8080
8176
  totalSize += await getDirectorySize(fullPath);
8081
8177
  } else {
@@ -8099,7 +8195,7 @@ var listCommand = command({
8099
8195
  args: {},
8100
8196
  handler: async () => {
8101
8197
  const poolRoot = getWorkspacePoolRoot();
8102
- if (!existsSync9(poolRoot)) {
8198
+ if (!existsSync10(poolRoot)) {
8103
8199
  console.log("No workspace pool entries found.");
8104
8200
  return;
8105
8201
  }
@@ -8110,11 +8206,11 @@ var listCommand = command({
8110
8206
  return;
8111
8207
  }
8112
8208
  for (const dir of poolDirs) {
8113
- const poolDir = path13.join(poolRoot, dir.name);
8209
+ const poolDir = path14.join(poolRoot, dir.name);
8114
8210
  const fingerprint = dir.name;
8115
8211
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
8116
8212
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
8117
- const metadataPath = path13.join(poolDir, "metadata.json");
8213
+ const metadataPath = path14.join(poolDir, "metadata.json");
8118
8214
  let metadata = null;
8119
8215
  try {
8120
8216
  const raw = await readFile6(metadataPath, "utf-8");
@@ -8160,8 +8256,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
8160
8256
  var AGENTV_DIR = getAgentvHome();
8161
8257
  var CACHE_FILE = "version-check.json";
8162
8258
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
8163
- async function getCachedUpdateInfo(path14) {
8164
- const filePath = path14 ?? join5(AGENTV_DIR, CACHE_FILE);
8259
+ async function getCachedUpdateInfo(path15) {
8260
+ const filePath = path15 ?? join5(AGENTV_DIR, CACHE_FILE);
8165
8261
  try {
8166
8262
  const raw = await readFile7(filePath, "utf-8");
8167
8263
  const data = JSON.parse(raw);
@@ -8318,4 +8414,4 @@ export {
8318
8414
  preprocessArgv,
8319
8415
  runCli
8320
8416
  };
8321
- //# sourceMappingURL=chunk-ASU5L5ZW.js.map
8417
+ //# sourceMappingURL=chunk-FXN26R2H.js.map