agentv 3.14.2 → 3.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import {
3
3
  HtmlWriter,
4
4
  RESULT_INDEX_FILENAME,
5
5
  RESULT_RUNS_DIRNAME,
6
+ buildDefaultRunDir,
6
7
  detectFileType,
7
8
  findRepoRoot,
8
9
  loadLightweightResults,
@@ -22,7 +23,7 @@ import {
22
23
  validateFileReferences,
23
24
  validateTargetsFile,
24
25
  writeArtifactsFromResults
25
- } from "./chunk-75PQBKLR.js";
26
+ } from "./chunk-SAPEYQ5U.js";
26
27
  import {
27
28
  createBuiltinRegistry,
28
29
  executeScript,
@@ -4185,7 +4186,7 @@ var evalRunCommand = command({
4185
4186
  },
4186
4187
  handler: async (args) => {
4187
4188
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-Q563ULAR.js");
4189
+ const { launchInteractiveWizard } = await import("./interactive-PGZ55VHT.js");
4189
4190
  await launchInteractiveWizard();
4190
4191
  return;
4191
4192
  }
@@ -4398,6 +4399,7 @@ var initCmdTsCommand = command({
4398
4399
  });
4399
4400
 
4400
4401
  // src/commands/pipeline/bench.ts
4402
+ import { existsSync as existsSync2 } from "node:fs";
4401
4403
  import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
4402
4404
  import { join } from "node:path";
4403
4405
  var evalBenchCommand = command({
@@ -4497,14 +4499,41 @@ var evalBenchCommand = command({
4497
4499
  `,
4498
4500
  "utf8"
4499
4501
  );
4502
+ const scores = evaluators.map((e) => ({
4503
+ name: e.name,
4504
+ type: e.type,
4505
+ score: e.score,
4506
+ weight: e.weight,
4507
+ verdict: e.score >= 0.5 ? "pass" : "fail",
4508
+ assertions: e.assertions.map((a) => ({
4509
+ text: a.text,
4510
+ passed: a.passed,
4511
+ evidence: a.evidence ?? ""
4512
+ }))
4513
+ }));
4514
+ let executionStatus = "ok";
4515
+ const timingPath = join(testDir, "timing.json");
4516
+ if (existsSync2(timingPath)) {
4517
+ try {
4518
+ const timing = JSON.parse(await readFile(timingPath, "utf8"));
4519
+ if (typeof timing.execution_status === "string") {
4520
+ executionStatus = timing.execution_status;
4521
+ }
4522
+ } catch {
4523
+ }
4524
+ }
4525
+ const hasResponse = existsSync2(join(testDir, "response.md"));
4500
4526
  indexLines.push(
4501
4527
  JSON.stringify({
4502
4528
  timestamp: manifest.timestamp,
4503
4529
  test_id: testId,
4504
4530
  score: Math.round(weightedScore * 1e3) / 1e3,
4505
4531
  target: targetName,
4532
+ scores,
4533
+ execution_status: executionStatus,
4506
4534
  grading_path: `${testId}/grading.json`,
4507
- timing_path: `${testId}/timing.json`
4535
+ timing_path: `${testId}/timing.json`,
4536
+ response_path: hasResponse ? `${testId}/response.md` : null
4508
4537
  })
4509
4538
  );
4510
4539
  }
@@ -4680,14 +4709,14 @@ var evalInputCommand = command({
4680
4709
  description: "Path to eval YAML file"
4681
4710
  }),
4682
4711
  out: option({
4683
- type: string,
4712
+ type: optional(string),
4684
4713
  long: "out",
4685
- description: "Output directory for extracted inputs"
4714
+ description: "Output directory for extracted inputs (default: .agentv/results/runs/eval_<timestamp>)"
4686
4715
  })
4687
4716
  },
4688
4717
  handler: async ({ evalPath, out }) => {
4689
4718
  const resolvedEvalPath = resolve(evalPath);
4690
- const outDir = resolve(out);
4719
+ const outDir = resolve(out ?? buildDefaultRunDir(process.cwd()));
4691
4720
  const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
4692
4721
  const evalDir = dirname(resolvedEvalPath);
4693
4722
  const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
@@ -4826,7 +4855,7 @@ async function writeJson(filePath, data) {
4826
4855
 
4827
4856
  // src/commands/pipeline/run.ts
4828
4857
  import { execSync } from "node:child_process";
4829
- import { existsSync as existsSync2, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4858
+ import { existsSync as existsSync3, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4830
4859
  import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
4831
4860
  import { tmpdir } from "node:os";
4832
4861
  import { dirname as dirname2, join as join4, resolve as resolve2 } from "node:path";
@@ -4834,7 +4863,7 @@ function loadEnvFile(dir) {
4834
4863
  let current = resolve2(dir);
4835
4864
  while (true) {
4836
4865
  const candidate = join4(current, ".env");
4837
- if (existsSync2(candidate)) {
4866
+ if (existsSync3(candidate)) {
4838
4867
  const env3 = {};
4839
4868
  for (const line of readFileSync4(candidate, "utf8").split("\n")) {
4840
4869
  const trimmed = line.trim();
@@ -4861,9 +4890,9 @@ var evalRunCommand2 = command({
4861
4890
  description: "Path to eval YAML file"
4862
4891
  }),
4863
4892
  out: option({
4864
- type: string,
4893
+ type: optional(string),
4865
4894
  long: "out",
4866
- description: "Output directory for results"
4895
+ description: "Output directory for results (default: .agentv/results/runs/eval_<timestamp>)"
4867
4896
  }),
4868
4897
  workers: option({
4869
4898
  type: optional(number),
@@ -4873,7 +4902,7 @@ var evalRunCommand2 = command({
4873
4902
  },
4874
4903
  handler: async ({ evalPath, out, workers }) => {
4875
4904
  const resolvedEvalPath = resolve2(evalPath);
4876
- const outDir = resolve2(out);
4905
+ const outDir = resolve2(out ?? buildDefaultRunDir(process.cwd()));
4877
4906
  const repoRoot = await findRepoRoot(dirname2(resolvedEvalPath));
4878
4907
  const evalDir = dirname2(resolvedEvalPath);
4879
4908
  const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
@@ -4957,6 +4986,9 @@ var evalRunCommand2 = command({
4957
4986
  console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
4958
4987
  if (targetInfo) {
4959
4988
  const envVars = loadEnvFile(evalDir);
4989
+ if (!process.env.AGENTV_RUN_TIMESTAMP) {
4990
+ process.env.AGENTV_RUN_TIMESTAMP = (/* @__PURE__ */ new Date()).toISOString().replace(/:/g, "-").replace(/\./g, "-");
4991
+ }
4960
4992
  const mergedEnv = { ...process.env, ...envVars };
4961
4993
  const maxWorkers = workers ?? testIds.length;
4962
4994
  console.log(`Invoking ${testIds.length} CLI target(s) (${maxWorkers} workers)...`);
@@ -4986,7 +5018,7 @@ var evalRunCommand2 = command({
4986
5018
  });
4987
5019
  const durationMs = Math.round(performance.now() - start);
4988
5020
  let response;
4989
- if (existsSync2(outputFile)) {
5021
+ if (existsSync3(outputFile)) {
4990
5022
  response = readFileSync4(outputFile, "utf8");
4991
5023
  } else {
4992
5024
  response = "ERROR: No output file generated";
@@ -4994,7 +5026,8 @@ var evalRunCommand2 = command({
4994
5026
  await writeFile5(join4(testDir, "response.md"), response, "utf8");
4995
5027
  await writeJson2(join4(testDir, "timing.json"), {
4996
5028
  duration_ms: durationMs,
4997
- total_duration_seconds: Math.round(durationMs / 10) / 100
5029
+ total_duration_seconds: Math.round(durationMs / 10) / 100,
5030
+ execution_status: "ok"
4998
5031
  });
4999
5032
  console.log(` ${testId}: OK (${durationMs}ms, ${response.length} chars)`);
5000
5033
  } catch (error) {
@@ -5004,19 +5037,29 @@ var evalRunCommand2 = command({
5004
5037
  await writeFile5(join4(testDir, "response.md"), response, "utf8");
5005
5038
  await writeJson2(join4(testDir, "timing.json"), {
5006
5039
  duration_ms: durationMs,
5007
- total_duration_seconds: Math.round(durationMs / 10) / 100
5040
+ total_duration_seconds: Math.round(durationMs / 10) / 100,
5041
+ execution_status: "execution_error"
5008
5042
  });
5009
5043
  console.error(` ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}`);
5010
5044
  } finally {
5011
5045
  try {
5012
- if (existsSync2(promptFile)) unlinkSync(promptFile);
5013
- if (existsSync2(outputFile)) unlinkSync(outputFile);
5046
+ if (existsSync3(promptFile)) unlinkSync(promptFile);
5047
+ if (existsSync3(outputFile)) unlinkSync(outputFile);
5014
5048
  } catch {
5015
5049
  }
5016
5050
  }
5017
5051
  };
5018
- const allTasks = testIds.map((testId) => invokeTarget(testId));
5019
- await Promise.all(allTasks);
5052
+ const pending = /* @__PURE__ */ new Set();
5053
+ for (const testId of testIds) {
5054
+ const task = invokeTarget(testId).then(() => {
5055
+ pending.delete(task);
5056
+ });
5057
+ pending.add(task);
5058
+ if (pending.size >= maxWorkers) {
5059
+ await Promise.race(pending);
5060
+ }
5061
+ }
5062
+ await Promise.all(pending);
5020
5063
  } else {
5021
5064
  console.log("Agent-as-target mode \u2014 skipping CLI invocation.");
5022
5065
  }
@@ -5185,7 +5228,7 @@ var pipelineCommand = subcommands({
5185
5228
  import path7 from "node:path";
5186
5229
 
5187
5230
  // src/commands/results/shared.ts
5188
- import { existsSync as existsSync3 } from "node:fs";
5231
+ import { existsSync as existsSync4 } from "node:fs";
5189
5232
 
5190
5233
  // src/commands/trace/utils.ts
5191
5234
  import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
@@ -5599,14 +5642,14 @@ async function resolveSourceFile(source, cwd) {
5599
5642
  let sourceFile;
5600
5643
  if (source) {
5601
5644
  sourceFile = resolveResultSourcePath(source, cwd);
5602
- if (!existsSync3(sourceFile)) {
5645
+ if (!existsSync4(sourceFile)) {
5603
5646
  console.error(`Error: File not found: ${sourceFile}`);
5604
5647
  process.exit(1);
5605
5648
  }
5606
5649
  } else {
5607
5650
  const cache = await loadRunCache(cwd);
5608
5651
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5609
- if (cachedFile && existsSync3(cachedFile)) {
5652
+ if (cachedFile && existsSync4(cachedFile)) {
5610
5653
  sourceFile = cachedFile;
5611
5654
  } else {
5612
5655
  const metas = listResultFiles(cwd, 1);
@@ -5818,7 +5861,7 @@ var resultsShowCommand = command({
5818
5861
  });
5819
5862
 
5820
5863
  // src/commands/results/summary.ts
5821
- import { existsSync as existsSync4, readFileSync as readFileSync6 } from "node:fs";
5864
+ import { existsSync as existsSync5, readFileSync as readFileSync6 } from "node:fs";
5822
5865
  function formatSummary(results, grading) {
5823
5866
  const total = results.length;
5824
5867
  let passed;
@@ -5869,7 +5912,7 @@ var resultsSummaryCommand = command({
5869
5912
  const { results, sourceFile } = await loadResults(source, cwd);
5870
5913
  let grading;
5871
5914
  const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
5872
- if (existsSync4(gradingPath)) {
5915
+ if (existsSync5(gradingPath)) {
5873
5916
  try {
5874
5917
  grading = JSON.parse(readFileSync6(gradingPath, "utf8"));
5875
5918
  } catch {
@@ -5883,6 +5926,217 @@ var resultsSummaryCommand = command({
5883
5926
  }
5884
5927
  });
5885
5928
 
5929
+ // src/commands/results/validate.ts
5930
+ import { existsSync as existsSync6, readFileSync as readFileSync7, statSync as statSync3 } from "node:fs";
5931
+ import path8 from "node:path";
5932
+ function checkDirectoryNaming(runDir) {
5933
+ const dirName = path8.basename(runDir);
5934
+ const parentName = path8.basename(path8.dirname(runDir));
5935
+ const diagnostics = [];
5936
+ if (parentName !== "runs") {
5937
+ diagnostics.push({
5938
+ severity: "warning",
5939
+ message: `Directory is not under a 'runs/' parent (found '${parentName}/'). Expected: .agentv/results/runs/<run-dir>`
5940
+ });
5941
+ }
5942
+ if (!/^eval_\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z$/.test(dirName)) {
5943
+ diagnostics.push({
5944
+ severity: "warning",
5945
+ message: `Directory name '${dirName}' does not match the expected pattern 'eval_<ISO-timestamp>'. Example: eval_2026-03-27T12-42-24-429Z`
5946
+ });
5947
+ }
5948
+ return diagnostics;
5949
+ }
5950
+ function checkIndexJsonl(runDir) {
5951
+ const indexPath = path8.join(runDir, "index.jsonl");
5952
+ const diagnostics = [];
5953
+ const entries2 = [];
5954
+ if (!existsSync6(indexPath)) {
5955
+ diagnostics.push({ severity: "error", message: "index.jsonl is missing" });
5956
+ return { diagnostics, entries: entries2 };
5957
+ }
5958
+ const content = readFileSync7(indexPath, "utf8");
5959
+ const lines = content.split("\n").filter((l) => l.trim().length > 0);
5960
+ if (lines.length === 0) {
5961
+ diagnostics.push({ severity: "error", message: "index.jsonl is empty" });
5962
+ return { diagnostics, entries: entries2 };
5963
+ }
5964
+ for (let i = 0; i < lines.length; i++) {
5965
+ try {
5966
+ const entry = JSON.parse(lines[i]);
5967
+ entries2.push(entry);
5968
+ if (!entry.test_id) {
5969
+ diagnostics.push({
5970
+ severity: "error",
5971
+ message: `index.jsonl line ${i + 1}: missing 'test_id'`
5972
+ });
5973
+ }
5974
+ if (entry.score === void 0 || entry.score === null) {
5975
+ diagnostics.push({
5976
+ severity: "error",
5977
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'score'`
5978
+ });
5979
+ } else if (typeof entry.score !== "number" || entry.score < 0 || entry.score > 1) {
5980
+ diagnostics.push({
5981
+ severity: "error",
5982
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): score ${entry.score} is outside [0, 1]`
5983
+ });
5984
+ }
5985
+ if (!entry.target) {
5986
+ diagnostics.push({
5987
+ severity: "error",
5988
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'target'`
5989
+ });
5990
+ }
5991
+ if (!entry.grading_path) {
5992
+ diagnostics.push({
5993
+ severity: "warning",
5994
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'grading_path'`
5995
+ });
5996
+ }
5997
+ if (!entry.scores || !Array.isArray(entry.scores) || entry.scores.length === 0) {
5998
+ diagnostics.push({
5999
+ severity: "warning",
6000
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-evaluator breakdown`
6001
+ });
6002
+ } else {
6003
+ for (let j = 0; j < entry.scores.length; j++) {
6004
+ const s = entry.scores[j];
6005
+ if (!s || typeof s !== "object") {
6006
+ diagnostics.push({
6007
+ severity: "error",
6008
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): scores[${j}] is not an object`
6009
+ });
6010
+ continue;
6011
+ }
6012
+ const missing = [];
6013
+ if (typeof s.name !== "string") missing.push("name");
6014
+ if (typeof s.type !== "string") missing.push("type");
6015
+ if (typeof s.score !== "number") missing.push("score");
6016
+ if (typeof s.verdict !== "string") missing.push("verdict");
6017
+ if (missing.length > 0) {
6018
+ diagnostics.push({
6019
+ severity: "warning",
6020
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): scores[${j}] missing fields: ${missing.join(", ")}`
6021
+ });
6022
+ }
6023
+ }
6024
+ }
6025
+ if (!entry.execution_status) {
6026
+ diagnostics.push({
6027
+ severity: "warning",
6028
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'execution_status'`
6029
+ });
6030
+ } else if (!["ok", "quality_failure", "execution_error"].includes(entry.execution_status)) {
6031
+ diagnostics.push({
6032
+ severity: "warning",
6033
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): unknown execution_status '${entry.execution_status}' (expected: ok, quality_failure, execution_error)`
6034
+ });
6035
+ }
6036
+ } catch {
6037
+ diagnostics.push({
6038
+ severity: "error",
6039
+ message: `index.jsonl line ${i + 1}: invalid JSON`
6040
+ });
6041
+ }
6042
+ }
6043
+ return { diagnostics, entries: entries2 };
6044
+ }
6045
+ function checkArtifactFiles(runDir, entries2) {
6046
+ const diagnostics = [];
6047
+ for (const entry of entries2) {
6048
+ const testId = entry.test_id ?? "?";
6049
+ if (entry.grading_path) {
6050
+ const gradingPath = path8.join(runDir, entry.grading_path);
6051
+ if (!existsSync6(gradingPath)) {
6052
+ diagnostics.push({
6053
+ severity: "error",
6054
+ message: `${testId}: grading.json not found at '${entry.grading_path}'`
6055
+ });
6056
+ } else {
6057
+ try {
6058
+ const grading = JSON.parse(readFileSync7(gradingPath, "utf8"));
6059
+ if (!grading.assertions || !Array.isArray(grading.assertions)) {
6060
+ diagnostics.push({
6061
+ severity: "error",
6062
+ message: `${testId}: grading.json missing 'assertions' array`
6063
+ });
6064
+ }
6065
+ if (!grading.summary) {
6066
+ diagnostics.push({
6067
+ severity: "warning",
6068
+ message: `${testId}: grading.json missing 'summary' object`
6069
+ });
6070
+ }
6071
+ } catch {
6072
+ diagnostics.push({
6073
+ severity: "error",
6074
+ message: `${testId}: grading.json is not valid JSON`
6075
+ });
6076
+ }
6077
+ }
6078
+ }
6079
+ if (entry.timing_path) {
6080
+ const timingPath = path8.join(runDir, entry.timing_path);
6081
+ if (!existsSync6(timingPath)) {
6082
+ diagnostics.push({
6083
+ severity: "warning",
6084
+ message: `${testId}: timing.json not found at '${entry.timing_path}'`
6085
+ });
6086
+ }
6087
+ }
6088
+ }
6089
+ const benchmarkPath = path8.join(runDir, "benchmark.json");
6090
+ if (!existsSync6(benchmarkPath)) {
6091
+ diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
6092
+ }
6093
+ return diagnostics;
6094
+ }
6095
+ var resultsValidateCommand = command({
6096
+ name: "validate",
6097
+ description: "Validate that a run directory contains well-formed result artifacts",
6098
+ args: {
6099
+ runDir: positional({
6100
+ type: string,
6101
+ displayName: "run-dir",
6102
+ description: "Path to the run directory to validate"
6103
+ })
6104
+ },
6105
+ handler: async ({ runDir }) => {
6106
+ const resolvedDir = path8.resolve(runDir);
6107
+ if (!existsSync6(resolvedDir) || !statSync3(resolvedDir).isDirectory()) {
6108
+ console.error(`Error: '${runDir}' is not a directory`);
6109
+ process.exit(1);
6110
+ }
6111
+ const allDiagnostics = [];
6112
+ allDiagnostics.push(...checkDirectoryNaming(resolvedDir));
6113
+ const { diagnostics: indexDiags, entries: entries2 } = checkIndexJsonl(resolvedDir);
6114
+ allDiagnostics.push(...indexDiags);
6115
+ if (entries2.length > 0) {
6116
+ allDiagnostics.push(...checkArtifactFiles(resolvedDir, entries2));
6117
+ }
6118
+ const errors = allDiagnostics.filter((d) => d.severity === "error");
6119
+ const warnings = allDiagnostics.filter((d) => d.severity === "warning");
6120
+ if (allDiagnostics.length === 0) {
6121
+ console.log(`\u2713 Valid run directory: ${entries2.length} test(s), no issues found`);
6122
+ return;
6123
+ }
6124
+ for (const d of errors) {
6125
+ console.error(` ERROR: ${d.message}`);
6126
+ }
6127
+ for (const d of warnings) {
6128
+ console.warn(` WARN: ${d.message}`);
6129
+ }
6130
+ console.log(
6131
+ `
6132
+ ${entries2.length} test(s), ${errors.length} error(s), ${warnings.length} warning(s)`
6133
+ );
6134
+ if (errors.length > 0) {
6135
+ process.exit(1);
6136
+ }
6137
+ }
6138
+ });
6139
+
5886
6140
  // src/commands/results/index.ts
5887
6141
  var resultsCommand = subcommands({
5888
6142
  name: "results",
@@ -5891,24 +6145,25 @@ var resultsCommand = subcommands({
5891
6145
  export: resultsExportCommand,
5892
6146
  summary: resultsSummaryCommand,
5893
6147
  failures: resultsFailuresCommand,
5894
- show: resultsShowCommand
6148
+ show: resultsShowCommand,
6149
+ validate: resultsValidateCommand
5895
6150
  }
5896
6151
  });
5897
6152
 
5898
6153
  // src/commands/results/serve.ts
5899
- import { existsSync as existsSync5, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
5900
- import path8 from "node:path";
6154
+ import { existsSync as existsSync7, readFileSync as readFileSync8, writeFileSync as writeFileSync3 } from "node:fs";
6155
+ import path9 from "node:path";
5901
6156
  import { Hono } from "hono";
5902
6157
  function feedbackPath(resultDir) {
5903
- return path8.join(resultDir, "feedback.json");
6158
+ return path9.join(resultDir, "feedback.json");
5904
6159
  }
5905
6160
  function readFeedback(cwd) {
5906
6161
  const fp = feedbackPath(cwd);
5907
- if (!existsSync5(fp)) {
6162
+ if (!existsSync7(fp)) {
5908
6163
  return { reviews: [] };
5909
6164
  }
5910
6165
  try {
5911
- return JSON.parse(readFileSync7(fp, "utf8"));
6166
+ return JSON.parse(readFileSync8(fp, "utf8"));
5912
6167
  } catch (err2) {
5913
6168
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
5914
6169
  return { reviews: [] };
@@ -6047,7 +6302,7 @@ ${SERVE_STYLES}
6047
6302
  <main id="app"></main>
6048
6303
  <script>
6049
6304
  var DATA = ${dataJson};
6050
- var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
6305
+ var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path9.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
6051
6306
  ${SERVE_SCRIPT}
6052
6307
  </script>
6053
6308
  </body>
@@ -6708,7 +6963,7 @@ var resultsServeCommand = command({
6708
6963
  let sourceFile;
6709
6964
  if (source) {
6710
6965
  const resolved = resolveResultSourcePath(source, cwd);
6711
- if (!existsSync5(resolved)) {
6966
+ if (!existsSync7(resolved)) {
6712
6967
  console.error(`Error: Source file not found: ${resolved}`);
6713
6968
  process.exit(1);
6714
6969
  }
@@ -6717,7 +6972,7 @@ var resultsServeCommand = command({
6717
6972
  } else {
6718
6973
  const cache = await loadRunCache(cwd);
6719
6974
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
6720
- if (cachedFile && existsSync5(cachedFile)) {
6975
+ if (cachedFile && existsSync7(cachedFile)) {
6721
6976
  sourceFile = cachedFile;
6722
6977
  results = patchTestIds(loadManifestResults(cachedFile));
6723
6978
  } else {
@@ -6728,7 +6983,7 @@ var resultsServeCommand = command({
6728
6983
  }
6729
6984
  }
6730
6985
  }
6731
- const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
6986
+ const resultDir = sourceFile ? path9.dirname(path9.resolve(sourceFile)) : cwd;
6732
6987
  const app2 = createApp(results, resultDir, cwd, sourceFile);
6733
6988
  if (results.length > 0 && sourceFile) {
6734
6989
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
@@ -7622,7 +7877,7 @@ var traceCommand = subcommands({
7622
7877
 
7623
7878
  // src/commands/transpile/index.ts
7624
7879
  import { writeFileSync as writeFileSync4 } from "node:fs";
7625
- import path9 from "node:path";
7880
+ import path10 from "node:path";
7626
7881
  var transpileCommand = command({
7627
7882
  name: "transpile",
7628
7883
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -7646,7 +7901,7 @@ var transpileCommand = command({
7646
7901
  handler: async ({ input, outDir, stdout }) => {
7647
7902
  let result;
7648
7903
  try {
7649
- result = transpileEvalYamlFile(path9.resolve(input));
7904
+ result = transpileEvalYamlFile(path10.resolve(input));
7650
7905
  } catch (error) {
7651
7906
  console.error(`Error: ${error.message}`);
7652
7907
  process.exit(1);
@@ -7670,11 +7925,11 @@ var transpileCommand = command({
7670
7925
  process.stdout.write("\n");
7671
7926
  return;
7672
7927
  }
7673
- const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
7928
+ const outputDir = outDir ? path10.resolve(outDir) : path10.dirname(path10.resolve(input));
7674
7929
  const fileNames = getOutputFilenames(result);
7675
7930
  for (const [skill, evalsJson] of result.files) {
7676
7931
  const fileName = fileNames.get(skill) ?? "evals.json";
7677
- const outputPath = path9.join(outputDir, fileName);
7932
+ const outputPath = path10.join(outputDir, fileName);
7678
7933
  writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7679
7934
  `);
7680
7935
  console.log(`Transpiled to ${outputPath}`);
@@ -7683,7 +7938,7 @@ var transpileCommand = command({
7683
7938
  });
7684
7939
 
7685
7940
  // src/commands/trim/index.ts
7686
- import { readFileSync as readFileSync8, writeFileSync as writeFileSync5 } from "node:fs";
7941
+ import { readFileSync as readFileSync9, writeFileSync as writeFileSync5 } from "node:fs";
7687
7942
  var trimCommand = command({
7688
7943
  name: "trim",
7689
7944
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -7702,7 +7957,7 @@ var trimCommand = command({
7702
7957
  },
7703
7958
  handler: async ({ input, out }) => {
7704
7959
  try {
7705
- const content = readFileSync8(input, "utf8");
7960
+ const content = readFileSync9(input, "utf8");
7706
7961
  const lines = content.trim().split("\n").filter((line) => line.trim());
7707
7962
  const trimmedLines = lines.map((line) => {
7708
7963
  const record = JSON.parse(line);
@@ -7809,7 +8064,7 @@ function isTTY() {
7809
8064
  // src/commands/validate/validate-files.ts
7810
8065
  import { constants } from "node:fs";
7811
8066
  import { access, readdir as readdir4, stat } from "node:fs/promises";
7812
- import path10 from "node:path";
8067
+ import path11 from "node:path";
7813
8068
  async function validateFiles(paths) {
7814
8069
  const filePaths = await expandPaths(paths);
7815
8070
  const results = [];
@@ -7827,7 +8082,7 @@ async function validateFiles(paths) {
7827
8082
  };
7828
8083
  }
7829
8084
  async function validateSingleFile(filePath) {
7830
- const absolutePath = path10.resolve(filePath);
8085
+ const absolutePath = path11.resolve(filePath);
7831
8086
  const fileType = await detectFileType(absolutePath);
7832
8087
  let result;
7833
8088
  if (fileType === "eval") {
@@ -7852,7 +8107,7 @@ async function validateSingleFile(filePath) {
7852
8107
  async function expandPaths(paths) {
7853
8108
  const expanded = [];
7854
8109
  for (const inputPath of paths) {
7855
- const absolutePath = path10.resolve(inputPath);
8110
+ const absolutePath = path11.resolve(inputPath);
7856
8111
  try {
7857
8112
  await access(absolutePath, constants.F_OK);
7858
8113
  } catch {
@@ -7876,7 +8131,7 @@ async function findYamlFiles(dirPath) {
7876
8131
  try {
7877
8132
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
7878
8133
  for (const entry of entries2) {
7879
- const fullPath = path10.join(dirPath, entry.name);
8134
+ const fullPath = path11.join(dirPath, entry.name);
7880
8135
  if (entry.isDirectory()) {
7881
8136
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
7882
8137
  continue;
@@ -7893,7 +8148,7 @@ async function findYamlFiles(dirPath) {
7893
8148
  return results;
7894
8149
  }
7895
8150
  function isYamlFile(filePath) {
7896
- const ext = path10.extname(filePath).toLowerCase();
8151
+ const ext = path11.extname(filePath).toLowerCase();
7897
8152
  return ext === ".yaml" || ext === ".yml";
7898
8153
  }
7899
8154
 
@@ -7931,9 +8186,9 @@ var validateCommand = command({
7931
8186
  });
7932
8187
 
7933
8188
  // src/commands/workspace/clean.ts
7934
- import { existsSync as existsSync6 } from "node:fs";
8189
+ import { existsSync as existsSync8 } from "node:fs";
7935
8190
  import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
7936
- import path11 from "node:path";
8191
+ import path12 from "node:path";
7937
8192
  async function confirm(message) {
7938
8193
  const readline2 = await import("node:readline");
7939
8194
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -7960,7 +8215,7 @@ var cleanCommand = command({
7960
8215
  },
7961
8216
  handler: async ({ repo, force }) => {
7962
8217
  const poolRoot = getWorkspacePoolRoot();
7963
- if (!existsSync6(poolRoot)) {
8218
+ if (!existsSync8(poolRoot)) {
7964
8219
  console.log("No workspace pool entries found.");
7965
8220
  return;
7966
8221
  }
@@ -7969,8 +8224,8 @@ var cleanCommand = command({
7969
8224
  const poolDirs = entries2.filter((e) => e.isDirectory());
7970
8225
  const matchingDirs = [];
7971
8226
  for (const dir of poolDirs) {
7972
- const poolDir = path11.join(poolRoot, dir.name);
7973
- const metadataPath = path11.join(poolDir, "metadata.json");
8227
+ const poolDir = path12.join(poolRoot, dir.name);
8228
+ const metadataPath = path12.join(poolDir, "metadata.json");
7974
8229
  try {
7975
8230
  const raw = await readFile5(metadataPath, "utf-8");
7976
8231
  const metadata = JSON.parse(raw);
@@ -8001,7 +8256,7 @@ var cleanCommand = command({
8001
8256
  }
8002
8257
  for (const dir of matchingDirs) {
8003
8258
  await rm(dir, { recursive: true, force: true });
8004
- console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
8259
+ console.log(`Removed: ${path12.basename(dir).slice(0, 12)}...`);
8005
8260
  }
8006
8261
  console.log("Done.");
8007
8262
  } else {
@@ -8019,15 +8274,15 @@ var cleanCommand = command({
8019
8274
  });
8020
8275
 
8021
8276
  // src/commands/workspace/list.ts
8022
- import { existsSync as existsSync7 } from "node:fs";
8277
+ import { existsSync as existsSync9 } from "node:fs";
8023
8278
  import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
8024
- import path12 from "node:path";
8279
+ import path13 from "node:path";
8025
8280
  async function getDirectorySize(dirPath) {
8026
8281
  let totalSize = 0;
8027
8282
  try {
8028
8283
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
8029
8284
  for (const entry of entries2) {
8030
- const fullPath = path12.join(dirPath, entry.name);
8285
+ const fullPath = path13.join(dirPath, entry.name);
8031
8286
  if (entry.isDirectory()) {
8032
8287
  totalSize += await getDirectorySize(fullPath);
8033
8288
  } else {
@@ -8051,7 +8306,7 @@ var listCommand = command({
8051
8306
  args: {},
8052
8307
  handler: async () => {
8053
8308
  const poolRoot = getWorkspacePoolRoot();
8054
- if (!existsSync7(poolRoot)) {
8309
+ if (!existsSync9(poolRoot)) {
8055
8310
  console.log("No workspace pool entries found.");
8056
8311
  return;
8057
8312
  }
@@ -8062,11 +8317,11 @@ var listCommand = command({
8062
8317
  return;
8063
8318
  }
8064
8319
  for (const dir of poolDirs) {
8065
- const poolDir = path12.join(poolRoot, dir.name);
8320
+ const poolDir = path13.join(poolRoot, dir.name);
8066
8321
  const fingerprint = dir.name;
8067
8322
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
8068
8323
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
8069
- const metadataPath = path12.join(poolDir, "metadata.json");
8324
+ const metadataPath = path13.join(poolDir, "metadata.json");
8070
8325
  let metadata = null;
8071
8326
  try {
8072
8327
  const raw = await readFile6(metadataPath, "utf-8");
@@ -8112,8 +8367,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
8112
8367
  var AGENTV_DIR = getAgentvHome();
8113
8368
  var CACHE_FILE = "version-check.json";
8114
8369
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
8115
- async function getCachedUpdateInfo(path13) {
8116
- const filePath = path13 ?? join5(AGENTV_DIR, CACHE_FILE);
8370
+ async function getCachedUpdateInfo(path14) {
8371
+ const filePath = path14 ?? join5(AGENTV_DIR, CACHE_FILE);
8117
8372
  try {
8118
8373
  const raw = await readFile7(filePath, "utf-8");
8119
8374
  const data = JSON.parse(raw);
@@ -8270,4 +8525,4 @@ export {
8270
8525
  preprocessArgv,
8271
8526
  runCli
8272
8527
  };
8273
- //# sourceMappingURL=chunk-3UW7KUQ3.js.map
8528
+ //# sourceMappingURL=chunk-3NLBBQX6.js.map