agentv 4.4.1 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,13 +24,14 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-VYZQMN57.js";
27
+ } from "./chunk-3UST47TW.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  PASS_THRESHOLD,
31
31
  addProject,
32
32
  createBuiltinRegistry,
33
33
  deriveCategory,
34
+ discoverClaudeSessions,
34
35
  discoverProjects,
35
36
  executeScript,
36
37
  getAgentvHome,
@@ -39,23 +40,20 @@ import {
39
40
  getWorkspacePoolRoot,
40
41
  isAgentSkillsFormat,
41
42
  loadProjectRegistry,
42
- loadTestById,
43
43
  loadTestSuite,
44
- loadTests,
45
44
  normalizeLineEndings,
46
45
  parseAgentSkillsEvals,
46
+ parseClaudeSession,
47
+ readTranscriptFile,
47
48
  removeProject,
48
49
  toCamelCaseDeep,
49
50
  toSnakeCaseDeep as toSnakeCaseDeep2,
50
51
  transpileEvalYamlFile,
51
52
  trimBaselineResult
52
- } from "./chunk-63NDZ6UC.js";
53
+ } from "./chunk-TEPYEYPG.js";
53
54
  import {
54
55
  __commonJS,
55
- __esm,
56
- __export,
57
56
  __require,
58
- __toCommonJS,
59
57
  __toESM
60
58
  } from "./chunk-5H446C7X.js";
61
59
 
@@ -280,7 +278,7 @@ var require_ms = __commonJS({
280
278
  var require_common = __commonJS({
281
279
  "../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/common.js"(exports, module) {
282
280
  "use strict";
283
- function setup(env3) {
281
+ function setup(env2) {
284
282
  createDebug.debug = createDebug;
285
283
  createDebug.default = createDebug;
286
284
  createDebug.coerce = coerce;
@@ -289,8 +287,8 @@ var require_common = __commonJS({
289
287
  createDebug.enabled = enabled;
290
288
  createDebug.humanize = require_ms();
291
289
  createDebug.destroy = destroy;
292
- Object.keys(env3).forEach((key) => {
293
- createDebug[key] = env3[key];
290
+ Object.keys(env2).forEach((key) => {
291
+ createDebug[key] = env2[key];
294
292
  });
295
293
  createDebug.names = [];
296
294
  createDebug.skips = [];
@@ -625,153 +623,118 @@ var require_browser = __commonJS({
625
623
  }
626
624
  });
627
625
 
628
- // ../../node_modules/.bun/supports-color@10.2.2/node_modules/supports-color/index.js
629
- var supports_color_exports = {};
630
- __export(supports_color_exports, {
631
- createSupportsColor: () => createSupportsColor2,
632
- default: () => supports_color_default2
633
- });
634
- import process3 from "node:process";
635
- import os2 from "node:os";
636
- import tty2 from "node:tty";
637
- function hasFlag2(flag2, argv = globalThis.Deno ? globalThis.Deno.args : process3.argv) {
638
- const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
639
- const position = argv.indexOf(prefix + flag2);
640
- const terminatorPosition = argv.indexOf("--");
641
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
642
- }
643
- function envForceColor2() {
644
- if (!("FORCE_COLOR" in env2)) {
645
- return;
646
- }
647
- if (env2.FORCE_COLOR === "true") {
648
- return 1;
649
- }
650
- if (env2.FORCE_COLOR === "false") {
651
- return 0;
652
- }
653
- if (env2.FORCE_COLOR.length === 0) {
654
- return 1;
655
- }
656
- const level = Math.min(Number.parseInt(env2.FORCE_COLOR, 10), 3);
657
- if (![0, 1, 2, 3].includes(level)) {
658
- return;
659
- }
660
- return level;
661
- }
662
- function translateLevel2(level) {
663
- if (level === 0) {
664
- return false;
665
- }
666
- return {
667
- level,
668
- hasBasic: true,
669
- has256: level >= 2,
670
- has16m: level >= 3
671
- };
672
- }
673
- function _supportsColor2(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
674
- const noFlagForceColor = envForceColor2();
675
- if (noFlagForceColor !== void 0) {
676
- flagForceColor2 = noFlagForceColor;
677
- }
678
- const forceColor = sniffFlags ? flagForceColor2 : noFlagForceColor;
679
- if (forceColor === 0) {
680
- return 0;
681
- }
682
- if (sniffFlags) {
683
- if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
684
- return 3;
685
- }
686
- if (hasFlag2("color=256")) {
687
- return 2;
688
- }
689
- }
690
- if ("TF_BUILD" in env2 && "AGENT_NAME" in env2) {
691
- return 1;
692
- }
693
- if (haveStream && !streamIsTTY && forceColor === void 0) {
694
- return 0;
695
- }
696
- const min = forceColor || 0;
697
- if (env2.TERM === "dumb") {
698
- return min;
626
+ // ../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js
627
+ var require_has_flag = __commonJS({
628
+ "../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js"(exports, module) {
629
+ "use strict";
630
+ module.exports = (flag2, argv = process.argv) => {
631
+ const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
632
+ const position = argv.indexOf(prefix + flag2);
633
+ const terminatorPosition = argv.indexOf("--");
634
+ return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
635
+ };
699
636
  }
700
- if (process3.platform === "win32") {
701
- const osRelease = os2.release().split(".");
702
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
703
- return Number(osRelease[2]) >= 14931 ? 3 : 2;
637
+ });
638
+
639
+ // ../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js
640
+ var require_supports_color = __commonJS({
641
+ "../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js"(exports, module) {
642
+ "use strict";
643
+ var os2 = __require("os");
644
+ var tty2 = __require("tty");
645
+ var hasFlag2 = require_has_flag();
646
+ var { env: env2 } = process;
647
+ var forceColor;
648
+ if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
649
+ forceColor = 0;
650
+ } else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
651
+ forceColor = 1;
704
652
  }
705
- return 1;
706
- }
707
- if ("CI" in env2) {
708
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => key in env2)) {
709
- return 3;
653
+ if ("FORCE_COLOR" in env2) {
654
+ if (env2.FORCE_COLOR === "true") {
655
+ forceColor = 1;
656
+ } else if (env2.FORCE_COLOR === "false") {
657
+ forceColor = 0;
658
+ } else {
659
+ forceColor = env2.FORCE_COLOR.length === 0 ? 1 : Math.min(parseInt(env2.FORCE_COLOR, 10), 3);
660
+ }
710
661
  }
711
- if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => sign in env2) || env2.CI_NAME === "codeship") {
712
- return 1;
662
+ function translateLevel2(level) {
663
+ if (level === 0) {
664
+ return false;
665
+ }
666
+ return {
667
+ level,
668
+ hasBasic: true,
669
+ has256: level >= 2,
670
+ has16m: level >= 3
671
+ };
713
672
  }
714
- return min;
715
- }
716
- if ("TEAMCITY_VERSION" in env2) {
717
- return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
718
- }
719
- if (env2.COLORTERM === "truecolor") {
720
- return 3;
721
- }
722
- if (env2.TERM === "xterm-kitty") {
723
- return 3;
724
- }
725
- if (env2.TERM === "xterm-ghostty") {
726
- return 3;
727
- }
728
- if (env2.TERM === "wezterm") {
729
- return 3;
730
- }
731
- if ("TERM_PROGRAM" in env2) {
732
- const version = Number.parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
733
- switch (env2.TERM_PROGRAM) {
734
- case "iTerm.app": {
735
- return version >= 3 ? 3 : 2;
673
+ function supportsColor2(haveStream, streamIsTTY) {
674
+ if (forceColor === 0) {
675
+ return 0;
736
676
  }
737
- case "Apple_Terminal": {
677
+ if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
678
+ return 3;
679
+ }
680
+ if (hasFlag2("color=256")) {
681
+ return 2;
682
+ }
683
+ if (haveStream && !streamIsTTY && forceColor === void 0) {
684
+ return 0;
685
+ }
686
+ const min = forceColor || 0;
687
+ if (env2.TERM === "dumb") {
688
+ return min;
689
+ }
690
+ if (process.platform === "win32") {
691
+ const osRelease = os2.release().split(".");
692
+ if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
693
+ return Number(osRelease[2]) >= 14931 ? 3 : 2;
694
+ }
695
+ return 1;
696
+ }
697
+ if ("CI" in env2) {
698
+ if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI", "GITHUB_ACTIONS", "BUILDKITE"].some((sign) => sign in env2) || env2.CI_NAME === "codeship") {
699
+ return 1;
700
+ }
701
+ return min;
702
+ }
703
+ if ("TEAMCITY_VERSION" in env2) {
704
+ return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
705
+ }
706
+ if (env2.COLORTERM === "truecolor") {
707
+ return 3;
708
+ }
709
+ if ("TERM_PROGRAM" in env2) {
710
+ const version = parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
711
+ switch (env2.TERM_PROGRAM) {
712
+ case "iTerm.app":
713
+ return version >= 3 ? 3 : 2;
714
+ case "Apple_Terminal":
715
+ return 2;
716
+ }
717
+ }
718
+ if (/-256(color)?$/i.test(env2.TERM)) {
738
719
  return 2;
739
720
  }
721
+ if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
722
+ return 1;
723
+ }
724
+ if ("COLORTERM" in env2) {
725
+ return 1;
726
+ }
727
+ return min;
740
728
  }
741
- }
742
- if (/-256(color)?$/i.test(env2.TERM)) {
743
- return 2;
744
- }
745
- if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
746
- return 1;
747
- }
748
- if ("COLORTERM" in env2) {
749
- return 1;
750
- }
751
- return min;
752
- }
753
- function createSupportsColor2(stream, options = {}) {
754
- const level = _supportsColor2(stream, {
755
- streamIsTTY: stream && stream.isTTY,
756
- ...options
757
- });
758
- return translateLevel2(level);
759
- }
760
- var env2, flagForceColor2, supportsColor2, supports_color_default2;
761
- var init_supports_color = __esm({
762
- "../../node_modules/.bun/supports-color@10.2.2/node_modules/supports-color/index.js"() {
763
- "use strict";
764
- ({ env: env2 } = process3);
765
- if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
766
- flagForceColor2 = 0;
767
- } else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
768
- flagForceColor2 = 1;
729
+ function getSupportLevel(stream) {
730
+ const level = supportsColor2(stream, stream && stream.isTTY);
731
+ return translateLevel2(level);
769
732
  }
770
- supportsColor2 = {
771
- stdout: createSupportsColor2({ isTTY: tty2.isatty(1) }),
772
- stderr: createSupportsColor2({ isTTY: tty2.isatty(2) })
733
+ module.exports = {
734
+ supportsColor: getSupportLevel,
735
+ stdout: translateLevel2(supportsColor2(true, tty2.isatty(1))),
736
+ stderr: translateLevel2(supportsColor2(true, tty2.isatty(2)))
773
737
  };
774
- supports_color_default2 = supportsColor2;
775
738
  }
776
739
  });
777
740
 
@@ -779,7 +742,7 @@ var init_supports_color = __esm({
779
742
  var require_node = __commonJS({
780
743
  "../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/node.js"(exports, module) {
781
744
  "use strict";
782
- var tty3 = __require("tty");
745
+ var tty2 = __require("tty");
783
746
  var util = __require("util");
784
747
  exports.init = init;
785
748
  exports.log = log;
@@ -794,8 +757,8 @@ var require_node = __commonJS({
794
757
  );
795
758
  exports.colors = [6, 2, 3, 4, 5, 1];
796
759
  try {
797
- const supportsColor3 = (init_supports_color(), __toCommonJS(supports_color_exports));
798
- if (supportsColor3 && (supportsColor3.stderr || supportsColor3).level >= 2) {
760
+ const supportsColor2 = require_supports_color();
761
+ if (supportsColor2 && (supportsColor2.stderr || supportsColor2).level >= 2) {
799
762
  exports.colors = [
800
763
  20,
801
764
  21,
@@ -897,7 +860,7 @@ var require_node = __commonJS({
897
860
  return obj;
898
861
  }, {});
899
862
  function useColors() {
900
- return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) : tty3.isatty(process.stderr.fd);
863
+ return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) : tty2.isatty(process.stderr.fd);
901
864
  }
902
865
  function formatArgs(args) {
903
866
  const { namespace: name, useColors: useColors2 } = this;
@@ -1629,8 +1592,8 @@ function fullFlag(config) {
1629
1592
  }
1630
1593
  const defaults = [];
1631
1594
  if (config.env) {
1632
- const env3 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
1633
- defaults.push(`env: ${config.env}${env3}`);
1595
+ const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
1596
+ defaults.push(`env: ${config.env}${env2}`);
1634
1597
  }
1635
1598
  if (config.defaultValue) {
1636
1599
  try {
@@ -2247,8 +2210,8 @@ function fullOption(config) {
2247
2210
  usage += ` <${displayName}>`;
2248
2211
  const defaults = [];
2249
2212
  if (config.env) {
2250
- const env3 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
2251
- defaults.push(`env: ${config.env}${env3}`);
2213
+ const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
2214
+ defaults.push(`env: ${config.env}${env2}`);
2252
2215
  }
2253
2216
  if (config.defaultValue) {
2254
2217
  try {
@@ -3721,228 +3684,6 @@ var createCommand = subcommands({
3721
3684
  }
3722
3685
  });
3723
3686
 
3724
- // src/commands/eval/commands/prompt/accessors.ts
3725
- async function listPromptEvalTestIds(evalPath) {
3726
- const repoRoot = await findRepoRoot(process.cwd());
3727
- const tests = await loadTests(evalPath, repoRoot);
3728
- return {
3729
- eval_path: evalPath,
3730
- test_ids: tests.map((test) => test.id).sort()
3731
- };
3732
- }
3733
- async function getPromptEvalInput(evalPath, testId) {
3734
- const repoRoot = await findRepoRoot(process.cwd());
3735
- const evalCase = await loadTestById(evalPath, repoRoot, testId);
3736
- const fileMap = buildFileMap(evalCase.input, evalCase.file_paths);
3737
- return {
3738
- test_id: evalCase.id,
3739
- input: resolveMessages(evalCase.input, fileMap),
3740
- criteria: evalCase.criteria
3741
- };
3742
- }
3743
- async function getPromptEvalExpectedOutput(evalPath, testId) {
3744
- const repoRoot = await findRepoRoot(process.cwd());
3745
- const evalCase = await loadTestById(evalPath, repoRoot, testId);
3746
- return {
3747
- test_id: evalCase.id,
3748
- criteria: evalCase.criteria,
3749
- expected_output: evalCase.expected_output,
3750
- reference_answer: evalCase.reference_answer,
3751
- assertions: evalCase.assertions ?? []
3752
- };
3753
- }
3754
- async function getPromptEvalGradingBrief(evalPath, testId) {
3755
- const repoRoot = await findRepoRoot(process.cwd());
3756
- const evalCase = await loadTestById(evalPath, repoRoot, testId);
3757
- const fileMap = buildFileMap(evalCase.input, evalCase.file_paths);
3758
- const resolvedInput = resolveMessages(evalCase.input, fileMap);
3759
- const lines = [];
3760
- const inputText = extractTextFromMessages(resolvedInput);
3761
- if (inputText) {
3762
- lines.push(`Input: "${inputText}"`);
3763
- }
3764
- if (evalCase.file_paths.length > 0) {
3765
- lines.push(`Files: ${evalCase.file_paths.join(", ")}`);
3766
- }
3767
- if (evalCase.reference_answer) {
3768
- lines.push(`Expected: "${evalCase.reference_answer}"`);
3769
- }
3770
- const criteria = [];
3771
- if (evalCase.criteria) {
3772
- criteria.push(evalCase.criteria);
3773
- }
3774
- for (const assertion of evalCase.assertions ?? []) {
3775
- const entry = assertion;
3776
- const type = entry.type;
3777
- const bag = entry.config ?? {};
3778
- if (type === "contains") {
3779
- criteria.push(`Output contains '${entry.value}'`);
3780
- } else if (type === "rubrics") {
3781
- const items = entry.criteria ?? bag.criteria;
3782
- if (Array.isArray(items)) {
3783
- for (const item of items) {
3784
- if (item.outcome) criteria.push(item.outcome);
3785
- }
3786
- }
3787
- } else if (type === "llm-grader" || type === "llm_grader") {
3788
- const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
3789
- criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
3790
- } else if (type === "code-grader" || type === "code_grader") {
3791
- const name = entry.name ?? type;
3792
- const desc = bag.description ?? entry.description;
3793
- criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
3794
- } else if (type === "skill-trigger") {
3795
- const trigger = entry.should_trigger !== false;
3796
- criteria.push(`[skill-trigger] should_trigger: ${trigger} for ${entry.skill}`);
3797
- } else if (type) {
3798
- criteria.push(`[${type}] ${entry.value ?? bag.criteria ?? bag.prompt ?? ""}`);
3799
- }
3800
- }
3801
- if (criteria.length > 0) {
3802
- lines.push("Criteria:");
3803
- for (const c3 of criteria) {
3804
- lines.push(` - ${c3}`);
3805
- }
3806
- }
3807
- return lines.join("\n");
3808
- }
3809
- function extractTextFromMessages(messages) {
3810
- for (const msg of messages) {
3811
- if (msg.role !== "user") continue;
3812
- if (typeof msg.content === "string") return msg.content;
3813
- if (Array.isArray(msg.content)) {
3814
- const textBlocks = msg.content.filter((b) => b.type === "text").map((b) => b.value);
3815
- if (textBlocks.length > 0) return textBlocks.join(" ");
3816
- }
3817
- }
3818
- return "";
3819
- }
3820
- function buildFileMap(inputMessages, allFilePaths) {
3821
- const map = /* @__PURE__ */ new Map();
3822
- for (const message of inputMessages) {
3823
- if (!Array.isArray(message.content)) {
3824
- continue;
3825
- }
3826
- for (const segment of message.content) {
3827
- registerResolvedFileSegment(map, segment);
3828
- }
3829
- }
3830
- return {
3831
- get(key) {
3832
- const direct = map.get(key);
3833
- if (direct) return direct;
3834
- return allFilePaths.find((filePath) => filePath.endsWith(`/${key}`) || filePath === key);
3835
- },
3836
- has(key) {
3837
- return this.get(key) !== void 0;
3838
- }
3839
- };
3840
- }
3841
- function registerResolvedFileSegment(map, segment) {
3842
- if (segment.type !== "file" || typeof segment.resolvedPath !== "string") {
3843
- return;
3844
- }
3845
- const aliases = [segment.value, segment.path].filter(
3846
- (alias) => typeof alias === "string"
3847
- );
3848
- for (const alias of aliases) {
3849
- map.set(alias, segment.resolvedPath);
3850
- }
3851
- }
3852
- function resolveMessages(messages, fileMap) {
3853
- return messages.map((message) => {
3854
- if (typeof message.content === "string") {
3855
- return { role: message.role, content: message.content };
3856
- }
3857
- if (!Array.isArray(message.content)) {
3858
- return { role: message.role, content: message.content };
3859
- }
3860
- const resolvedContent = [];
3861
- for (const segment of message.content) {
3862
- if (typeof segment === "string") {
3863
- resolvedContent.push({ type: "text", value: segment });
3864
- continue;
3865
- }
3866
- const obj = segment;
3867
- if (obj.type === "file" && typeof obj.value === "string") {
3868
- const resolved = fileMap.get(obj.value);
3869
- resolvedContent.push({
3870
- type: "file",
3871
- path: resolved ?? obj.value
3872
- });
3873
- } else {
3874
- resolvedContent.push(obj);
3875
- }
3876
- }
3877
- return { role: message.role, content: resolvedContent };
3878
- });
3879
- }
3880
-
3881
- // src/commands/eval/commands/prompt/index.ts
3882
- var evalPromptEvalSubcommand = command({
3883
- name: "eval",
3884
- description: "Extract eval prompt data for agents",
3885
- args: {
3886
- list: flag({
3887
- long: "list",
3888
- description: "List available test IDs"
3889
- }),
3890
- input: flag({
3891
- long: "input",
3892
- description: "Extract the test input payload for a single test"
3893
- }),
3894
- expectedOutput: flag({
3895
- long: "expected-output",
3896
- description: "Extract expected output and grading context for a single test"
3897
- }),
3898
- gradingBrief: flag({
3899
- long: "grading-brief",
3900
- description: "Output human-readable grading brief with typed criteria"
3901
- }),
3902
- testId: option({
3903
- type: optional(string),
3904
- long: "test-id",
3905
- description: "Test ID (required for --input and --expected-output)"
3906
- }),
3907
- evalPath: positional({
3908
- type: string,
3909
- displayName: "eval-path",
3910
- description: "Path to evaluation .yaml, .json, or .jsonl file"
3911
- })
3912
- },
3913
- handler: async ({ evalPath, expectedOutput, gradingBrief, input, list, testId }) => {
3914
- const selectedModes = [list, input, expectedOutput, gradingBrief].filter(Boolean).length;
3915
- if (selectedModes !== 1) {
3916
- throw new Error(
3917
- "Specify exactly one of --list, --input, --expected-output, or --grading-brief."
3918
- );
3919
- }
3920
- if (gradingBrief) {
3921
- if (!testId) {
3922
- throw new Error("--test-id is required with --grading-brief.");
3923
- }
3924
- const brief = await getPromptEvalGradingBrief(evalPath, testId);
3925
- process.stdout.write(brief);
3926
- process.stdout.write("\n");
3927
- return;
3928
- }
3929
- if ((input || expectedOutput) && !testId) {
3930
- throw new Error("--test-id is required with --input and --expected-output.");
3931
- }
3932
- const requiredTestId = testId ?? "";
3933
- const output = list ? await listPromptEvalTestIds(evalPath) : input ? await getPromptEvalInput(evalPath, requiredTestId) : await getPromptEvalExpectedOutput(evalPath, requiredTestId);
3934
- process.stdout.write(JSON.stringify(output, null, 2));
3935
- process.stdout.write("\n");
3936
- }
3937
- });
3938
- var evalPromptCommand = subcommands({
3939
- name: "prompt",
3940
- description: "Prompt commands",
3941
- cmds: {
3942
- eval: evalPromptEvalSubcommand
3943
- }
3944
- });
3945
-
3946
3687
  // src/commands/eval/commands/assert.ts
3947
3688
  import { readFileSync as readFileSync2 } from "node:fs";
3948
3689
  import path3 from "node:path";
@@ -4218,12 +3959,12 @@ var evalRunCommand = command({
4218
3959
  threshold: option({
4219
3960
  type: optional(number),
4220
3961
  long: "threshold",
4221
- description: "Suite-level quality gate: exit 1 if mean score falls below this value (0-1)"
3962
+ description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
4222
3963
  })
4223
3964
  },
4224
3965
  handler: async (args) => {
4225
3966
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4226
- const { launchInteractiveWizard } = await import("./interactive-VJP2AEPT.js");
3967
+ const { launchInteractiveWizard } = await import("./interactive-YAOW6DCC.js");
4227
3968
  await launchInteractiveWizard();
4228
3969
  return;
4229
3970
  }
@@ -4275,36 +4016,173 @@ var evalCommand = subcommands({
4275
4016
  description: "Evaluation commands",
4276
4017
  cmds: {
4277
4018
  run: evalRunCommand,
4278
- prompt: evalPromptCommand,
4279
4019
  assert: evalAssertCommand
4280
4020
  }
4281
4021
  });
4282
4022
 
4023
+ // src/commands/import/claude.ts
4024
+ import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
4025
+ import path4 from "node:path";
4026
+ var importClaudeCommand = command({
4027
+ name: "claude",
4028
+ description: "Import a Claude Code session transcript for offline grading",
4029
+ args: {
4030
+ sessionId: option({
4031
+ type: optional(string),
4032
+ long: "session-id",
4033
+ description: "UUID of the Claude Code session to import"
4034
+ }),
4035
+ discover: option({
4036
+ type: optional(string),
4037
+ long: "discover",
4038
+ description: 'Discovery mode: "latest" to import the most recent session'
4039
+ }),
4040
+ projectPath: option({
4041
+ type: optional(string),
4042
+ long: "project-path",
4043
+ description: "Filter sessions by project path"
4044
+ }),
4045
+ output: option({
4046
+ type: optional(string),
4047
+ long: "output",
4048
+ short: "o",
4049
+ description: "Output file path (default: .agentv/transcripts/claude-<session-id-short>.jsonl)"
4050
+ }),
4051
+ projectsDir: option({
4052
+ type: optional(string),
4053
+ long: "projects-dir",
4054
+ description: "Override the default ~/.claude/projects directory"
4055
+ }),
4056
+ list: flag({
4057
+ long: "list",
4058
+ description: "List available sessions instead of importing"
4059
+ })
4060
+ },
4061
+ handler: async ({ sessionId, discover, projectPath, output, projectsDir, list }) => {
4062
+ if (list) {
4063
+ const sessions = await discoverClaudeSessions({
4064
+ projectPath,
4065
+ projectsDir,
4066
+ limit: 20
4067
+ });
4068
+ if (sessions.length === 0) {
4069
+ console.log("No Claude Code sessions found.");
4070
+ return;
4071
+ }
4072
+ console.log(`Found ${sessions.length} session(s):
4073
+ `);
4074
+ for (const session of sessions) {
4075
+ const age = formatAge(session.updatedAt);
4076
+ console.log(` ${session.sessionId} ${age} ${session.projectDir}`);
4077
+ }
4078
+ return;
4079
+ }
4080
+ let sessionFilePath;
4081
+ if (sessionId) {
4082
+ const sessions = await discoverClaudeSessions({
4083
+ sessionId,
4084
+ projectPath,
4085
+ projectsDir,
4086
+ limit: 1
4087
+ });
4088
+ if (sessions.length === 0) {
4089
+ console.error(`Error: session ${sessionId} not found.`);
4090
+ process.exit(1);
4091
+ }
4092
+ sessionFilePath = sessions[0].filePath;
4093
+ } else if (discover === "latest") {
4094
+ const sessions = await discoverClaudeSessions({
4095
+ projectPath,
4096
+ projectsDir,
4097
+ latest: true
4098
+ });
4099
+ if (sessions.length === 0) {
4100
+ console.error("Error: no Claude Code sessions found.");
4101
+ process.exit(1);
4102
+ }
4103
+ sessionFilePath = sessions[0].filePath;
4104
+ sessionId = sessions[0].sessionId;
4105
+ console.log(`Discovered latest session: ${sessionId}`);
4106
+ } else {
4107
+ console.error("Error: specify --session-id <uuid> or --discover latest to select a session.");
4108
+ process.exit(1);
4109
+ }
4110
+ const rawJsonl = await readTranscriptFile(sessionFilePath);
4111
+ const transcript = parseClaudeSession(rawJsonl);
4112
+ const shortId = (sessionId ?? transcript.source.sessionId).slice(0, 8);
4113
+ const outputPath = output ?? path4.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
4114
+ await mkdir2(path4.dirname(outputPath), { recursive: true });
4115
+ const outputLines = transcript.messages.map((msg) => JSON.stringify(msg));
4116
+ await writeFile2(outputPath, `${outputLines.join("\n")}
4117
+ `, "utf8");
4118
+ const msgCount = transcript.messages.length;
4119
+ const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0);
4120
+ console.log(`Imported ${msgCount} messages (${toolCount} tool calls) \u2192 ${outputPath}`);
4121
+ if (transcript.source.model) {
4122
+ console.log(` Model: ${transcript.source.model}`);
4123
+ }
4124
+ if (transcript.durationMs !== void 0) {
4125
+ console.log(` Duration: ${formatDurationMs(transcript.durationMs)}`);
4126
+ }
4127
+ if (transcript.tokenUsage) {
4128
+ console.log(
4129
+ ` Tokens: ${transcript.tokenUsage.input} in / ${transcript.tokenUsage.output} out`
4130
+ );
4131
+ }
4132
+ }
4133
+ });
4134
+ function formatAge(date) {
4135
+ const diffMs = Date.now() - date.getTime();
4136
+ const diffMin = Math.floor(diffMs / 6e4);
4137
+ if (diffMin < 60) return `${diffMin}m ago`;
4138
+ const diffHours = Math.floor(diffMin / 60);
4139
+ if (diffHours < 24) return `${diffHours}h ago`;
4140
+ const diffDays = Math.floor(diffHours / 24);
4141
+ return `${diffDays}d ago`;
4142
+ }
4143
+ function formatDurationMs(ms) {
4144
+ if (ms < 1e3) return `${ms}ms`;
4145
+ const seconds = Math.floor(ms / 1e3);
4146
+ if (seconds < 60) return `${seconds}s`;
4147
+ const minutes = Math.floor(seconds / 60);
4148
+ const remainingSeconds = seconds % 60;
4149
+ return `${minutes}m ${remainingSeconds}s`;
4150
+ }
4151
+
4152
+ // src/commands/import/index.ts
4153
+ var importCommand = subcommands({
4154
+ name: "import",
4155
+ description: "Import agent session transcripts for offline grading",
4156
+ cmds: {
4157
+ claude: importClaudeCommand
4158
+ }
4159
+ });
4160
+
4283
4161
  // src/commands/init/index.ts
4284
4162
  import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4285
- import path5 from "node:path";
4163
+ import path6 from "node:path";
4286
4164
  import * as readline from "node:readline/promises";
4287
4165
 
4288
4166
  // src/templates/index.ts
4289
4167
  import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
4290
- import path4 from "node:path";
4168
+ import path5 from "node:path";
4291
4169
  import { fileURLToPath } from "node:url";
4292
4170
  function getAgentvTemplates() {
4293
4171
  return getTemplatesFromDir(".agentv");
4294
4172
  }
4295
4173
  function getEnvExampleTemplate() {
4296
- const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4297
- const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
4298
- const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
4174
+ const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4175
+ const templatesBase = currentDir.includes(`${path5.sep}dist`) ? path5.join(currentDir, "templates") : currentDir;
4176
+ const content = readFileSync3(path5.join(templatesBase, ".env.example"), "utf-8");
4299
4177
  return { path: ".env.example", content };
4300
4178
  }
4301
4179
  function getTemplatesFromDir(subdir) {
4302
- const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4180
+ const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4303
4181
  let templatesDir;
4304
- if (currentDir.includes(`${path4.sep}dist`)) {
4305
- templatesDir = path4.join(currentDir, "templates", subdir);
4182
+ if (currentDir.includes(`${path5.sep}dist`)) {
4183
+ templatesDir = path5.join(currentDir, "templates", subdir);
4306
4184
  } else {
4307
- templatesDir = path4.join(currentDir, subdir);
4185
+ templatesDir = path5.join(currentDir, subdir);
4308
4186
  }
4309
4187
  return readTemplatesRecursively(templatesDir, "");
4310
4188
  }
@@ -4312,15 +4190,15 @@ function readTemplatesRecursively(dir, relativePath) {
4312
4190
  const templates = [];
4313
4191
  const entries2 = readdirSync(dir);
4314
4192
  for (const entry of entries2) {
4315
- const fullPath = path4.join(dir, entry);
4193
+ const fullPath = path5.join(dir, entry);
4316
4194
  const stat3 = statSync(fullPath);
4317
- const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
4195
+ const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
4318
4196
  if (stat3.isDirectory()) {
4319
4197
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
4320
4198
  } else {
4321
4199
  const content = readFileSync3(fullPath, "utf-8");
4322
4200
  templates.push({
4323
- path: entryRelativePath.split(path4.sep).join("/"),
4201
+ path: entryRelativePath.split(path5.sep).join("/"),
4324
4202
  // Normalize to forward slashes
4325
4203
  content
4326
4204
  });
@@ -4349,22 +4227,22 @@ async function promptYesNo(message) {
4349
4227
  }
4350
4228
  }
4351
4229
  async function initCommand(options = {}) {
4352
- const targetPath = path5.resolve(options.targetPath ?? ".");
4353
- const agentvDir = path5.join(targetPath, ".agentv");
4230
+ const targetPath = path6.resolve(options.targetPath ?? ".");
4231
+ const agentvDir = path6.join(targetPath, ".agentv");
4354
4232
  const otherAgentvTemplates = getAgentvTemplates();
4355
4233
  const envTemplate = getEnvExampleTemplate();
4356
4234
  const existingFiles = [];
4357
4235
  if (envTemplate) {
4358
- const envFilePath = path5.join(targetPath, ".env.example");
4236
+ const envFilePath = path6.join(targetPath, ".env.example");
4359
4237
  if (existsSync(envFilePath)) {
4360
4238
  existingFiles.push(".env.example");
4361
4239
  }
4362
4240
  }
4363
4241
  if (existsSync(agentvDir)) {
4364
4242
  for (const template of otherAgentvTemplates) {
4365
- const targetFilePath = path5.join(agentvDir, template.path);
4243
+ const targetFilePath = path6.join(agentvDir, template.path);
4366
4244
  if (existsSync(targetFilePath)) {
4367
- existingFiles.push(path5.relative(targetPath, targetFilePath));
4245
+ existingFiles.push(path6.relative(targetPath, targetFilePath));
4368
4246
  }
4369
4247
  }
4370
4248
  }
@@ -4386,18 +4264,18 @@ async function initCommand(options = {}) {
4386
4264
  mkdirSync(agentvDir, { recursive: true });
4387
4265
  }
4388
4266
  if (envTemplate) {
4389
- const envFilePath = path5.join(targetPath, ".env.example");
4267
+ const envFilePath = path6.join(targetPath, ".env.example");
4390
4268
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
4391
4269
  console.log("Created .env.example");
4392
4270
  }
4393
4271
  for (const template of otherAgentvTemplates) {
4394
- const targetFilePath = path5.join(agentvDir, template.path);
4395
- const targetDirPath = path5.dirname(targetFilePath);
4272
+ const targetFilePath = path6.join(agentvDir, template.path);
4273
+ const targetDirPath = path6.dirname(targetFilePath);
4396
4274
  if (!existsSync(targetDirPath)) {
4397
4275
  mkdirSync(targetDirPath, { recursive: true });
4398
4276
  }
4399
4277
  writeFileSync2(targetFilePath, template.content, "utf-8");
4400
- console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
4278
+ console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
4401
4279
  }
4402
4280
  console.log("\nAgentV initialized successfully!");
4403
4281
  console.log("\nFiles installed to root:");
@@ -4405,7 +4283,7 @@ async function initCommand(options = {}) {
4405
4283
  console.log(" - .env.example");
4406
4284
  }
4407
4285
  console.log(`
4408
- Files installed to ${path5.relative(targetPath, agentvDir)}:`);
4286
+ Files installed to ${path6.relative(targetPath, agentvDir)}:`);
4409
4287
  for (const t of otherAgentvTemplates) {
4410
4288
  console.log(` - ${t.path}`);
4411
4289
  }
@@ -4437,7 +4315,7 @@ var initCmdTsCommand = command({
4437
4315
 
4438
4316
  // src/commands/pipeline/bench.ts
4439
4317
  import { existsSync as existsSync2 } from "node:fs";
4440
- import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
4318
+ import { readFile, readdir, writeFile as writeFile3 } from "node:fs/promises";
4441
4319
  import { join } from "node:path";
4442
4320
  var evalBenchCommand = command({
4443
4321
  name: "bench",
@@ -4527,7 +4405,7 @@ var evalBenchCommand = command({
4527
4405
  weight: e.weight
4528
4406
  }))
4529
4407
  };
4530
- await writeFile2(
4408
+ await writeFile3(
4531
4409
  join(testDir, "grading.json"),
4532
4410
  `${JSON.stringify(grading, null, 2)}
4533
4411
  `,
@@ -4573,7 +4451,7 @@ var evalBenchCommand = command({
4573
4451
  })
4574
4452
  );
4575
4453
  }
4576
- await writeFile2(
4454
+ await writeFile3(
4577
4455
  join(exportDir, "index.jsonl"),
4578
4456
  indexLines.length > 0 ? `${indexLines.join("\n")}
4579
4457
  ` : "",
@@ -4597,7 +4475,7 @@ var evalBenchCommand = command({
4597
4475
  },
4598
4476
  notes: []
4599
4477
  };
4600
- await writeFile2(
4478
+ await writeFile3(
4601
4479
  join(exportDir, "benchmark.json"),
4602
4480
  `${JSON.stringify(benchmark, null, 2)}
4603
4481
  `,
@@ -4617,7 +4495,7 @@ function computeStats(values) {
4617
4495
  }
4618
4496
 
4619
4497
  // src/commands/pipeline/grade.ts
4620
- import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
4498
+ import { mkdir as mkdir3, readFile as readFile2, readdir as readdir2, writeFile as writeFile4 } from "node:fs/promises";
4621
4499
  import { join as join2 } from "node:path";
4622
4500
  var DEFAULT_CONCURRENCY = 10;
4623
4501
  function extractInputText(input) {
@@ -4684,7 +4562,7 @@ async function runCodeGraders(tasks, concurrency) {
4684
4562
  assertions,
4685
4563
  details: parsed.details ?? {}
4686
4564
  };
4687
- await writeFile3(
4565
+ await writeFile4(
4688
4566
  join2(resultsDir, `${graderName}.json`),
4689
4567
  `${JSON.stringify(result, null, 2)}
4690
4568
  `,
@@ -4705,7 +4583,7 @@ async function runCodeGraders(tasks, concurrency) {
4705
4583
  assertions: [{ text: `Error: ${message}`, passed: false }],
4706
4584
  details: { error: message }
4707
4585
  };
4708
- await writeFile3(
4586
+ await writeFile4(
4709
4587
  join2(resultsDir, `${graderName}.json`),
4710
4588
  `${JSON.stringify(errorResult, null, 2)}
4711
4589
  `,
@@ -4767,7 +4645,7 @@ var evalGradeCommand = command({
4767
4645
  continue;
4768
4646
  }
4769
4647
  if (graderFiles.length === 0) continue;
4770
- await mkdir2(resultsDir, { recursive: true });
4648
+ await mkdir3(resultsDir, { recursive: true });
4771
4649
  const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
4772
4650
  const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
4773
4651
  for (const graderFile of graderFiles) {
@@ -4781,7 +4659,7 @@ var evalGradeCommand = command({
4781
4659
 
4782
4660
  // src/commands/pipeline/input.ts
4783
4661
  import { readFile as readFile3 } from "node:fs/promises";
4784
- import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
4662
+ import { mkdir as mkdir4, writeFile as writeFile5 } from "node:fs/promises";
4785
4663
  import { dirname, join as join3, relative, resolve } from "node:path";
4786
4664
  var evalInputCommand = command({
4787
4665
  name: "input",
@@ -4854,7 +4732,7 @@ var evalInputCommand = command({
4854
4732
  for (const test of tests) {
4855
4733
  const subpath = safeEvalSet ? [safeEvalSet, test.id] : [test.id];
4856
4734
  const testDir = join3(outDir, ...subpath);
4857
- await mkdir3(testDir, { recursive: true });
4735
+ await mkdir4(testDir, { recursive: true });
4858
4736
  testIds.push(test.id);
4859
4737
  const inputMessages = test.input.map((m) => ({
4860
4738
  role: m.role,
@@ -4879,7 +4757,7 @@ var evalInputCommand = command({
4879
4757
  instructions: "Execute this task in the current workspace. The agent IS the target."
4880
4758
  });
4881
4759
  }
4882
- await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
4760
+ await writeFile5(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
4883
4761
  if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
4884
4762
  await writeJson(join3(testDir, "expected_output.json"), {
4885
4763
  expected_output: test.expected_output,
@@ -4911,7 +4789,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4911
4789
  for (const assertion of assertions) {
4912
4790
  if (assertion.type === "code-grader") {
4913
4791
  if (!hasCodeGraders) {
4914
- await mkdir3(codeGradersDir, { recursive: true });
4792
+ await mkdir4(codeGradersDir, { recursive: true });
4915
4793
  hasCodeGraders = true;
4916
4794
  }
4917
4795
  const config = assertion;
@@ -4924,7 +4802,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4924
4802
  });
4925
4803
  } else if (assertion.type === "llm-grader") {
4926
4804
  if (!hasLlmGraders) {
4927
- await mkdir3(llmGradersDir, { recursive: true });
4805
+ await mkdir4(llmGradersDir, { recursive: true });
4928
4806
  hasLlmGraders = true;
4929
4807
  }
4930
4808
  const config = assertion;
@@ -4949,14 +4827,14 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
4949
4827
  }
4950
4828
  }
4951
4829
  async function writeJson(filePath, data) {
4952
- await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
4830
+ await writeFile5(filePath, `${JSON.stringify(data, null, 2)}
4953
4831
  `, "utf8");
4954
4832
  }
4955
4833
 
4956
4834
  // src/commands/pipeline/run.ts
4957
4835
  import { exec } from "node:child_process";
4958
4836
  import { existsSync as existsSync3, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4959
- import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
4837
+ import { mkdir as mkdir5, readFile as readFile4, readdir as readdir3, writeFile as writeFile6 } from "node:fs/promises";
4960
4838
  import { tmpdir } from "node:os";
4961
4839
  import { dirname as dirname2, join as join4, relative as relative2, resolve as resolve2 } from "node:path";
4962
4840
  function extractInputText2(input) {
@@ -4970,15 +4848,15 @@ function loadEnvFile(dir) {
4970
4848
  while (true) {
4971
4849
  const candidate = join4(current, ".env");
4972
4850
  if (existsSync3(candidate)) {
4973
- const env3 = {};
4851
+ const env2 = {};
4974
4852
  for (const line of readFileSync4(candidate, "utf8").split("\n")) {
4975
4853
  const trimmed = line.trim();
4976
4854
  if (!trimmed || trimmed.startsWith("#")) continue;
4977
4855
  const eqIdx = trimmed.indexOf("=");
4978
4856
  if (eqIdx === -1) continue;
4979
- env3[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
4857
+ env2[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
4980
4858
  }
4981
- return env3;
4859
+ return env2;
4982
4860
  }
4983
4861
  const parent = dirname2(current);
4984
4862
  if (parent === current) break;
@@ -5063,7 +4941,7 @@ var evalRunCommand2 = command({
5063
4941
  for (const test of tests) {
5064
4942
  const subpath = safeEvalSet ? [safeEvalSet, test.id] : [test.id];
5065
4943
  const testDir = join4(outDir, ...subpath);
5066
- await mkdir4(testDir, { recursive: true });
4944
+ await mkdir5(testDir, { recursive: true });
5067
4945
  testIds.push(test.id);
5068
4946
  const inputMessages = test.input.map((m) => ({
5069
4947
  role: m.role,
@@ -5088,7 +4966,7 @@ var evalRunCommand2 = command({
5088
4966
  instructions: "Execute this task in the current workspace. The agent IS the target."
5089
4967
  });
5090
4968
  }
5091
- await writeFile5(join4(testDir, "criteria.md"), test.criteria ?? "", "utf8");
4969
+ await writeFile6(join4(testDir, "criteria.md"), test.criteria ?? "", "utf8");
5092
4970
  if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
5093
4971
  await writeJson2(join4(testDir, "expected_output.json"), {
5094
4972
  expected_output: test.expected_output,
@@ -5132,7 +5010,7 @@ var evalRunCommand2 = command({
5132
5010
  const promptFile = join4(tmpdir(), `agentv-prompt-${testId}-${Date.now()}.txt`);
5133
5011
  const outputFile = join4(tmpdir(), `agentv-output-${testId}-${Date.now()}.txt`);
5134
5012
  const inputText = extractInputText2(inputData.input);
5135
- await writeFile5(promptFile, inputText, "utf8");
5013
+ await writeFile6(promptFile, inputText, "utf8");
5136
5014
  let rendered = template;
5137
5015
  rendered = rendered.replace("{PROMPT_FILE}", promptFile);
5138
5016
  rendered = rendered.replace("{OUTPUT_FILE}", outputFile);
@@ -5161,7 +5039,7 @@ var evalRunCommand2 = command({
5161
5039
  } else {
5162
5040
  response = "ERROR: No output file generated";
5163
5041
  }
5164
- await writeFile5(join4(testDir, "response.md"), response, "utf8");
5042
+ await writeFile6(join4(testDir, "response.md"), response, "utf8");
5165
5043
  await writeJson2(join4(testDir, "timing.json"), {
5166
5044
  duration_ms: durationMs,
5167
5045
  total_duration_seconds: Math.round(durationMs / 10) / 100,
@@ -5174,7 +5052,7 @@ var evalRunCommand2 = command({
5174
5052
  const durationMs = Math.round(performance.now() - start);
5175
5053
  const message = error instanceof Error ? error.message : String(error);
5176
5054
  const response = `ERROR: target failed \u2014 ${message}`;
5177
- await writeFile5(join4(testDir, "response.md"), response, "utf8");
5055
+ await writeFile6(join4(testDir, "response.md"), response, "utf8");
5178
5056
  await writeJson2(join4(testDir, "timing.json"), {
5179
5057
  duration_ms: durationMs,
5180
5058
  total_duration_seconds: Math.round(durationMs / 10) / 100,
@@ -5231,7 +5109,7 @@ Done. Results in ${outDir}`);
5231
5109
  continue;
5232
5110
  }
5233
5111
  if (graderFiles.length === 0) continue;
5234
- await mkdir4(resultsDir, { recursive: true });
5112
+ await mkdir5(resultsDir, { recursive: true });
5235
5113
  const responseText = await readFile4(join4(testDir, "response.md"), "utf8");
5236
5114
  const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
5237
5115
  for (const graderFile of graderFiles) {
@@ -5246,7 +5124,7 @@ Done. Agent can now perform LLM grading on responses in ${outDir}`);
5246
5124
  }
5247
5125
  });
5248
5126
  async function writeJson2(filePath, data) {
5249
- await writeFile5(filePath, `${JSON.stringify(data, null, 2)}
5127
+ await writeFile6(filePath, `${JSON.stringify(data, null, 2)}
5250
5128
  `, "utf8");
5251
5129
  }
5252
5130
  async function writeGraderConfigs2(testDir, assertions, evalDir) {
@@ -5257,7 +5135,7 @@ async function writeGraderConfigs2(testDir, assertions, evalDir) {
5257
5135
  for (const assertion of assertions) {
5258
5136
  if (assertion.type === "code-grader") {
5259
5137
  if (!hasCodeGraders) {
5260
- await mkdir4(codeGradersDir, { recursive: true });
5138
+ await mkdir5(codeGradersDir, { recursive: true });
5261
5139
  hasCodeGraders = true;
5262
5140
  }
5263
5141
  const config = assertion;
@@ -5270,7 +5148,7 @@ async function writeGraderConfigs2(testDir, assertions, evalDir) {
5270
5148
  });
5271
5149
  } else if (assertion.type === "llm-grader") {
5272
5150
  if (!hasLlmGraders) {
5273
- await mkdir4(llmGradersDir, { recursive: true });
5151
+ await mkdir5(llmGradersDir, { recursive: true });
5274
5152
  hasLlmGraders = true;
5275
5153
  }
5276
5154
  const config = assertion;
@@ -5308,14 +5186,14 @@ var pipelineCommand = subcommands({
5308
5186
  });
5309
5187
 
5310
5188
  // src/commands/results/export.ts
5311
- import path7 from "node:path";
5189
+ import path8 from "node:path";
5312
5190
 
5313
5191
  // src/commands/results/shared.ts
5314
5192
  import { existsSync as existsSync4 } from "node:fs";
5315
5193
 
5316
5194
  // src/commands/trace/utils.ts
5317
5195
  import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
5318
- import path6 from "node:path";
5196
+ import path7 from "node:path";
5319
5197
  var colors2 = {
5320
5198
  reset: "\x1B[0m",
5321
5199
  bold: "\x1B[1m",
@@ -5342,10 +5220,10 @@ function padLeft2(str, len) {
5342
5220
  }
5343
5221
  function loadResultFile(filePath) {
5344
5222
  const resolvedFilePath = resolveTraceResultPath(filePath);
5345
- if (path6.extname(resolvedFilePath) === ".json") {
5223
+ if (path7.extname(resolvedFilePath) === ".json") {
5346
5224
  return loadOtlpTraceFile(resolvedFilePath);
5347
5225
  }
5348
- if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
5226
+ if (path7.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
5349
5227
  return loadManifestAsRawResults(resolvedFilePath);
5350
5228
  }
5351
5229
  return loadJsonlRecords(resolvedFilePath);
@@ -5624,14 +5502,14 @@ function toTraceSummary(result) {
5624
5502
  return toCamelCaseDeep(rawTrace);
5625
5503
  }
5626
5504
  function listResultFiles(cwd, limit) {
5627
- const baseDir = path6.join(cwd, ".agentv", "results");
5628
- const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
5505
+ const baseDir = path7.join(cwd, ".agentv", "results");
5506
+ const runsDir = path7.join(baseDir, RESULT_RUNS_DIRNAME);
5629
5507
  const files = [];
5630
5508
  try {
5631
5509
  const entries2 = readdirSync2(runsDir, { withFileTypes: true });
5632
5510
  for (const entry of entries2) {
5633
5511
  if (entry.isDirectory()) {
5634
- const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
5512
+ const primaryPath = resolveExistingRunPrimaryPath(path7.join(runsDir, entry.name));
5635
5513
  if (primaryPath) {
5636
5514
  files.push({ filePath: primaryPath, displayName: entry.name });
5637
5515
  }
@@ -5639,7 +5517,7 @@ function listResultFiles(cwd, limit) {
5639
5517
  }
5640
5518
  for (const entry of entries2) {
5641
5519
  if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
5642
- files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
5520
+ files.push({ filePath: path7.join(runsDir, entry.name), displayName: entry.name });
5643
5521
  }
5644
5522
  }
5645
5523
  } catch {
@@ -5647,7 +5525,7 @@ function listResultFiles(cwd, limit) {
5647
5525
  try {
5648
5526
  const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
5649
5527
  for (const entry of entries2) {
5650
- files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
5528
+ files.push({ filePath: path7.join(baseDir, entry), displayName: entry });
5651
5529
  }
5652
5530
  } catch {
5653
5531
  }
@@ -5766,16 +5644,16 @@ function patchTestIds(results) {
5766
5644
 
5767
5645
  // src/commands/results/export.ts
5768
5646
  function deriveOutputDir(cwd, sourceFile) {
5769
- const parentDir = path7.basename(path7.dirname(sourceFile));
5647
+ const parentDir = path8.basename(path8.dirname(sourceFile));
5770
5648
  if (/^\d{4}-\d{2}-\d{2}T/.test(parentDir)) {
5771
- return path7.join(cwd, ".agentv", "results", "export", parentDir);
5649
+ return path8.join(cwd, ".agentv", "results", "export", parentDir);
5772
5650
  }
5773
5651
  if (parentDir.startsWith("eval_")) {
5774
- return path7.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
5652
+ return path8.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
5775
5653
  }
5776
- const basename = path7.basename(sourceFile, ".jsonl");
5654
+ const basename = path8.basename(sourceFile, ".jsonl");
5777
5655
  const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
5778
- return path7.join(cwd, ".agentv", "results", "export", dirName);
5656
+ return path8.join(cwd, ".agentv", "results", "export", dirName);
5779
5657
  }
5780
5658
  var resultsExportCommand = command({
5781
5659
  name: "export",
@@ -5804,7 +5682,7 @@ var resultsExportCommand = command({
5804
5682
  try {
5805
5683
  const { sourceFile } = await resolveSourceFile(source, cwd);
5806
5684
  const { results } = await loadResults(source, cwd);
5807
- const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
5685
+ const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
5808
5686
  await writeArtifactsFromResults(results, outputDir, {
5809
5687
  evalFile: sourceFile
5810
5688
  });
@@ -6013,10 +5891,10 @@ var resultsSummaryCommand = command({
6013
5891
 
6014
5892
  // src/commands/results/validate.ts
6015
5893
  import { existsSync as existsSync6, readFileSync as readFileSync7, statSync as statSync3 } from "node:fs";
6016
- import path8 from "node:path";
5894
+ import path9 from "node:path";
6017
5895
  function checkDirectoryNaming(runDir) {
6018
- const dirName = path8.basename(runDir);
6019
- const parentName = path8.basename(path8.dirname(runDir));
5896
+ const dirName = path9.basename(runDir);
5897
+ const parentName = path9.basename(path9.dirname(runDir));
6020
5898
  const diagnostics = [];
6021
5899
  if (parentName !== "runs") {
6022
5900
  diagnostics.push({
@@ -6035,7 +5913,7 @@ function checkDirectoryNaming(runDir) {
6035
5913
  return diagnostics;
6036
5914
  }
6037
5915
  function checkIndexJsonl(runDir) {
6038
- const indexPath = path8.join(runDir, "index.jsonl");
5916
+ const indexPath = path9.join(runDir, "index.jsonl");
6039
5917
  const diagnostics = [];
6040
5918
  const entries2 = [];
6041
5919
  if (!existsSync6(indexPath)) {
@@ -6134,7 +6012,7 @@ function checkArtifactFiles(runDir, entries2) {
6134
6012
  for (const entry of entries2) {
6135
6013
  const testId = entry.test_id ?? "?";
6136
6014
  if (entry.grading_path) {
6137
- const gradingPath = path8.join(runDir, entry.grading_path);
6015
+ const gradingPath = path9.join(runDir, entry.grading_path);
6138
6016
  if (!existsSync6(gradingPath)) {
6139
6017
  diagnostics.push({
6140
6018
  severity: "error",
@@ -6164,7 +6042,7 @@ function checkArtifactFiles(runDir, entries2) {
6164
6042
  }
6165
6043
  }
6166
6044
  if (entry.timing_path) {
6167
- const timingPath = path8.join(runDir, entry.timing_path);
6045
+ const timingPath = path9.join(runDir, entry.timing_path);
6168
6046
  if (!existsSync6(timingPath)) {
6169
6047
  diagnostics.push({
6170
6048
  severity: "warning",
@@ -6173,7 +6051,7 @@ function checkArtifactFiles(runDir, entries2) {
6173
6051
  }
6174
6052
  }
6175
6053
  }
6176
- const benchmarkPath = path8.join(runDir, "benchmark.json");
6054
+ const benchmarkPath = path9.join(runDir, "benchmark.json");
6177
6055
  if (!existsSync6(benchmarkPath)) {
6178
6056
  diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
6179
6057
  }
@@ -6190,7 +6068,7 @@ var resultsValidateCommand = command({
6190
6068
  })
6191
6069
  },
6192
6070
  handler: async ({ runDir }) => {
6193
- const resolvedDir = path8.resolve(runDir);
6071
+ const resolvedDir = path9.resolve(runDir);
6194
6072
  if (!existsSync6(resolvedDir) || !statSync3(resolvedDir).isDirectory()) {
6195
6073
  console.error(`Error: '${runDir}' is not a directory`);
6196
6074
  process.exit(1);
@@ -6239,19 +6117,19 @@ var resultsCommand = subcommands({
6239
6117
 
6240
6118
  // src/commands/results/serve.ts
6241
6119
  import { existsSync as existsSync8, readFileSync as readFileSync9, readdirSync as readdirSync3, statSync as statSync4, writeFileSync as writeFileSync4 } from "node:fs";
6242
- import path10 from "node:path";
6120
+ import path11 from "node:path";
6243
6121
  import { fileURLToPath as fileURLToPath2 } from "node:url";
6244
6122
  import { Hono } from "hono";
6245
6123
 
6246
6124
  // src/commands/results/studio-config.ts
6247
6125
  import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync8, writeFileSync as writeFileSync3 } from "node:fs";
6248
- import path9 from "node:path";
6126
+ import path10 from "node:path";
6249
6127
  import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
6250
6128
  var DEFAULTS = {
6251
6129
  pass_threshold: PASS_THRESHOLD
6252
6130
  };
6253
6131
  function loadStudioConfig(agentvDir) {
6254
- const configPath = path9.join(agentvDir, "config.yaml");
6132
+ const configPath = path10.join(agentvDir, "config.yaml");
6255
6133
  if (!existsSync7(configPath)) {
6256
6134
  return { ...DEFAULTS };
6257
6135
  }
@@ -6278,7 +6156,7 @@ function saveStudioConfig(agentvDir, config) {
6278
6156
  if (!existsSync7(agentvDir)) {
6279
6157
  mkdirSync2(agentvDir, { recursive: true });
6280
6158
  }
6281
- const configPath = path9.join(agentvDir, "config.yaml");
6159
+ const configPath = path10.join(agentvDir, "config.yaml");
6282
6160
  let existing = {};
6283
6161
  if (existsSync7(configPath)) {
6284
6162
  const raw = readFileSync8(configPath, "utf-8");
@@ -6296,7 +6174,7 @@ function saveStudioConfig(agentvDir, config) {
6296
6174
 
6297
6175
  // src/commands/results/serve.ts
6298
6176
  function feedbackPath(resultDir) {
6299
- return path10.join(resultDir, "feedback.json");
6177
+ return path11.join(resultDir, "feedback.json");
6300
6178
  }
6301
6179
  function readFeedback(cwd) {
6302
6180
  const fp = feedbackPath(cwd);
@@ -6323,8 +6201,8 @@ function buildFileTree(dirPath, relativeTo) {
6323
6201
  if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
6324
6202
  return a.name.localeCompare(b.name);
6325
6203
  }).map((entry) => {
6326
- const fullPath = path10.join(dirPath, entry.name);
6327
- const relPath = path10.relative(relativeTo, fullPath);
6204
+ const fullPath = path11.join(dirPath, entry.name);
6205
+ const relPath = path11.relative(relativeTo, fullPath);
6328
6206
  if (entry.isDirectory()) {
6329
6207
  return {
6330
6208
  name: entry.name,
@@ -6337,7 +6215,7 @@ function buildFileTree(dirPath, relativeTo) {
6337
6215
  });
6338
6216
  }
6339
6217
  function inferLanguage(filePath) {
6340
- const ext = path10.extname(filePath).toLowerCase();
6218
+ const ext = path11.extname(filePath).toLowerCase();
6341
6219
  const langMap = {
6342
6220
  ".json": "json",
6343
6221
  ".jsonl": "json",
@@ -6531,7 +6409,7 @@ function handleEvalFiles(c3, { searchDir }) {
6531
6409
  const records = parseResultManifest(content);
6532
6410
  const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
6533
6411
  if (!record) return c3.json({ error: "Eval not found" }, 404);
6534
- const baseDir = path10.dirname(meta.path);
6412
+ const baseDir = path11.dirname(meta.path);
6535
6413
  const knownPaths = [
6536
6414
  record.grading_path,
6537
6415
  record.timing_path,
@@ -6540,14 +6418,14 @@ function handleEvalFiles(c3, { searchDir }) {
6540
6418
  record.response_path
6541
6419
  ].filter((p) => !!p);
6542
6420
  if (knownPaths.length === 0) return c3.json({ files: [] });
6543
- const artifactDirs = knownPaths.map((p) => path10.dirname(p));
6421
+ const artifactDirs = knownPaths.map((p) => path11.dirname(p));
6544
6422
  let commonDir = artifactDirs[0];
6545
6423
  for (const dir of artifactDirs) {
6546
6424
  while (!dir.startsWith(commonDir)) {
6547
- commonDir = path10.dirname(commonDir);
6425
+ commonDir = path11.dirname(commonDir);
6548
6426
  }
6549
6427
  }
6550
- const artifactAbsDir = path10.join(baseDir, commonDir);
6428
+ const artifactAbsDir = path11.join(baseDir, commonDir);
6551
6429
  const files = buildFileTree(artifactAbsDir, baseDir);
6552
6430
  return c3.json({ files });
6553
6431
  } catch {
@@ -6563,9 +6441,9 @@ function handleEvalFileContent(c3, { searchDir }) {
6563
6441
  const markerIdx = c3.req.path.indexOf(marker);
6564
6442
  const filePath = markerIdx >= 0 ? c3.req.path.slice(markerIdx + marker.length) : "";
6565
6443
  if (!filePath) return c3.json({ error: "No file path specified" }, 400);
6566
- const baseDir = path10.dirname(meta.path);
6567
- const absolutePath = path10.resolve(baseDir, filePath);
6568
- if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
6444
+ const baseDir = path11.dirname(meta.path);
6445
+ const absolutePath = path11.resolve(baseDir, filePath);
6446
+ if (!absolutePath.startsWith(path11.resolve(baseDir) + path11.sep) && absolutePath !== path11.resolve(baseDir)) {
6569
6447
  return c3.json({ error: "Path traversal not allowed" }, 403);
6570
6448
  }
6571
6449
  if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
@@ -6656,12 +6534,12 @@ function handleConfig(c3, { agentvDir }) {
6656
6534
  return c3.json(loadStudioConfig(agentvDir));
6657
6535
  }
6658
6536
  function handleFeedbackRead(c3, { searchDir }) {
6659
- const resultsDir = path10.join(searchDir, ".agentv", "results");
6537
+ const resultsDir = path11.join(searchDir, ".agentv", "results");
6660
6538
  return c3.json(readFeedback(existsSync8(resultsDir) ? resultsDir : searchDir));
6661
6539
  }
6662
6540
  function createApp(results, resultDir, cwd, sourceFile, options) {
6663
6541
  const searchDir = cwd ?? resultDir;
6664
- const agentvDir = path10.join(searchDir, ".agentv");
6542
+ const agentvDir = path11.join(searchDir, ".agentv");
6665
6543
  const defaultCtx = { searchDir, agentvDir };
6666
6544
  const app2 = new Hono();
6667
6545
  function withProject(c3, handler) {
@@ -6671,7 +6549,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6671
6549
  }
6672
6550
  return handler(c3, {
6673
6551
  searchDir: project.path,
6674
- agentvDir: path10.join(project.path, ".agentv")
6552
+ agentvDir: path11.join(project.path, ".agentv")
6675
6553
  });
6676
6554
  }
6677
6555
  app2.post("/api/config", async (c3) => {
@@ -6912,20 +6790,20 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6912
6790
  app2.get("/api/projects/:projectId/targets", (c3) => withProject(c3, handleTargets));
6913
6791
  app2.get("/api/projects/:projectId/feedback", (c3) => withProject(c3, handleFeedbackRead));
6914
6792
  const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
6915
- if (!studioDistPath || !existsSync8(path10.join(studioDistPath, "index.html"))) {
6793
+ if (!studioDistPath || !existsSync8(path11.join(studioDistPath, "index.html"))) {
6916
6794
  throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
6917
6795
  }
6918
6796
  app2.get("/", (c3) => {
6919
- const indexPath = path10.join(studioDistPath, "index.html");
6797
+ const indexPath = path11.join(studioDistPath, "index.html");
6920
6798
  if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
6921
6799
  return c3.notFound();
6922
6800
  });
6923
6801
  app2.get("/assets/*", (c3) => {
6924
6802
  const assetPath = c3.req.path;
6925
- const filePath = path10.join(studioDistPath, assetPath);
6803
+ const filePath = path11.join(studioDistPath, assetPath);
6926
6804
  if (!existsSync8(filePath)) return c3.notFound();
6927
6805
  const content = readFileSync9(filePath);
6928
- const ext = path10.extname(filePath);
6806
+ const ext = path11.extname(filePath);
6929
6807
  const mimeTypes = {
6930
6808
  ".js": "application/javascript",
6931
6809
  ".css": "text/css",
@@ -6946,26 +6824,26 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6946
6824
  });
6947
6825
  app2.get("*", (c3) => {
6948
6826
  if (c3.req.path.startsWith("/api/")) return c3.json({ error: "Not found" }, 404);
6949
- const indexPath = path10.join(studioDistPath, "index.html");
6827
+ const indexPath = path11.join(studioDistPath, "index.html");
6950
6828
  if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
6951
6829
  return c3.notFound();
6952
6830
  });
6953
6831
  return app2;
6954
6832
  }
6955
6833
  function resolveStudioDistDir() {
6956
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path10.dirname(fileURLToPath2(import.meta.url));
6834
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path11.dirname(fileURLToPath2(import.meta.url));
6957
6835
  const candidates = [
6958
6836
  // From src/commands/results/ → sibling apps/studio/dist
6959
- path10.resolve(currentDir, "../../../../studio/dist"),
6837
+ path11.resolve(currentDir, "../../../../studio/dist"),
6960
6838
  // From dist/ → sibling apps/studio/dist (monorepo dev)
6961
- path10.resolve(currentDir, "../../studio/dist"),
6839
+ path11.resolve(currentDir, "../../studio/dist"),
6962
6840
  // Bundled inside CLI dist (published package: dist/studio/)
6963
- path10.resolve(currentDir, "studio"),
6841
+ path11.resolve(currentDir, "studio"),
6964
6842
  // From dist/ in monorepo root context
6965
- path10.resolve(currentDir, "../../../apps/studio/dist")
6843
+ path11.resolve(currentDir, "../../../apps/studio/dist")
6966
6844
  ];
6967
6845
  for (const candidate of candidates) {
6968
- if (existsSync8(candidate) && existsSync8(path10.join(candidate, "index.html"))) {
6846
+ if (existsSync8(candidate) && existsSync8(path11.join(candidate, "index.html"))) {
6969
6847
  return candidate;
6970
6848
  }
6971
6849
  }
@@ -7076,7 +6954,7 @@ Discovered ${discovered.length} project(s).`);
7076
6954
  }
7077
6955
  }
7078
6956
  }
7079
- const resultDir = sourceFile ? path10.dirname(path10.resolve(sourceFile)) : cwd;
6957
+ const resultDir = sourceFile ? path11.dirname(path11.resolve(sourceFile)) : cwd;
7080
6958
  const app2 = createApp(results, resultDir, cwd, sourceFile);
7081
6959
  if (isMultiProject) {
7082
6960
  console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
@@ -7970,7 +7848,7 @@ var traceCommand = subcommands({
7970
7848
 
7971
7849
  // src/commands/transpile/index.ts
7972
7850
  import { writeFileSync as writeFileSync5 } from "node:fs";
7973
- import path11 from "node:path";
7851
+ import path12 from "node:path";
7974
7852
  var transpileCommand = command({
7975
7853
  name: "transpile",
7976
7854
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -7994,7 +7872,7 @@ var transpileCommand = command({
7994
7872
  handler: async ({ input, outDir, stdout }) => {
7995
7873
  let result;
7996
7874
  try {
7997
- result = transpileEvalYamlFile(path11.resolve(input));
7875
+ result = transpileEvalYamlFile(path12.resolve(input));
7998
7876
  } catch (error) {
7999
7877
  console.error(`Error: ${error.message}`);
8000
7878
  process.exit(1);
@@ -8018,11 +7896,11 @@ var transpileCommand = command({
8018
7896
  process.stdout.write("\n");
8019
7897
  return;
8020
7898
  }
8021
- const outputDir = outDir ? path11.resolve(outDir) : path11.dirname(path11.resolve(input));
7899
+ const outputDir = outDir ? path12.resolve(outDir) : path12.dirname(path12.resolve(input));
8022
7900
  const fileNames = getOutputFilenames(result);
8023
7901
  for (const [skill, evalsJson] of result.files) {
8024
7902
  const fileName = fileNames.get(skill) ?? "evals.json";
8025
- const outputPath = path11.join(outputDir, fileName);
7903
+ const outputPath = path12.join(outputDir, fileName);
8026
7904
  writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
8027
7905
  `);
8028
7906
  console.log(`Transpiled to ${outputPath}`);
@@ -8157,7 +8035,7 @@ function isTTY() {
8157
8035
  // src/commands/validate/validate-files.ts
8158
8036
  import { constants } from "node:fs";
8159
8037
  import { access, readdir as readdir4, stat } from "node:fs/promises";
8160
- import path12 from "node:path";
8038
+ import path13 from "node:path";
8161
8039
  import fg2 from "fast-glob";
8162
8040
  async function validateFiles(paths) {
8163
8041
  const filePaths = await expandPaths(paths);
@@ -8176,7 +8054,7 @@ async function validateFiles(paths) {
8176
8054
  };
8177
8055
  }
8178
8056
  async function validateSingleFile(filePath) {
8179
- const absolutePath = path12.resolve(filePath);
8057
+ const absolutePath = path13.resolve(filePath);
8180
8058
  const fileType = await detectFileType(absolutePath);
8181
8059
  let result;
8182
8060
  if (fileType === "eval") {
@@ -8201,7 +8079,7 @@ async function validateSingleFile(filePath) {
8201
8079
  async function expandPaths(paths) {
8202
8080
  const expanded = /* @__PURE__ */ new Set();
8203
8081
  for (const inputPath of paths) {
8204
- const absolutePath = path12.resolve(inputPath);
8082
+ const absolutePath = path13.resolve(inputPath);
8205
8083
  try {
8206
8084
  await access(absolutePath, constants.F_OK);
8207
8085
  const stats = await stat(absolutePath);
@@ -8229,7 +8107,7 @@ async function expandPaths(paths) {
8229
8107
  if (yamlMatches.length === 0) {
8230
8108
  console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
8231
8109
  }
8232
- for (const f of yamlMatches) expanded.add(path12.normalize(f));
8110
+ for (const f of yamlMatches) expanded.add(path13.normalize(f));
8233
8111
  }
8234
8112
  const sorted = Array.from(expanded);
8235
8113
  sorted.sort();
@@ -8240,7 +8118,7 @@ async function findYamlFiles(dirPath) {
8240
8118
  try {
8241
8119
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
8242
8120
  for (const entry of entries2) {
8243
- const fullPath = path12.join(dirPath, entry.name);
8121
+ const fullPath = path13.join(dirPath, entry.name);
8244
8122
  if (entry.isDirectory()) {
8245
8123
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
8246
8124
  continue;
@@ -8257,7 +8135,7 @@ async function findYamlFiles(dirPath) {
8257
8135
  return results;
8258
8136
  }
8259
8137
  function isYamlFile(filePath) {
8260
- const ext = path12.extname(filePath).toLowerCase();
8138
+ const ext = path13.extname(filePath).toLowerCase();
8261
8139
  return ext === ".yaml" || ext === ".yml";
8262
8140
  }
8263
8141
 
@@ -8314,7 +8192,7 @@ var validateCommand = command({
8314
8192
  // src/commands/workspace/clean.ts
8315
8193
  import { existsSync as existsSync9 } from "node:fs";
8316
8194
  import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
8317
- import path13 from "node:path";
8195
+ import path14 from "node:path";
8318
8196
  async function confirm(message) {
8319
8197
  const readline2 = await import("node:readline");
8320
8198
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -8350,8 +8228,8 @@ var cleanCommand = command({
8350
8228
  const poolDirs = entries2.filter((e) => e.isDirectory());
8351
8229
  const matchingDirs = [];
8352
8230
  for (const dir of poolDirs) {
8353
- const poolDir = path13.join(poolRoot, dir.name);
8354
- const metadataPath = path13.join(poolDir, "metadata.json");
8231
+ const poolDir = path14.join(poolRoot, dir.name);
8232
+ const metadataPath = path14.join(poolDir, "metadata.json");
8355
8233
  try {
8356
8234
  const raw = await readFile5(metadataPath, "utf-8");
8357
8235
  const metadata = JSON.parse(raw);
@@ -8382,7 +8260,7 @@ var cleanCommand = command({
8382
8260
  }
8383
8261
  for (const dir of matchingDirs) {
8384
8262
  await rm(dir, { recursive: true, force: true });
8385
- console.log(`Removed: ${path13.basename(dir).slice(0, 12)}...`);
8263
+ console.log(`Removed: ${path14.basename(dir).slice(0, 12)}...`);
8386
8264
  }
8387
8265
  console.log("Done.");
8388
8266
  } else {
@@ -8402,13 +8280,13 @@ var cleanCommand = command({
8402
8280
  // src/commands/workspace/list.ts
8403
8281
  import { existsSync as existsSync10 } from "node:fs";
8404
8282
  import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
8405
- import path14 from "node:path";
8283
+ import path15 from "node:path";
8406
8284
  async function getDirectorySize(dirPath) {
8407
8285
  let totalSize = 0;
8408
8286
  try {
8409
8287
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
8410
8288
  for (const entry of entries2) {
8411
- const fullPath = path14.join(dirPath, entry.name);
8289
+ const fullPath = path15.join(dirPath, entry.name);
8412
8290
  if (entry.isDirectory()) {
8413
8291
  totalSize += await getDirectorySize(fullPath);
8414
8292
  } else {
@@ -8443,11 +8321,11 @@ var listCommand = command({
8443
8321
  return;
8444
8322
  }
8445
8323
  for (const dir of poolDirs) {
8446
- const poolDir = path14.join(poolRoot, dir.name);
8324
+ const poolDir = path15.join(poolRoot, dir.name);
8447
8325
  const fingerprint = dir.name;
8448
8326
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
8449
8327
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
8450
- const metadataPath = path14.join(poolDir, "metadata.json");
8328
+ const metadataPath = path15.join(poolDir, "metadata.json");
8451
8329
  let metadata = null;
8452
8330
  try {
8453
8331
  const raw = await readFile6(metadataPath, "utf-8");
@@ -8493,8 +8371,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
8493
8371
  var AGENTV_DIR = getAgentvHome();
8494
8372
  var CACHE_FILE = "version-check.json";
8495
8373
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
8496
- async function getCachedUpdateInfo(path15) {
8497
- const filePath = path15 ?? join5(AGENTV_DIR, CACHE_FILE);
8374
+ async function getCachedUpdateInfo(path16) {
8375
+ const filePath = path16 ?? join5(AGENTV_DIR, CACHE_FILE);
8498
8376
  try {
8499
8377
  const raw = await readFile7(filePath, "utf-8");
8500
8378
  const data = JSON.parse(raw);
@@ -8578,7 +8456,7 @@ var app = subcommands({
8578
8456
  version: package_default.version,
8579
8457
  cmds: {
8580
8458
  eval: evalCommand,
8581
- prompt: evalPromptCommand,
8459
+ import: importCommand,
8582
8460
  compare: compareCommand,
8583
8461
  convert: convertCommand,
8584
8462
  create: createCommand,
@@ -8594,9 +8472,9 @@ var app = subcommands({
8594
8472
  workspace: workspaceCommand
8595
8473
  }
8596
8474
  });
8597
- var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "prompt", "assert"]);
8475
+ var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "assert"]);
8598
8476
  var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
8599
- "prompt",
8477
+ "import",
8600
8478
  "compare",
8601
8479
  "convert",
8602
8480
  "create",
@@ -8651,4 +8529,4 @@ export {
8651
8529
  preprocessArgv,
8652
8530
  runCli
8653
8531
  };
8654
- //# sourceMappingURL=chunk-4WMLJHW5.js.map
8532
+ //# sourceMappingURL=chunk-D7HJ5HME.js.map