agentv 4.40.1 → 4.41.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-GIAIMGPQ.js → artifact-writer-AMV64TWV.js} +4 -4
- package/dist/{chunk-B7CT3J2W.js → chunk-6FXICR66.js} +899 -300
- package/dist/chunk-6FXICR66.js.map +1 -0
- package/dist/{chunk-TWQP7JYQ.js → chunk-A4J456KS.js} +2 -2
- package/dist/{chunk-A36XLUI5.js → chunk-CF5RCUWH.js} +12 -10
- package/dist/chunk-CF5RCUWH.js.map +1 -0
- package/dist/{chunk-BLXYBUU4.js → chunk-ENHX2CCS.js} +1485 -943
- package/dist/chunk-ENHX2CCS.js.map +1 -0
- package/dist/{chunk-I3SC4FOT.js → chunk-Z45FKRMJ.js} +212 -58
- package/dist/chunk-Z45FKRMJ.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/{dist-6Z4OSITR.js → dist-X5P5IR65.js} +7 -3
- package/dist/index.js +5 -5
- package/dist/{interactive-Q575M3A7.js → interactive-4JKJTY3G.js} +5 -5
- package/dist/skills/agentv-bench/references/eval-yaml-spec.md +4 -4
- package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +14 -14
- package/dist/skills/agentv-eval-writer/references/python-helpers.md +47 -0
- package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-A36XLUI5.js.map +0 -1
- package/dist/chunk-B7CT3J2W.js.map +0 -1
- package/dist/chunk-BLXYBUU4.js.map +0 -1
- package/dist/chunk-I3SC4FOT.js.map +0 -1
- /package/dist/{artifact-writer-GIAIMGPQ.js.map → artifact-writer-AMV64TWV.js.map} +0 -0
- /package/dist/{chunk-TWQP7JYQ.js.map → chunk-A4J456KS.js.map} +0 -0
- /package/dist/{dist-6Z4OSITR.js.map → dist-X5P5IR65.js.map} +0 -0
- /package/dist/{interactive-Q575M3A7.js.map → interactive-4JKJTY3G.js.map} +0 -0
- /package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js.map → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js.map} +0 -0
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
isRemoteRunId,
|
|
26
26
|
listMergedResultFiles,
|
|
27
27
|
listResultFiles,
|
|
28
|
+
loadEnvFromHierarchy,
|
|
28
29
|
loadLightweightResults,
|
|
29
30
|
loadManifestResults,
|
|
30
31
|
loadResultFile,
|
|
@@ -41,6 +42,7 @@ import {
|
|
|
41
42
|
resolveRunCacheFile,
|
|
42
43
|
resolveRunManifestPath,
|
|
43
44
|
runEvalCommand,
|
|
45
|
+
selectMultipleTargets,
|
|
44
46
|
selectTarget,
|
|
45
47
|
setRemoteRunTags,
|
|
46
48
|
syncRemoteResults,
|
|
@@ -52,11 +54,11 @@ import {
|
|
|
52
54
|
validateTargetsFile,
|
|
53
55
|
validateWorkspacePaths,
|
|
54
56
|
writeRunTags
|
|
55
|
-
} from "./chunk-
|
|
57
|
+
} from "./chunk-CF5RCUWH.js";
|
|
56
58
|
import {
|
|
57
59
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
58
|
-
writeArtifactsFromResults
|
|
59
|
-
} from "./chunk-
|
|
60
|
+
writeArtifactsFromResults as writeArtifactsFromResults2
|
|
61
|
+
} from "./chunk-A4J456KS.js";
|
|
60
62
|
import {
|
|
61
63
|
DEFAULT_CATEGORY,
|
|
62
64
|
deriveCategory,
|
|
@@ -65,12 +67,13 @@ import {
|
|
|
65
67
|
getOutputFilenames,
|
|
66
68
|
parseClaudeSession,
|
|
67
69
|
parseCodexSession,
|
|
70
|
+
prepareEvalWorkspace,
|
|
68
71
|
runBeforeSessionHook,
|
|
69
72
|
scanRepoDeps,
|
|
70
73
|
syncProjects,
|
|
71
74
|
transpileEvalYamlFile,
|
|
72
75
|
trimBaselineResult
|
|
73
|
-
} from "./chunk-
|
|
76
|
+
} from "./chunk-Z45FKRMJ.js";
|
|
74
77
|
import {
|
|
75
78
|
DEFAULT_THRESHOLD,
|
|
76
79
|
addProject,
|
|
@@ -85,6 +88,7 @@ import {
|
|
|
85
88
|
getAgentvConfigDir,
|
|
86
89
|
getProject,
|
|
87
90
|
getWorkspacePoolRoot,
|
|
91
|
+
gradePreparedEvalCase,
|
|
88
92
|
isAgentSkillsFormat,
|
|
89
93
|
listTargetNames,
|
|
90
94
|
loadConfig,
|
|
@@ -112,8 +116,9 @@ import {
|
|
|
112
116
|
toCamelCaseDeep,
|
|
113
117
|
toSnakeCaseDeep,
|
|
114
118
|
toTranscriptJsonLines,
|
|
115
|
-
touchProject
|
|
116
|
-
|
|
119
|
+
touchProject,
|
|
120
|
+
writeArtifactsFromResults
|
|
121
|
+
} from "./chunk-ENHX2CCS.js";
|
|
117
122
|
import {
|
|
118
123
|
__commonJS,
|
|
119
124
|
__require,
|
|
@@ -874,7 +879,7 @@ var require_src = __commonJS({
|
|
|
874
879
|
});
|
|
875
880
|
|
|
876
881
|
// src/index.ts
|
|
877
|
-
import
|
|
882
|
+
import path34 from "node:path";
|
|
878
883
|
|
|
879
884
|
// ../../node_modules/.bun/chalk@5.6.2/node_modules/chalk/source/vendor/ansi-styles/index.js
|
|
880
885
|
var ANSI_BACKGROUND_OFFSET = 10;
|
|
@@ -4597,7 +4602,7 @@ var evalRunCommand = command({
|
|
|
4597
4602
|
},
|
|
4598
4603
|
handler: async (args) => {
|
|
4599
4604
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4600
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4605
|
+
const { launchInteractiveWizard } = await import("./interactive-4JKJTY3G.js");
|
|
4601
4606
|
await launchInteractiveWizard();
|
|
4602
4607
|
return;
|
|
4603
4608
|
}
|
|
@@ -4684,9 +4689,351 @@ var evalCommand = subcommands({
|
|
|
4684
4689
|
}
|
|
4685
4690
|
});
|
|
4686
4691
|
|
|
4692
|
+
// src/commands/grade/index.ts
|
|
4693
|
+
import { readFile, stat } from "node:fs/promises";
|
|
4694
|
+
import path7 from "node:path";
|
|
4695
|
+
function isRecord(value) {
|
|
4696
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
4697
|
+
}
|
|
4698
|
+
function invalidManifest(manifestPath, message) {
|
|
4699
|
+
return new Error(`Invalid prepared manifest at ${manifestPath}: ${message}`);
|
|
4700
|
+
}
|
|
4701
|
+
function expectString(record, key, manifestPath) {
|
|
4702
|
+
const value = record[key];
|
|
4703
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
4704
|
+
throw invalidManifest(manifestPath, `missing non-empty string field '${key}'`);
|
|
4705
|
+
}
|
|
4706
|
+
return value;
|
|
4707
|
+
}
|
|
4708
|
+
function expectArray(record, key, manifestPath) {
|
|
4709
|
+
const value = record[key];
|
|
4710
|
+
if (!Array.isArray(value)) {
|
|
4711
|
+
throw invalidManifest(manifestPath, `missing array field '${key}'`);
|
|
4712
|
+
}
|
|
4713
|
+
return value;
|
|
4714
|
+
}
|
|
4715
|
+
function expectBaseline(value, manifestPath) {
|
|
4716
|
+
if (!isRecord(value)) {
|
|
4717
|
+
throw invalidManifest(manifestPath, "missing object field 'baseline'");
|
|
4718
|
+
}
|
|
4719
|
+
const status = value.status;
|
|
4720
|
+
if (status !== "initialized" && status !== "unavailable") {
|
|
4721
|
+
throw invalidManifest(
|
|
4722
|
+
manifestPath,
|
|
4723
|
+
"field 'baseline.status' must be 'initialized' or 'unavailable'"
|
|
4724
|
+
);
|
|
4725
|
+
}
|
|
4726
|
+
const commit = value.commit;
|
|
4727
|
+
if (commit !== void 0 && typeof commit !== "string") {
|
|
4728
|
+
throw invalidManifest(manifestPath, "field 'baseline.commit' must be a string");
|
|
4729
|
+
}
|
|
4730
|
+
if (status === "initialized" && (!commit || commit.trim().length === 0)) {
|
|
4731
|
+
throw invalidManifest(
|
|
4732
|
+
manifestPath,
|
|
4733
|
+
"field 'baseline.commit' is required when baseline.status is 'initialized'"
|
|
4734
|
+
);
|
|
4735
|
+
}
|
|
4736
|
+
return {
|
|
4737
|
+
status,
|
|
4738
|
+
...typeof commit === "string" && commit.trim().length > 0 && { commit }
|
|
4739
|
+
};
|
|
4740
|
+
}
|
|
4741
|
+
function fromManifestWire(value, manifestPath) {
|
|
4742
|
+
if (!isRecord(value)) {
|
|
4743
|
+
throw invalidManifest(manifestPath, "expected a JSON object");
|
|
4744
|
+
}
|
|
4745
|
+
if (value.schema_version !== 1) {
|
|
4746
|
+
throw invalidManifest(manifestPath, "field 'schema_version' must be 1");
|
|
4747
|
+
}
|
|
4748
|
+
const setupStatus = value.setup_status;
|
|
4749
|
+
if (setupStatus !== "ok") {
|
|
4750
|
+
throw invalidManifest(manifestPath, "field 'setup_status' must be 'ok'");
|
|
4751
|
+
}
|
|
4752
|
+
const preparedDir = path7.dirname(manifestPath);
|
|
4753
|
+
const resolveManifestPath = (rawPath) => path7.isAbsolute(rawPath) ? rawPath : path7.resolve(preparedDir, rawPath);
|
|
4754
|
+
return {
|
|
4755
|
+
schemaVersion: 1,
|
|
4756
|
+
evalPath: resolveManifestPath(expectString(value, "eval_path", manifestPath)),
|
|
4757
|
+
testId: expectString(value, "test_id", manifestPath),
|
|
4758
|
+
target: expectString(value, "target", manifestPath),
|
|
4759
|
+
workspacePath: resolveManifestPath(expectString(value, "workspace_path", manifestPath)),
|
|
4760
|
+
promptPath: resolveManifestPath(expectString(value, "prompt_path", manifestPath)),
|
|
4761
|
+
setupStatus,
|
|
4762
|
+
setupSteps: expectArray(value, "setup_steps", manifestPath),
|
|
4763
|
+
repoPins: expectArray(value, "repo_pins", manifestPath),
|
|
4764
|
+
baseline: expectBaseline(value.baseline, manifestPath),
|
|
4765
|
+
createdAt: expectString(value, "created_at", manifestPath),
|
|
4766
|
+
manifestPath,
|
|
4767
|
+
preparedDir
|
|
4768
|
+
};
|
|
4769
|
+
}
|
|
4770
|
+
async function resolvePreparedManifestPath(preparedPath) {
|
|
4771
|
+
const resolved = path7.resolve(preparedPath);
|
|
4772
|
+
try {
|
|
4773
|
+
const stats = await stat(resolved);
|
|
4774
|
+
return stats.isDirectory() ? path7.join(resolved, "agentv_prepare.json") : resolved;
|
|
4775
|
+
} catch {
|
|
4776
|
+
return path7.basename(resolved) === "agentv_prepare.json" ? resolved : path7.join(resolved, "agentv_prepare.json");
|
|
4777
|
+
}
|
|
4778
|
+
}
|
|
4779
|
+
async function readPreparedManifest(preparedPath) {
|
|
4780
|
+
const manifestPath = await resolvePreparedManifestPath(preparedPath);
|
|
4781
|
+
let raw;
|
|
4782
|
+
try {
|
|
4783
|
+
raw = await readFile(manifestPath, "utf8");
|
|
4784
|
+
} catch (error) {
|
|
4785
|
+
if (error.code === "ENOENT") {
|
|
4786
|
+
throw new Error(
|
|
4787
|
+
`Prepared manifest not found at ${manifestPath}. Run agentv prepare first and pass --prepared <dir>.`
|
|
4788
|
+
);
|
|
4789
|
+
}
|
|
4790
|
+
throw error;
|
|
4791
|
+
}
|
|
4792
|
+
let parsed;
|
|
4793
|
+
try {
|
|
4794
|
+
parsed = JSON.parse(raw);
|
|
4795
|
+
} catch (error) {
|
|
4796
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4797
|
+
throw new Error(`Invalid prepared manifest JSON at ${manifestPath}: ${message}`);
|
|
4798
|
+
}
|
|
4799
|
+
return fromManifestWire(parsed, manifestPath);
|
|
4800
|
+
}
|
|
4801
|
+
async function ensureDirectoryExists(dirPath, description) {
|
|
4802
|
+
try {
|
|
4803
|
+
const stats = await stat(dirPath);
|
|
4804
|
+
if (!stats.isDirectory()) {
|
|
4805
|
+
throw new Error(`${description} is not a directory: ${dirPath}`);
|
|
4806
|
+
}
|
|
4807
|
+
} catch (error) {
|
|
4808
|
+
if (error.code === "ENOENT") {
|
|
4809
|
+
throw new Error(`${description} not found: ${dirPath}`);
|
|
4810
|
+
}
|
|
4811
|
+
throw error;
|
|
4812
|
+
}
|
|
4813
|
+
}
|
|
4814
|
+
async function ensureFileExists(filePath, description) {
|
|
4815
|
+
try {
|
|
4816
|
+
const stats = await stat(filePath);
|
|
4817
|
+
if (!stats.isFile()) {
|
|
4818
|
+
throw new Error(`${description} is not a file: ${filePath}`);
|
|
4819
|
+
}
|
|
4820
|
+
} catch (error) {
|
|
4821
|
+
if (error.code === "ENOENT") {
|
|
4822
|
+
throw new Error(`${description} not found: ${filePath}`);
|
|
4823
|
+
}
|
|
4824
|
+
throw error;
|
|
4825
|
+
}
|
|
4826
|
+
}
|
|
4827
|
+
function assertMatchesManifest(options) {
|
|
4828
|
+
const commandEvalPath = path7.resolve(options.evalPath);
|
|
4829
|
+
if (path7.resolve(options.manifest.evalPath) !== commandEvalPath) {
|
|
4830
|
+
throw new Error(
|
|
4831
|
+
`Prepared manifest eval_path does not match command eval path: ${options.manifest.evalPath} !== ${commandEvalPath}`
|
|
4832
|
+
);
|
|
4833
|
+
}
|
|
4834
|
+
if (options.testId && options.testId !== options.manifest.testId) {
|
|
4835
|
+
throw new Error(
|
|
4836
|
+
`Prepared manifest test_id '${options.manifest.testId}' does not match --test-id '${options.testId}'`
|
|
4837
|
+
);
|
|
4838
|
+
}
|
|
4839
|
+
return options.testId ?? options.manifest.testId;
|
|
4840
|
+
}
|
|
4841
|
+
function toPreparedAttemptMetadata(manifest) {
|
|
4842
|
+
return {
|
|
4843
|
+
source: "manual",
|
|
4844
|
+
manifestPath: manifest.manifestPath,
|
|
4845
|
+
preparedDir: manifest.preparedDir,
|
|
4846
|
+
workspacePath: manifest.workspacePath,
|
|
4847
|
+
promptPath: manifest.promptPath,
|
|
4848
|
+
target: manifest.target,
|
|
4849
|
+
preparedAt: manifest.createdAt,
|
|
4850
|
+
setupStatus: manifest.setupStatus,
|
|
4851
|
+
baselineStatus: manifest.baseline.status,
|
|
4852
|
+
...manifest.baseline.commit !== void 0 && { baselineCommit: manifest.baseline.commit }
|
|
4853
|
+
};
|
|
4854
|
+
}
|
|
4855
|
+
function toCommandOutputWire(result) {
|
|
4856
|
+
return {
|
|
4857
|
+
test_id: result.testId,
|
|
4858
|
+
target: result.target,
|
|
4859
|
+
score: result.score,
|
|
4860
|
+
execution_status: result.executionStatus,
|
|
4861
|
+
workspace_path: result.workspacePath,
|
|
4862
|
+
manifest_path: result.manifestPath,
|
|
4863
|
+
output_dir: result.outputDir,
|
|
4864
|
+
index_path: result.indexPath
|
|
4865
|
+
};
|
|
4866
|
+
}
|
|
4867
|
+
function printHumanOutput(result) {
|
|
4868
|
+
console.log(`Graded prepared attempt for ${result.testId} (${result.target})`);
|
|
4869
|
+
console.log(`Score: ${result.score.toFixed(3)} (${result.executionStatus})`);
|
|
4870
|
+
console.log(`Workspace: ${result.workspacePath}`);
|
|
4871
|
+
console.log(`Manifest: ${result.manifestPath}`);
|
|
4872
|
+
console.log(`Artifact workspace: ${result.outputDir}`);
|
|
4873
|
+
console.log(`Index: ${result.indexPath}`);
|
|
4874
|
+
}
|
|
4875
|
+
async function gradePreparedAttempt(options) {
|
|
4876
|
+
const manifest = await readPreparedManifest(options.preparedPath);
|
|
4877
|
+
const evalPath = path7.resolve(options.evalPath);
|
|
4878
|
+
const testId = assertMatchesManifest({ manifest, evalPath, testId: options.testId });
|
|
4879
|
+
await ensureDirectoryExists(manifest.workspacePath, "Prepared workspace");
|
|
4880
|
+
await ensureFileExists(manifest.promptPath, "Prepared prompt");
|
|
4881
|
+
const evalDir = path7.dirname(evalPath);
|
|
4882
|
+
const repoRoot = await findRepoRoot(evalDir);
|
|
4883
|
+
await loadEnvFromHierarchy({ testFilePath: evalPath, repoRoot, verbose: !!options.verbose });
|
|
4884
|
+
const category = deriveCategory(path7.relative(process.cwd(), evalPath));
|
|
4885
|
+
const suite = await loadTestSuite(evalPath, repoRoot, { category });
|
|
4886
|
+
const test = suite.tests.find((candidate) => candidate.id === testId);
|
|
4887
|
+
if (!test) {
|
|
4888
|
+
throw new Error(`Test ID '${testId}' not found in ${evalPath}`);
|
|
4889
|
+
}
|
|
4890
|
+
const selections = await selectMultipleTargets({
|
|
4891
|
+
testFilePath: evalPath,
|
|
4892
|
+
repoRoot,
|
|
4893
|
+
cwd: process.cwd(),
|
|
4894
|
+
dryRun: false,
|
|
4895
|
+
dryRunDelay: 0,
|
|
4896
|
+
dryRunDelayMin: 0,
|
|
4897
|
+
dryRunDelayMax: 0,
|
|
4898
|
+
env: process.env,
|
|
4899
|
+
targetNames: [manifest.target],
|
|
4900
|
+
targetRefs: suite.targetRefs
|
|
4901
|
+
});
|
|
4902
|
+
const selection = selections[0];
|
|
4903
|
+
if (!selection) {
|
|
4904
|
+
throw new Error(`Target '${manifest.target}' could not be resolved`);
|
|
4905
|
+
}
|
|
4906
|
+
const target = {
|
|
4907
|
+
...selection.resolvedTarget,
|
|
4908
|
+
name: manifest.target
|
|
4909
|
+
};
|
|
4910
|
+
const response = options.responsePath !== void 0 ? await readFile(path7.resolve(options.responsePath), "utf8") : void 0;
|
|
4911
|
+
const runDir = path7.resolve(
|
|
4912
|
+
options.outputDir ?? buildDefaultRunDir(process.cwd(), options.experiment)
|
|
4913
|
+
);
|
|
4914
|
+
const result = await gradePreparedEvalCase({
|
|
4915
|
+
evalCase: test,
|
|
4916
|
+
target,
|
|
4917
|
+
targets: selection.definitions,
|
|
4918
|
+
env: process.env,
|
|
4919
|
+
evalFilePath: evalPath,
|
|
4920
|
+
workspacePath: manifest.workspacePath,
|
|
4921
|
+
baselineCommit: manifest.baseline.commit,
|
|
4922
|
+
response,
|
|
4923
|
+
verbose: options.verbose,
|
|
4924
|
+
graderTarget: options.graderTarget,
|
|
4925
|
+
model: options.model,
|
|
4926
|
+
threshold: options.threshold ?? suite.threshold,
|
|
4927
|
+
preparedAttempt: toPreparedAttemptMetadata(manifest)
|
|
4928
|
+
});
|
|
4929
|
+
const artifacts = await writeArtifactsFromResults([result], runDir, {
|
|
4930
|
+
evalFile: evalPath,
|
|
4931
|
+
experiment: options.experiment,
|
|
4932
|
+
plannedTestCount: 1,
|
|
4933
|
+
sourceTests: [test]
|
|
4934
|
+
});
|
|
4935
|
+
return {
|
|
4936
|
+
testId,
|
|
4937
|
+
target: manifest.target,
|
|
4938
|
+
score: result.score,
|
|
4939
|
+
executionStatus: result.executionStatus,
|
|
4940
|
+
workspacePath: manifest.workspacePath,
|
|
4941
|
+
manifestPath: manifest.manifestPath,
|
|
4942
|
+
outputDir: runDir,
|
|
4943
|
+
indexPath: artifacts.indexPath
|
|
4944
|
+
};
|
|
4945
|
+
}
|
|
4946
|
+
var gradeCommand = command({
|
|
4947
|
+
name: "grade",
|
|
4948
|
+
description: "Grade a prepared workspace attempt without running the target provider",
|
|
4949
|
+
args: {
|
|
4950
|
+
evalPath: positional({
|
|
4951
|
+
type: string,
|
|
4952
|
+
displayName: "eval",
|
|
4953
|
+
description: "Path to an eval file"
|
|
4954
|
+
}),
|
|
4955
|
+
testId: option({
|
|
4956
|
+
type: optional(string),
|
|
4957
|
+
long: "test-id",
|
|
4958
|
+
description: "Exact test ID to grade; defaults to agentv_prepare.json test_id"
|
|
4959
|
+
}),
|
|
4960
|
+
prepared: option({
|
|
4961
|
+
type: string,
|
|
4962
|
+
long: "prepared",
|
|
4963
|
+
description: "Prepared-attempt directory or agentv_prepare.json path"
|
|
4964
|
+
}),
|
|
4965
|
+
output: option({
|
|
4966
|
+
type: optional(string),
|
|
4967
|
+
long: "output",
|
|
4968
|
+
short: "o",
|
|
4969
|
+
description: "Run artifact directory (writes index.jsonl and per-test artifacts)"
|
|
4970
|
+
}),
|
|
4971
|
+
response: option({
|
|
4972
|
+
type: optional(string),
|
|
4973
|
+
long: "response",
|
|
4974
|
+
description: "Optional final response text file from the human or external agent"
|
|
4975
|
+
}),
|
|
4976
|
+
experiment: option({
|
|
4977
|
+
type: optional(string),
|
|
4978
|
+
long: "experiment",
|
|
4979
|
+
description: "Experiment label for canonical run output (default: default)"
|
|
4980
|
+
}),
|
|
4981
|
+
graderTarget: option({
|
|
4982
|
+
type: optional(string),
|
|
4983
|
+
long: "grader-target",
|
|
4984
|
+
description: 'Override grader target for all evaluators (e.g., "agentv", or a target name from targets.yaml)'
|
|
4985
|
+
}),
|
|
4986
|
+
model: option({
|
|
4987
|
+
type: optional(string),
|
|
4988
|
+
long: "model",
|
|
4989
|
+
description: 'Override model for the grader target (e.g., "openai:gpt-5-mini")'
|
|
4990
|
+
}),
|
|
4991
|
+
threshold: option({
|
|
4992
|
+
type: optional(number),
|
|
4993
|
+
long: "threshold",
|
|
4994
|
+
description: "Per-test score threshold (0-1, default 0.8 or suite threshold)"
|
|
4995
|
+
}),
|
|
4996
|
+
format: option({
|
|
4997
|
+
type: optional(oneOf(["text", "json"])),
|
|
4998
|
+
long: "format",
|
|
4999
|
+
description: "Output format: text (default) or json"
|
|
5000
|
+
})
|
|
5001
|
+
},
|
|
5002
|
+
handler: async ({
|
|
5003
|
+
evalPath,
|
|
5004
|
+
testId,
|
|
5005
|
+
prepared,
|
|
5006
|
+
output,
|
|
5007
|
+
response,
|
|
5008
|
+
experiment,
|
|
5009
|
+
graderTarget,
|
|
5010
|
+
model,
|
|
5011
|
+
threshold,
|
|
5012
|
+
format
|
|
5013
|
+
}) => {
|
|
5014
|
+
const result = await gradePreparedAttempt({
|
|
5015
|
+
evalPath,
|
|
5016
|
+
testId,
|
|
5017
|
+
preparedPath: prepared,
|
|
5018
|
+
outputDir: output,
|
|
5019
|
+
responsePath: response,
|
|
5020
|
+
experiment,
|
|
5021
|
+
graderTarget,
|
|
5022
|
+
model,
|
|
5023
|
+
threshold,
|
|
5024
|
+
verbose: false
|
|
5025
|
+
});
|
|
5026
|
+
if (format === "json") {
|
|
5027
|
+
console.log(JSON.stringify(toCommandOutputWire(result), null, 2));
|
|
5028
|
+
return;
|
|
5029
|
+
}
|
|
5030
|
+
printHumanOutput(result);
|
|
5031
|
+
}
|
|
5032
|
+
});
|
|
5033
|
+
|
|
4687
5034
|
// src/commands/import/claude.ts
|
|
4688
5035
|
import { mkdir as mkdir3, writeFile as writeFile3 } from "node:fs/promises";
|
|
4689
|
-
import
|
|
5036
|
+
import path8 from "node:path";
|
|
4690
5037
|
var importClaudeCommand = command({
|
|
4691
5038
|
name: "claude",
|
|
4692
5039
|
description: "Import a Claude Code session transcript for offline grading",
|
|
@@ -4758,8 +5105,8 @@ var importClaudeCommand = command({
|
|
|
4758
5105
|
const rawJsonl = await readTranscriptFile(sessionFilePath);
|
|
4759
5106
|
const transcript = parseClaudeSession(rawJsonl);
|
|
4760
5107
|
const shortId = (sessionId ?? transcript.source.sessionId).slice(0, 8);
|
|
4761
|
-
const outputPath = output ??
|
|
4762
|
-
await mkdir3(
|
|
5108
|
+
const outputPath = output ?? path8.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
|
|
5109
|
+
await mkdir3(path8.dirname(outputPath), { recursive: true });
|
|
4763
5110
|
const jsonLines = toTranscriptJsonLines(transcript);
|
|
4764
5111
|
await writeFile3(
|
|
4765
5112
|
outputPath,
|
|
@@ -4803,7 +5150,7 @@ function formatDurationMs(ms) {
|
|
|
4803
5150
|
|
|
4804
5151
|
// src/commands/import/codex.ts
|
|
4805
5152
|
import { mkdir as mkdir4, writeFile as writeFile4 } from "node:fs/promises";
|
|
4806
|
-
import
|
|
5153
|
+
import path9 from "node:path";
|
|
4807
5154
|
var importCodexCommand = command({
|
|
4808
5155
|
name: "codex",
|
|
4809
5156
|
description: "Import a Codex CLI session transcript for offline grading",
|
|
@@ -4875,8 +5222,8 @@ var importCodexCommand = command({
|
|
|
4875
5222
|
const rawJsonl = await readTranscriptFile(session.filePath);
|
|
4876
5223
|
const transcript = parseCodexSession(rawJsonl);
|
|
4877
5224
|
const shortId = session.sessionId.slice(0, 8);
|
|
4878
|
-
const outputPath = output ??
|
|
4879
|
-
await mkdir4(
|
|
5225
|
+
const outputPath = output ?? path9.join(".agentv", "transcripts", `codex-${shortId}.jsonl`);
|
|
5226
|
+
await mkdir4(path9.dirname(outputPath), { recursive: true });
|
|
4880
5227
|
const jsonLines = toTranscriptJsonLines(transcript);
|
|
4881
5228
|
await writeFile4(
|
|
4882
5229
|
outputPath,
|
|
@@ -4914,8 +5261,8 @@ function formatDurationMs2(ms) {
|
|
|
4914
5261
|
}
|
|
4915
5262
|
|
|
4916
5263
|
// src/commands/import/copilot.ts
|
|
4917
|
-
import { mkdir as mkdir5, readFile, writeFile as writeFile5 } from "node:fs/promises";
|
|
4918
|
-
import
|
|
5264
|
+
import { mkdir as mkdir5, readFile as readFile2, writeFile as writeFile5 } from "node:fs/promises";
|
|
5265
|
+
import path10 from "node:path";
|
|
4919
5266
|
var importCopilotCommand = command({
|
|
4920
5267
|
name: "copilot",
|
|
4921
5268
|
description: "Import a Copilot CLI session transcript for offline grading",
|
|
@@ -4980,8 +5327,8 @@ var importCopilotCommand = command({
|
|
|
4980
5327
|
);
|
|
4981
5328
|
process.exit(1);
|
|
4982
5329
|
}
|
|
4983
|
-
const eventsPath =
|
|
4984
|
-
const rawJsonl = await
|
|
5330
|
+
const eventsPath = path10.join(sessionDir, "events.jsonl");
|
|
5331
|
+
const rawJsonl = await readFile2(eventsPath, "utf8");
|
|
4985
5332
|
const parsed = parseCopilotEvents(rawJsonl);
|
|
4986
5333
|
const transcript = {
|
|
4987
5334
|
messages: parsed.messages,
|
|
@@ -4997,8 +5344,8 @@ var importCopilotCommand = command({
|
|
|
4997
5344
|
costUsd: null
|
|
4998
5345
|
};
|
|
4999
5346
|
const shortId = resolvedSessionId.slice(0, 8);
|
|
5000
|
-
const outputPath = output ??
|
|
5001
|
-
await mkdir5(
|
|
5347
|
+
const outputPath = output ?? path10.join(".agentv", "transcripts", `copilot-${shortId}.jsonl`);
|
|
5348
|
+
await mkdir5(path10.dirname(outputPath), { recursive: true });
|
|
5002
5349
|
const jsonLines = toTranscriptJsonLines(transcript);
|
|
5003
5350
|
await writeFile5(
|
|
5004
5351
|
outputPath,
|
|
@@ -5043,12 +5390,12 @@ function formatDurationMs3(ms) {
|
|
|
5043
5390
|
// src/commands/import/huggingface.ts
|
|
5044
5391
|
import { execFile } from "node:child_process";
|
|
5045
5392
|
import { existsSync as existsSync2 } from "node:fs";
|
|
5046
|
-
import
|
|
5393
|
+
import path11 from "node:path";
|
|
5047
5394
|
function findScript() {
|
|
5048
5395
|
const candidates = [
|
|
5049
|
-
|
|
5050
|
-
|
|
5051
|
-
|
|
5396
|
+
path11.resolve(__dirname, "..", "..", "..", "..", "..", "scripts", "import-huggingface.py"),
|
|
5397
|
+
path11.resolve(__dirname, "..", "..", "..", "..", "scripts", "import-huggingface.py"),
|
|
5398
|
+
path11.resolve(process.cwd(), "scripts", "import-huggingface.py")
|
|
5052
5399
|
];
|
|
5053
5400
|
for (const candidate of candidates) {
|
|
5054
5401
|
if (existsSync2(candidate)) return candidate;
|
|
@@ -5147,18 +5494,18 @@ Imported ${summary.files_created} eval(s) from ${summary.dataset} \u2192 ${summa
|
|
|
5147
5494
|
});
|
|
5148
5495
|
|
|
5149
5496
|
// src/commands/import/promptfoo.ts
|
|
5150
|
-
import { mkdir as mkdir6, readFile as
|
|
5151
|
-
import
|
|
5497
|
+
import { mkdir as mkdir6, readFile as readFile3, writeFile as writeFile6 } from "node:fs/promises";
|
|
5498
|
+
import path12 from "node:path";
|
|
5152
5499
|
import fg2 from "fast-glob";
|
|
5153
5500
|
import JSON5 from "json5";
|
|
5154
5501
|
import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
|
|
5155
5502
|
var FILE_PREFIX = "file://";
|
|
5156
5503
|
var PROMPTFOO_COMMENT_PREFIX = "# Converted from promptfoo config: ";
|
|
5157
5504
|
async function convertPromptfooToAgentvSuite(options) {
|
|
5158
|
-
const absoluteInputPath =
|
|
5159
|
-
const configDir =
|
|
5505
|
+
const absoluteInputPath = path12.resolve(options.inputPath);
|
|
5506
|
+
const configDir = path12.dirname(absoluteInputPath);
|
|
5160
5507
|
const rawConfig = await loadPromptfooConfig(absoluteInputPath);
|
|
5161
|
-
const suiteName = sanitizeName(
|
|
5508
|
+
const suiteName = sanitizeName(path12.basename(absoluteInputPath, path12.extname(absoluteInputPath)));
|
|
5162
5509
|
const prompts = await loadPromptfooPrompts(rawConfig.prompts, configDir);
|
|
5163
5510
|
if (prompts.length === 0) {
|
|
5164
5511
|
throw new Error(`promptfoo import requires at least one prompt in ${absoluteInputPath}`);
|
|
@@ -5199,7 +5546,7 @@ async function convertPromptfooToAgentvYaml(inputPath) {
|
|
|
5199
5546
|
indent: 2,
|
|
5200
5547
|
lineWidth: 0
|
|
5201
5548
|
});
|
|
5202
|
-
return `${PROMPTFOO_COMMENT_PREFIX}${
|
|
5549
|
+
return `${PROMPTFOO_COMMENT_PREFIX}${path12.resolve(inputPath)}
|
|
5203
5550
|
${yaml}`;
|
|
5204
5551
|
}
|
|
5205
5552
|
var importPromptfooCommand = command({
|
|
@@ -5223,20 +5570,20 @@ var importPromptfooCommand = command({
|
|
|
5223
5570
|
})
|
|
5224
5571
|
},
|
|
5225
5572
|
handler: async ({ input, output, dryRun: dryRun2 }) => {
|
|
5226
|
-
const absoluteInput =
|
|
5573
|
+
const absoluteInput = path12.resolve(input);
|
|
5227
5574
|
const yaml = await convertPromptfooToAgentvYaml(absoluteInput);
|
|
5228
5575
|
if (dryRun2) {
|
|
5229
5576
|
process.stdout.write(yaml);
|
|
5230
5577
|
return;
|
|
5231
5578
|
}
|
|
5232
|
-
const outputPath =
|
|
5233
|
-
await mkdir6(
|
|
5579
|
+
const outputPath = path12.resolve(output ?? path12.join(path12.dirname(absoluteInput), "EVAL.yaml"));
|
|
5580
|
+
await mkdir6(path12.dirname(outputPath), { recursive: true });
|
|
5234
5581
|
await writeFile6(outputPath, yaml, "utf8");
|
|
5235
5582
|
console.log(`Imported promptfoo config \u2192 ${outputPath}`);
|
|
5236
5583
|
}
|
|
5237
5584
|
});
|
|
5238
5585
|
async function loadPromptfooConfig(filePath) {
|
|
5239
|
-
const content = await
|
|
5586
|
+
const content = await readFile3(filePath, "utf8");
|
|
5240
5587
|
const parsed = parseStructuredText(content, filePath);
|
|
5241
5588
|
if (!isJsonObject(parsed)) {
|
|
5242
5589
|
throw new Error(`promptfoo config must be an object: ${filePath}`);
|
|
@@ -5244,7 +5591,7 @@ async function loadPromptfooConfig(filePath) {
|
|
|
5244
5591
|
return parsed;
|
|
5245
5592
|
}
|
|
5246
5593
|
function parseStructuredText(content, filePath) {
|
|
5247
|
-
const ext =
|
|
5594
|
+
const ext = path12.extname(filePath).toLowerCase();
|
|
5248
5595
|
if (ext === ".json" || ext === ".json5" || ext === ".jsonc") {
|
|
5249
5596
|
return JSON5.parse(content);
|
|
5250
5597
|
}
|
|
@@ -5318,8 +5665,8 @@ async function loadPromptFromReference(reference, baseDir, identity2) {
|
|
|
5318
5665
|
const prompts = [];
|
|
5319
5666
|
for (let index = 0; index < files.length; index++) {
|
|
5320
5667
|
const filePath = files[index];
|
|
5321
|
-
const ext =
|
|
5322
|
-
const raw = await
|
|
5668
|
+
const ext = path12.extname(filePath).toLowerCase();
|
|
5669
|
+
const raw = await readFile3(filePath, "utf8");
|
|
5323
5670
|
if (ext === ".json" || ext === ".json5" || ext === ".jsonc") {
|
|
5324
5671
|
const parsed = parseStructuredText(raw, filePath);
|
|
5325
5672
|
if (!Array.isArray(parsed) || !parsed.every(isPromptMessage)) {
|
|
@@ -5373,7 +5720,7 @@ async function expandProviderEntry(rawProvider, baseDir) {
|
|
|
5373
5720
|
const files = await resolvePromptfooFileReference(rawProvider, baseDir);
|
|
5374
5721
|
const providers = [];
|
|
5375
5722
|
for (const filePath of files) {
|
|
5376
|
-
const parsed = parseStructuredText(await
|
|
5723
|
+
const parsed = parseStructuredText(await readFile3(filePath, "utf8"), filePath);
|
|
5377
5724
|
if (!isJsonObject(parsed)) {
|
|
5378
5725
|
throw new Error(`Provider file must be an object: ${filePath}`);
|
|
5379
5726
|
}
|
|
@@ -5412,7 +5759,7 @@ async function loadDefaultTest(rawDefaultTest, baseDir) {
|
|
|
5412
5759
|
if (files.length !== 1) {
|
|
5413
5760
|
throw new Error(`defaultTest must resolve to exactly one file: ${rawDefaultTest}`);
|
|
5414
5761
|
}
|
|
5415
|
-
const parsed = parseStructuredText(await
|
|
5762
|
+
const parsed = parseStructuredText(await readFile3(files[0], "utf8"), files[0]);
|
|
5416
5763
|
if (!isJsonObject(parsed)) {
|
|
5417
5764
|
throw new Error(`defaultTest file must contain an object: ${files[0]}`);
|
|
5418
5765
|
}
|
|
@@ -5461,13 +5808,13 @@ async function loadPromptfooTestsFromReference(reference, baseDir) {
|
|
|
5461
5808
|
const files = await resolvePromptfooFileReference(reference, baseDir);
|
|
5462
5809
|
const tests = [];
|
|
5463
5810
|
for (const filePath of files) {
|
|
5464
|
-
const ext =
|
|
5811
|
+
const ext = path12.extname(stripSheetSuffix(filePath)).toLowerCase();
|
|
5465
5812
|
if (ext === ".xlsx" || ext === ".xls") {
|
|
5466
5813
|
throw new Error(
|
|
5467
|
-
`Unsupported test dataset '${
|
|
5814
|
+
`Unsupported test dataset '${path12.basename(filePath)}': XLSX promptfoo datasets are not imported yet`
|
|
5468
5815
|
);
|
|
5469
5816
|
}
|
|
5470
|
-
const content = await
|
|
5817
|
+
const content = await readFile3(filePath, "utf8");
|
|
5471
5818
|
if (ext === ".csv") {
|
|
5472
5819
|
tests.push(...parseCsvPromptfooTests(content, filePath));
|
|
5473
5820
|
continue;
|
|
@@ -6062,7 +6409,7 @@ function normalizeAssertionType(rawType) {
|
|
|
6062
6409
|
async function resolvePromptfooFileReference(reference, baseDir) {
|
|
6063
6410
|
const rawPath = reference.slice(FILE_PREFIX.length);
|
|
6064
6411
|
const [pathWithoutSheet] = rawPath.split("#");
|
|
6065
|
-
const absolutePath =
|
|
6412
|
+
const absolutePath = path12.resolve(baseDir, pathWithoutSheet);
|
|
6066
6413
|
const normalizedPattern = absolutePath.replaceAll("\\", "/");
|
|
6067
6414
|
const matches = await fg2(normalizedPattern, {
|
|
6068
6415
|
onlyFiles: true,
|
|
@@ -6155,29 +6502,29 @@ var importCommand = subcommands({
|
|
|
6155
6502
|
|
|
6156
6503
|
// src/commands/init/index.ts
|
|
6157
6504
|
import { existsSync as existsSync3, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
6158
|
-
import
|
|
6505
|
+
import path14 from "node:path";
|
|
6159
6506
|
import * as readline from "node:readline/promises";
|
|
6160
6507
|
|
|
6161
6508
|
// src/templates/index.ts
|
|
6162
6509
|
import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
|
|
6163
|
-
import
|
|
6510
|
+
import path13 from "node:path";
|
|
6164
6511
|
import { fileURLToPath } from "node:url";
|
|
6165
6512
|
function getAgentvTemplates() {
|
|
6166
6513
|
return getTemplatesFromDir(".agentv");
|
|
6167
6514
|
}
|
|
6168
6515
|
function getEnvExampleTemplate() {
|
|
6169
|
-
const currentDir =
|
|
6170
|
-
const templatesBase = currentDir.includes(`${
|
|
6171
|
-
const content = readFileSync3(
|
|
6516
|
+
const currentDir = path13.dirname(fileURLToPath(import.meta.url));
|
|
6517
|
+
const templatesBase = currentDir.includes(`${path13.sep}dist`) ? path13.join(currentDir, "templates") : currentDir;
|
|
6518
|
+
const content = readFileSync3(path13.join(templatesBase, ".env.example"), "utf-8");
|
|
6172
6519
|
return { path: ".env.example", content };
|
|
6173
6520
|
}
|
|
6174
6521
|
function getTemplatesFromDir(subdir) {
|
|
6175
|
-
const currentDir =
|
|
6522
|
+
const currentDir = path13.dirname(fileURLToPath(import.meta.url));
|
|
6176
6523
|
let templatesDir;
|
|
6177
|
-
if (currentDir.includes(`${
|
|
6178
|
-
templatesDir =
|
|
6524
|
+
if (currentDir.includes(`${path13.sep}dist`)) {
|
|
6525
|
+
templatesDir = path13.join(currentDir, "templates", subdir);
|
|
6179
6526
|
} else {
|
|
6180
|
-
templatesDir =
|
|
6527
|
+
templatesDir = path13.join(currentDir, subdir);
|
|
6181
6528
|
}
|
|
6182
6529
|
return readTemplatesRecursively(templatesDir, "");
|
|
6183
6530
|
}
|
|
@@ -6185,15 +6532,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
6185
6532
|
const templates = [];
|
|
6186
6533
|
const entries2 = readdirSync(dir);
|
|
6187
6534
|
for (const entry of entries2) {
|
|
6188
|
-
const fullPath =
|
|
6189
|
-
const
|
|
6190
|
-
const entryRelativePath = relativePath ?
|
|
6191
|
-
if (
|
|
6535
|
+
const fullPath = path13.join(dir, entry);
|
|
6536
|
+
const stat4 = statSync(fullPath);
|
|
6537
|
+
const entryRelativePath = relativePath ? path13.join(relativePath, entry) : entry;
|
|
6538
|
+
if (stat4.isDirectory()) {
|
|
6192
6539
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
6193
6540
|
} else {
|
|
6194
6541
|
const content = readFileSync3(fullPath, "utf-8");
|
|
6195
6542
|
templates.push({
|
|
6196
|
-
path: entryRelativePath.split(
|
|
6543
|
+
path: entryRelativePath.split(path13.sep).join("/"),
|
|
6197
6544
|
// Normalize to forward slashes
|
|
6198
6545
|
content
|
|
6199
6546
|
});
|
|
@@ -6224,22 +6571,22 @@ async function promptYesNo(message) {
|
|
|
6224
6571
|
}
|
|
6225
6572
|
}
|
|
6226
6573
|
async function initCommand(options = {}) {
|
|
6227
|
-
const targetPath =
|
|
6228
|
-
const agentvDir =
|
|
6574
|
+
const targetPath = path14.resolve(options.targetPath ?? ".");
|
|
6575
|
+
const agentvDir = path14.join(targetPath, ".agentv");
|
|
6229
6576
|
const otherAgentvTemplates = getAgentvTemplates();
|
|
6230
6577
|
const envTemplate = getEnvExampleTemplate();
|
|
6231
6578
|
const existingFiles = [];
|
|
6232
6579
|
if (envTemplate) {
|
|
6233
|
-
const envFilePath =
|
|
6580
|
+
const envFilePath = path14.join(targetPath, ".env.example");
|
|
6234
6581
|
if (existsSync3(envFilePath)) {
|
|
6235
6582
|
existingFiles.push(".env.example");
|
|
6236
6583
|
}
|
|
6237
6584
|
}
|
|
6238
6585
|
if (existsSync3(agentvDir)) {
|
|
6239
6586
|
for (const template of otherAgentvTemplates) {
|
|
6240
|
-
const targetFilePath =
|
|
6587
|
+
const targetFilePath = path14.join(agentvDir, template.path);
|
|
6241
6588
|
if (existsSync3(targetFilePath)) {
|
|
6242
|
-
existingFiles.push(
|
|
6589
|
+
existingFiles.push(path14.relative(targetPath, targetFilePath));
|
|
6243
6590
|
}
|
|
6244
6591
|
}
|
|
6245
6592
|
}
|
|
@@ -6261,18 +6608,18 @@ async function initCommand(options = {}) {
|
|
|
6261
6608
|
mkdirSync(agentvDir, { recursive: true });
|
|
6262
6609
|
}
|
|
6263
6610
|
if (envTemplate) {
|
|
6264
|
-
const envFilePath =
|
|
6611
|
+
const envFilePath = path14.join(targetPath, ".env.example");
|
|
6265
6612
|
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
6266
6613
|
console.log("Created .env.example");
|
|
6267
6614
|
}
|
|
6268
6615
|
for (const template of otherAgentvTemplates) {
|
|
6269
|
-
const targetFilePath =
|
|
6270
|
-
const targetDirPath =
|
|
6616
|
+
const targetFilePath = path14.join(agentvDir, template.path);
|
|
6617
|
+
const targetDirPath = path14.dirname(targetFilePath);
|
|
6271
6618
|
if (!existsSync3(targetDirPath)) {
|
|
6272
6619
|
mkdirSync(targetDirPath, { recursive: true });
|
|
6273
6620
|
}
|
|
6274
6621
|
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
6275
|
-
console.log(`Created ${
|
|
6622
|
+
console.log(`Created ${path14.relative(targetPath, targetFilePath)}`);
|
|
6276
6623
|
}
|
|
6277
6624
|
console.log("\nAgentV initialized successfully!");
|
|
6278
6625
|
console.log("\nFiles installed to root:");
|
|
@@ -6280,7 +6627,7 @@ async function initCommand(options = {}) {
|
|
|
6280
6627
|
console.log(" - .env.example");
|
|
6281
6628
|
}
|
|
6282
6629
|
console.log(`
|
|
6283
|
-
Files installed to ${
|
|
6630
|
+
Files installed to ${path14.relative(targetPath, agentvDir)}:`);
|
|
6284
6631
|
for (const t of otherAgentvTemplates) {
|
|
6285
6632
|
console.log(` - ${t.path}`);
|
|
6286
6633
|
}
|
|
@@ -6312,13 +6659,13 @@ var initCmdTsCommand = command({
|
|
|
6312
6659
|
|
|
6313
6660
|
// src/commands/inspect/filter.ts
|
|
6314
6661
|
import { existsSync as existsSync4, readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
6315
|
-
import
|
|
6662
|
+
import path15 from "node:path";
|
|
6316
6663
|
function collectIndexFiles(dir) {
|
|
6317
6664
|
const files = [];
|
|
6318
6665
|
try {
|
|
6319
6666
|
const entries2 = readdirSync2(dir, { withFileTypes: true });
|
|
6320
6667
|
for (const entry of entries2) {
|
|
6321
|
-
const fullPath =
|
|
6668
|
+
const fullPath = path15.join(dir, entry.name);
|
|
6322
6669
|
if (entry.isDirectory()) {
|
|
6323
6670
|
files.push(...collectIndexFiles(fullPath));
|
|
6324
6671
|
} else if (entry.name === "index.jsonl") {
|
|
@@ -6371,7 +6718,7 @@ function parseFilterableRecords(filePath) {
|
|
|
6371
6718
|
raw = normalizeResultRow(raw, { lineNumber: i + 1, sourceLabel: filePath });
|
|
6372
6719
|
let experiment = typeof raw.experiment === "string" ? raw.experiment : void 0;
|
|
6373
6720
|
if (!experiment) {
|
|
6374
|
-
const parts = filePath.split(
|
|
6721
|
+
const parts = filePath.split(path15.sep);
|
|
6375
6722
|
const runsIdx = parts.indexOf("runs");
|
|
6376
6723
|
if (runsIdx !== -1 && parts.length - runsIdx >= 3) {
|
|
6377
6724
|
const candidate = parts[runsIdx + 1];
|
|
@@ -6426,7 +6773,7 @@ function buildFilterPredicate(opts) {
|
|
|
6426
6773
|
}
|
|
6427
6774
|
function discoverFilterSources(searchPath, cwd) {
|
|
6428
6775
|
if (searchPath) {
|
|
6429
|
-
const resolved =
|
|
6776
|
+
const resolved = path15.isAbsolute(searchPath) ? searchPath : path15.resolve(cwd, searchPath);
|
|
6430
6777
|
if (!existsSync4(resolved)) {
|
|
6431
6778
|
console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
|
|
6432
6779
|
process.exit(1);
|
|
@@ -6439,7 +6786,7 @@ function discoverFilterSources(searchPath, cwd) {
|
|
|
6439
6786
|
}
|
|
6440
6787
|
return [resolved];
|
|
6441
6788
|
}
|
|
6442
|
-
return collectIndexFiles(
|
|
6789
|
+
return collectIndexFiles(path15.join(cwd, ".agentv", "results", "runs"));
|
|
6443
6790
|
}
|
|
6444
6791
|
function formatFilterTable(records) {
|
|
6445
6792
|
const lines = [];
|
|
@@ -6919,13 +7266,13 @@ var traceScoreCommand = command({
|
|
|
6919
7266
|
|
|
6920
7267
|
// src/commands/inspect/search.ts
|
|
6921
7268
|
import { existsSync as existsSync5, readFileSync as readFileSync5, readdirSync as readdirSync3, statSync as statSync3 } from "node:fs";
|
|
6922
|
-
import
|
|
7269
|
+
import path16 from "node:path";
|
|
6923
7270
|
function collectJsonlFiles(dir) {
|
|
6924
7271
|
const files = [];
|
|
6925
7272
|
try {
|
|
6926
7273
|
const entries2 = readdirSync3(dir, { withFileTypes: true });
|
|
6927
7274
|
for (const entry of entries2) {
|
|
6928
|
-
const fullPath =
|
|
7275
|
+
const fullPath = path16.join(dir, entry.name);
|
|
6929
7276
|
if (entry.isDirectory()) {
|
|
6930
7277
|
files.push(...collectJsonlFiles(fullPath));
|
|
6931
7278
|
} else if (entry.name.endsWith(".jsonl")) {
|
|
@@ -6984,7 +7331,7 @@ function searchJsonlFile(filePath, regex2, targetFilter, experimentFilter) {
|
|
|
6984
7331
|
}
|
|
6985
7332
|
function discoverSources(basePath, cwd) {
|
|
6986
7333
|
if (basePath) {
|
|
6987
|
-
const resolved =
|
|
7334
|
+
const resolved = path16.isAbsolute(basePath) ? basePath : path16.resolve(cwd, basePath);
|
|
6988
7335
|
if (!existsSync5(resolved)) {
|
|
6989
7336
|
console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
|
|
6990
7337
|
process.exit(1);
|
|
@@ -6998,8 +7345,8 @@ function discoverSources(basePath, cwd) {
|
|
|
6998
7345
|
return [resolved];
|
|
6999
7346
|
}
|
|
7000
7347
|
const sources = [];
|
|
7001
|
-
sources.push(...collectJsonlFiles(
|
|
7002
|
-
sources.push(...collectJsonlFiles(
|
|
7348
|
+
sources.push(...collectJsonlFiles(path16.join(cwd, ".agentv", "results", "runs")));
|
|
7349
|
+
sources.push(...collectJsonlFiles(path16.join(cwd, ".agentv", "transcripts")));
|
|
7003
7350
|
return sources;
|
|
7004
7351
|
}
|
|
7005
7352
|
function formatSearchResults(matches, pattern) {
|
|
@@ -7547,7 +7894,7 @@ var inspectCommand = subcommands({
|
|
|
7547
7894
|
|
|
7548
7895
|
// src/commands/pipeline/bench.ts
|
|
7549
7896
|
import { existsSync as existsSync6 } from "node:fs";
|
|
7550
|
-
import { readFile as
|
|
7897
|
+
import { readFile as readFile4, readdir, writeFile as writeFile7 } from "node:fs/promises";
|
|
7551
7898
|
import { join } from "node:path";
|
|
7552
7899
|
var evalBenchCommand = command({
|
|
7553
7900
|
name: "bench",
|
|
@@ -7560,7 +7907,7 @@ var evalBenchCommand = command({
|
|
|
7560
7907
|
})
|
|
7561
7908
|
},
|
|
7562
7909
|
handler: async ({ exportDir }) => {
|
|
7563
|
-
const manifest = JSON.parse(await
|
|
7910
|
+
const manifest = JSON.parse(await readFile4(join(exportDir, "manifest.json"), "utf8"));
|
|
7564
7911
|
const testIds = manifest.test_ids;
|
|
7565
7912
|
const targetName2 = manifest.target?.name ?? "unknown";
|
|
7566
7913
|
const suiteName = manifest.suite ?? "";
|
|
@@ -7578,7 +7925,7 @@ var evalBenchCommand = command({
|
|
|
7578
7925
|
try {
|
|
7579
7926
|
const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
|
|
7580
7927
|
for (const file of resultFiles) {
|
|
7581
|
-
const result = JSON.parse(await
|
|
7928
|
+
const result = JSON.parse(await readFile4(join(codeResultsDir, file), "utf8"));
|
|
7582
7929
|
evaluators.push({
|
|
7583
7930
|
name: result.name,
|
|
7584
7931
|
type: result.type ?? "code-grader",
|
|
@@ -7596,12 +7943,12 @@ var evalBenchCommand = command({
|
|
|
7596
7943
|
try {
|
|
7597
7944
|
const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
|
|
7598
7945
|
for (const file of graderFiles) {
|
|
7599
|
-
const graderMeta = JSON.parse(await
|
|
7946
|
+
const graderMeta = JSON.parse(await readFile4(join(llmGradersDir, file), "utf8"));
|
|
7600
7947
|
const graderName = graderMeta.name;
|
|
7601
7948
|
const diskResultPath = join(testDir, "llm_grader_results", `${graderName}.json`);
|
|
7602
7949
|
let llmResult;
|
|
7603
7950
|
try {
|
|
7604
|
-
llmResult = JSON.parse(await
|
|
7951
|
+
llmResult = JSON.parse(await readFile4(diskResultPath, "utf8"));
|
|
7605
7952
|
} catch {
|
|
7606
7953
|
}
|
|
7607
7954
|
if (llmResult) {
|
|
@@ -7659,7 +8006,7 @@ var evalBenchCommand = command({
|
|
|
7659
8006
|
const timingPath = join(testDir, "timing.json");
|
|
7660
8007
|
if (existsSync6(timingPath)) {
|
|
7661
8008
|
try {
|
|
7662
|
-
const timing = JSON.parse(await
|
|
8009
|
+
const timing = JSON.parse(await readFile4(timingPath, "utf8"));
|
|
7663
8010
|
if (typeof timing.execution_status === "string") {
|
|
7664
8011
|
executionStatus = timing.execution_status;
|
|
7665
8012
|
}
|
|
@@ -7754,7 +8101,7 @@ function computeStats(values) {
|
|
|
7754
8101
|
}
|
|
7755
8102
|
|
|
7756
8103
|
// src/commands/pipeline/grade.ts
|
|
7757
|
-
import { mkdir as mkdir7, readFile as
|
|
8104
|
+
import { mkdir as mkdir7, readFile as readFile5, readdir as readdir2, writeFile as writeFile8 } from "node:fs/promises";
|
|
7758
8105
|
import { join as join2 } from "node:path";
|
|
7759
8106
|
var DEFAULT_CONCURRENCY = 10;
|
|
7760
8107
|
async function runCodeGraders(tasks, concurrency) {
|
|
@@ -7770,7 +8117,7 @@ async function runCodeGraders(tasks, concurrency) {
|
|
|
7770
8117
|
const executeGrader = async (task) => {
|
|
7771
8118
|
const { testDir, resultsDir, graderFile, responseText } = task;
|
|
7772
8119
|
const graderConfig = JSON.parse(
|
|
7773
|
-
await
|
|
8120
|
+
await readFile5(join2(testDir, "code_graders", graderFile), "utf8")
|
|
7774
8121
|
);
|
|
7775
8122
|
if (graderConfig.command) {
|
|
7776
8123
|
await executeCodeGrader(graderConfig, task);
|
|
@@ -7929,7 +8276,7 @@ var evalGradeCommand = command({
|
|
|
7929
8276
|
handler: async ({ exportDir, concurrency }) => {
|
|
7930
8277
|
const maxWorkers = concurrency ?? DEFAULT_CONCURRENCY;
|
|
7931
8278
|
const manifestPath = join2(exportDir, "manifest.json");
|
|
7932
|
-
const manifest = JSON.parse(await
|
|
8279
|
+
const manifest = JSON.parse(await readFile5(manifestPath, "utf8"));
|
|
7933
8280
|
const testIds = manifest.test_ids;
|
|
7934
8281
|
const suiteName = manifest.suite ?? "";
|
|
7935
8282
|
const safeSuiteName = suiteName ? suiteName.replace(/[\/\\:*?"<>|]/g, "_") : "";
|
|
@@ -7947,8 +8294,8 @@ var evalGradeCommand = command({
|
|
|
7947
8294
|
}
|
|
7948
8295
|
if (graderFiles.length === 0) continue;
|
|
7949
8296
|
await mkdir7(resultsDir, { recursive: true });
|
|
7950
|
-
const responseText = await
|
|
7951
|
-
const inputData = JSON.parse(await
|
|
8297
|
+
const responseText = await readFile5(join2(testDir, "response.md"), "utf8");
|
|
8298
|
+
const inputData = JSON.parse(await readFile5(join2(testDir, "input.json"), "utf8"));
|
|
7952
8299
|
for (const graderFile of graderFiles) {
|
|
7953
8300
|
tasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
|
|
7954
8301
|
}
|
|
@@ -7959,7 +8306,7 @@ var evalGradeCommand = command({
|
|
|
7959
8306
|
});
|
|
7960
8307
|
|
|
7961
8308
|
// src/commands/pipeline/input.ts
|
|
7962
|
-
import { readFile as
|
|
8309
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
7963
8310
|
import { mkdir as mkdir8, writeFile as writeFile9 } from "node:fs/promises";
|
|
7964
8311
|
import { dirname, join as join3, relative, resolve } from "node:path";
|
|
7965
8312
|
var BUILTIN_ASSERTION_TYPES = /* @__PURE__ */ new Set([
|
|
@@ -8161,7 +8508,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
8161
8508
|
let promptContent = "";
|
|
8162
8509
|
if (config.resolvedPromptPath) {
|
|
8163
8510
|
try {
|
|
8164
|
-
promptContent = await
|
|
8511
|
+
promptContent = await readFile6(config.resolvedPromptPath, "utf8");
|
|
8165
8512
|
} catch {
|
|
8166
8513
|
promptContent = typeof config.prompt === "string" ? config.prompt : "";
|
|
8167
8514
|
}
|
|
@@ -8211,7 +8558,7 @@ async function writeJson(filePath, data) {
|
|
|
8211
8558
|
// src/commands/pipeline/run.ts
|
|
8212
8559
|
import { exec } from "node:child_process";
|
|
8213
8560
|
import { existsSync as existsSync7, readFileSync as readFileSync6, unlinkSync } from "node:fs";
|
|
8214
|
-
import { mkdir as mkdir9, readFile as
|
|
8561
|
+
import { mkdir as mkdir9, readFile as readFile7, readdir as readdir3, writeFile as writeFile10 } from "node:fs/promises";
|
|
8215
8562
|
import { tmpdir } from "node:os";
|
|
8216
8563
|
import { dirname as dirname2, join as join4, relative as relative2, resolve as resolve2 } from "node:path";
|
|
8217
8564
|
function extractInputText(input) {
|
|
@@ -8390,9 +8737,9 @@ var evalRunCommand2 = command({
|
|
|
8390
8737
|
const invokeTarget = async (testId) => {
|
|
8391
8738
|
const subpath = safeSuiteName ? [safeSuiteName, testId] : [testId];
|
|
8392
8739
|
const testDir = join4(outDir, ...subpath);
|
|
8393
|
-
const invoke = JSON.parse(await
|
|
8740
|
+
const invoke = JSON.parse(await readFile7(join4(testDir, "invoke.json"), "utf8"));
|
|
8394
8741
|
if (invoke.kind !== "cli") return;
|
|
8395
|
-
const inputData = JSON.parse(await
|
|
8742
|
+
const inputData = JSON.parse(await readFile7(join4(testDir, "input.json"), "utf8"));
|
|
8396
8743
|
const template = invoke.command;
|
|
8397
8744
|
const cwd = invoke.cwd;
|
|
8398
8745
|
const timeoutMs = invoke.timeout_ms ?? 12e4;
|
|
@@ -8531,8 +8878,8 @@ Done. Results in ${outDir}`);
|
|
|
8531
8878
|
}
|
|
8532
8879
|
if (graderFiles.length === 0) continue;
|
|
8533
8880
|
await mkdir9(resultsDir, { recursive: true });
|
|
8534
|
-
const responseText = await
|
|
8535
|
-
const inputData = JSON.parse(await
|
|
8881
|
+
const responseText = await readFile7(join4(testDir, "response.md"), "utf8");
|
|
8882
|
+
const inputData = JSON.parse(await readFile7(join4(testDir, "input.json"), "utf8"));
|
|
8536
8883
|
for (const graderFile of graderFiles) {
|
|
8537
8884
|
graderTasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
|
|
8538
8885
|
}
|
|
@@ -8623,6 +8970,254 @@ var pipelineCommand = subcommands({
|
|
|
8623
8970
|
}
|
|
8624
8971
|
});
|
|
8625
8972
|
|
|
8973
|
+
// src/commands/prepare/index.ts
|
|
8974
|
+
import { cp, mkdir as mkdir10, rename, rm, writeFile as writeFile11 } from "node:fs/promises";
|
|
8975
|
+
import path17 from "node:path";
|
|
8976
|
+
function setupStatusFromHook(status) {
|
|
8977
|
+
if (status === "success") {
|
|
8978
|
+
return "ok";
|
|
8979
|
+
}
|
|
8980
|
+
if (status === "skipped") {
|
|
8981
|
+
return "skipped";
|
|
8982
|
+
}
|
|
8983
|
+
return "warning";
|
|
8984
|
+
}
|
|
8985
|
+
function setupStepsFromPrepared(prepared) {
|
|
8986
|
+
return [
|
|
8987
|
+
...prepared.hookExecutions.map((hook) => ({
|
|
8988
|
+
name: `${hook.scope}_${hook.name}`,
|
|
8989
|
+
status: setupStatusFromHook(hook.status),
|
|
8990
|
+
...hook.error !== void 0 && { message: hook.error }
|
|
8991
|
+
})),
|
|
8992
|
+
{
|
|
8993
|
+
name: "workspace_baseline",
|
|
8994
|
+
status: prepared.baseline.status === "initialized" ? "ok" : "skipped",
|
|
8995
|
+
...prepared.baseline.status === "unavailable" && { message: "baseline unavailable" }
|
|
8996
|
+
}
|
|
8997
|
+
];
|
|
8998
|
+
}
|
|
8999
|
+
function toRepoPins(pins) {
|
|
9000
|
+
return pins.map((pin) => ({
|
|
9001
|
+
...pin.path !== void 0 && { path: pin.path },
|
|
9002
|
+
...pin.repo !== void 0 && { repo: pin.repo },
|
|
9003
|
+
...pin.commit !== void 0 && { commit: pin.commit },
|
|
9004
|
+
...pin.baseCommit !== void 0 && { baseCommit: pin.baseCommit },
|
|
9005
|
+
...pin.ancestor !== void 0 && { ancestor: pin.ancestor },
|
|
9006
|
+
...pin.sparse !== void 0 && { sparse: pin.sparse }
|
|
9007
|
+
}));
|
|
9008
|
+
}
|
|
9009
|
+
async function moveDirectory(sourcePath, destinationPath) {
|
|
9010
|
+
try {
|
|
9011
|
+
await rename(sourcePath, destinationPath);
|
|
9012
|
+
} catch (error) {
|
|
9013
|
+
if (error.code !== "EXDEV") {
|
|
9014
|
+
throw error;
|
|
9015
|
+
}
|
|
9016
|
+
await cp(sourcePath, destinationPath, { recursive: true });
|
|
9017
|
+
await rm(sourcePath, { recursive: true, force: true });
|
|
9018
|
+
}
|
|
9019
|
+
}
|
|
9020
|
+
async function placePreparedWorkspace(prepared, destinationPath) {
|
|
9021
|
+
const sourcePath = prepared.workspacePath;
|
|
9022
|
+
if (path17.resolve(sourcePath) === path17.resolve(destinationPath)) {
|
|
9023
|
+
return destinationPath;
|
|
9024
|
+
}
|
|
9025
|
+
await mkdir10(path17.dirname(destinationPath), { recursive: true });
|
|
9026
|
+
await rm(destinationPath, { recursive: true, force: true });
|
|
9027
|
+
if (prepared.cleanupPolicy.mode !== "static" && prepared.pool === void 0) {
|
|
9028
|
+
await moveDirectory(sourcePath, destinationPath);
|
|
9029
|
+
return destinationPath;
|
|
9030
|
+
}
|
|
9031
|
+
await cp(sourcePath, destinationPath, { recursive: true });
|
|
9032
|
+
return destinationPath;
|
|
9033
|
+
}
|
|
9034
|
+
function renderPrompt(options) {
|
|
9035
|
+
const taskInput = options.taskInput.trim() || "(No text input was provided.)";
|
|
9036
|
+
return [
|
|
9037
|
+
"# AgentV Prepared Attempt",
|
|
9038
|
+
"",
|
|
9039
|
+
"## Task Input",
|
|
9040
|
+
"",
|
|
9041
|
+
taskInput,
|
|
9042
|
+
"",
|
|
9043
|
+
"## Execution Instructions",
|
|
9044
|
+
"",
|
|
9045
|
+
`- Work in this workspace: ${options.workspacePath}`,
|
|
9046
|
+
`- Complete the task input for target: ${options.target}`,
|
|
9047
|
+
"- Leave the final files in the workspace for a later AgentV grading step.",
|
|
9048
|
+
"- Do not run AgentV graders or inspect eval answer keys, rubrics, or oracle data.",
|
|
9049
|
+
""
|
|
9050
|
+
].join("\n");
|
|
9051
|
+
}
|
|
9052
|
+
function toManifestWire(result) {
|
|
9053
|
+
return {
|
|
9054
|
+
schema_version: result.schemaVersion,
|
|
9055
|
+
eval_path: result.evalPath,
|
|
9056
|
+
test_id: result.testId,
|
|
9057
|
+
target: result.target,
|
|
9058
|
+
workspace_path: result.workspacePath,
|
|
9059
|
+
prompt_path: result.promptPath,
|
|
9060
|
+
setup_status: result.setupStatus,
|
|
9061
|
+
setup_steps: result.setupSteps.map((step) => ({
|
|
9062
|
+
name: step.name,
|
|
9063
|
+
status: step.status,
|
|
9064
|
+
...step.message !== void 0 && { message: step.message }
|
|
9065
|
+
})),
|
|
9066
|
+
repo_pins: result.repoPins.map((pin) => ({
|
|
9067
|
+
...pin.path !== void 0 && { path: pin.path },
|
|
9068
|
+
...pin.repo !== void 0 && { repo: pin.repo },
|
|
9069
|
+
...pin.commit !== void 0 && { commit: pin.commit },
|
|
9070
|
+
...pin.baseCommit !== void 0 && { base_commit: pin.baseCommit },
|
|
9071
|
+
...pin.ancestor !== void 0 && { ancestor: pin.ancestor },
|
|
9072
|
+
...pin.sparse !== void 0 && { sparse: pin.sparse }
|
|
9073
|
+
})),
|
|
9074
|
+
baseline: {
|
|
9075
|
+
status: result.baseline.status,
|
|
9076
|
+
...result.baseline.commit !== void 0 && { commit: result.baseline.commit }
|
|
9077
|
+
},
|
|
9078
|
+
created_at: result.createdAt
|
|
9079
|
+
};
|
|
9080
|
+
}
|
|
9081
|
+
function toCommandOutputWire2(result) {
|
|
9082
|
+
return {
|
|
9083
|
+
...toManifestWire(result),
|
|
9084
|
+
manifest_path: result.manifestPath
|
|
9085
|
+
};
|
|
9086
|
+
}
|
|
9087
|
+
async function selectPrepareTarget(options) {
|
|
9088
|
+
const selections = await selectMultipleTargets({
|
|
9089
|
+
testFilePath: options.evalPath,
|
|
9090
|
+
repoRoot: options.repoRoot,
|
|
9091
|
+
cwd: process.cwd(),
|
|
9092
|
+
dryRun: false,
|
|
9093
|
+
dryRunDelay: 0,
|
|
9094
|
+
dryRunDelayMin: 0,
|
|
9095
|
+
dryRunDelayMax: 0,
|
|
9096
|
+
env: process.env,
|
|
9097
|
+
targetNames: [options.target],
|
|
9098
|
+
targetRefs: options.targetRefs
|
|
9099
|
+
});
|
|
9100
|
+
const selection = selections[0];
|
|
9101
|
+
if (!selection) {
|
|
9102
|
+
throw new Error(`Target '${options.target}' could not be resolved`);
|
|
9103
|
+
}
|
|
9104
|
+
return {
|
|
9105
|
+
resolvedTarget: {
|
|
9106
|
+
...selection.resolvedTarget,
|
|
9107
|
+
name: options.target
|
|
9108
|
+
},
|
|
9109
|
+
...selection.targetHooks !== void 0 && { targetHooks: selection.targetHooks }
|
|
9110
|
+
};
|
|
9111
|
+
}
|
|
9112
|
+
async function prepareAttempt(options) {
|
|
9113
|
+
const evalPath = path17.resolve(options.evalPath);
|
|
9114
|
+
const outDir = path17.resolve(options.outDir);
|
|
9115
|
+
const evalDir = path17.dirname(evalPath);
|
|
9116
|
+
const repoRoot = await findRepoRoot(evalDir);
|
|
9117
|
+
await loadEnvFromHierarchy({ testFilePath: evalPath, repoRoot, verbose: false });
|
|
9118
|
+
const category = deriveCategory(path17.relative(process.cwd(), evalPath));
|
|
9119
|
+
const suite = await loadTestSuite(evalPath, repoRoot, { category });
|
|
9120
|
+
const test = suite.tests.find((candidate) => candidate.id === options.testId);
|
|
9121
|
+
if (!test) {
|
|
9122
|
+
throw new Error(`Test ID '${options.testId}' not found in ${evalPath}`);
|
|
9123
|
+
}
|
|
9124
|
+
const { resolvedTarget, targetHooks } = await selectPrepareTarget({
|
|
9125
|
+
evalPath,
|
|
9126
|
+
repoRoot,
|
|
9127
|
+
target: options.target,
|
|
9128
|
+
targetRefs: suite.targetRefs
|
|
9129
|
+
});
|
|
9130
|
+
const prepared = await prepareEvalWorkspace({
|
|
9131
|
+
testFilePath: evalPath,
|
|
9132
|
+
repoRoot,
|
|
9133
|
+
target: resolvedTarget,
|
|
9134
|
+
...targetHooks !== void 0 && { targetHooks },
|
|
9135
|
+
evalCases: suite.tests,
|
|
9136
|
+
testId: options.testId,
|
|
9137
|
+
verbose: false,
|
|
9138
|
+
...test.workspace?.path === void 0 && test.workspace?.mode !== "static" && { workspaceMode: "temp" },
|
|
9139
|
+
retainOnSuccess: "keep",
|
|
9140
|
+
retainOnFailure: "keep"
|
|
9141
|
+
});
|
|
9142
|
+
await mkdir10(outDir, { recursive: true });
|
|
9143
|
+
const workspacePath = await placePreparedWorkspace(prepared, path17.join(outDir, "workspace"));
|
|
9144
|
+
const promptPath = path17.join(outDir, "prompt.md");
|
|
9145
|
+
const manifestPath = path17.join(outDir, "agentv_prepare.json");
|
|
9146
|
+
const prompt = renderPrompt({
|
|
9147
|
+
workspacePath,
|
|
9148
|
+
target: options.target,
|
|
9149
|
+
taskInput: prepared.promptSource.question
|
|
9150
|
+
});
|
|
9151
|
+
await writeFile11(promptPath, prompt, "utf8");
|
|
9152
|
+
const result = {
|
|
9153
|
+
schemaVersion: 1,
|
|
9154
|
+
evalPath,
|
|
9155
|
+
testId: prepared.testId,
|
|
9156
|
+
target: options.target,
|
|
9157
|
+
workspacePath,
|
|
9158
|
+
promptPath,
|
|
9159
|
+
manifestPath,
|
|
9160
|
+
setupStatus: "ok",
|
|
9161
|
+
setupSteps: setupStepsFromPrepared(prepared),
|
|
9162
|
+
repoPins: toRepoPins(prepared.repoPins),
|
|
9163
|
+
baseline: prepared.baseline,
|
|
9164
|
+
createdAt: prepared.createdAt
|
|
9165
|
+
};
|
|
9166
|
+
await writeFile11(manifestPath, `${JSON.stringify(toManifestWire(result), null, 2)}
|
|
9167
|
+
`, "utf8");
|
|
9168
|
+
return result;
|
|
9169
|
+
}
|
|
9170
|
+
function printHumanOutput2(result) {
|
|
9171
|
+
console.log(`Prepared attempt for ${result.testId} (${result.target})`);
|
|
9172
|
+
console.log(`Workspace: ${result.workspacePath}`);
|
|
9173
|
+
console.log(`Prompt: ${result.promptPath}`);
|
|
9174
|
+
console.log(`Manifest: ${result.manifestPath}`);
|
|
9175
|
+
console.log("");
|
|
9176
|
+
console.log("Next steps:");
|
|
9177
|
+
console.log(" 1. Give prompt.md to the human or external agent.");
|
|
9178
|
+
console.log(` 2. Run the agent in ${result.workspacePath}.`);
|
|
9179
|
+
console.log(" 3. Keep the final workspace for a later AgentV grading command.");
|
|
9180
|
+
}
|
|
9181
|
+
var prepareCommand = command({
|
|
9182
|
+
name: "prepare",
|
|
9183
|
+
description: "Prepare one eval test workspace and safe prompt without running the target",
|
|
9184
|
+
args: {
|
|
9185
|
+
evalPath: positional({
|
|
9186
|
+
type: string,
|
|
9187
|
+
displayName: "eval",
|
|
9188
|
+
description: "Path to an eval file"
|
|
9189
|
+
}),
|
|
9190
|
+
testId: option({
|
|
9191
|
+
type: string,
|
|
9192
|
+
long: "test-id",
|
|
9193
|
+
description: "Exact test ID to prepare"
|
|
9194
|
+
}),
|
|
9195
|
+
target: option({
|
|
9196
|
+
type: string,
|
|
9197
|
+
long: "target",
|
|
9198
|
+
description: "Target name this prepared attempt is for"
|
|
9199
|
+
}),
|
|
9200
|
+
out: option({
|
|
9201
|
+
type: string,
|
|
9202
|
+
long: "out",
|
|
9203
|
+
description: "Prepared-attempt output directory"
|
|
9204
|
+
}),
|
|
9205
|
+
format: option({
|
|
9206
|
+
type: optional(oneOf(["text", "json"])),
|
|
9207
|
+
long: "format",
|
|
9208
|
+
description: "Output format: text (default) or json"
|
|
9209
|
+
})
|
|
9210
|
+
},
|
|
9211
|
+
handler: async ({ evalPath, testId, target, out, format }) => {
|
|
9212
|
+
const result = await prepareAttempt({ evalPath, testId, target, outDir: out });
|
|
9213
|
+
if (format === "json") {
|
|
9214
|
+
console.log(JSON.stringify(toCommandOutputWire2(result), null, 2));
|
|
9215
|
+
return;
|
|
9216
|
+
}
|
|
9217
|
+
printHumanOutput2(result);
|
|
9218
|
+
}
|
|
9219
|
+
});
|
|
9220
|
+
|
|
8626
9221
|
// src/commands/results/combine.ts
|
|
8627
9222
|
import * as readline2 from "node:readline/promises";
|
|
8628
9223
|
|
|
@@ -8636,7 +9231,7 @@ import {
|
|
|
8636
9231
|
statSync as statSync4,
|
|
8637
9232
|
writeFileSync as writeFileSync3
|
|
8638
9233
|
} from "node:fs";
|
|
8639
|
-
import
|
|
9234
|
+
import path18 from "node:path";
|
|
8640
9235
|
var CombineDuplicateError = class extends Error {
|
|
8641
9236
|
constructor(conflicts) {
|
|
8642
9237
|
super(`Duplicate result rows found for ${conflicts.length} (test_id, target) pair(s)`);
|
|
@@ -8652,7 +9247,7 @@ function readManifestRecords(manifestPath) {
|
|
|
8652
9247
|
}
|
|
8653
9248
|
function readBenchmarkMetadata(manifestPath) {
|
|
8654
9249
|
try {
|
|
8655
|
-
const benchmarkPath =
|
|
9250
|
+
const benchmarkPath = path18.join(path18.dirname(manifestPath), "benchmark.json");
|
|
8656
9251
|
const parsed = JSON.parse(readFileSync7(benchmarkPath, "utf8"));
|
|
8657
9252
|
return {
|
|
8658
9253
|
timestamp: parsed.metadata?.timestamp,
|
|
@@ -8771,7 +9366,7 @@ function defaultDisplayName(sources) {
|
|
|
8771
9366
|
function defaultCombinedRunDir(cwd, startedAt) {
|
|
8772
9367
|
const parsed = startedAt ? new Date(startedAt) : void 0;
|
|
8773
9368
|
const timestamp = parsed && !Number.isNaN(parsed.getTime()) ? createRunDirName(parsed) : sanitizePathSegment(startedAt ?? "unknown-time");
|
|
8774
|
-
return
|
|
9369
|
+
return path18.join(cwd, ".agentv", "results", "runs", "combined", timestamp);
|
|
8775
9370
|
}
|
|
8776
9371
|
function uniqueRunDir(baseDir) {
|
|
8777
9372
|
if (!existsSync8(baseDir)) return baseDir;
|
|
@@ -8782,13 +9377,13 @@ function uniqueRunDir(baseDir) {
|
|
|
8782
9377
|
return `${baseDir}-${suffix}`;
|
|
8783
9378
|
}
|
|
8784
9379
|
function toRunId(cwd, runDir) {
|
|
8785
|
-
const runsRoot =
|
|
8786
|
-
const relative3 =
|
|
8787
|
-
if (relative3 === "" || relative3.startsWith("..") ||
|
|
8788
|
-
return
|
|
9380
|
+
const runsRoot = path18.join(cwd, ".agentv", "results", "runs");
|
|
9381
|
+
const relative3 = path18.relative(runsRoot, runDir);
|
|
9382
|
+
if (relative3 === "" || relative3.startsWith("..") || path18.isAbsolute(relative3)) {
|
|
9383
|
+
return path18.basename(runDir);
|
|
8789
9384
|
}
|
|
8790
|
-
const parts = relative3.split(
|
|
8791
|
-
return parts.length > 1 ? `${parts[0]}::${parts.slice(1).join(
|
|
9385
|
+
const parts = relative3.split(path18.sep);
|
|
9386
|
+
return parts.length > 1 ? `${parts[0]}::${parts.slice(1).join(path18.sep)}` : relative3;
|
|
8792
9387
|
}
|
|
8793
9388
|
var MANIFEST_PATH_FIELDS = [
|
|
8794
9389
|
"grading_path",
|
|
@@ -8804,17 +9399,17 @@ var MANIFEST_PATH_FIELDS = [
|
|
|
8804
9399
|
];
|
|
8805
9400
|
function copyReferencedArtifact(sourceBaseDir, outputDir, sourceIndex, relativePath) {
|
|
8806
9401
|
if (!relativePath) return void 0;
|
|
8807
|
-
if (
|
|
9402
|
+
if (path18.isAbsolute(relativePath) || relativePath.split(/[\\/]+/).includes("..")) {
|
|
8808
9403
|
throw new Error(`Unsafe artifact path in source manifest: ${relativePath}`);
|
|
8809
9404
|
}
|
|
8810
|
-
const sourcePath =
|
|
9405
|
+
const sourcePath = path18.join(sourceBaseDir, relativePath);
|
|
8811
9406
|
if (!existsSync8(sourcePath)) {
|
|
8812
9407
|
return relativePath;
|
|
8813
9408
|
}
|
|
8814
|
-
const rewritten =
|
|
8815
|
-
const destPath =
|
|
9409
|
+
const rewritten = path18.posix.join(`sources/source-${sourceIndex + 1}`, relativePath);
|
|
9410
|
+
const destPath = path18.join(outputDir, rewritten);
|
|
8816
9411
|
const sourceStat = statSync4(sourcePath);
|
|
8817
|
-
mkdirSync2(
|
|
9412
|
+
mkdirSync2(path18.dirname(destPath), { recursive: true });
|
|
8818
9413
|
if (sourceStat.isDirectory()) {
|
|
8819
9414
|
cpSync(sourcePath, destPath, { recursive: true });
|
|
8820
9415
|
} else if (sourceStat.isFile()) {
|
|
@@ -8823,7 +9418,7 @@ function copyReferencedArtifact(sourceBaseDir, outputDir, sourceIndex, relativeP
|
|
|
8823
9418
|
return rewritten;
|
|
8824
9419
|
}
|
|
8825
9420
|
function rewriteAndCopyRecord(row, outputDir) {
|
|
8826
|
-
const sourceBaseDir =
|
|
9421
|
+
const sourceBaseDir = path18.dirname(row.source.manifestPath);
|
|
8827
9422
|
const rewritten = { ...row.record };
|
|
8828
9423
|
for (const field of MANIFEST_PATH_FIELDS) {
|
|
8829
9424
|
rewritten[field] = copyReferencedArtifact(
|
|
@@ -8851,8 +9446,8 @@ function buildCombineRunSources(sourcePaths, cwd, options) {
|
|
|
8851
9446
|
return sourcePaths.map((sourcePath, index) => {
|
|
8852
9447
|
const manifestPath = resolveResultSourcePath(sourcePath, cwd);
|
|
8853
9448
|
return {
|
|
8854
|
-
id: options?.ids?.[index] ??
|
|
8855
|
-
displayName: options?.displayNames?.[index] ??
|
|
9449
|
+
id: options?.ids?.[index] ?? path18.basename(path18.dirname(manifestPath)),
|
|
9450
|
+
displayName: options?.displayNames?.[index] ?? path18.basename(path18.dirname(manifestPath)),
|
|
8856
9451
|
manifestPath,
|
|
8857
9452
|
tags: options?.tags?.[index]
|
|
8858
9453
|
};
|
|
@@ -8866,7 +9461,7 @@ function combineRunSources(options) {
|
|
|
8866
9461
|
const startedAt = earliestTimestamp(loadedSources.map((source) => source.startedAt));
|
|
8867
9462
|
const displayName = options.displayName?.trim() || defaultDisplayName(loadedSources);
|
|
8868
9463
|
const runDir = uniqueRunDir(
|
|
8869
|
-
options.outputDir ?
|
|
9464
|
+
options.outputDir ? path18.resolve(options.cwd, options.outputDir) : defaultCombinedRunDir(options.cwd, startedAt)
|
|
8870
9465
|
);
|
|
8871
9466
|
const { rows, conflicts } = selectRows(
|
|
8872
9467
|
loadedSources,
|
|
@@ -8880,10 +9475,10 @@ function combineRunSources(options) {
|
|
|
8880
9475
|
});
|
|
8881
9476
|
mkdirSync2(runDir, { recursive: true });
|
|
8882
9477
|
const records = rows.map((row) => rewriteAndCopyRecord(row, runDir));
|
|
8883
|
-
const manifestPath =
|
|
9478
|
+
const manifestPath = path18.join(runDir, "index.jsonl");
|
|
8884
9479
|
writeJsonl(manifestPath, records);
|
|
8885
9480
|
const timing = buildTimingArtifact(results);
|
|
8886
|
-
const timingPath =
|
|
9481
|
+
const timingPath = path18.join(runDir, "timing.json");
|
|
8887
9482
|
writeJson3(timingPath, timing);
|
|
8888
9483
|
const benchmark = buildBenchmarkArtifact(results, "", "combined", results.length);
|
|
8889
9484
|
const benchmarkWithMetadata = {
|
|
@@ -8897,7 +9492,7 @@ function combineRunSources(options) {
|
|
|
8897
9492
|
duplicate_policy: options.duplicatePolicy
|
|
8898
9493
|
}
|
|
8899
9494
|
};
|
|
8900
|
-
const benchmarkPath =
|
|
9495
|
+
const benchmarkPath = path18.join(runDir, "benchmark.json");
|
|
8901
9496
|
writeJson3(benchmarkPath, benchmarkWithMetadata);
|
|
8902
9497
|
const tags = [...new Set(loadedSources.flatMap((source) => source.tags ?? []))].sort();
|
|
8903
9498
|
return {
|
|
@@ -9033,19 +9628,19 @@ import * as readline3 from "node:readline/promises";
|
|
|
9033
9628
|
|
|
9034
9629
|
// src/commands/results/delete-run.ts
|
|
9035
9630
|
import { existsSync as existsSync9, rmSync } from "node:fs";
|
|
9036
|
-
import
|
|
9631
|
+
import path19 from "node:path";
|
|
9037
9632
|
function localRunsRoot(cwd) {
|
|
9038
|
-
return
|
|
9633
|
+
return path19.resolve(cwd, ".agentv", "results", "runs");
|
|
9039
9634
|
}
|
|
9040
9635
|
function assertLocalRunManifest(cwd, manifestPath, runId) {
|
|
9041
|
-
const resolvedManifestPath =
|
|
9042
|
-
if (
|
|
9636
|
+
const resolvedManifestPath = path19.resolve(manifestPath);
|
|
9637
|
+
if (path19.basename(resolvedManifestPath) !== RESULT_INDEX_FILENAME) {
|
|
9043
9638
|
throw new Error("Expected a run workspace directory or index.jsonl manifest");
|
|
9044
9639
|
}
|
|
9045
|
-
const runDir =
|
|
9640
|
+
const runDir = path19.dirname(resolvedManifestPath);
|
|
9046
9641
|
const runsRoot = localRunsRoot(cwd);
|
|
9047
|
-
const relativeRunDir =
|
|
9048
|
-
if (relativeRunDir === "" || relativeRunDir.startsWith("..") ||
|
|
9642
|
+
const relativeRunDir = path19.relative(runsRoot, runDir);
|
|
9643
|
+
if (relativeRunDir === "" || relativeRunDir.startsWith("..") || path19.isAbsolute(relativeRunDir)) {
|
|
9049
9644
|
throw new Error("Run workspace is outside the local results directory");
|
|
9050
9645
|
}
|
|
9051
9646
|
if (!existsSync9(resolvedManifestPath)) {
|
|
@@ -9131,7 +9726,7 @@ var resultsDeleteCommand = command({
|
|
|
9131
9726
|
});
|
|
9132
9727
|
|
|
9133
9728
|
// src/commands/results/export.ts
|
|
9134
|
-
import
|
|
9729
|
+
import path20 from "node:path";
|
|
9135
9730
|
|
|
9136
9731
|
// src/commands/results/shared.ts
|
|
9137
9732
|
import { existsSync as existsSync10 } from "node:fs";
|
|
@@ -9178,20 +9773,20 @@ async function loadResults(source, cwd) {
|
|
|
9178
9773
|
|
|
9179
9774
|
// src/commands/results/export.ts
|
|
9180
9775
|
function deriveOutputDir(cwd, sourceFile) {
|
|
9181
|
-
if (
|
|
9776
|
+
if (path20.basename(sourceFile) !== RESULT_INDEX_FILENAME) {
|
|
9182
9777
|
throw new Error(`Expected a run manifest named ${RESULT_INDEX_FILENAME}: ${sourceFile}`);
|
|
9183
9778
|
}
|
|
9184
|
-
const runDir =
|
|
9185
|
-
const segments =
|
|
9779
|
+
const runDir = path20.dirname(sourceFile);
|
|
9780
|
+
const segments = path20.normalize(runDir).split(path20.sep).filter(Boolean);
|
|
9186
9781
|
const runsIndex = segments.lastIndexOf("runs");
|
|
9187
9782
|
if (runsIndex >= 0 && runsIndex < segments.length - 1) {
|
|
9188
|
-
return
|
|
9783
|
+
return path20.join(cwd, ".agentv", "results", "export", ...segments.slice(runsIndex + 1));
|
|
9189
9784
|
}
|
|
9190
|
-
const parentDir =
|
|
9785
|
+
const parentDir = path20.basename(runDir);
|
|
9191
9786
|
if (parentDir.startsWith("eval_")) {
|
|
9192
|
-
return
|
|
9787
|
+
return path20.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
|
|
9193
9788
|
}
|
|
9194
|
-
return
|
|
9789
|
+
return path20.join(cwd, ".agentv", "results", "export", parentDir);
|
|
9195
9790
|
}
|
|
9196
9791
|
async function loadExportSource(source, cwd) {
|
|
9197
9792
|
const { sourceFile } = await resolveSourceFile(source, cwd);
|
|
@@ -9224,8 +9819,8 @@ var resultsExportCommand = command({
|
|
|
9224
9819
|
const cwd = dir ?? process.cwd();
|
|
9225
9820
|
try {
|
|
9226
9821
|
const { sourceFile, results } = await loadExportSource(source, cwd);
|
|
9227
|
-
const outputDir = out ?
|
|
9228
|
-
await
|
|
9822
|
+
const outputDir = out ? path20.isAbsolute(out) ? out : path20.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
9823
|
+
await writeArtifactsFromResults2(results, outputDir, {
|
|
9229
9824
|
evalFile: sourceFile
|
|
9230
9825
|
});
|
|
9231
9826
|
console.log(`Exported ${results.length} test(s) to ${outputDir}`);
|
|
@@ -9285,7 +9880,7 @@ var resultsFailuresCommand = command({
|
|
|
9285
9880
|
|
|
9286
9881
|
// src/commands/results/report.ts
|
|
9287
9882
|
import { existsSync as existsSync11, mkdirSync as mkdirSync3, readFileSync as readFileSync8, writeFileSync as writeFileSync4 } from "node:fs";
|
|
9288
|
-
import
|
|
9883
|
+
import path21 from "node:path";
|
|
9289
9884
|
|
|
9290
9885
|
// src/commands/results/report-template.ts
|
|
9291
9886
|
var RESULTS_REPORT_TEMPLATE = `<!DOCTYPE html>
|
|
@@ -10909,10 +11504,10 @@ function normalizeEvalFileLabel(value) {
|
|
|
10909
11504
|
if (!trimmed) {
|
|
10910
11505
|
return void 0;
|
|
10911
11506
|
}
|
|
10912
|
-
return
|
|
11507
|
+
return path21.basename(trimmed).replace(/\.results\.jsonl$/i, "").replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "").replace(/\.jsonl$/i, "");
|
|
10913
11508
|
}
|
|
10914
11509
|
function readBenchmarkEvalFile(sourceFile) {
|
|
10915
|
-
const benchmarkPath =
|
|
11510
|
+
const benchmarkPath = path21.join(path21.dirname(sourceFile), "benchmark.json");
|
|
10916
11511
|
if (!existsSync11(benchmarkPath)) {
|
|
10917
11512
|
return void 0;
|
|
10918
11513
|
}
|
|
@@ -10924,10 +11519,10 @@ function readBenchmarkEvalFile(sourceFile) {
|
|
|
10924
11519
|
}
|
|
10925
11520
|
}
|
|
10926
11521
|
function deriveReportPath(sourceFile) {
|
|
10927
|
-
return
|
|
11522
|
+
return path21.join(path21.dirname(sourceFile), "report.html");
|
|
10928
11523
|
}
|
|
10929
11524
|
function serializeReportResult(result, sourceFile, manifestRecord, benchmarkEvalFile) {
|
|
10930
|
-
const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ??
|
|
11525
|
+
const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ?? path21.basename(path21.dirname(sourceFile));
|
|
10931
11526
|
return {
|
|
10932
11527
|
timestamp: result.timestamp,
|
|
10933
11528
|
test_id: result.testId,
|
|
@@ -10975,9 +11570,9 @@ function renderResultsReport(results, sourceFile, records, benchmarkEvalFile) {
|
|
|
10975
11570
|
}
|
|
10976
11571
|
async function writeResultsReport(source, outputPath, cwd) {
|
|
10977
11572
|
const { sourceFile, results, records, benchmarkEvalFile } = await loadReportSource(source, cwd);
|
|
10978
|
-
const resolvedOutputPath = outputPath ?
|
|
11573
|
+
const resolvedOutputPath = outputPath ? path21.isAbsolute(outputPath) ? outputPath : path21.resolve(cwd, outputPath) : deriveReportPath(sourceFile);
|
|
10979
11574
|
const html = renderResultsReport(results, sourceFile, records, benchmarkEvalFile);
|
|
10980
|
-
mkdirSync3(
|
|
11575
|
+
mkdirSync3(path21.dirname(resolvedOutputPath), { recursive: true });
|
|
10981
11576
|
writeFileSync4(resolvedOutputPath, html, "utf8");
|
|
10982
11577
|
const written = readFileSync8(resolvedOutputPath, "utf8");
|
|
10983
11578
|
if (written.includes("__DATA_PLACEHOLDER__")) {
|
|
@@ -11165,10 +11760,10 @@ var resultsSummaryCommand = command({
|
|
|
11165
11760
|
|
|
11166
11761
|
// src/commands/results/validate.ts
|
|
11167
11762
|
import { existsSync as existsSync13, readFileSync as readFileSync10, statSync as statSync5 } from "node:fs";
|
|
11168
|
-
import
|
|
11763
|
+
import path22 from "node:path";
|
|
11169
11764
|
function checkDirectoryNaming(runDir) {
|
|
11170
|
-
const dirName =
|
|
11171
|
-
const pathSegments =
|
|
11765
|
+
const dirName = path22.basename(runDir);
|
|
11766
|
+
const pathSegments = path22.normalize(runDir).split(path22.sep).filter(Boolean);
|
|
11172
11767
|
const runsIndex = pathSegments.lastIndexOf("runs");
|
|
11173
11768
|
const diagnostics = [];
|
|
11174
11769
|
if (runsIndex < 0 || runsIndex >= pathSegments.length - 1) {
|
|
@@ -11198,7 +11793,7 @@ function validateRunDirectory(runDir) {
|
|
|
11198
11793
|
return { diagnostics, entries: entries2 };
|
|
11199
11794
|
}
|
|
11200
11795
|
function checkIndexJsonl(runDir) {
|
|
11201
|
-
const indexPath =
|
|
11796
|
+
const indexPath = path22.join(runDir, "index.jsonl");
|
|
11202
11797
|
const diagnostics = [];
|
|
11203
11798
|
const entries2 = [];
|
|
11204
11799
|
if (!existsSync13(indexPath)) {
|
|
@@ -11297,7 +11892,7 @@ function checkArtifactFiles(runDir, entries2) {
|
|
|
11297
11892
|
for (const entry of entries2) {
|
|
11298
11893
|
const testId = entry.test_id ?? "?";
|
|
11299
11894
|
if (entry.grading_path) {
|
|
11300
|
-
const gradingPath =
|
|
11895
|
+
const gradingPath = path22.join(runDir, entry.grading_path);
|
|
11301
11896
|
if (!existsSync13(gradingPath)) {
|
|
11302
11897
|
diagnostics.push({
|
|
11303
11898
|
severity: "error",
|
|
@@ -11327,7 +11922,7 @@ function checkArtifactFiles(runDir, entries2) {
|
|
|
11327
11922
|
}
|
|
11328
11923
|
}
|
|
11329
11924
|
if (entry.timing_path) {
|
|
11330
|
-
const timingPath =
|
|
11925
|
+
const timingPath = path22.join(runDir, entry.timing_path);
|
|
11331
11926
|
if (!existsSync13(timingPath)) {
|
|
11332
11927
|
diagnostics.push({
|
|
11333
11928
|
severity: "warning",
|
|
@@ -11336,7 +11931,7 @@ function checkArtifactFiles(runDir, entries2) {
|
|
|
11336
11931
|
}
|
|
11337
11932
|
}
|
|
11338
11933
|
}
|
|
11339
|
-
const benchmarkPath =
|
|
11934
|
+
const benchmarkPath = path22.join(runDir, "benchmark.json");
|
|
11340
11935
|
if (!existsSync13(benchmarkPath)) {
|
|
11341
11936
|
diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
|
|
11342
11937
|
}
|
|
@@ -11353,7 +11948,7 @@ var resultsValidateCommand = command({
|
|
|
11353
11948
|
})
|
|
11354
11949
|
},
|
|
11355
11950
|
handler: async ({ runDir }) => {
|
|
11356
|
-
const resolvedDir =
|
|
11951
|
+
const resolvedDir = path22.resolve(runDir);
|
|
11357
11952
|
if (!existsSync13(resolvedDir) || !statSync5(resolvedDir).isDirectory()) {
|
|
11358
11953
|
console.error(`Error: '${runDir}' is not a directory`);
|
|
11359
11954
|
process.exit(1);
|
|
@@ -11399,14 +11994,14 @@ var resultsCommand = subcommands({
|
|
|
11399
11994
|
|
|
11400
11995
|
// src/commands/results/serve.ts
|
|
11401
11996
|
import { existsSync as existsSync16, readFileSync as readFileSync12, readdirSync as readdirSync4, statSync as statSync6, writeFileSync as writeFileSync6 } from "node:fs";
|
|
11402
|
-
import
|
|
11997
|
+
import path25 from "node:path";
|
|
11403
11998
|
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
11404
11999
|
import { Hono } from "hono";
|
|
11405
12000
|
|
|
11406
12001
|
// src/commands/results/eval-runner.ts
|
|
11407
12002
|
import { execFileSync as execFileSync2, spawn } from "node:child_process";
|
|
11408
12003
|
import { createWriteStream, existsSync as existsSync14, mkdirSync as mkdirSync4 } from "node:fs";
|
|
11409
|
-
import
|
|
12004
|
+
import path23 from "node:path";
|
|
11410
12005
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
11411
12006
|
var activeRuns = /* @__PURE__ */ new Map();
|
|
11412
12007
|
function generateRunId() {
|
|
@@ -11426,7 +12021,7 @@ function pruneFinishedRuns() {
|
|
|
11426
12021
|
}
|
|
11427
12022
|
function getActiveRunTarget(indexJsonlPath) {
|
|
11428
12023
|
for (const run2 of activeRuns.values()) {
|
|
11429
|
-
if (run2.outputDir &&
|
|
12024
|
+
if (run2.outputDir && path23.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
|
|
11430
12025
|
return run2.target;
|
|
11431
12026
|
}
|
|
11432
12027
|
}
|
|
@@ -11434,7 +12029,7 @@ function getActiveRunTarget(indexJsonlPath) {
|
|
|
11434
12029
|
}
|
|
11435
12030
|
function getActiveRunStatus(indexJsonlPath) {
|
|
11436
12031
|
for (const run2 of activeRuns.values()) {
|
|
11437
|
-
if (run2.outputDir &&
|
|
12032
|
+
if (run2.outputDir && path23.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
|
|
11438
12033
|
return run2.status;
|
|
11439
12034
|
}
|
|
11440
12035
|
}
|
|
@@ -11444,7 +12039,7 @@ async function discoverTargetsInProject(cwd) {
|
|
|
11444
12039
|
const repoRoot = await findRepoRoot(cwd) ?? cwd;
|
|
11445
12040
|
let targetsFilePath;
|
|
11446
12041
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
11447
|
-
const fullPath =
|
|
12042
|
+
const fullPath = path23.join(cwd, candidate);
|
|
11448
12043
|
if (existsSync14(fullPath)) {
|
|
11449
12044
|
targetsFilePath = fullPath;
|
|
11450
12045
|
break;
|
|
@@ -11452,7 +12047,7 @@ async function discoverTargetsInProject(cwd) {
|
|
|
11452
12047
|
}
|
|
11453
12048
|
if (!targetsFilePath) {
|
|
11454
12049
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
11455
|
-
const fullPath =
|
|
12050
|
+
const fullPath = path23.join(repoRoot, candidate);
|
|
11456
12051
|
if (existsSync14(fullPath)) {
|
|
11457
12052
|
targetsFilePath = fullPath;
|
|
11458
12053
|
break;
|
|
@@ -11526,17 +12121,17 @@ function buildCliPreview(args) {
|
|
|
11526
12121
|
}
|
|
11527
12122
|
function resolveCliPath(cwd) {
|
|
11528
12123
|
const candidates = [
|
|
11529
|
-
|
|
11530
|
-
|
|
12124
|
+
path23.join(cwd, "apps/cli/src/cli.ts"),
|
|
12125
|
+
path23.join(cwd, "apps/cli/dist/cli.js")
|
|
11531
12126
|
];
|
|
11532
12127
|
for (const c4 of candidates) {
|
|
11533
12128
|
if (existsSync14(c4)) {
|
|
11534
12129
|
return { binPath: "bun", args: [c4] };
|
|
11535
12130
|
}
|
|
11536
12131
|
}
|
|
11537
|
-
const currentDir = typeof __dirname !== "undefined" ? __dirname :
|
|
11538
|
-
const fromSrc =
|
|
11539
|
-
const fromDist =
|
|
12132
|
+
const currentDir = typeof __dirname !== "undefined" ? __dirname : path23.dirname(fileURLToPath2(import.meta.url));
|
|
12133
|
+
const fromSrc = path23.resolve(currentDir, "../../cli.ts");
|
|
12134
|
+
const fromDist = path23.resolve(currentDir, "cli.js");
|
|
11540
12135
|
if (existsSync14(fromSrc)) return { binPath: "bun", args: [fromSrc] };
|
|
11541
12136
|
if (existsSync14(fromDist)) return { binPath: "bun", args: [fromDist] };
|
|
11542
12137
|
if (isCommandAvailable("agentv")) {
|
|
@@ -11555,7 +12150,7 @@ function isCommandAvailable(cmd) {
|
|
|
11555
12150
|
function openConsoleLogStream(outputDir) {
|
|
11556
12151
|
try {
|
|
11557
12152
|
mkdirSync4(outputDir, { recursive: true });
|
|
11558
|
-
const stream = createWriteStream(
|
|
12153
|
+
const stream = createWriteStream(path23.join(outputDir, "console.log"), { flags: "w" });
|
|
11559
12154
|
stream.on("error", () => {
|
|
11560
12155
|
});
|
|
11561
12156
|
return stream;
|
|
@@ -11612,7 +12207,7 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
11612
12207
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
11613
12208
|
}
|
|
11614
12209
|
const args = buildCliArgs(body);
|
|
11615
|
-
const outputDir = body.output?.trim() ?
|
|
12210
|
+
const outputDir = body.output?.trim() ? path23.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
|
|
11616
12211
|
if (!body.output?.trim()) {
|
|
11617
12212
|
args.push("--output", outputDir);
|
|
11618
12213
|
}
|
|
@@ -11790,7 +12385,7 @@ Process error: ${err2.message}
|
|
|
11790
12385
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
11791
12386
|
}
|
|
11792
12387
|
const args = buildCliArgs(body);
|
|
11793
|
-
const outputDir = body.output?.trim() ?
|
|
12388
|
+
const outputDir = body.output?.trim() ? path23.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
|
|
11794
12389
|
if (!body.output?.trim()) {
|
|
11795
12390
|
args.push("--output", outputDir);
|
|
11796
12391
|
}
|
|
@@ -11913,17 +12508,17 @@ Process error: ${err2.message}
|
|
|
11913
12508
|
|
|
11914
12509
|
// src/commands/results/studio-config.ts
|
|
11915
12510
|
import { existsSync as existsSync15, mkdirSync as mkdirSync5, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
|
|
11916
|
-
import
|
|
12511
|
+
import path24 from "node:path";
|
|
11917
12512
|
import { stringify as stringifyYaml3 } from "yaml";
|
|
11918
12513
|
var DEFAULTS = {
|
|
11919
12514
|
threshold: DEFAULT_THRESHOLD,
|
|
11920
12515
|
appName: "agentv"
|
|
11921
12516
|
};
|
|
11922
12517
|
function loadStudioConfig(agentvDir) {
|
|
11923
|
-
const localConfigPath =
|
|
11924
|
-
const globalConfigPath =
|
|
12518
|
+
const localConfigPath = path24.join(agentvDir, "config.yaml");
|
|
12519
|
+
const globalConfigPath = path24.join(getAgentvConfigDir(), "config.yaml");
|
|
11925
12520
|
const localConfig = loadParsedConfig(localConfigPath);
|
|
11926
|
-
const globalConfig =
|
|
12521
|
+
const globalConfig = path24.resolve(globalConfigPath) === path24.resolve(localConfigPath) ? void 0 : loadParsedConfig(globalConfigPath);
|
|
11927
12522
|
const threshold = [
|
|
11928
12523
|
readThreshold(localConfig?.dashboard),
|
|
11929
12524
|
readThreshold(localConfig?.studio),
|
|
@@ -11963,7 +12558,7 @@ function saveStudioConfig(agentvDir, config) {
|
|
|
11963
12558
|
if (!existsSync15(agentvDir)) {
|
|
11964
12559
|
mkdirSync5(agentvDir, { recursive: true });
|
|
11965
12560
|
}
|
|
11966
|
-
const configPath =
|
|
12561
|
+
const configPath = path24.join(agentvDir, "config.yaml");
|
|
11967
12562
|
let existing = {};
|
|
11968
12563
|
if (existsSync15(configPath)) {
|
|
11969
12564
|
const raw = readFileSync11(configPath, "utf-8");
|
|
@@ -12034,13 +12629,13 @@ function resolveDashboardMode(_projectCount, options) {
|
|
|
12034
12629
|
}
|
|
12035
12630
|
function bootstrapCurrentProject(cwd, options) {
|
|
12036
12631
|
if (options.single === true) return {};
|
|
12037
|
-
if (!existsSync16(
|
|
12632
|
+
if (!existsSync16(path25.join(cwd, ".agentv"))) return {};
|
|
12038
12633
|
const entry = addProject(cwd);
|
|
12039
12634
|
touchProject(entry.id);
|
|
12040
12635
|
return { currentProjectId: entry.id };
|
|
12041
12636
|
}
|
|
12042
12637
|
function feedbackPath(resultDir) {
|
|
12043
|
-
return
|
|
12638
|
+
return path25.join(resultDir, "feedback.json");
|
|
12044
12639
|
}
|
|
12045
12640
|
function readFeedback(cwd) {
|
|
12046
12641
|
const fp = feedbackPath(cwd);
|
|
@@ -12067,8 +12662,8 @@ function buildFileTree(dirPath, relativeTo) {
|
|
|
12067
12662
|
if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
|
|
12068
12663
|
return a.name.localeCompare(b.name);
|
|
12069
12664
|
}).map((entry) => {
|
|
12070
|
-
const fullPath =
|
|
12071
|
-
const relPath =
|
|
12665
|
+
const fullPath = path25.join(dirPath, entry.name);
|
|
12666
|
+
const relPath = path25.relative(relativeTo, fullPath);
|
|
12072
12667
|
if (entry.isDirectory()) {
|
|
12073
12668
|
return {
|
|
12074
12669
|
name: entry.name,
|
|
@@ -12081,7 +12676,7 @@ function buildFileTree(dirPath, relativeTo) {
|
|
|
12081
12676
|
});
|
|
12082
12677
|
}
|
|
12083
12678
|
function inferLanguage(filePath) {
|
|
12084
|
-
const ext =
|
|
12679
|
+
const ext = path25.extname(filePath).toLowerCase();
|
|
12085
12680
|
const langMap = {
|
|
12086
12681
|
".json": "json",
|
|
12087
12682
|
".jsonl": "json",
|
|
@@ -12108,14 +12703,14 @@ function inferLanguage(filePath) {
|
|
|
12108
12703
|
return langMap[ext] ?? "plaintext";
|
|
12109
12704
|
}
|
|
12110
12705
|
function inferRawContentType(filePath) {
|
|
12111
|
-
const ext =
|
|
12706
|
+
const ext = path25.extname(filePath).toLowerCase();
|
|
12112
12707
|
if (ext === ".json") return "application/json; charset=utf-8";
|
|
12113
12708
|
if (ext === ".jsonl") return "text/plain; charset=utf-8";
|
|
12114
12709
|
if (ext === ".md") return "text/markdown; charset=utf-8";
|
|
12115
12710
|
return "text/plain; charset=utf-8";
|
|
12116
12711
|
}
|
|
12117
12712
|
function contentDispositionFilename(filePath) {
|
|
12118
|
-
return
|
|
12713
|
+
return path25.basename(filePath).replace(/["\\\r\n]/g, "_");
|
|
12119
12714
|
}
|
|
12120
12715
|
function stripHeavyFields(results) {
|
|
12121
12716
|
return results.map((r) => {
|
|
@@ -12335,7 +12930,7 @@ async function handleRunLog(c4, { searchDir, projectId }) {
|
|
|
12335
12930
|
if (meta.source === "remote") {
|
|
12336
12931
|
return c4.json({ error: "Run log is not available for remote runs" }, 404);
|
|
12337
12932
|
}
|
|
12338
|
-
const logPath =
|
|
12933
|
+
const logPath = path25.join(path25.dirname(meta.path), "console.log");
|
|
12339
12934
|
if (!existsSync16(logPath)) {
|
|
12340
12935
|
return c4.json({ error: "Run log not found for this run" }, 404);
|
|
12341
12936
|
}
|
|
@@ -12369,11 +12964,11 @@ async function handleRunDetail(c4, { searchDir, projectId }) {
|
|
|
12369
12964
|
}
|
|
12370
12965
|
function deriveResumeMeta(cwd, manifestPath) {
|
|
12371
12966
|
const out = {};
|
|
12372
|
-
const runDir =
|
|
12373
|
-
const relative3 =
|
|
12967
|
+
const runDir = path25.dirname(manifestPath);
|
|
12968
|
+
const relative3 = path25.relative(cwd, runDir);
|
|
12374
12969
|
out.run_dir = relative3 !== "" && !relative3.startsWith("..") ? relative3 : runDir;
|
|
12375
12970
|
try {
|
|
12376
|
-
const benchmarkPath =
|
|
12971
|
+
const benchmarkPath = path25.join(runDir, "benchmark.json");
|
|
12377
12972
|
if (existsSync16(benchmarkPath)) {
|
|
12378
12973
|
const parsed = JSON.parse(readFileSync12(benchmarkPath, "utf8"));
|
|
12379
12974
|
const evalFile = parsed.metadata?.eval_file;
|
|
@@ -12510,7 +13105,7 @@ async function handleEvalFiles(c4, { searchDir, projectId }) {
|
|
|
12510
13105
|
const records = await parseManifestForMeta(searchDir, meta, projectId);
|
|
12511
13106
|
const record = records.find((r) => r.test_id === evalId);
|
|
12512
13107
|
if (!record) return c4.json({ error: "Eval not found" }, 404);
|
|
12513
|
-
const baseDir =
|
|
13108
|
+
const baseDir = path25.dirname(meta.path);
|
|
12514
13109
|
const knownPaths = [
|
|
12515
13110
|
record.grading_path,
|
|
12516
13111
|
record.timing_path,
|
|
@@ -12526,14 +13121,14 @@ async function handleEvalFiles(c4, { searchDir, projectId }) {
|
|
|
12526
13121
|
record.graders_path
|
|
12527
13122
|
].filter((p) => !!p);
|
|
12528
13123
|
if (knownPaths.length === 0) return c4.json({ files: [] });
|
|
12529
|
-
const artifactDirs = knownPaths.map((p) =>
|
|
13124
|
+
const artifactDirs = knownPaths.map((p) => path25.dirname(p));
|
|
12530
13125
|
let commonDir = artifactDirs[0];
|
|
12531
13126
|
for (const dir of artifactDirs) {
|
|
12532
13127
|
while (!dir.startsWith(commonDir)) {
|
|
12533
|
-
commonDir =
|
|
13128
|
+
commonDir = path25.dirname(commonDir);
|
|
12534
13129
|
}
|
|
12535
13130
|
}
|
|
12536
|
-
const artifactAbsDir =
|
|
13131
|
+
const artifactAbsDir = path25.join(baseDir, commonDir);
|
|
12537
13132
|
const files = buildFileTree(artifactAbsDir, baseDir);
|
|
12538
13133
|
return c4.json({ files });
|
|
12539
13134
|
} catch {
|
|
@@ -12555,9 +13150,9 @@ async function handleEvalFileContent(c4, { searchDir, projectId }) {
|
|
|
12555
13150
|
}
|
|
12556
13151
|
if (!filePath) return c4.json({ error: "No file path specified" }, 400);
|
|
12557
13152
|
await ensureRunReadable(searchDir, meta, projectId);
|
|
12558
|
-
const baseDir =
|
|
12559
|
-
const absolutePath =
|
|
12560
|
-
if (!absolutePath.startsWith(
|
|
13153
|
+
const baseDir = path25.dirname(meta.path);
|
|
13154
|
+
const absolutePath = path25.resolve(baseDir, filePath);
|
|
13155
|
+
if (!absolutePath.startsWith(path25.resolve(baseDir) + path25.sep) && absolutePath !== path25.resolve(baseDir)) {
|
|
12561
13156
|
return c4.json({ error: "Path traversal not allowed" }, 403);
|
|
12562
13157
|
}
|
|
12563
13158
|
if (!existsSync16(absolutePath) || !statSync6(absolutePath).isFile()) {
|
|
@@ -12828,13 +13423,13 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
|
|
|
12828
13423
|
threshold: config.threshold,
|
|
12829
13424
|
app_name: config.appName,
|
|
12830
13425
|
read_only: options?.readOnly === true,
|
|
12831
|
-
project_name:
|
|
13426
|
+
project_name: path25.basename(searchDir),
|
|
12832
13427
|
project_dashboard: options?.projectDashboard === true,
|
|
12833
13428
|
...options?.currentProjectId && { current_project_id: options.currentProjectId }
|
|
12834
13429
|
});
|
|
12835
13430
|
}
|
|
12836
13431
|
function handleFeedbackRead(c4, { searchDir }) {
|
|
12837
|
-
const resultsDir =
|
|
13432
|
+
const resultsDir = path25.join(searchDir, ".agentv", "results");
|
|
12838
13433
|
return c4.json(readFeedback(existsSync16(resultsDir) ? resultsDir : searchDir));
|
|
12839
13434
|
}
|
|
12840
13435
|
async function handleRunTagsPut(c4, { searchDir, projectId }) {
|
|
@@ -12904,7 +13499,7 @@ async function handleRunDelete(c4, { searchDir, projectId }) {
|
|
|
12904
13499
|
}
|
|
12905
13500
|
}
|
|
12906
13501
|
function getLocalRunsRoot(searchDir) {
|
|
12907
|
-
return
|
|
13502
|
+
return path25.join(searchDir, ".agentv", "results", "runs");
|
|
12908
13503
|
}
|
|
12909
13504
|
function validateLocalCompletedRun(searchDir, meta, actionName = "Run combine") {
|
|
12910
13505
|
if (meta.source === "remote") {
|
|
@@ -12913,13 +13508,13 @@ function validateLocalCompletedRun(searchDir, meta, actionName = "Run combine")
|
|
|
12913
13508
|
if (getActiveRunStatus(meta.path) === "starting" || getActiveRunStatus(meta.path) === "running") {
|
|
12914
13509
|
return { error: "Run is still active", status: 409 };
|
|
12915
13510
|
}
|
|
12916
|
-
const manifestPath =
|
|
12917
|
-
if (
|
|
13511
|
+
const manifestPath = path25.resolve(meta.path);
|
|
13512
|
+
if (path25.basename(manifestPath) !== "index.jsonl") {
|
|
12918
13513
|
return { error: "Run workspace is invalid", status: 400 };
|
|
12919
13514
|
}
|
|
12920
|
-
const runDir =
|
|
12921
|
-
const runsRoot =
|
|
12922
|
-
if (runDir !== runsRoot && runDir.startsWith(`${runsRoot}${
|
|
13515
|
+
const runDir = path25.dirname(manifestPath);
|
|
13516
|
+
const runsRoot = path25.resolve(getLocalRunsRoot(searchDir));
|
|
13517
|
+
if (runDir !== runsRoot && runDir.startsWith(`${runsRoot}${path25.sep}`) && existsSync16(runDir)) {
|
|
12923
13518
|
return { ok: true };
|
|
12924
13519
|
}
|
|
12925
13520
|
return { error: "Run workspace is outside the local results directory", status: 400 };
|
|
@@ -13001,7 +13596,7 @@ async function handleRunsCombine(c4, { searchDir, projectId }) {
|
|
|
13001
13596
|
}
|
|
13002
13597
|
function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
13003
13598
|
const searchDir = cwd ?? resultDir;
|
|
13004
|
-
const agentvDir =
|
|
13599
|
+
const agentvDir = path25.join(searchDir, ".agentv");
|
|
13005
13600
|
const defaultCtx = { searchDir, agentvDir, projectId: options?.currentProjectId };
|
|
13006
13601
|
const readOnly = options?.readOnly === true;
|
|
13007
13602
|
const app2 = new Hono();
|
|
@@ -13012,7 +13607,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
13012
13607
|
}
|
|
13013
13608
|
return handler(c4, {
|
|
13014
13609
|
searchDir: project.path,
|
|
13015
|
-
agentvDir:
|
|
13610
|
+
agentvDir: path25.join(project.path, ".agentv"),
|
|
13016
13611
|
projectId: project.id
|
|
13017
13612
|
});
|
|
13018
13613
|
}
|
|
@@ -13044,7 +13639,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
13044
13639
|
}
|
|
13045
13640
|
async function summarizeProjectRunMetas(project) {
|
|
13046
13641
|
const { runs: metas } = await listMergedResultFiles(project.path, void 0, project.id);
|
|
13047
|
-
const threshold = loadStudioConfig(
|
|
13642
|
+
const threshold = loadStudioConfig(path25.join(project.path, ".agentv")).threshold;
|
|
13048
13643
|
let passRateSum = 0;
|
|
13049
13644
|
let executionErrorCount = 0;
|
|
13050
13645
|
for (const meta of metas) {
|
|
@@ -13140,7 +13735,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
13140
13735
|
if (records.length > 0) {
|
|
13141
13736
|
const qualitySummary = summarizeQualityResults(
|
|
13142
13737
|
records,
|
|
13143
|
-
loadStudioConfig(
|
|
13738
|
+
loadStudioConfig(path25.join(p.path, ".agentv")).threshold
|
|
13144
13739
|
);
|
|
13145
13740
|
target = records[0].target;
|
|
13146
13741
|
experiment = records[0].experiment ?? experiment;
|
|
@@ -13410,22 +14005,22 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
13410
14005
|
{ readOnly }
|
|
13411
14006
|
);
|
|
13412
14007
|
const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
|
|
13413
|
-
if (!studioDistPath || !existsSync16(
|
|
14008
|
+
if (!studioDistPath || !existsSync16(path25.join(studioDistPath, "index.html"))) {
|
|
13414
14009
|
throw new Error(
|
|
13415
14010
|
'Dashboard dist not found. Run "bun run build" in apps/dashboard/ to build the SPA.'
|
|
13416
14011
|
);
|
|
13417
14012
|
}
|
|
13418
14013
|
app2.get("/", (c4) => {
|
|
13419
|
-
const indexPath =
|
|
14014
|
+
const indexPath = path25.join(studioDistPath, "index.html");
|
|
13420
14015
|
if (existsSync16(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
|
|
13421
14016
|
return c4.notFound();
|
|
13422
14017
|
});
|
|
13423
14018
|
app2.get("/assets/*", (c4) => {
|
|
13424
14019
|
const assetPath = c4.req.path;
|
|
13425
|
-
const filePath =
|
|
14020
|
+
const filePath = path25.join(studioDistPath, assetPath);
|
|
13426
14021
|
if (!existsSync16(filePath)) return c4.notFound();
|
|
13427
14022
|
const content = readFileSync12(filePath);
|
|
13428
|
-
const ext =
|
|
14023
|
+
const ext = path25.extname(filePath);
|
|
13429
14024
|
const mimeTypes = {
|
|
13430
14025
|
".js": "application/javascript",
|
|
13431
14026
|
".css": "text/css",
|
|
@@ -13446,26 +14041,26 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
13446
14041
|
});
|
|
13447
14042
|
app2.get("*", (c4) => {
|
|
13448
14043
|
if (c4.req.path.startsWith("/api/")) return c4.json({ error: "Not found" }, 404);
|
|
13449
|
-
const indexPath =
|
|
14044
|
+
const indexPath = path25.join(studioDistPath, "index.html");
|
|
13450
14045
|
if (existsSync16(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
|
|
13451
14046
|
return c4.notFound();
|
|
13452
14047
|
});
|
|
13453
14048
|
return app2;
|
|
13454
14049
|
}
|
|
13455
14050
|
function resolveStudioDistDir() {
|
|
13456
|
-
const currentDir = typeof __dirname !== "undefined" ? __dirname :
|
|
14051
|
+
const currentDir = typeof __dirname !== "undefined" ? __dirname : path25.dirname(fileURLToPath3(import.meta.url));
|
|
13457
14052
|
const candidates = [
|
|
13458
14053
|
// From src/commands/results/ → sibling apps/dashboard/dist
|
|
13459
|
-
|
|
14054
|
+
path25.resolve(currentDir, "../../../../dashboard/dist"),
|
|
13460
14055
|
// From dist/ → sibling apps/dashboard/dist (monorepo dev)
|
|
13461
|
-
|
|
14056
|
+
path25.resolve(currentDir, "../../dashboard/dist"),
|
|
13462
14057
|
// Bundled inside CLI dist (published package: dist/dashboard/)
|
|
13463
|
-
|
|
14058
|
+
path25.resolve(currentDir, "dashboard"),
|
|
13464
14059
|
// From dist/ in monorepo root context
|
|
13465
|
-
|
|
14060
|
+
path25.resolve(currentDir, "../../../apps/dashboard/dist")
|
|
13466
14061
|
];
|
|
13467
14062
|
for (const candidate of candidates) {
|
|
13468
|
-
if (existsSync16(candidate) && existsSync16(
|
|
14063
|
+
if (existsSync16(candidate) && existsSync16(path25.join(candidate, "index.html"))) {
|
|
13469
14064
|
return candidate;
|
|
13470
14065
|
}
|
|
13471
14066
|
}
|
|
@@ -13535,7 +14130,7 @@ var resultsServeCommand = command({
|
|
|
13535
14130
|
return;
|
|
13536
14131
|
}
|
|
13537
14132
|
const repoRoot = await findRepoRoot(cwd);
|
|
13538
|
-
const yamlConfig = await loadConfig(
|
|
14133
|
+
const yamlConfig = await loadConfig(path25.join(cwd, "_"), repoRoot);
|
|
13539
14134
|
if (yamlConfig?.required_version) {
|
|
13540
14135
|
await enforceRequiredVersion(yamlConfig.required_version);
|
|
13541
14136
|
}
|
|
@@ -13565,7 +14160,7 @@ var resultsServeCommand = command({
|
|
|
13565
14160
|
}
|
|
13566
14161
|
}
|
|
13567
14162
|
}
|
|
13568
|
-
const resultDir = sourceFile ?
|
|
14163
|
+
const resultDir = sourceFile ? path25.dirname(path25.resolve(sourceFile)) : cwd;
|
|
13569
14164
|
const app2 = createApp(results, resultDir, cwd, sourceFile, {
|
|
13570
14165
|
readOnly,
|
|
13571
14166
|
projectDashboard,
|
|
@@ -13601,26 +14196,26 @@ var resultsServeCommand = command({
|
|
|
13601
14196
|
|
|
13602
14197
|
// src/commands/runs/rerun.ts
|
|
13603
14198
|
import { constants } from "node:fs";
|
|
13604
|
-
import { access, readFile as
|
|
13605
|
-
import
|
|
14199
|
+
import { access, readFile as readFile8 } from "node:fs/promises";
|
|
14200
|
+
import path26 from "node:path";
|
|
13606
14201
|
import { config as loadDotenv } from "dotenv";
|
|
13607
14202
|
var TASK_EVAL_FILENAME = "EVAL.yaml";
|
|
13608
14203
|
var TASK_TARGETS_FILENAME = "targets.yaml";
|
|
13609
14204
|
var ENV_REF_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
13610
|
-
function
|
|
14205
|
+
function isRecord2(value) {
|
|
13611
14206
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
13612
14207
|
}
|
|
13613
14208
|
function displayRecord(record) {
|
|
13614
14209
|
return `${record.test_id ?? "unknown"}@${record.target ?? "unknown"}`;
|
|
13615
14210
|
}
|
|
13616
14211
|
function resolveSourcePath(cwd, source) {
|
|
13617
|
-
return
|
|
14212
|
+
return path26.isAbsolute(source) ? source : path26.resolve(cwd, source);
|
|
13618
14213
|
}
|
|
13619
14214
|
function resolveRelativeRunPath(runDir, relativePath) {
|
|
13620
14215
|
if (!relativePath || relativePath.trim().length === 0) {
|
|
13621
14216
|
return void 0;
|
|
13622
14217
|
}
|
|
13623
|
-
return
|
|
14218
|
+
return path26.resolve(runDir, relativePath);
|
|
13624
14219
|
}
|
|
13625
14220
|
async function ensureFile(filePath, label) {
|
|
13626
14221
|
try {
|
|
@@ -13649,25 +14244,25 @@ function stringArray(value) {
|
|
|
13649
14244
|
return Array.isArray(value) ? value.filter((entry) => typeof entry === "string" && entry.length > 0) : [];
|
|
13650
14245
|
}
|
|
13651
14246
|
function readExecutionTarget(parsedEval) {
|
|
13652
|
-
if (!
|
|
14247
|
+
if (!isRecord2(parsedEval)) {
|
|
13653
14248
|
return void 0;
|
|
13654
14249
|
}
|
|
13655
14250
|
const execution = parsedEval.execution;
|
|
13656
|
-
if (
|
|
14251
|
+
if (isRecord2(execution) && typeof execution.target === "string" && execution.target.length > 0) {
|
|
13657
14252
|
return execution.target;
|
|
13658
14253
|
}
|
|
13659
14254
|
return typeof parsedEval.target === "string" && parsedEval.target.length > 0 ? parsedEval.target : void 0;
|
|
13660
14255
|
}
|
|
13661
14256
|
async function readTaskTarget(evalPath, fallback) {
|
|
13662
|
-
const raw = await
|
|
14257
|
+
const raw = await readFile8(evalPath, "utf8");
|
|
13663
14258
|
return readExecutionTarget(parseYamlValue(raw)) ?? fallback;
|
|
13664
14259
|
}
|
|
13665
14260
|
async function readTargetDefinitions2(targetsPath) {
|
|
13666
|
-
const parsed = parseYamlValue(await
|
|
13667
|
-
if (!
|
|
14261
|
+
const parsed = parseYamlValue(await readFile8(targetsPath, "utf8"));
|
|
14262
|
+
if (!isRecord2(parsed) || !Array.isArray(parsed.targets)) {
|
|
13668
14263
|
throw new Error(`Targets file is missing a top-level targets array: ${targetsPath}`);
|
|
13669
14264
|
}
|
|
13670
|
-
return parsed.targets.filter(
|
|
14265
|
+
return parsed.targets.filter(isRecord2);
|
|
13671
14266
|
}
|
|
13672
14267
|
function targetName(definition) {
|
|
13673
14268
|
return typeof definition.name === "string" && definition.name.trim().length > 0 ? definition.name.trim() : void 0;
|
|
@@ -13717,7 +14312,7 @@ function collectEnvRefs(value, names = /* @__PURE__ */ new Set()) {
|
|
|
13717
14312
|
}
|
|
13718
14313
|
return names;
|
|
13719
14314
|
}
|
|
13720
|
-
if (
|
|
14315
|
+
if (isRecord2(value)) {
|
|
13721
14316
|
for (const [key, entry] of Object.entries(value)) {
|
|
13722
14317
|
if (key === "required_env") {
|
|
13723
14318
|
for (const required of stringArray(entry)) {
|
|
@@ -13778,15 +14373,15 @@ async function validateTargetFile(targetsPath, targetNames, label) {
|
|
|
13778
14373
|
}
|
|
13779
14374
|
}
|
|
13780
14375
|
function isInsideOrSame(root, candidate) {
|
|
13781
|
-
const relative3 =
|
|
13782
|
-
return relative3 === "" || !relative3.startsWith("..") && !
|
|
14376
|
+
const relative3 = path26.relative(path26.resolve(root), path26.resolve(candidate));
|
|
14377
|
+
return relative3 === "" || !relative3.startsWith("..") && !path26.isAbsolute(relative3);
|
|
13783
14378
|
}
|
|
13784
14379
|
function forbiddenOutputRoots(sourceRunDir, selected) {
|
|
13785
14380
|
return [
|
|
13786
|
-
|
|
14381
|
+
path26.resolve(sourceRunDir),
|
|
13787
14382
|
...selected.flatMap((bundle) => [
|
|
13788
|
-
|
|
13789
|
-
|
|
14383
|
+
path26.resolve(bundle.artifactDir),
|
|
14384
|
+
path26.resolve(bundle.taskDir)
|
|
13790
14385
|
])
|
|
13791
14386
|
];
|
|
13792
14387
|
}
|
|
@@ -13805,13 +14400,13 @@ function defaultOutputDir(cwd, sourceRunDir, selected, experiment) {
|
|
|
13805
14400
|
if (!roots.some((root) => isInsideOrSame(root, candidate))) {
|
|
13806
14401
|
return candidate;
|
|
13807
14402
|
}
|
|
13808
|
-
return
|
|
14403
|
+
return path26.join(path26.dirname(path26.resolve(sourceRunDir)), `rerun-${createRunDirName()}`);
|
|
13809
14404
|
}
|
|
13810
14405
|
async function loadEnvFile2(envFile, cwd, verbose) {
|
|
13811
14406
|
if (!envFile) {
|
|
13812
14407
|
return;
|
|
13813
14408
|
}
|
|
13814
|
-
const resolved =
|
|
14409
|
+
const resolved = path26.isAbsolute(envFile) ? envFile : path26.resolve(cwd, envFile);
|
|
13815
14410
|
await ensureFile(resolved, "Environment file");
|
|
13816
14411
|
const loaded = loadDotenv({ path: resolved, override: false });
|
|
13817
14412
|
if (loaded.error) {
|
|
@@ -13822,7 +14417,7 @@ async function loadEnvFile2(envFile, cwd, verbose) {
|
|
|
13822
14417
|
}
|
|
13823
14418
|
}
|
|
13824
14419
|
async function loadSelectedTaskBundles(options) {
|
|
13825
|
-
const content = await
|
|
14420
|
+
const content = await readFile8(options.indexPath, "utf8");
|
|
13826
14421
|
const records = parseResultManifest(content);
|
|
13827
14422
|
if (records.length === 0) {
|
|
13828
14423
|
throw new Error(`Run manifest contains no result rows: ${options.indexPath}`);
|
|
@@ -13843,8 +14438,8 @@ async function loadSelectedTaskBundles(options) {
|
|
|
13843
14438
|
options.sourceRunDir,
|
|
13844
14439
|
record.task_dir && `${record.task_dir}/${TASK_TARGETS_FILENAME}`
|
|
13845
14440
|
);
|
|
13846
|
-
const taskDir = resolveRelativeRunPath(options.sourceRunDir, record.task_dir) ?? (evalPath ?
|
|
13847
|
-
const artifactDir = resolveRelativeRunPath(options.sourceRunDir, record.artifact_dir) ?? (taskDir ?
|
|
14441
|
+
const taskDir = resolveRelativeRunPath(options.sourceRunDir, record.task_dir) ?? (evalPath ? path26.dirname(evalPath) : void 0);
|
|
14442
|
+
const artifactDir = resolveRelativeRunPath(options.sourceRunDir, record.artifact_dir) ?? (taskDir ? path26.dirname(taskDir) : void 0);
|
|
13848
14443
|
if (!evalPath || !targetsPath || !taskDir || !artifactDir) {
|
|
13849
14444
|
throw new Error(
|
|
13850
14445
|
`Selected result ${recordLabel} is missing task bundle paths. Re-run requires task/EVAL.yaml and task/targets.yaml.`
|
|
@@ -13874,14 +14469,14 @@ async function loadSelectedTaskBundles(options) {
|
|
|
13874
14469
|
function buildSourceMetadataByEvalFile(sourceRunDir, indexPath, selected) {
|
|
13875
14470
|
return new Map(
|
|
13876
14471
|
selected.map((bundle) => [
|
|
13877
|
-
|
|
14472
|
+
path26.resolve(bundle.evalPath),
|
|
13878
14473
|
{
|
|
13879
14474
|
rerunSource: {
|
|
13880
14475
|
mode: "rerun",
|
|
13881
|
-
sourceRunDir:
|
|
13882
|
-
sourceIndexPath:
|
|
13883
|
-
sourceArtifactDir:
|
|
13884
|
-
sourceTaskDir:
|
|
14476
|
+
sourceRunDir: path26.resolve(sourceRunDir),
|
|
14477
|
+
sourceIndexPath: path26.resolve(indexPath),
|
|
14478
|
+
sourceArtifactDir: path26.resolve(bundle.artifactDir),
|
|
14479
|
+
sourceTaskDir: path26.resolve(bundle.taskDir),
|
|
13885
14480
|
sourceTestId: bundle.testId,
|
|
13886
14481
|
sourceTarget: bundle.sourceTarget,
|
|
13887
14482
|
sourceTimestamp: bundle.record.timestamp
|
|
@@ -13952,7 +14547,7 @@ var runsRerunCommand = command({
|
|
|
13952
14547
|
handler: async (args) => {
|
|
13953
14548
|
const cwd = process.cwd();
|
|
13954
14549
|
const indexPath = resolveRunManifestPath(resolveSourcePath(cwd, args.runDir));
|
|
13955
|
-
const sourceRunDir =
|
|
14550
|
+
const sourceRunDir = path26.dirname(indexPath);
|
|
13956
14551
|
await loadEnvFile2(args.envFile, cwd, args.verbose);
|
|
13957
14552
|
const selected = await loadSelectedTaskBundles({
|
|
13958
14553
|
indexPath,
|
|
@@ -13961,10 +14556,10 @@ var runsRerunCommand = command({
|
|
|
13961
14556
|
sourceTargets: args.sourceTarget
|
|
13962
14557
|
});
|
|
13963
14558
|
const targetOverrides = args.target;
|
|
13964
|
-
const outputDir = args.output ?
|
|
14559
|
+
const outputDir = args.output ? path26.resolve(cwd, args.output) : defaultOutputDir(cwd, sourceRunDir, selected, args.experiment);
|
|
13965
14560
|
assertOutputIsSeparate(outputDir, forbiddenOutputRoots(sourceRunDir, selected));
|
|
13966
14561
|
if (args.targets) {
|
|
13967
|
-
const overrideTargetsPath =
|
|
14562
|
+
const overrideTargetsPath = path26.resolve(cwd, args.targets);
|
|
13968
14563
|
await ensureFile(overrideTargetsPath, "Target override");
|
|
13969
14564
|
const targetNames = targetOverrides.length > 0 ? targetOverrides : selected.map((bundle) => bundle.taskTarget);
|
|
13970
14565
|
await validateTargetFile(overrideTargetsPath, targetNames, "Target override");
|
|
@@ -13988,7 +14583,7 @@ var runsRerunCommand = command({
|
|
|
13988
14583
|
testFiles: selected.map((bundle) => bundle.evalPath),
|
|
13989
14584
|
rawOptions: {
|
|
13990
14585
|
target: targetOverrides,
|
|
13991
|
-
targets: args.targets ?
|
|
14586
|
+
targets: args.targets ? path26.resolve(cwd, args.targets) : void 0,
|
|
13992
14587
|
output: outputDir,
|
|
13993
14588
|
experiment: args.experiment ?? "rerun",
|
|
13994
14589
|
workers: args.workers,
|
|
@@ -14219,13 +14814,13 @@ var selfCommand = subcommands({
|
|
|
14219
14814
|
|
|
14220
14815
|
// src/commands/skills/index.ts
|
|
14221
14816
|
import { existsSync as existsSync18, readFileSync as readFileSync13, readdirSync as readdirSync5 } from "node:fs";
|
|
14222
|
-
import
|
|
14817
|
+
import path27 from "node:path";
|
|
14223
14818
|
import { fileURLToPath as fileURLToPath4 } from "node:url";
|
|
14224
14819
|
function isValidSkillsDir(dir) {
|
|
14225
14820
|
if (!existsSync18(dir)) return false;
|
|
14226
14821
|
try {
|
|
14227
14822
|
return readdirSync5(dir, { withFileTypes: true }).some(
|
|
14228
|
-
(e) => e.isDirectory() && existsSync18(
|
|
14823
|
+
(e) => e.isDirectory() && existsSync18(path27.join(dir, e.name, "SKILL.md"))
|
|
14229
14824
|
);
|
|
14230
14825
|
} catch {
|
|
14231
14826
|
return false;
|
|
@@ -14233,15 +14828,15 @@ function isValidSkillsDir(dir) {
|
|
|
14233
14828
|
}
|
|
14234
14829
|
function findSkillsDir() {
|
|
14235
14830
|
const selfFile = fileURLToPath4(import.meta.url);
|
|
14236
|
-
let dir =
|
|
14831
|
+
let dir = path27.dirname(selfFile);
|
|
14237
14832
|
for (let i = 0; i < 6; i++) {
|
|
14238
|
-
const distCandidate =
|
|
14833
|
+
const distCandidate = path27.join(dir, "dist", "skills");
|
|
14239
14834
|
if (isValidSkillsDir(distCandidate)) return distCandidate;
|
|
14240
|
-
const repoRootCandidate =
|
|
14835
|
+
const repoRootCandidate = path27.join(dir, "skills-data");
|
|
14241
14836
|
if (isValidSkillsDir(repoRootCandidate)) return repoRootCandidate;
|
|
14242
|
-
const legacyCandidate =
|
|
14837
|
+
const legacyCandidate = path27.join(dir, "skills");
|
|
14243
14838
|
if (isValidSkillsDir(legacyCandidate)) return legacyCandidate;
|
|
14244
|
-
dir =
|
|
14839
|
+
dir = path27.dirname(dir);
|
|
14245
14840
|
}
|
|
14246
14841
|
return null;
|
|
14247
14842
|
}
|
|
@@ -14260,7 +14855,7 @@ function listSkillNames(skillsDir) {
|
|
|
14260
14855
|
return readdirSync5(skillsDir, { withFileTypes: true }).filter((e) => e.isDirectory()).map((e) => e.name).sort();
|
|
14261
14856
|
}
|
|
14262
14857
|
function readSkillFile(skillDir, relPath) {
|
|
14263
|
-
const full =
|
|
14858
|
+
const full = path27.join(skillDir, relPath);
|
|
14264
14859
|
if (!existsSync18(full)) return null;
|
|
14265
14860
|
return readFileSync13(full, "utf-8");
|
|
14266
14861
|
}
|
|
@@ -14270,9 +14865,9 @@ function collectDir(dir, prefix = "") {
|
|
|
14270
14865
|
for (const entry of readdirSync5(dir, { withFileTypes: true })) {
|
|
14271
14866
|
const relPath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
14272
14867
|
if (entry.isDirectory()) {
|
|
14273
|
-
Object.assign(result, collectDir(
|
|
14868
|
+
Object.assign(result, collectDir(path27.join(dir, entry.name), relPath));
|
|
14274
14869
|
} else {
|
|
14275
|
-
result[relPath] = readFileSync13(
|
|
14870
|
+
result[relPath] = readFileSync13(path27.join(dir, entry.name), "utf-8");
|
|
14276
14871
|
}
|
|
14277
14872
|
}
|
|
14278
14873
|
return result;
|
|
@@ -14282,14 +14877,14 @@ function listSkillSubdirs(skillDir) {
|
|
|
14282
14877
|
return readdirSync5(skillDir, { withFileTypes: true }).filter((e) => e.isDirectory() && !e.name.startsWith(".")).map((e) => e.name).sort();
|
|
14283
14878
|
}
|
|
14284
14879
|
function readSkill(skillsDir, name, full) {
|
|
14285
|
-
const skillDir =
|
|
14880
|
+
const skillDir = path27.join(skillsDir, name);
|
|
14286
14881
|
if (!existsSync18(skillDir)) return null;
|
|
14287
14882
|
const content = readSkillFile(skillDir, "SKILL.md");
|
|
14288
14883
|
if (content === null) return null;
|
|
14289
14884
|
if (!full) return { name, content };
|
|
14290
14885
|
const files = {};
|
|
14291
14886
|
for (const sub of listSkillSubdirs(skillDir)) {
|
|
14292
|
-
const subDir =
|
|
14887
|
+
const subDir = path27.join(skillDir, sub);
|
|
14293
14888
|
const collected = collectDir(subDir, sub);
|
|
14294
14889
|
Object.assign(files, collected);
|
|
14295
14890
|
}
|
|
@@ -14299,14 +14894,14 @@ function findRefFile(skillDir, refName) {
|
|
|
14299
14894
|
const candidates = refName.endsWith(".md") ? [refName] : [refName, `${refName}.md`];
|
|
14300
14895
|
for (const sub of listSkillSubdirs(skillDir)) {
|
|
14301
14896
|
for (const candidate of candidates) {
|
|
14302
|
-
const filePath =
|
|
14897
|
+
const filePath = path27.join(skillDir, sub, candidate);
|
|
14303
14898
|
if (existsSync18(filePath)) {
|
|
14304
14899
|
return { relPath: `${sub}/${candidate}`, content: readFileSync13(filePath, "utf-8") };
|
|
14305
14900
|
}
|
|
14306
14901
|
}
|
|
14307
14902
|
}
|
|
14308
14903
|
for (const candidate of candidates) {
|
|
14309
|
-
const filePath =
|
|
14904
|
+
const filePath = path27.join(skillDir, candidate);
|
|
14310
14905
|
if (existsSync18(filePath)) {
|
|
14311
14906
|
return { relPath: candidate, content: readFileSync13(filePath, "utf-8") };
|
|
14312
14907
|
}
|
|
@@ -14316,7 +14911,7 @@ function findRefFile(skillDir, refName) {
|
|
|
14316
14911
|
function listRefFiles(skillDir) {
|
|
14317
14912
|
const out = [];
|
|
14318
14913
|
for (const sub of listSkillSubdirs(skillDir)) {
|
|
14319
|
-
const subDir =
|
|
14914
|
+
const subDir = path27.join(skillDir, sub);
|
|
14320
14915
|
for (const entry of readdirSync5(subDir, { withFileTypes: true })) {
|
|
14321
14916
|
if (entry.isFile()) out.push(`${sub}/${entry.name}`);
|
|
14322
14917
|
}
|
|
@@ -14400,7 +14995,7 @@ var skillsGetCommand = command({
|
|
|
14400
14995
|
}
|
|
14401
14996
|
process.exit(1);
|
|
14402
14997
|
}
|
|
14403
|
-
const skillDir =
|
|
14998
|
+
const skillDir = path27.join(skillsDir, name);
|
|
14404
14999
|
if (!existsSync18(skillDir)) {
|
|
14405
15000
|
const msg = `skill '${name}' not found`;
|
|
14406
15001
|
if (json) {
|
|
@@ -14489,7 +15084,7 @@ var skillsPathCommand = command({
|
|
|
14489
15084
|
handler: ({ name }) => {
|
|
14490
15085
|
const skillsDir = requireSkillsDir();
|
|
14491
15086
|
if (name) {
|
|
14492
|
-
const skillDir =
|
|
15087
|
+
const skillDir = path27.join(skillsDir, name);
|
|
14493
15088
|
if (!existsSync18(skillDir)) {
|
|
14494
15089
|
console.error(`Error: skill '${name}' not found`);
|
|
14495
15090
|
process.exit(1);
|
|
@@ -14512,7 +15107,7 @@ var skillsCommand = subcommands({
|
|
|
14512
15107
|
|
|
14513
15108
|
// src/commands/transpile/index.ts
|
|
14514
15109
|
import { writeFileSync as writeFileSync7 } from "node:fs";
|
|
14515
|
-
import
|
|
15110
|
+
import path28 from "node:path";
|
|
14516
15111
|
var transpileCommand = command({
|
|
14517
15112
|
name: "transpile",
|
|
14518
15113
|
description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
|
|
@@ -14536,7 +15131,7 @@ var transpileCommand = command({
|
|
|
14536
15131
|
handler: async ({ input, outDir, stdout }) => {
|
|
14537
15132
|
let result;
|
|
14538
15133
|
try {
|
|
14539
|
-
result = transpileEvalYamlFile(
|
|
15134
|
+
result = transpileEvalYamlFile(path28.resolve(input));
|
|
14540
15135
|
} catch (error) {
|
|
14541
15136
|
console.error(`Error: ${error.message}`);
|
|
14542
15137
|
process.exit(1);
|
|
@@ -14560,11 +15155,11 @@ var transpileCommand = command({
|
|
|
14560
15155
|
process.stdout.write("\n");
|
|
14561
15156
|
return;
|
|
14562
15157
|
}
|
|
14563
|
-
const outputDir = outDir ?
|
|
15158
|
+
const outputDir = outDir ? path28.resolve(outDir) : path28.dirname(path28.resolve(input));
|
|
14564
15159
|
const fileNames = getOutputFilenames(result);
|
|
14565
15160
|
for (const [skill, evalsJson] of result.files) {
|
|
14566
15161
|
const fileName = fileNames.get(skill) ?? "evals.json";
|
|
14567
|
-
const outputPath =
|
|
15162
|
+
const outputPath = path28.join(outputDir, fileName);
|
|
14568
15163
|
writeFileSync7(outputPath, `${JSON.stringify(evalsJson, null, 2)}
|
|
14569
15164
|
`);
|
|
14570
15165
|
console.log(`Transpiled to ${outputPath}`);
|
|
@@ -14573,7 +15168,7 @@ var transpileCommand = command({
|
|
|
14573
15168
|
});
|
|
14574
15169
|
|
|
14575
15170
|
// src/commands/trend/index.ts
|
|
14576
|
-
import
|
|
15171
|
+
import path29 from "node:path";
|
|
14577
15172
|
var colors2 = {
|
|
14578
15173
|
reset: "\x1B[0m",
|
|
14579
15174
|
bold: "\x1B[1m",
|
|
@@ -14623,7 +15218,7 @@ function colorizeSlope(value) {
|
|
|
14623
15218
|
}
|
|
14624
15219
|
function ensureTrendIndexPath(source, cwd) {
|
|
14625
15220
|
const resolved = resolveResultSourcePath(source, cwd);
|
|
14626
|
-
if (
|
|
15221
|
+
if (path29.basename(resolved) !== RESULT_INDEX_FILENAME) {
|
|
14627
15222
|
throw new Error(
|
|
14628
15223
|
`Unsupported result source for trend: ${source}. Use a run workspace directory or ${RESULT_INDEX_FILENAME} manifest.`
|
|
14629
15224
|
);
|
|
@@ -14643,7 +15238,7 @@ function resolveTrendSources(cwd, sources, last) {
|
|
|
14643
15238
|
if (last < 2) {
|
|
14644
15239
|
throw new Error("--last must be at least 2");
|
|
14645
15240
|
}
|
|
14646
|
-
const metas = listResultFiles(cwd).filter((meta) =>
|
|
15241
|
+
const metas = listResultFiles(cwd).filter((meta) => path29.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
|
|
14647
15242
|
if (metas.length < 2) {
|
|
14648
15243
|
throw new Error(
|
|
14649
15244
|
"Trend analysis requires at least 2 canonical run workspaces in .agentv/results/runs/"
|
|
@@ -14658,10 +15253,10 @@ function getRunLabel(sourcePath, timestamp) {
|
|
|
14658
15253
|
if (timestamp) {
|
|
14659
15254
|
return timestamp;
|
|
14660
15255
|
}
|
|
14661
|
-
return
|
|
15256
|
+
return path29.basename(path29.dirname(sourcePath));
|
|
14662
15257
|
}
|
|
14663
15258
|
function getRunSortKey(sourcePath, timestamp) {
|
|
14664
|
-
return timestamp ??
|
|
15259
|
+
return timestamp ?? path29.basename(path29.dirname(sourcePath));
|
|
14665
15260
|
}
|
|
14666
15261
|
function mean2(values) {
|
|
14667
15262
|
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
@@ -15041,8 +15636,8 @@ function isTTY() {
|
|
|
15041
15636
|
|
|
15042
15637
|
// src/commands/validate/validate-files.ts
|
|
15043
15638
|
import { constants as constants2 } from "node:fs";
|
|
15044
|
-
import { access as access2, readdir as readdir4, stat } from "node:fs/promises";
|
|
15045
|
-
import
|
|
15639
|
+
import { access as access2, readdir as readdir4, stat as stat2 } from "node:fs/promises";
|
|
15640
|
+
import path30 from "node:path";
|
|
15046
15641
|
import fg3 from "fast-glob";
|
|
15047
15642
|
async function validateFiles(paths) {
|
|
15048
15643
|
const filePaths = await expandPaths(paths);
|
|
@@ -15057,7 +15652,7 @@ async function validateFiles(paths) {
|
|
|
15057
15652
|
};
|
|
15058
15653
|
}
|
|
15059
15654
|
async function validateSingleFile(filePath) {
|
|
15060
|
-
const absolutePath =
|
|
15655
|
+
const absolutePath = path30.resolve(filePath);
|
|
15061
15656
|
const fileType = await detectFileType(absolutePath);
|
|
15062
15657
|
let result;
|
|
15063
15658
|
if (fileType === "eval") {
|
|
@@ -15101,10 +15696,10 @@ async function validateSingleFile(filePath) {
|
|
|
15101
15696
|
async function expandPaths(paths) {
|
|
15102
15697
|
const expanded = /* @__PURE__ */ new Set();
|
|
15103
15698
|
for (const inputPath of paths) {
|
|
15104
|
-
const absolutePath =
|
|
15699
|
+
const absolutePath = path30.resolve(inputPath);
|
|
15105
15700
|
try {
|
|
15106
15701
|
await access2(absolutePath, constants2.F_OK);
|
|
15107
|
-
const stats = await
|
|
15702
|
+
const stats = await stat2(absolutePath);
|
|
15108
15703
|
if (stats.isFile()) {
|
|
15109
15704
|
if (isYamlFile(absolutePath)) expanded.add(absolutePath);
|
|
15110
15705
|
continue;
|
|
@@ -15129,7 +15724,7 @@ async function expandPaths(paths) {
|
|
|
15129
15724
|
if (yamlMatches.length === 0) {
|
|
15130
15725
|
console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
|
|
15131
15726
|
}
|
|
15132
|
-
for (const f of yamlMatches) expanded.add(
|
|
15727
|
+
for (const f of yamlMatches) expanded.add(path30.normalize(f));
|
|
15133
15728
|
}
|
|
15134
15729
|
const sorted = Array.from(expanded);
|
|
15135
15730
|
sorted.sort();
|
|
@@ -15140,7 +15735,7 @@ async function findYamlFiles(dirPath) {
|
|
|
15140
15735
|
try {
|
|
15141
15736
|
const entries2 = await readdir4(dirPath, { withFileTypes: true });
|
|
15142
15737
|
for (const entry of entries2) {
|
|
15143
|
-
const fullPath =
|
|
15738
|
+
const fullPath = path30.join(dirPath, entry.name);
|
|
15144
15739
|
if (entry.isDirectory()) {
|
|
15145
15740
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
15146
15741
|
continue;
|
|
@@ -15157,11 +15752,11 @@ async function findYamlFiles(dirPath) {
|
|
|
15157
15752
|
return results;
|
|
15158
15753
|
}
|
|
15159
15754
|
function isYamlFile(filePath) {
|
|
15160
|
-
const ext =
|
|
15755
|
+
const ext = path30.extname(filePath).toLowerCase();
|
|
15161
15756
|
return ext === ".yaml" || ext === ".yml";
|
|
15162
15757
|
}
|
|
15163
15758
|
function isEvalYamlFile(filePath) {
|
|
15164
|
-
const lower =
|
|
15759
|
+
const lower = path30.basename(filePath).toLowerCase();
|
|
15165
15760
|
return lower.endsWith(".eval.yaml") || lower.endsWith(".eval.yml");
|
|
15166
15761
|
}
|
|
15167
15762
|
|
|
@@ -15217,8 +15812,8 @@ var validateCommand = command({
|
|
|
15217
15812
|
|
|
15218
15813
|
// src/commands/workspace/clean.ts
|
|
15219
15814
|
import { existsSync as existsSync19 } from "node:fs";
|
|
15220
|
-
import { readFile as
|
|
15221
|
-
import
|
|
15815
|
+
import { readFile as readFile9, readdir as readdir5, rm as rm2 } from "node:fs/promises";
|
|
15816
|
+
import path31 from "node:path";
|
|
15222
15817
|
async function confirm2(message) {
|
|
15223
15818
|
const readline4 = await import("node:readline");
|
|
15224
15819
|
const rl = readline4.createInterface({ input: process.stdin, output: process.stdout });
|
|
@@ -15254,10 +15849,10 @@ var cleanCommand = command({
|
|
|
15254
15849
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
15255
15850
|
const matchingDirs = [];
|
|
15256
15851
|
for (const dir of poolDirs) {
|
|
15257
|
-
const poolDir =
|
|
15258
|
-
const metadataPath =
|
|
15852
|
+
const poolDir = path31.join(poolRoot, dir.name);
|
|
15853
|
+
const metadataPath = path31.join(poolDir, "metadata.json");
|
|
15259
15854
|
try {
|
|
15260
|
-
const raw = await
|
|
15855
|
+
const raw = await readFile9(metadataPath, "utf-8");
|
|
15261
15856
|
const metadata = JSON.parse(raw);
|
|
15262
15857
|
const hasRepo = metadata.repos?.some((r) => {
|
|
15263
15858
|
const value = r.repo ?? (r.source?.type === "git" ? r.source.url : r.source?.path);
|
|
@@ -15283,8 +15878,8 @@ var cleanCommand = command({
|
|
|
15283
15878
|
}
|
|
15284
15879
|
}
|
|
15285
15880
|
for (const dir of matchingDirs) {
|
|
15286
|
-
await
|
|
15287
|
-
console.log(`Removed: ${
|
|
15881
|
+
await rm2(dir, { recursive: true, force: true });
|
|
15882
|
+
console.log(`Removed: ${path31.basename(dir).slice(0, 12)}...`);
|
|
15288
15883
|
}
|
|
15289
15884
|
console.log("Done.");
|
|
15290
15885
|
} else {
|
|
@@ -15295,14 +15890,14 @@ var cleanCommand = command({
|
|
|
15295
15890
|
return;
|
|
15296
15891
|
}
|
|
15297
15892
|
}
|
|
15298
|
-
await
|
|
15893
|
+
await rm2(poolRoot, { recursive: true, force: true });
|
|
15299
15894
|
console.log("Workspace pool cleaned.");
|
|
15300
15895
|
}
|
|
15301
15896
|
}
|
|
15302
15897
|
});
|
|
15303
15898
|
|
|
15304
15899
|
// src/commands/workspace/deps.ts
|
|
15305
|
-
import
|
|
15900
|
+
import path32 from "node:path";
|
|
15306
15901
|
var depsCommand = command({
|
|
15307
15902
|
name: "deps",
|
|
15308
15903
|
description: "Scan eval files and list git repo dependencies needed by workspaces",
|
|
@@ -15326,7 +15921,7 @@ var depsCommand = command({
|
|
|
15326
15921
|
const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
|
|
15327
15922
|
const result = await scanRepoDeps(resolvedPaths);
|
|
15328
15923
|
for (const err2 of result.errors) {
|
|
15329
|
-
console.error(`error: ${
|
|
15924
|
+
console.error(`error: ${path32.relative(cwd, err2.file)}: ${err2.message}`);
|
|
15330
15925
|
}
|
|
15331
15926
|
if (result.errors.length > 0) {
|
|
15332
15927
|
process.exit(1);
|
|
@@ -15337,7 +15932,7 @@ var depsCommand = command({
|
|
|
15337
15932
|
...r.ref !== void 0 && { ref: r.ref },
|
|
15338
15933
|
...r.sparse !== void 0 && { sparse: r.sparse },
|
|
15339
15934
|
...r.ancestor !== void 0 && { ancestor: r.ancestor },
|
|
15340
|
-
...usedBy && { used_by: r.usedBy.map((p) =>
|
|
15935
|
+
...usedBy && { used_by: r.usedBy.map((p) => path32.relative(cwd, p)) }
|
|
15341
15936
|
}))
|
|
15342
15937
|
};
|
|
15343
15938
|
console.log(JSON.stringify(output, null, 2));
|
|
@@ -15346,18 +15941,18 @@ var depsCommand = command({
|
|
|
15346
15941
|
|
|
15347
15942
|
// src/commands/workspace/list.ts
|
|
15348
15943
|
import { existsSync as existsSync20 } from "node:fs";
|
|
15349
|
-
import { readFile as
|
|
15350
|
-
import
|
|
15944
|
+
import { readFile as readFile10, readdir as readdir6, stat as stat3 } from "node:fs/promises";
|
|
15945
|
+
import path33 from "node:path";
|
|
15351
15946
|
async function getDirectorySize(dirPath) {
|
|
15352
15947
|
let totalSize = 0;
|
|
15353
15948
|
try {
|
|
15354
15949
|
const entries2 = await readdir6(dirPath, { withFileTypes: true });
|
|
15355
15950
|
for (const entry of entries2) {
|
|
15356
|
-
const fullPath =
|
|
15951
|
+
const fullPath = path33.join(dirPath, entry.name);
|
|
15357
15952
|
if (entry.isDirectory()) {
|
|
15358
15953
|
totalSize += await getDirectorySize(fullPath);
|
|
15359
15954
|
} else {
|
|
15360
|
-
const stats = await
|
|
15955
|
+
const stats = await stat3(fullPath);
|
|
15361
15956
|
totalSize += stats.size;
|
|
15362
15957
|
}
|
|
15363
15958
|
}
|
|
@@ -15388,14 +15983,14 @@ var listCommand = command({
|
|
|
15388
15983
|
return;
|
|
15389
15984
|
}
|
|
15390
15985
|
for (const dir of poolDirs) {
|
|
15391
|
-
const poolDir =
|
|
15986
|
+
const poolDir = path33.join(poolRoot, dir.name);
|
|
15392
15987
|
const fingerprint = dir.name;
|
|
15393
15988
|
const poolEntries = await readdir6(poolDir, { withFileTypes: true });
|
|
15394
15989
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
15395
|
-
const metadataPath =
|
|
15990
|
+
const metadataPath = path33.join(poolDir, "metadata.json");
|
|
15396
15991
|
let metadata = null;
|
|
15397
15992
|
try {
|
|
15398
|
-
const raw = await
|
|
15993
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
15399
15994
|
metadata = JSON.parse(raw);
|
|
15400
15995
|
} catch {
|
|
15401
15996
|
}
|
|
@@ -15433,16 +16028,16 @@ var workspaceCommand = subcommands({
|
|
|
15433
16028
|
|
|
15434
16029
|
// src/update-check.ts
|
|
15435
16030
|
import { spawn as spawn3 } from "node:child_process";
|
|
15436
|
-
import { readFile as
|
|
16031
|
+
import { readFile as readFile11 } from "node:fs/promises";
|
|
15437
16032
|
import { join as join6 } from "node:path";
|
|
15438
16033
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
15439
16034
|
var CONFIG_DIR = getAgentvConfigDir();
|
|
15440
16035
|
var CACHE_FILE = "version-check.json";
|
|
15441
16036
|
var NPM_REGISTRY_BASE2 = "https://registry.npmjs.org/agentv/";
|
|
15442
|
-
async function getCachedUpdateInfo(
|
|
15443
|
-
const filePath =
|
|
16037
|
+
async function getCachedUpdateInfo(path35) {
|
|
16038
|
+
const filePath = path35 ?? join6(CONFIG_DIR, CACHE_FILE);
|
|
15444
16039
|
try {
|
|
15445
|
-
const raw = await
|
|
16040
|
+
const raw = await readFile11(filePath, "utf-8");
|
|
15446
16041
|
const data = JSON.parse(raw);
|
|
15447
16042
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
15448
16043
|
return data;
|
|
@@ -15527,6 +16122,7 @@ var app = subcommands({
|
|
|
15527
16122
|
cmds: {
|
|
15528
16123
|
dashboard: resultsServeCommand,
|
|
15529
16124
|
eval: evalCommand,
|
|
16125
|
+
grade: gradeCommand,
|
|
15530
16126
|
import: importCommand,
|
|
15531
16127
|
compare: compareCommand,
|
|
15532
16128
|
convert: convertCommand,
|
|
@@ -15534,6 +16130,7 @@ var app = subcommands({
|
|
|
15534
16130
|
doctor: doctorCommand,
|
|
15535
16131
|
init: initCmdTsCommand,
|
|
15536
16132
|
pipeline: pipelineCommand,
|
|
16133
|
+
prepare: prepareCommand,
|
|
15537
16134
|
results: resultsCommand,
|
|
15538
16135
|
runs: runsCommand,
|
|
15539
16136
|
self: selfCommand,
|
|
@@ -15556,8 +16153,10 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
15556
16153
|
"create",
|
|
15557
16154
|
"dashboard",
|
|
15558
16155
|
"doctor",
|
|
16156
|
+
"grade",
|
|
15559
16157
|
"init",
|
|
15560
16158
|
"pipeline",
|
|
16159
|
+
"prepare",
|
|
15561
16160
|
"results",
|
|
15562
16161
|
"runs",
|
|
15563
16162
|
"self",
|
|
@@ -15617,7 +16216,7 @@ ${updateNotice}
|
|
|
15617
16216
|
}
|
|
15618
16217
|
const cwd = process.cwd();
|
|
15619
16218
|
const repoRoot = await findRepoRoot(cwd);
|
|
15620
|
-
const sessionConfig = await loadConfig(
|
|
16219
|
+
const sessionConfig = await loadConfig(path34.join(cwd, "_"), repoRoot);
|
|
15621
16220
|
const beforeSessionCommand = sessionConfig?.hooks?.before_session;
|
|
15622
16221
|
if (beforeSessionCommand) {
|
|
15623
16222
|
runBeforeSessionHook(beforeSessionCommand);
|
|
@@ -15631,4 +16230,4 @@ export {
|
|
|
15631
16230
|
preprocessArgv,
|
|
15632
16231
|
runCli
|
|
15633
16232
|
};
|
|
15634
|
-
//# sourceMappingURL=chunk-
|
|
16233
|
+
//# sourceMappingURL=chunk-6FXICR66.js.map
|