agentv 3.13.3 → 3.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TGCWIHBH.js → chunk-3UW7KUQ3.js} +562 -58
- package/dist/chunk-3UW7KUQ3.js.map +1 -0
- package/dist/{chunk-PACTPWEN.js → chunk-75PQBKLR.js} +4 -4
- package/dist/{chunk-PACTPWEN.js.map → chunk-75PQBKLR.js.map} +1 -1
- package/dist/{chunk-D3LNJUUB.js → chunk-ELQEFMGO.js} +773 -339
- package/dist/chunk-ELQEFMGO.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-KPMR7RBT.js → dist-5EEXTTC3.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-OMJAMCQP.js → interactive-Q563ULAR.js} +3 -3
- package/dist/templates/.agentv/config.yaml +4 -13
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +1 -1
- package/dist/chunk-D3LNJUUB.js.map +0 -1
- package/dist/chunk-TGCWIHBH.js.map +0 -1
- package/dist/templates/.agentv/.env.example +0 -23
- /package/dist/{dist-KPMR7RBT.js.map → dist-5EEXTTC3.js.map} +0 -0
- /package/dist/{interactive-OMJAMCQP.js.map → interactive-Q563ULAR.js.map} +0 -0
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
validateFileReferences,
|
|
23
23
|
validateTargetsFile,
|
|
24
24
|
writeArtifactsFromResults
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-75PQBKLR.js";
|
|
26
26
|
import {
|
|
27
27
|
createBuiltinRegistry,
|
|
28
28
|
executeScript,
|
|
@@ -39,7 +39,7 @@ import {
|
|
|
39
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
40
40
|
transpileEvalYamlFile,
|
|
41
41
|
trimBaselineResult
|
|
42
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-ELQEFMGO.js";
|
|
43
43
|
import {
|
|
44
44
|
__commonJS,
|
|
45
45
|
__esm,
|
|
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4185
4185
|
},
|
|
4186
4186
|
handler: async (args) => {
|
|
4187
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-Q563ULAR.js");
|
|
4189
4189
|
await launchInteractiveWizard();
|
|
4190
4190
|
return;
|
|
4191
4191
|
}
|
|
@@ -4408,13 +4408,23 @@ var evalBenchCommand = command({
|
|
|
4408
4408
|
type: string,
|
|
4409
4409
|
displayName: "export-dir",
|
|
4410
4410
|
description: "Export directory from pipeline input/grade"
|
|
4411
|
+
}),
|
|
4412
|
+
llmScores: option({
|
|
4413
|
+
type: optional(string),
|
|
4414
|
+
long: "llm-scores",
|
|
4415
|
+
description: "Path to LLM scores JSON file (reads from stdin if omitted)"
|
|
4411
4416
|
})
|
|
4412
4417
|
},
|
|
4413
|
-
handler: async ({ exportDir }) => {
|
|
4418
|
+
handler: async ({ exportDir, llmScores: llmScoresPath }) => {
|
|
4414
4419
|
const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
|
|
4415
4420
|
const testIds = manifest.test_ids;
|
|
4416
4421
|
const targetName = manifest.target?.name ?? "unknown";
|
|
4417
|
-
|
|
4422
|
+
let stdinData;
|
|
4423
|
+
if (llmScoresPath) {
|
|
4424
|
+
stdinData = await readFile(llmScoresPath, "utf8");
|
|
4425
|
+
} else {
|
|
4426
|
+
stdinData = await readStdin();
|
|
4427
|
+
}
|
|
4418
4428
|
const llmScores = stdinData ? JSON.parse(stdinData) : {};
|
|
4419
4429
|
const indexLines = [];
|
|
4420
4430
|
const allPassRates = [];
|
|
@@ -4814,6 +4824,351 @@ async function writeJson(filePath, data) {
|
|
|
4814
4824
|
`, "utf8");
|
|
4815
4825
|
}
|
|
4816
4826
|
|
|
4827
|
+
// src/commands/pipeline/run.ts
|
|
4828
|
+
import { execSync } from "node:child_process";
|
|
4829
|
+
import { existsSync as existsSync2, readFileSync as readFileSync4, unlinkSync } from "node:fs";
|
|
4830
|
+
import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
|
|
4831
|
+
import { tmpdir } from "node:os";
|
|
4832
|
+
import { dirname as dirname2, join as join4, resolve as resolve2 } from "node:path";
|
|
4833
|
+
function loadEnvFile(dir) {
|
|
4834
|
+
let current = resolve2(dir);
|
|
4835
|
+
while (true) {
|
|
4836
|
+
const candidate = join4(current, ".env");
|
|
4837
|
+
if (existsSync2(candidate)) {
|
|
4838
|
+
const env3 = {};
|
|
4839
|
+
for (const line of readFileSync4(candidate, "utf8").split("\n")) {
|
|
4840
|
+
const trimmed = line.trim();
|
|
4841
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
4842
|
+
const eqIdx = trimmed.indexOf("=");
|
|
4843
|
+
if (eqIdx === -1) continue;
|
|
4844
|
+
env3[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
|
|
4845
|
+
}
|
|
4846
|
+
return env3;
|
|
4847
|
+
}
|
|
4848
|
+
const parent = dirname2(current);
|
|
4849
|
+
if (parent === current) break;
|
|
4850
|
+
current = parent;
|
|
4851
|
+
}
|
|
4852
|
+
return {};
|
|
4853
|
+
}
|
|
4854
|
+
var evalRunCommand2 = command({
|
|
4855
|
+
name: "run",
|
|
4856
|
+
description: "Extract inputs, invoke CLI targets, and run code graders in one step",
|
|
4857
|
+
args: {
|
|
4858
|
+
evalPath: positional({
|
|
4859
|
+
type: string,
|
|
4860
|
+
displayName: "eval-path",
|
|
4861
|
+
description: "Path to eval YAML file"
|
|
4862
|
+
}),
|
|
4863
|
+
out: option({
|
|
4864
|
+
type: string,
|
|
4865
|
+
long: "out",
|
|
4866
|
+
description: "Output directory for results"
|
|
4867
|
+
}),
|
|
4868
|
+
workers: option({
|
|
4869
|
+
type: optional(number),
|
|
4870
|
+
long: "workers",
|
|
4871
|
+
description: "Parallel workers for target invocation (default: all tests)"
|
|
4872
|
+
})
|
|
4873
|
+
},
|
|
4874
|
+
handler: async ({ evalPath, out, workers }) => {
|
|
4875
|
+
const resolvedEvalPath = resolve2(evalPath);
|
|
4876
|
+
const outDir = resolve2(out);
|
|
4877
|
+
const repoRoot = await findRepoRoot(dirname2(resolvedEvalPath));
|
|
4878
|
+
const evalDir = dirname2(resolvedEvalPath);
|
|
4879
|
+
const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
|
|
4880
|
+
const tests = suite.tests;
|
|
4881
|
+
if (tests.length === 0) {
|
|
4882
|
+
console.error("No tests found in eval file.");
|
|
4883
|
+
process.exit(1);
|
|
4884
|
+
}
|
|
4885
|
+
let targetInfo = null;
|
|
4886
|
+
let targetName = "agent";
|
|
4887
|
+
let targetKind = "agent";
|
|
4888
|
+
try {
|
|
4889
|
+
const selection = await selectTarget({
|
|
4890
|
+
testFilePath: resolvedEvalPath,
|
|
4891
|
+
repoRoot,
|
|
4892
|
+
cwd: evalDir,
|
|
4893
|
+
dryRun: false,
|
|
4894
|
+
dryRunDelay: 0,
|
|
4895
|
+
dryRunDelayMin: 0,
|
|
4896
|
+
dryRunDelayMax: 0,
|
|
4897
|
+
env: process.env
|
|
4898
|
+
});
|
|
4899
|
+
targetName = selection.targetName;
|
|
4900
|
+
if (selection.resolvedTarget.kind === "cli") {
|
|
4901
|
+
targetKind = "cli";
|
|
4902
|
+
const config = selection.resolvedTarget.config;
|
|
4903
|
+
targetInfo = {
|
|
4904
|
+
kind: "cli",
|
|
4905
|
+
command: config.command,
|
|
4906
|
+
cwd: config.cwd ?? evalDir,
|
|
4907
|
+
timeoutMs: config.timeoutMs ?? 3e4
|
|
4908
|
+
};
|
|
4909
|
+
}
|
|
4910
|
+
} catch {
|
|
4911
|
+
}
|
|
4912
|
+
const testIds = [];
|
|
4913
|
+
for (const test of tests) {
|
|
4914
|
+
const testDir = join4(outDir, test.id);
|
|
4915
|
+
await mkdir4(testDir, { recursive: true });
|
|
4916
|
+
testIds.push(test.id);
|
|
4917
|
+
const inputText = test.question;
|
|
4918
|
+
const inputMessages = test.input.map((m) => ({
|
|
4919
|
+
role: m.role,
|
|
4920
|
+
content: typeof m.content === "string" ? m.content : m.content
|
|
4921
|
+
}));
|
|
4922
|
+
await writeJson2(join4(testDir, "input.json"), {
|
|
4923
|
+
input_text: inputText,
|
|
4924
|
+
input_messages: inputMessages,
|
|
4925
|
+
file_paths: test.file_paths,
|
|
4926
|
+
metadata: test.metadata ?? {}
|
|
4927
|
+
});
|
|
4928
|
+
if (targetInfo) {
|
|
4929
|
+
await writeJson2(join4(testDir, "invoke.json"), {
|
|
4930
|
+
kind: "cli",
|
|
4931
|
+
command: targetInfo.command,
|
|
4932
|
+
cwd: targetInfo.cwd,
|
|
4933
|
+
timeout_ms: targetInfo.timeoutMs,
|
|
4934
|
+
env: {}
|
|
4935
|
+
});
|
|
4936
|
+
} else {
|
|
4937
|
+
await writeJson2(join4(testDir, "invoke.json"), {
|
|
4938
|
+
kind: "agent",
|
|
4939
|
+
instructions: "Execute this task in the current workspace. The agent IS the target."
|
|
4940
|
+
});
|
|
4941
|
+
}
|
|
4942
|
+
await writeFile5(join4(testDir, "criteria.md"), test.criteria ?? "", "utf8");
|
|
4943
|
+
if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
|
|
4944
|
+
await writeJson2(join4(testDir, "expected_output.json"), {
|
|
4945
|
+
expected_output: test.expected_output,
|
|
4946
|
+
reference_answer: test.reference_answer ?? ""
|
|
4947
|
+
});
|
|
4948
|
+
}
|
|
4949
|
+
await writeGraderConfigs2(testDir, test.assertions ?? [], evalDir);
|
|
4950
|
+
}
|
|
4951
|
+
await writeJson2(join4(outDir, "manifest.json"), {
|
|
4952
|
+
eval_file: resolvedEvalPath,
|
|
4953
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4954
|
+
target: { name: targetName, kind: targetKind },
|
|
4955
|
+
test_ids: testIds
|
|
4956
|
+
});
|
|
4957
|
+
console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
|
|
4958
|
+
if (targetInfo) {
|
|
4959
|
+
const envVars = loadEnvFile(evalDir);
|
|
4960
|
+
const mergedEnv = { ...process.env, ...envVars };
|
|
4961
|
+
const maxWorkers = workers ?? testIds.length;
|
|
4962
|
+
console.log(`Invoking ${testIds.length} CLI target(s) (${maxWorkers} workers)...`);
|
|
4963
|
+
const invokeTarget = async (testId) => {
|
|
4964
|
+
const testDir = join4(outDir, testId);
|
|
4965
|
+
const invoke = JSON.parse(await readFile4(join4(testDir, "invoke.json"), "utf8"));
|
|
4966
|
+
if (invoke.kind !== "cli") return;
|
|
4967
|
+
const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
|
|
4968
|
+
const template = invoke.command;
|
|
4969
|
+
const cwd = invoke.cwd;
|
|
4970
|
+
const timeoutMs = invoke.timeout_ms ?? 12e4;
|
|
4971
|
+
const promptFile = join4(tmpdir(), `agentv-prompt-${testId}-${Date.now()}.txt`);
|
|
4972
|
+
const outputFile = join4(tmpdir(), `agentv-output-${testId}-${Date.now()}.txt`);
|
|
4973
|
+
await writeFile5(promptFile, inputData.input_text, "utf8");
|
|
4974
|
+
let rendered = template;
|
|
4975
|
+
rendered = rendered.replace("{PROMPT_FILE}", promptFile);
|
|
4976
|
+
rendered = rendered.replace("{OUTPUT_FILE}", outputFile);
|
|
4977
|
+
rendered = rendered.replace("{PROMPT}", inputData.input_text);
|
|
4978
|
+
const start = performance.now();
|
|
4979
|
+
try {
|
|
4980
|
+
execSync(rendered, {
|
|
4981
|
+
cwd,
|
|
4982
|
+
timeout: timeoutMs,
|
|
4983
|
+
env: mergedEnv,
|
|
4984
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
4985
|
+
maxBuffer: 10 * 1024 * 1024
|
|
4986
|
+
});
|
|
4987
|
+
const durationMs = Math.round(performance.now() - start);
|
|
4988
|
+
let response;
|
|
4989
|
+
if (existsSync2(outputFile)) {
|
|
4990
|
+
response = readFileSync4(outputFile, "utf8");
|
|
4991
|
+
} else {
|
|
4992
|
+
response = "ERROR: No output file generated";
|
|
4993
|
+
}
|
|
4994
|
+
await writeFile5(join4(testDir, "response.md"), response, "utf8");
|
|
4995
|
+
await writeJson2(join4(testDir, "timing.json"), {
|
|
4996
|
+
duration_ms: durationMs,
|
|
4997
|
+
total_duration_seconds: Math.round(durationMs / 10) / 100
|
|
4998
|
+
});
|
|
4999
|
+
console.log(` ${testId}: OK (${durationMs}ms, ${response.length} chars)`);
|
|
5000
|
+
} catch (error) {
|
|
5001
|
+
const durationMs = Math.round(performance.now() - start);
|
|
5002
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5003
|
+
const response = `ERROR: target failed \u2014 ${message}`;
|
|
5004
|
+
await writeFile5(join4(testDir, "response.md"), response, "utf8");
|
|
5005
|
+
await writeJson2(join4(testDir, "timing.json"), {
|
|
5006
|
+
duration_ms: durationMs,
|
|
5007
|
+
total_duration_seconds: Math.round(durationMs / 10) / 100
|
|
5008
|
+
});
|
|
5009
|
+
console.error(` ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}`);
|
|
5010
|
+
} finally {
|
|
5011
|
+
try {
|
|
5012
|
+
if (existsSync2(promptFile)) unlinkSync(promptFile);
|
|
5013
|
+
if (existsSync2(outputFile)) unlinkSync(outputFile);
|
|
5014
|
+
} catch {
|
|
5015
|
+
}
|
|
5016
|
+
}
|
|
5017
|
+
};
|
|
5018
|
+
const allTasks = testIds.map((testId) => invokeTarget(testId));
|
|
5019
|
+
await Promise.all(allTasks);
|
|
5020
|
+
} else {
|
|
5021
|
+
console.log("Agent-as-target mode \u2014 skipping CLI invocation.");
|
|
5022
|
+
}
|
|
5023
|
+
let totalGraders = 0;
|
|
5024
|
+
let totalPassed = 0;
|
|
5025
|
+
for (const testId of testIds) {
|
|
5026
|
+
const testDir = join4(outDir, testId);
|
|
5027
|
+
const codeGradersDir = join4(testDir, "code_graders");
|
|
5028
|
+
const resultsDir = join4(testDir, "code_grader_results");
|
|
5029
|
+
let graderFiles;
|
|
5030
|
+
try {
|
|
5031
|
+
graderFiles = (await readdir3(codeGradersDir)).filter((f) => f.endsWith(".json"));
|
|
5032
|
+
} catch {
|
|
5033
|
+
continue;
|
|
5034
|
+
}
|
|
5035
|
+
if (graderFiles.length === 0) continue;
|
|
5036
|
+
await mkdir4(resultsDir, { recursive: true });
|
|
5037
|
+
const responseText = await readFile4(join4(testDir, "response.md"), "utf8");
|
|
5038
|
+
const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
|
|
5039
|
+
for (const graderFile of graderFiles) {
|
|
5040
|
+
const graderConfig = JSON.parse(await readFile4(join4(codeGradersDir, graderFile), "utf8"));
|
|
5041
|
+
const graderName = graderConfig.name;
|
|
5042
|
+
const payload = JSON.stringify({
|
|
5043
|
+
output: [{ role: "assistant", content: responseText }],
|
|
5044
|
+
input: inputData.input_messages,
|
|
5045
|
+
question: inputData.input_text,
|
|
5046
|
+
criteria: "",
|
|
5047
|
+
expected_output: [],
|
|
5048
|
+
reference_answer: "",
|
|
5049
|
+
input_files: [],
|
|
5050
|
+
trace: null,
|
|
5051
|
+
token_usage: null,
|
|
5052
|
+
cost_usd: null,
|
|
5053
|
+
duration_ms: null,
|
|
5054
|
+
start_time: null,
|
|
5055
|
+
end_time: null,
|
|
5056
|
+
file_changes: null,
|
|
5057
|
+
workspace_path: null,
|
|
5058
|
+
config: graderConfig.config ?? null,
|
|
5059
|
+
metadata: {},
|
|
5060
|
+
input_text: inputData.input_text,
|
|
5061
|
+
output_text: responseText,
|
|
5062
|
+
expected_output_text: ""
|
|
5063
|
+
});
|
|
5064
|
+
try {
|
|
5065
|
+
const stdout = await executeScript(
|
|
5066
|
+
graderConfig.command,
|
|
5067
|
+
payload,
|
|
5068
|
+
void 0,
|
|
5069
|
+
graderConfig.cwd
|
|
5070
|
+
);
|
|
5071
|
+
const parsed = JSON.parse(stdout);
|
|
5072
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
5073
|
+
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
5074
|
+
await writeFile5(
|
|
5075
|
+
join4(resultsDir, `${graderName}.json`),
|
|
5076
|
+
`${JSON.stringify(
|
|
5077
|
+
{
|
|
5078
|
+
name: graderName,
|
|
5079
|
+
type: "code-grader",
|
|
5080
|
+
score,
|
|
5081
|
+
weight: graderConfig.weight ?? 1,
|
|
5082
|
+
assertions,
|
|
5083
|
+
details: parsed.details ?? {}
|
|
5084
|
+
},
|
|
5085
|
+
null,
|
|
5086
|
+
2
|
|
5087
|
+
)}
|
|
5088
|
+
`,
|
|
5089
|
+
"utf8"
|
|
5090
|
+
);
|
|
5091
|
+
totalGraders++;
|
|
5092
|
+
if (score >= 0.5) totalPassed++;
|
|
5093
|
+
} catch (error) {
|
|
5094
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5095
|
+
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
5096
|
+
await writeFile5(
|
|
5097
|
+
join4(resultsDir, `${graderName}.json`),
|
|
5098
|
+
`${JSON.stringify(
|
|
5099
|
+
{
|
|
5100
|
+
name: graderName,
|
|
5101
|
+
type: "code-grader",
|
|
5102
|
+
score: 0,
|
|
5103
|
+
weight: graderConfig.weight ?? 1,
|
|
5104
|
+
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
5105
|
+
details: { error: message }
|
|
5106
|
+
},
|
|
5107
|
+
null,
|
|
5108
|
+
2
|
|
5109
|
+
)}
|
|
5110
|
+
`,
|
|
5111
|
+
"utf8"
|
|
5112
|
+
);
|
|
5113
|
+
totalGraders++;
|
|
5114
|
+
}
|
|
5115
|
+
}
|
|
5116
|
+
}
|
|
5117
|
+
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
5118
|
+
console.log(`
|
|
5119
|
+
Done. Agent can now perform LLM grading on responses in ${outDir}`);
|
|
5120
|
+
}
|
|
5121
|
+
});
|
|
5122
|
+
async function writeJson2(filePath, data) {
|
|
5123
|
+
await writeFile5(filePath, `${JSON.stringify(data, null, 2)}
|
|
5124
|
+
`, "utf8");
|
|
5125
|
+
}
|
|
5126
|
+
async function writeGraderConfigs2(testDir, assertions, evalDir) {
|
|
5127
|
+
const codeGradersDir = join4(testDir, "code_graders");
|
|
5128
|
+
const llmGradersDir = join4(testDir, "llm_graders");
|
|
5129
|
+
let hasCodeGraders = false;
|
|
5130
|
+
let hasLlmGraders = false;
|
|
5131
|
+
for (const assertion of assertions) {
|
|
5132
|
+
if (assertion.type === "code-grader") {
|
|
5133
|
+
if (!hasCodeGraders) {
|
|
5134
|
+
await mkdir4(codeGradersDir, { recursive: true });
|
|
5135
|
+
hasCodeGraders = true;
|
|
5136
|
+
}
|
|
5137
|
+
const config = assertion;
|
|
5138
|
+
await writeJson2(join4(codeGradersDir, `${config.name}.json`), {
|
|
5139
|
+
name: config.name,
|
|
5140
|
+
command: config.command,
|
|
5141
|
+
cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
|
|
5142
|
+
weight: config.weight ?? 1,
|
|
5143
|
+
config: config.config ?? {}
|
|
5144
|
+
});
|
|
5145
|
+
} else if (assertion.type === "llm-grader") {
|
|
5146
|
+
if (!hasLlmGraders) {
|
|
5147
|
+
await mkdir4(llmGradersDir, { recursive: true });
|
|
5148
|
+
hasLlmGraders = true;
|
|
5149
|
+
}
|
|
5150
|
+
const config = assertion;
|
|
5151
|
+
let promptContent = "";
|
|
5152
|
+
if (config.resolvedPromptPath) {
|
|
5153
|
+
try {
|
|
5154
|
+
promptContent = readFileSync4(config.resolvedPromptPath, "utf8");
|
|
5155
|
+
} catch {
|
|
5156
|
+
promptContent = typeof config.prompt === "string" ? config.prompt : "";
|
|
5157
|
+
}
|
|
5158
|
+
} else if (typeof config.prompt === "string") {
|
|
5159
|
+
promptContent = config.prompt;
|
|
5160
|
+
}
|
|
5161
|
+
await writeJson2(join4(llmGradersDir, `${config.name}.json`), {
|
|
5162
|
+
name: config.name,
|
|
5163
|
+
prompt_content: promptContent,
|
|
5164
|
+
weight: config.weight ?? 1,
|
|
5165
|
+
threshold: 0.5,
|
|
5166
|
+
config: {}
|
|
5167
|
+
});
|
|
5168
|
+
}
|
|
5169
|
+
}
|
|
5170
|
+
}
|
|
5171
|
+
|
|
4817
5172
|
// src/commands/pipeline/index.ts
|
|
4818
5173
|
var pipelineCommand = subcommands({
|
|
4819
5174
|
name: "pipeline",
|
|
@@ -4821,7 +5176,8 @@ var pipelineCommand = subcommands({
|
|
|
4821
5176
|
cmds: {
|
|
4822
5177
|
input: evalInputCommand,
|
|
4823
5178
|
grade: evalGradeCommand,
|
|
4824
|
-
bench: evalBenchCommand
|
|
5179
|
+
bench: evalBenchCommand,
|
|
5180
|
+
run: evalRunCommand2
|
|
4825
5181
|
}
|
|
4826
5182
|
});
|
|
4827
5183
|
|
|
@@ -4829,10 +5185,10 @@ var pipelineCommand = subcommands({
|
|
|
4829
5185
|
import path7 from "node:path";
|
|
4830
5186
|
|
|
4831
5187
|
// src/commands/results/shared.ts
|
|
4832
|
-
import { existsSync as
|
|
5188
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
4833
5189
|
|
|
4834
5190
|
// src/commands/trace/utils.ts
|
|
4835
|
-
import { readFileSync as
|
|
5191
|
+
import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
4836
5192
|
import path6 from "node:path";
|
|
4837
5193
|
var colors2 = {
|
|
4838
5194
|
reset: "\x1B[0m",
|
|
@@ -4872,7 +5228,7 @@ function resolveTraceResultPath(filePath) {
|
|
|
4872
5228
|
return resolveWorkspaceOrFilePath(filePath);
|
|
4873
5229
|
}
|
|
4874
5230
|
function loadJsonlRecords(filePath) {
|
|
4875
|
-
const content =
|
|
5231
|
+
const content = readFileSync5(filePath, "utf8");
|
|
4876
5232
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
4877
5233
|
return lines.map((line, i) => {
|
|
4878
5234
|
const record = JSON.parse(line);
|
|
@@ -4925,7 +5281,7 @@ function toRawResult(result) {
|
|
|
4925
5281
|
};
|
|
4926
5282
|
}
|
|
4927
5283
|
function loadOtlpTraceFile(filePath) {
|
|
4928
|
-
const parsed = JSON.parse(
|
|
5284
|
+
const parsed = JSON.parse(readFileSync5(filePath, "utf8"));
|
|
4929
5285
|
const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
|
|
4930
5286
|
if (!spans || spans.length === 0) {
|
|
4931
5287
|
return [];
|
|
@@ -5243,14 +5599,14 @@ async function resolveSourceFile(source, cwd) {
|
|
|
5243
5599
|
let sourceFile;
|
|
5244
5600
|
if (source) {
|
|
5245
5601
|
sourceFile = resolveResultSourcePath(source, cwd);
|
|
5246
|
-
if (!
|
|
5602
|
+
if (!existsSync3(sourceFile)) {
|
|
5247
5603
|
console.error(`Error: File not found: ${sourceFile}`);
|
|
5248
5604
|
process.exit(1);
|
|
5249
5605
|
}
|
|
5250
5606
|
} else {
|
|
5251
5607
|
const cache = await loadRunCache(cwd);
|
|
5252
5608
|
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5253
|
-
if (cachedFile &&
|
|
5609
|
+
if (cachedFile && existsSync3(cachedFile)) {
|
|
5254
5610
|
sourceFile = cachedFile;
|
|
5255
5611
|
} else {
|
|
5256
5612
|
const metas = listResultFiles(cwd, 1);
|
|
@@ -5462,7 +5818,7 @@ var resultsShowCommand = command({
|
|
|
5462
5818
|
});
|
|
5463
5819
|
|
|
5464
5820
|
// src/commands/results/summary.ts
|
|
5465
|
-
import { existsSync as
|
|
5821
|
+
import { existsSync as existsSync4, readFileSync as readFileSync6 } from "node:fs";
|
|
5466
5822
|
function formatSummary(results, grading) {
|
|
5467
5823
|
const total = results.length;
|
|
5468
5824
|
let passed;
|
|
@@ -5513,9 +5869,9 @@ var resultsSummaryCommand = command({
|
|
|
5513
5869
|
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5514
5870
|
let grading;
|
|
5515
5871
|
const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
|
|
5516
|
-
if (
|
|
5872
|
+
if (existsSync4(gradingPath)) {
|
|
5517
5873
|
try {
|
|
5518
|
-
grading = JSON.parse(
|
|
5874
|
+
grading = JSON.parse(readFileSync6(gradingPath, "utf8"));
|
|
5519
5875
|
} catch {
|
|
5520
5876
|
}
|
|
5521
5877
|
}
|
|
@@ -5540,7 +5896,7 @@ var resultsCommand = subcommands({
|
|
|
5540
5896
|
});
|
|
5541
5897
|
|
|
5542
5898
|
// src/commands/results/serve.ts
|
|
5543
|
-
import { existsSync as
|
|
5899
|
+
import { existsSync as existsSync5, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
|
|
5544
5900
|
import path8 from "node:path";
|
|
5545
5901
|
import { Hono } from "hono";
|
|
5546
5902
|
function feedbackPath(resultDir) {
|
|
@@ -5548,11 +5904,11 @@ function feedbackPath(resultDir) {
|
|
|
5548
5904
|
}
|
|
5549
5905
|
function readFeedback(cwd) {
|
|
5550
5906
|
const fp = feedbackPath(cwd);
|
|
5551
|
-
if (!
|
|
5907
|
+
if (!existsSync5(fp)) {
|
|
5552
5908
|
return { reviews: [] };
|
|
5553
5909
|
}
|
|
5554
5910
|
try {
|
|
5555
|
-
return JSON.parse(
|
|
5911
|
+
return JSON.parse(readFileSync7(fp, "utf8"));
|
|
5556
5912
|
} catch (err2) {
|
|
5557
5913
|
console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
|
|
5558
5914
|
return { reviews: [] };
|
|
@@ -5562,10 +5918,40 @@ function writeFeedback(cwd, data) {
|
|
|
5562
5918
|
writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
5563
5919
|
`, "utf8");
|
|
5564
5920
|
}
|
|
5565
|
-
function createApp(results, resultDir) {
|
|
5921
|
+
function createApp(results, resultDir, cwd, sourceFile) {
|
|
5922
|
+
const searchDir = cwd ?? resultDir;
|
|
5566
5923
|
const app2 = new Hono();
|
|
5567
5924
|
app2.get("/", (c3) => {
|
|
5568
|
-
return c3.html(generateServeHtml(results));
|
|
5925
|
+
return c3.html(generateServeHtml(results, sourceFile));
|
|
5926
|
+
});
|
|
5927
|
+
app2.get("/api/runs", (c3) => {
|
|
5928
|
+
const metas = listResultFiles(searchDir);
|
|
5929
|
+
return c3.json({
|
|
5930
|
+
runs: metas.map((m) => ({
|
|
5931
|
+
filename: m.filename,
|
|
5932
|
+
path: m.path,
|
|
5933
|
+
timestamp: m.timestamp,
|
|
5934
|
+
test_count: m.testCount,
|
|
5935
|
+
pass_rate: m.passRate,
|
|
5936
|
+
avg_score: m.avgScore,
|
|
5937
|
+
size_bytes: m.sizeBytes
|
|
5938
|
+
}))
|
|
5939
|
+
});
|
|
5940
|
+
});
|
|
5941
|
+
app2.get("/api/runs/:filename", (c3) => {
|
|
5942
|
+
const filename = c3.req.param("filename");
|
|
5943
|
+
const metas = listResultFiles(searchDir);
|
|
5944
|
+
const meta = metas.find((m) => m.filename === filename);
|
|
5945
|
+
if (!meta) {
|
|
5946
|
+
return c3.json({ error: "Run not found" }, 404);
|
|
5947
|
+
}
|
|
5948
|
+
try {
|
|
5949
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
5950
|
+
const lightResults = stripHeavyFields(loaded);
|
|
5951
|
+
return c3.json({ results: lightResults, source: meta.filename });
|
|
5952
|
+
} catch (err2) {
|
|
5953
|
+
return c3.json({ error: "Failed to load run" }, 500);
|
|
5954
|
+
}
|
|
5569
5955
|
});
|
|
5570
5956
|
app2.get("/api/feedback", (c3) => {
|
|
5571
5957
|
const data = readFeedback(resultDir);
|
|
@@ -5611,11 +5997,8 @@ function createApp(results, resultDir) {
|
|
|
5611
5997
|
});
|
|
5612
5998
|
return app2;
|
|
5613
5999
|
}
|
|
5614
|
-
function
|
|
5615
|
-
return
|
|
5616
|
-
}
|
|
5617
|
-
function generateServeHtml(results) {
|
|
5618
|
-
const lightResults = results.map((r) => {
|
|
6000
|
+
function stripHeavyFields(results) {
|
|
6001
|
+
return results.map((r) => {
|
|
5619
6002
|
const { requests, trace, ...rest } = r;
|
|
5620
6003
|
const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
|
|
5621
6004
|
const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
@@ -5625,6 +6008,12 @@ function generateServeHtml(results) {
|
|
|
5625
6008
|
...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
|
|
5626
6009
|
};
|
|
5627
6010
|
});
|
|
6011
|
+
}
|
|
6012
|
+
function escapeHtml(s) {
|
|
6013
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
6014
|
+
}
|
|
6015
|
+
function generateServeHtml(results, sourceFile) {
|
|
6016
|
+
const lightResults = stripHeavyFields(results);
|
|
5628
6017
|
const dataJson = JSON.stringify(lightResults).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/\u2028/g, "\\u2028").replace(/\u2029/g, "\\u2029");
|
|
5629
6018
|
return `<!DOCTYPE html>
|
|
5630
6019
|
<html lang="en">
|
|
@@ -5642,6 +6031,11 @@ ${SERVE_STYLES}
|
|
|
5642
6031
|
<h1 class="header-title">AgentV</h1>
|
|
5643
6032
|
<span class="header-subtitle">Results Review</span>
|
|
5644
6033
|
</div>
|
|
6034
|
+
<div class="header-center">
|
|
6035
|
+
<select id="run-picker" class="run-picker" title="Switch result file">
|
|
6036
|
+
<option value="">Loading runs...</option>
|
|
6037
|
+
</select>
|
|
6038
|
+
</div>
|
|
5645
6039
|
<div class="header-right">
|
|
5646
6040
|
<span class="timestamp">${escapeHtml((/* @__PURE__ */ new Date()).toISOString())}</span>
|
|
5647
6041
|
</div>
|
|
@@ -5653,6 +6047,7 @@ ${SERVE_STYLES}
|
|
|
5653
6047
|
<main id="app"></main>
|
|
5654
6048
|
<script>
|
|
5655
6049
|
var DATA = ${dataJson};
|
|
6050
|
+
var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
|
|
5656
6051
|
${SERVE_SCRIPT}
|
|
5657
6052
|
</script>
|
|
5658
6053
|
</body>
|
|
@@ -5679,6 +6074,10 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5679
6074
|
.header-left{display:flex;align-items:baseline;gap:12px}
|
|
5680
6075
|
.header-title{font-size:18px;font-weight:600}
|
|
5681
6076
|
.header-subtitle{font-size:14px;color:var(--text-muted)}
|
|
6077
|
+
.header-center{flex:1;display:flex;justify-content:center;padding:0 16px}
|
|
6078
|
+
.run-picker{padding:6px 10px;border:1px solid var(--border);border-radius:var(--radius);font-size:13px;background:var(--surface);color:var(--text);font-family:var(--font);max-width:400px;width:100%;cursor:pointer}
|
|
6079
|
+
.run-picker:hover{border-color:var(--primary)}
|
|
6080
|
+
.run-picker:focus{outline:none;border-color:var(--primary);box-shadow:0 0 0 3px var(--primary-bg)}
|
|
5682
6081
|
.timestamp{font-size:12px;color:var(--text-muted);font-family:var(--mono)}
|
|
5683
6082
|
|
|
5684
6083
|
/* Tabs */
|
|
@@ -5778,6 +6177,11 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5778
6177
|
.tool-tag{display:inline-block;padding:2px 10px;font-size:12px;font-family:var(--mono);background:var(--primary-bg);color:var(--primary);border:1px solid var(--border);border-radius:12px}
|
|
5779
6178
|
.empty-state{text-align:center;padding:48px 24px;color:var(--text-muted)}
|
|
5780
6179
|
.empty-state h3{font-size:16px;margin-bottom:8px;color:var(--text)}
|
|
6180
|
+
.welcome-state{text-align:center;padding:80px 24px;color:var(--text-muted)}
|
|
6181
|
+
.welcome-state h2{font-size:24px;margin-bottom:12px;color:var(--text);font-weight:600}
|
|
6182
|
+
.welcome-state p{font-size:15px;margin-bottom:8px;max-width:500px;margin-left:auto;margin-right:auto}
|
|
6183
|
+
.welcome-state code{font-family:var(--mono);background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:2px 6px;font-size:13px}
|
|
6184
|
+
.welcome-state .hint{margin-top:24px;font-size:13px;color:var(--text-muted)}
|
|
5781
6185
|
|
|
5782
6186
|
/* Feedback */
|
|
5783
6187
|
.feedback-section{margin-top:16px;padding-top:16px;border-top:1px solid var(--border-light)}
|
|
@@ -5935,7 +6339,15 @@ var SERVE_SCRIPT = `
|
|
|
5935
6339
|
|
|
5936
6340
|
/* ---- render ---- */
|
|
5937
6341
|
function render(){
|
|
5938
|
-
if(DATA.length===0){
|
|
6342
|
+
if(DATA.length===0){
|
|
6343
|
+
app.innerHTML='<div class="welcome-state">'
|
|
6344
|
+
+'<h2>No results yet</h2>'
|
|
6345
|
+
+'<p>Run an evaluation or mount a results directory to see results here.</p>'
|
|
6346
|
+
+'<p><code>agentv eval <eval-file></code></p>'
|
|
6347
|
+
+'<p class="hint">The dashboard will automatically detect new result files.</p>'
|
|
6348
|
+
+'</div>';
|
|
6349
|
+
return;
|
|
6350
|
+
}
|
|
5939
6351
|
if(state.tab==="overview")renderOverview();else renderTests();
|
|
5940
6352
|
}
|
|
5941
6353
|
|
|
@@ -6198,6 +6610,69 @@ var SERVE_SCRIPT = `
|
|
|
6198
6610
|
return h;
|
|
6199
6611
|
}
|
|
6200
6612
|
|
|
6613
|
+
/* ---- run picker ---- */
|
|
6614
|
+
var runPicker=document.getElementById("run-picker");
|
|
6615
|
+
var knownRunFilenames=[];
|
|
6616
|
+
|
|
6617
|
+
function refreshRunList(){
|
|
6618
|
+
fetch("/api/runs").then(function(r){return r.json();}).then(function(d){
|
|
6619
|
+
if(!d||!d.runs)return;
|
|
6620
|
+
var runs=d.runs;
|
|
6621
|
+
var newFilenames=runs.map(function(r){return r.filename;});
|
|
6622
|
+
|
|
6623
|
+
/* Detect new runs that appeared since last poll */
|
|
6624
|
+
if(knownRunFilenames.length>0){
|
|
6625
|
+
var hasNew=newFilenames.some(function(f){return knownRunFilenames.indexOf(f)===-1;});
|
|
6626
|
+
if(hasNew&&DATA.length===0){
|
|
6627
|
+
/* Auto-load the first (most recent) run when starting from empty state */
|
|
6628
|
+
loadRun(runs[0].filename);
|
|
6629
|
+
}
|
|
6630
|
+
}
|
|
6631
|
+
knownRunFilenames=newFilenames;
|
|
6632
|
+
|
|
6633
|
+
/* Rebuild picker options */
|
|
6634
|
+
var h='<option value="">Select a result file...</option>';
|
|
6635
|
+
if(runs.length===0){
|
|
6636
|
+
h='<option value="">No result files</option>';
|
|
6637
|
+
}
|
|
6638
|
+
for(var i=0;i<runs.length;i++){
|
|
6639
|
+
var r=runs[i];
|
|
6640
|
+
var label=r.filename+" ("+r.test_count+" tests, "+(r.pass_rate*100).toFixed(0)+"% pass)";
|
|
6641
|
+
h+='<option value="'+esc(r.filename)+'">'+esc(label)+"</option>";
|
|
6642
|
+
}
|
|
6643
|
+
runPicker.innerHTML=h;
|
|
6644
|
+
/* Pre-select the initially loaded run */
|
|
6645
|
+
if(INITIAL_SOURCE&&runs.length>0){
|
|
6646
|
+
runPicker.value=INITIAL_SOURCE;
|
|
6647
|
+
}
|
|
6648
|
+
}).catch(function(err){console.warn("Failed to refresh run list:",err);});
|
|
6649
|
+
}
|
|
6650
|
+
|
|
6651
|
+
function loadRun(filename){
|
|
6652
|
+
fetch("/api/runs/"+encodeURIComponent(filename)).then(function(r){return r.json();}).then(function(d){
|
|
6653
|
+
if(d.error){console.error(d.error);return;}
|
|
6654
|
+
DATA=d.results;
|
|
6655
|
+
stats=computeStats(DATA);
|
|
6656
|
+
tgtStats=computeTargets(DATA);
|
|
6657
|
+
tgtNames=tgtStats.map(function(t){return t.target;});
|
|
6658
|
+
state.expanded={};
|
|
6659
|
+
feedbackCache={};
|
|
6660
|
+
loadFeedback();
|
|
6661
|
+
render();
|
|
6662
|
+
/* Update picker selection */
|
|
6663
|
+
runPicker.value=filename;
|
|
6664
|
+
}).catch(function(err){console.error("Failed to load run:",err);});
|
|
6665
|
+
}
|
|
6666
|
+
|
|
6667
|
+
runPicker.addEventListener("change",function(){
|
|
6668
|
+
var val=runPicker.value;
|
|
6669
|
+
if(val)loadRun(val);
|
|
6670
|
+
});
|
|
6671
|
+
|
|
6672
|
+
/* Poll for new result files every 5 seconds */
|
|
6673
|
+
refreshRunList();
|
|
6674
|
+
setInterval(refreshRunList,5000);
|
|
6675
|
+
|
|
6201
6676
|
/* ---- init ---- */
|
|
6202
6677
|
loadFeedback();
|
|
6203
6678
|
render();
|
|
@@ -6216,7 +6691,7 @@ var resultsServeCommand = command({
|
|
|
6216
6691
|
type: optional(number),
|
|
6217
6692
|
long: "port",
|
|
6218
6693
|
short: "p",
|
|
6219
|
-
description: "Port to listen on (
|
|
6694
|
+
description: "Port to listen on (flag \u2192 PORT env var \u2192 3117)"
|
|
6220
6695
|
}),
|
|
6221
6696
|
dir: option({
|
|
6222
6697
|
type: optional(string),
|
|
@@ -6227,14 +6702,43 @@ var resultsServeCommand = command({
|
|
|
6227
6702
|
},
|
|
6228
6703
|
handler: async ({ source, port, dir }) => {
|
|
6229
6704
|
const cwd = dir ?? process.cwd();
|
|
6230
|
-
const listenPort = port ?? 3117;
|
|
6705
|
+
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
6231
6706
|
try {
|
|
6232
|
-
|
|
6233
|
-
|
|
6234
|
-
|
|
6235
|
-
|
|
6707
|
+
let results = [];
|
|
6708
|
+
let sourceFile;
|
|
6709
|
+
if (source) {
|
|
6710
|
+
const resolved = resolveResultSourcePath(source, cwd);
|
|
6711
|
+
if (!existsSync5(resolved)) {
|
|
6712
|
+
console.error(`Error: Source file not found: ${resolved}`);
|
|
6713
|
+
process.exit(1);
|
|
6714
|
+
}
|
|
6715
|
+
sourceFile = resolved;
|
|
6716
|
+
results = patchTestIds(loadManifestResults(resolved));
|
|
6717
|
+
} else {
|
|
6718
|
+
const cache = await loadRunCache(cwd);
|
|
6719
|
+
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
6720
|
+
if (cachedFile && existsSync5(cachedFile)) {
|
|
6721
|
+
sourceFile = cachedFile;
|
|
6722
|
+
results = patchTestIds(loadManifestResults(cachedFile));
|
|
6723
|
+
} else {
|
|
6724
|
+
const metas = listResultFiles(cwd, 1);
|
|
6725
|
+
if (metas.length > 0) {
|
|
6726
|
+
sourceFile = metas[0].path;
|
|
6727
|
+
results = patchTestIds(loadManifestResults(metas[0].path));
|
|
6728
|
+
}
|
|
6729
|
+
}
|
|
6730
|
+
}
|
|
6731
|
+
const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
|
|
6732
|
+
const app2 = createApp(results, resultDir, cwd, sourceFile);
|
|
6733
|
+
if (results.length > 0 && sourceFile) {
|
|
6734
|
+
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
6735
|
+
} else {
|
|
6736
|
+
console.log("No results found. Dashboard will show an empty state.");
|
|
6737
|
+
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
6738
|
+
}
|
|
6236
6739
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
6237
6740
|
console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
|
|
6741
|
+
console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
|
|
6238
6742
|
console.log(`Feedback file: ${feedbackPath(resultDir)}`);
|
|
6239
6743
|
console.log("Press Ctrl+C to stop");
|
|
6240
6744
|
const { serve: startServer } = await import("@hono/node-server");
|
|
@@ -6263,7 +6767,7 @@ function detectPackageManager() {
|
|
|
6263
6767
|
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
6264
6768
|
}
|
|
6265
6769
|
function runCommand(cmd, args) {
|
|
6266
|
-
return new Promise((
|
|
6770
|
+
return new Promise((resolve3, reject) => {
|
|
6267
6771
|
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
6268
6772
|
let stdout = "";
|
|
6269
6773
|
child.stdout?.on("data", (data) => {
|
|
@@ -6271,7 +6775,7 @@ function runCommand(cmd, args) {
|
|
|
6271
6775
|
stdout += data.toString();
|
|
6272
6776
|
});
|
|
6273
6777
|
child.on("error", reject);
|
|
6274
|
-
child.on("close", (code) =>
|
|
6778
|
+
child.on("close", (code) => resolve3({ exitCode: code ?? 1, stdout }));
|
|
6275
6779
|
});
|
|
6276
6780
|
}
|
|
6277
6781
|
var updateCommand = command({
|
|
@@ -7179,7 +7683,7 @@ var transpileCommand = command({
|
|
|
7179
7683
|
});
|
|
7180
7684
|
|
|
7181
7685
|
// src/commands/trim/index.ts
|
|
7182
|
-
import { readFileSync as
|
|
7686
|
+
import { readFileSync as readFileSync8, writeFileSync as writeFileSync5 } from "node:fs";
|
|
7183
7687
|
var trimCommand = command({
|
|
7184
7688
|
name: "trim",
|
|
7185
7689
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -7198,7 +7702,7 @@ var trimCommand = command({
|
|
|
7198
7702
|
},
|
|
7199
7703
|
handler: async ({ input, out }) => {
|
|
7200
7704
|
try {
|
|
7201
|
-
const content =
|
|
7705
|
+
const content = readFileSync8(input, "utf8");
|
|
7202
7706
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
7203
7707
|
const trimmedLines = lines.map((line) => {
|
|
7204
7708
|
const record = JSON.parse(line);
|
|
@@ -7304,7 +7808,7 @@ function isTTY() {
|
|
|
7304
7808
|
|
|
7305
7809
|
// src/commands/validate/validate-files.ts
|
|
7306
7810
|
import { constants } from "node:fs";
|
|
7307
|
-
import { access, readdir as
|
|
7811
|
+
import { access, readdir as readdir4, stat } from "node:fs/promises";
|
|
7308
7812
|
import path10 from "node:path";
|
|
7309
7813
|
async function validateFiles(paths) {
|
|
7310
7814
|
const filePaths = await expandPaths(paths);
|
|
@@ -7370,7 +7874,7 @@ async function expandPaths(paths) {
|
|
|
7370
7874
|
async function findYamlFiles(dirPath) {
|
|
7371
7875
|
const results = [];
|
|
7372
7876
|
try {
|
|
7373
|
-
const entries2 = await
|
|
7877
|
+
const entries2 = await readdir4(dirPath, { withFileTypes: true });
|
|
7374
7878
|
for (const entry of entries2) {
|
|
7375
7879
|
const fullPath = path10.join(dirPath, entry.name);
|
|
7376
7880
|
if (entry.isDirectory()) {
|
|
@@ -7427,14 +7931,14 @@ var validateCommand = command({
|
|
|
7427
7931
|
});
|
|
7428
7932
|
|
|
7429
7933
|
// src/commands/workspace/clean.ts
|
|
7430
|
-
import { existsSync as
|
|
7431
|
-
import { readFile as
|
|
7934
|
+
import { existsSync as existsSync6 } from "node:fs";
|
|
7935
|
+
import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
|
|
7432
7936
|
import path11 from "node:path";
|
|
7433
7937
|
async function confirm(message) {
|
|
7434
7938
|
const readline2 = await import("node:readline");
|
|
7435
7939
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
7436
|
-
const answer = await new Promise((
|
|
7437
|
-
rl.question(`${message} [y/N] `,
|
|
7940
|
+
const answer = await new Promise((resolve3) => {
|
|
7941
|
+
rl.question(`${message} [y/N] `, resolve3);
|
|
7438
7942
|
});
|
|
7439
7943
|
rl.close();
|
|
7440
7944
|
return answer.toLowerCase() === "y";
|
|
@@ -7456,19 +7960,19 @@ var cleanCommand = command({
|
|
|
7456
7960
|
},
|
|
7457
7961
|
handler: async ({ repo, force }) => {
|
|
7458
7962
|
const poolRoot = getWorkspacePoolRoot();
|
|
7459
|
-
if (!
|
|
7963
|
+
if (!existsSync6(poolRoot)) {
|
|
7460
7964
|
console.log("No workspace pool entries found.");
|
|
7461
7965
|
return;
|
|
7462
7966
|
}
|
|
7463
7967
|
if (repo) {
|
|
7464
|
-
const entries2 = await
|
|
7968
|
+
const entries2 = await readdir5(poolRoot, { withFileTypes: true });
|
|
7465
7969
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7466
7970
|
const matchingDirs = [];
|
|
7467
7971
|
for (const dir of poolDirs) {
|
|
7468
7972
|
const poolDir = path11.join(poolRoot, dir.name);
|
|
7469
7973
|
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
7470
7974
|
try {
|
|
7471
|
-
const raw = await
|
|
7975
|
+
const raw = await readFile5(metadataPath, "utf-8");
|
|
7472
7976
|
const metadata = JSON.parse(raw);
|
|
7473
7977
|
const hasRepo = metadata.repos?.some((r) => {
|
|
7474
7978
|
if (r.source.type === "git" && r.source.url) {
|
|
@@ -7515,13 +8019,13 @@ var cleanCommand = command({
|
|
|
7515
8019
|
});
|
|
7516
8020
|
|
|
7517
8021
|
// src/commands/workspace/list.ts
|
|
7518
|
-
import { existsSync as
|
|
7519
|
-
import { readFile as
|
|
8022
|
+
import { existsSync as existsSync7 } from "node:fs";
|
|
8023
|
+
import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
|
|
7520
8024
|
import path12 from "node:path";
|
|
7521
8025
|
async function getDirectorySize(dirPath) {
|
|
7522
8026
|
let totalSize = 0;
|
|
7523
8027
|
try {
|
|
7524
|
-
const entries2 = await
|
|
8028
|
+
const entries2 = await readdir6(dirPath, { withFileTypes: true });
|
|
7525
8029
|
for (const entry of entries2) {
|
|
7526
8030
|
const fullPath = path12.join(dirPath, entry.name);
|
|
7527
8031
|
if (entry.isDirectory()) {
|
|
@@ -7547,11 +8051,11 @@ var listCommand = command({
|
|
|
7547
8051
|
args: {},
|
|
7548
8052
|
handler: async () => {
|
|
7549
8053
|
const poolRoot = getWorkspacePoolRoot();
|
|
7550
|
-
if (!
|
|
8054
|
+
if (!existsSync7(poolRoot)) {
|
|
7551
8055
|
console.log("No workspace pool entries found.");
|
|
7552
8056
|
return;
|
|
7553
8057
|
}
|
|
7554
|
-
const entries2 = await
|
|
8058
|
+
const entries2 = await readdir6(poolRoot, { withFileTypes: true });
|
|
7555
8059
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7556
8060
|
if (poolDirs.length === 0) {
|
|
7557
8061
|
console.log("No workspace pool entries found.");
|
|
@@ -7560,12 +8064,12 @@ var listCommand = command({
|
|
|
7560
8064
|
for (const dir of poolDirs) {
|
|
7561
8065
|
const poolDir = path12.join(poolRoot, dir.name);
|
|
7562
8066
|
const fingerprint = dir.name;
|
|
7563
|
-
const poolEntries = await
|
|
8067
|
+
const poolEntries = await readdir6(poolDir, { withFileTypes: true });
|
|
7564
8068
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
7565
8069
|
const metadataPath = path12.join(poolDir, "metadata.json");
|
|
7566
8070
|
let metadata = null;
|
|
7567
8071
|
try {
|
|
7568
|
-
const raw = await
|
|
8072
|
+
const raw = await readFile6(metadataPath, "utf-8");
|
|
7569
8073
|
metadata = JSON.parse(raw);
|
|
7570
8074
|
} catch {
|
|
7571
8075
|
}
|
|
@@ -7602,16 +8106,16 @@ var workspaceCommand = subcommands({
|
|
|
7602
8106
|
|
|
7603
8107
|
// src/update-check.ts
|
|
7604
8108
|
import { spawn as spawn2 } from "node:child_process";
|
|
7605
|
-
import { readFile as
|
|
7606
|
-
import { join as
|
|
8109
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
8110
|
+
import { join as join5 } from "node:path";
|
|
7607
8111
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
7608
8112
|
var AGENTV_DIR = getAgentvHome();
|
|
7609
8113
|
var CACHE_FILE = "version-check.json";
|
|
7610
8114
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
7611
8115
|
async function getCachedUpdateInfo(path13) {
|
|
7612
|
-
const filePath = path13 ??
|
|
8116
|
+
const filePath = path13 ?? join5(AGENTV_DIR, CACHE_FILE);
|
|
7613
8117
|
try {
|
|
7614
|
-
const raw = await
|
|
8118
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
7615
8119
|
const data = JSON.parse(raw);
|
|
7616
8120
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
7617
8121
|
return data;
|
|
@@ -7643,7 +8147,7 @@ function buildNotice(currentVersion, latestVersion) {
|
|
|
7643
8147
|
}
|
|
7644
8148
|
function backgroundUpdateCheck() {
|
|
7645
8149
|
const dir = AGENTV_DIR;
|
|
7646
|
-
const filePath =
|
|
8150
|
+
const filePath = join5(dir, CACHE_FILE);
|
|
7647
8151
|
const script = `
|
|
7648
8152
|
const https = require('https');
|
|
7649
8153
|
const fs = require('fs');
|
|
@@ -7766,4 +8270,4 @@ export {
|
|
|
7766
8270
|
preprocessArgv,
|
|
7767
8271
|
runCli
|
|
7768
8272
|
};
|
|
7769
|
-
//# sourceMappingURL=chunk-
|
|
8273
|
+
//# sourceMappingURL=chunk-3UW7KUQ3.js.map
|