agentv 3.14.0 → 3.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-W6CGDNQR.js → chunk-3UW7KUQ3.js} +402 -46
- package/dist/chunk-3UW7KUQ3.js.map +1 -0
- package/dist/{chunk-YYECEMUV.js → chunk-75PQBKLR.js} +5 -8
- package/dist/chunk-75PQBKLR.js.map +1 -0
- package/dist/{chunk-3TBDSUYD.js → chunk-ELQEFMGO.js} +115 -20
- package/dist/chunk-ELQEFMGO.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-ZGLENPVH.js → dist-5EEXTTC3.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-AI75XY3X.js → interactive-Q563ULAR.js} +3 -3
- package/package.json +2 -5
- package/dist/chunk-3TBDSUYD.js.map +0 -1
- package/dist/chunk-W6CGDNQR.js.map +0 -1
- package/dist/chunk-YYECEMUV.js.map +0 -1
- /package/dist/{dist-ZGLENPVH.js.map → dist-5EEXTTC3.js.map} +0 -0
- /package/dist/{interactive-AI75XY3X.js.map → interactive-Q563ULAR.js.map} +0 -0
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
validateFileReferences,
|
|
23
23
|
validateTargetsFile,
|
|
24
24
|
writeArtifactsFromResults
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-75PQBKLR.js";
|
|
26
26
|
import {
|
|
27
27
|
createBuiltinRegistry,
|
|
28
28
|
executeScript,
|
|
@@ -39,7 +39,7 @@ import {
|
|
|
39
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
40
40
|
transpileEvalYamlFile,
|
|
41
41
|
trimBaselineResult
|
|
42
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-ELQEFMGO.js";
|
|
43
43
|
import {
|
|
44
44
|
__commonJS,
|
|
45
45
|
__esm,
|
|
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4185
4185
|
},
|
|
4186
4186
|
handler: async (args) => {
|
|
4187
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-Q563ULAR.js");
|
|
4189
4189
|
await launchInteractiveWizard();
|
|
4190
4190
|
return;
|
|
4191
4191
|
}
|
|
@@ -4408,13 +4408,23 @@ var evalBenchCommand = command({
|
|
|
4408
4408
|
type: string,
|
|
4409
4409
|
displayName: "export-dir",
|
|
4410
4410
|
description: "Export directory from pipeline input/grade"
|
|
4411
|
+
}),
|
|
4412
|
+
llmScores: option({
|
|
4413
|
+
type: optional(string),
|
|
4414
|
+
long: "llm-scores",
|
|
4415
|
+
description: "Path to LLM scores JSON file (reads from stdin if omitted)"
|
|
4411
4416
|
})
|
|
4412
4417
|
},
|
|
4413
|
-
handler: async ({ exportDir }) => {
|
|
4418
|
+
handler: async ({ exportDir, llmScores: llmScoresPath }) => {
|
|
4414
4419
|
const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
|
|
4415
4420
|
const testIds = manifest.test_ids;
|
|
4416
4421
|
const targetName = manifest.target?.name ?? "unknown";
|
|
4417
|
-
|
|
4422
|
+
let stdinData;
|
|
4423
|
+
if (llmScoresPath) {
|
|
4424
|
+
stdinData = await readFile(llmScoresPath, "utf8");
|
|
4425
|
+
} else {
|
|
4426
|
+
stdinData = await readStdin();
|
|
4427
|
+
}
|
|
4418
4428
|
const llmScores = stdinData ? JSON.parse(stdinData) : {};
|
|
4419
4429
|
const indexLines = [];
|
|
4420
4430
|
const allPassRates = [];
|
|
@@ -4814,6 +4824,351 @@ async function writeJson(filePath, data) {
|
|
|
4814
4824
|
`, "utf8");
|
|
4815
4825
|
}
|
|
4816
4826
|
|
|
4827
|
+
// src/commands/pipeline/run.ts
|
|
4828
|
+
import { execSync } from "node:child_process";
|
|
4829
|
+
import { existsSync as existsSync2, readFileSync as readFileSync4, unlinkSync } from "node:fs";
|
|
4830
|
+
import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
|
|
4831
|
+
import { tmpdir } from "node:os";
|
|
4832
|
+
import { dirname as dirname2, join as join4, resolve as resolve2 } from "node:path";
|
|
4833
|
+
function loadEnvFile(dir) {
|
|
4834
|
+
let current = resolve2(dir);
|
|
4835
|
+
while (true) {
|
|
4836
|
+
const candidate = join4(current, ".env");
|
|
4837
|
+
if (existsSync2(candidate)) {
|
|
4838
|
+
const env3 = {};
|
|
4839
|
+
for (const line of readFileSync4(candidate, "utf8").split("\n")) {
|
|
4840
|
+
const trimmed = line.trim();
|
|
4841
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
4842
|
+
const eqIdx = trimmed.indexOf("=");
|
|
4843
|
+
if (eqIdx === -1) continue;
|
|
4844
|
+
env3[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
|
|
4845
|
+
}
|
|
4846
|
+
return env3;
|
|
4847
|
+
}
|
|
4848
|
+
const parent = dirname2(current);
|
|
4849
|
+
if (parent === current) break;
|
|
4850
|
+
current = parent;
|
|
4851
|
+
}
|
|
4852
|
+
return {};
|
|
4853
|
+
}
|
|
4854
|
+
var evalRunCommand2 = command({
|
|
4855
|
+
name: "run",
|
|
4856
|
+
description: "Extract inputs, invoke CLI targets, and run code graders in one step",
|
|
4857
|
+
args: {
|
|
4858
|
+
evalPath: positional({
|
|
4859
|
+
type: string,
|
|
4860
|
+
displayName: "eval-path",
|
|
4861
|
+
description: "Path to eval YAML file"
|
|
4862
|
+
}),
|
|
4863
|
+
out: option({
|
|
4864
|
+
type: string,
|
|
4865
|
+
long: "out",
|
|
4866
|
+
description: "Output directory for results"
|
|
4867
|
+
}),
|
|
4868
|
+
workers: option({
|
|
4869
|
+
type: optional(number),
|
|
4870
|
+
long: "workers",
|
|
4871
|
+
description: "Parallel workers for target invocation (default: all tests)"
|
|
4872
|
+
})
|
|
4873
|
+
},
|
|
4874
|
+
handler: async ({ evalPath, out, workers }) => {
|
|
4875
|
+
const resolvedEvalPath = resolve2(evalPath);
|
|
4876
|
+
const outDir = resolve2(out);
|
|
4877
|
+
const repoRoot = await findRepoRoot(dirname2(resolvedEvalPath));
|
|
4878
|
+
const evalDir = dirname2(resolvedEvalPath);
|
|
4879
|
+
const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
|
|
4880
|
+
const tests = suite.tests;
|
|
4881
|
+
if (tests.length === 0) {
|
|
4882
|
+
console.error("No tests found in eval file.");
|
|
4883
|
+
process.exit(1);
|
|
4884
|
+
}
|
|
4885
|
+
let targetInfo = null;
|
|
4886
|
+
let targetName = "agent";
|
|
4887
|
+
let targetKind = "agent";
|
|
4888
|
+
try {
|
|
4889
|
+
const selection = await selectTarget({
|
|
4890
|
+
testFilePath: resolvedEvalPath,
|
|
4891
|
+
repoRoot,
|
|
4892
|
+
cwd: evalDir,
|
|
4893
|
+
dryRun: false,
|
|
4894
|
+
dryRunDelay: 0,
|
|
4895
|
+
dryRunDelayMin: 0,
|
|
4896
|
+
dryRunDelayMax: 0,
|
|
4897
|
+
env: process.env
|
|
4898
|
+
});
|
|
4899
|
+
targetName = selection.targetName;
|
|
4900
|
+
if (selection.resolvedTarget.kind === "cli") {
|
|
4901
|
+
targetKind = "cli";
|
|
4902
|
+
const config = selection.resolvedTarget.config;
|
|
4903
|
+
targetInfo = {
|
|
4904
|
+
kind: "cli",
|
|
4905
|
+
command: config.command,
|
|
4906
|
+
cwd: config.cwd ?? evalDir,
|
|
4907
|
+
timeoutMs: config.timeoutMs ?? 3e4
|
|
4908
|
+
};
|
|
4909
|
+
}
|
|
4910
|
+
} catch {
|
|
4911
|
+
}
|
|
4912
|
+
const testIds = [];
|
|
4913
|
+
for (const test of tests) {
|
|
4914
|
+
const testDir = join4(outDir, test.id);
|
|
4915
|
+
await mkdir4(testDir, { recursive: true });
|
|
4916
|
+
testIds.push(test.id);
|
|
4917
|
+
const inputText = test.question;
|
|
4918
|
+
const inputMessages = test.input.map((m) => ({
|
|
4919
|
+
role: m.role,
|
|
4920
|
+
content: typeof m.content === "string" ? m.content : m.content
|
|
4921
|
+
}));
|
|
4922
|
+
await writeJson2(join4(testDir, "input.json"), {
|
|
4923
|
+
input_text: inputText,
|
|
4924
|
+
input_messages: inputMessages,
|
|
4925
|
+
file_paths: test.file_paths,
|
|
4926
|
+
metadata: test.metadata ?? {}
|
|
4927
|
+
});
|
|
4928
|
+
if (targetInfo) {
|
|
4929
|
+
await writeJson2(join4(testDir, "invoke.json"), {
|
|
4930
|
+
kind: "cli",
|
|
4931
|
+
command: targetInfo.command,
|
|
4932
|
+
cwd: targetInfo.cwd,
|
|
4933
|
+
timeout_ms: targetInfo.timeoutMs,
|
|
4934
|
+
env: {}
|
|
4935
|
+
});
|
|
4936
|
+
} else {
|
|
4937
|
+
await writeJson2(join4(testDir, "invoke.json"), {
|
|
4938
|
+
kind: "agent",
|
|
4939
|
+
instructions: "Execute this task in the current workspace. The agent IS the target."
|
|
4940
|
+
});
|
|
4941
|
+
}
|
|
4942
|
+
await writeFile5(join4(testDir, "criteria.md"), test.criteria ?? "", "utf8");
|
|
4943
|
+
if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
|
|
4944
|
+
await writeJson2(join4(testDir, "expected_output.json"), {
|
|
4945
|
+
expected_output: test.expected_output,
|
|
4946
|
+
reference_answer: test.reference_answer ?? ""
|
|
4947
|
+
});
|
|
4948
|
+
}
|
|
4949
|
+
await writeGraderConfigs2(testDir, test.assertions ?? [], evalDir);
|
|
4950
|
+
}
|
|
4951
|
+
await writeJson2(join4(outDir, "manifest.json"), {
|
|
4952
|
+
eval_file: resolvedEvalPath,
|
|
4953
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4954
|
+
target: { name: targetName, kind: targetKind },
|
|
4955
|
+
test_ids: testIds
|
|
4956
|
+
});
|
|
4957
|
+
console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
|
|
4958
|
+
if (targetInfo) {
|
|
4959
|
+
const envVars = loadEnvFile(evalDir);
|
|
4960
|
+
const mergedEnv = { ...process.env, ...envVars };
|
|
4961
|
+
const maxWorkers = workers ?? testIds.length;
|
|
4962
|
+
console.log(`Invoking ${testIds.length} CLI target(s) (${maxWorkers} workers)...`);
|
|
4963
|
+
const invokeTarget = async (testId) => {
|
|
4964
|
+
const testDir = join4(outDir, testId);
|
|
4965
|
+
const invoke = JSON.parse(await readFile4(join4(testDir, "invoke.json"), "utf8"));
|
|
4966
|
+
if (invoke.kind !== "cli") return;
|
|
4967
|
+
const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
|
|
4968
|
+
const template = invoke.command;
|
|
4969
|
+
const cwd = invoke.cwd;
|
|
4970
|
+
const timeoutMs = invoke.timeout_ms ?? 12e4;
|
|
4971
|
+
const promptFile = join4(tmpdir(), `agentv-prompt-${testId}-${Date.now()}.txt`);
|
|
4972
|
+
const outputFile = join4(tmpdir(), `agentv-output-${testId}-${Date.now()}.txt`);
|
|
4973
|
+
await writeFile5(promptFile, inputData.input_text, "utf8");
|
|
4974
|
+
let rendered = template;
|
|
4975
|
+
rendered = rendered.replace("{PROMPT_FILE}", promptFile);
|
|
4976
|
+
rendered = rendered.replace("{OUTPUT_FILE}", outputFile);
|
|
4977
|
+
rendered = rendered.replace("{PROMPT}", inputData.input_text);
|
|
4978
|
+
const start = performance.now();
|
|
4979
|
+
try {
|
|
4980
|
+
execSync(rendered, {
|
|
4981
|
+
cwd,
|
|
4982
|
+
timeout: timeoutMs,
|
|
4983
|
+
env: mergedEnv,
|
|
4984
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
4985
|
+
maxBuffer: 10 * 1024 * 1024
|
|
4986
|
+
});
|
|
4987
|
+
const durationMs = Math.round(performance.now() - start);
|
|
4988
|
+
let response;
|
|
4989
|
+
if (existsSync2(outputFile)) {
|
|
4990
|
+
response = readFileSync4(outputFile, "utf8");
|
|
4991
|
+
} else {
|
|
4992
|
+
response = "ERROR: No output file generated";
|
|
4993
|
+
}
|
|
4994
|
+
await writeFile5(join4(testDir, "response.md"), response, "utf8");
|
|
4995
|
+
await writeJson2(join4(testDir, "timing.json"), {
|
|
4996
|
+
duration_ms: durationMs,
|
|
4997
|
+
total_duration_seconds: Math.round(durationMs / 10) / 100
|
|
4998
|
+
});
|
|
4999
|
+
console.log(` ${testId}: OK (${durationMs}ms, ${response.length} chars)`);
|
|
5000
|
+
} catch (error) {
|
|
5001
|
+
const durationMs = Math.round(performance.now() - start);
|
|
5002
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5003
|
+
const response = `ERROR: target failed \u2014 ${message}`;
|
|
5004
|
+
await writeFile5(join4(testDir, "response.md"), response, "utf8");
|
|
5005
|
+
await writeJson2(join4(testDir, "timing.json"), {
|
|
5006
|
+
duration_ms: durationMs,
|
|
5007
|
+
total_duration_seconds: Math.round(durationMs / 10) / 100
|
|
5008
|
+
});
|
|
5009
|
+
console.error(` ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}`);
|
|
5010
|
+
} finally {
|
|
5011
|
+
try {
|
|
5012
|
+
if (existsSync2(promptFile)) unlinkSync(promptFile);
|
|
5013
|
+
if (existsSync2(outputFile)) unlinkSync(outputFile);
|
|
5014
|
+
} catch {
|
|
5015
|
+
}
|
|
5016
|
+
}
|
|
5017
|
+
};
|
|
5018
|
+
const allTasks = testIds.map((testId) => invokeTarget(testId));
|
|
5019
|
+
await Promise.all(allTasks);
|
|
5020
|
+
} else {
|
|
5021
|
+
console.log("Agent-as-target mode \u2014 skipping CLI invocation.");
|
|
5022
|
+
}
|
|
5023
|
+
let totalGraders = 0;
|
|
5024
|
+
let totalPassed = 0;
|
|
5025
|
+
for (const testId of testIds) {
|
|
5026
|
+
const testDir = join4(outDir, testId);
|
|
5027
|
+
const codeGradersDir = join4(testDir, "code_graders");
|
|
5028
|
+
const resultsDir = join4(testDir, "code_grader_results");
|
|
5029
|
+
let graderFiles;
|
|
5030
|
+
try {
|
|
5031
|
+
graderFiles = (await readdir3(codeGradersDir)).filter((f) => f.endsWith(".json"));
|
|
5032
|
+
} catch {
|
|
5033
|
+
continue;
|
|
5034
|
+
}
|
|
5035
|
+
if (graderFiles.length === 0) continue;
|
|
5036
|
+
await mkdir4(resultsDir, { recursive: true });
|
|
5037
|
+
const responseText = await readFile4(join4(testDir, "response.md"), "utf8");
|
|
5038
|
+
const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
|
|
5039
|
+
for (const graderFile of graderFiles) {
|
|
5040
|
+
const graderConfig = JSON.parse(await readFile4(join4(codeGradersDir, graderFile), "utf8"));
|
|
5041
|
+
const graderName = graderConfig.name;
|
|
5042
|
+
const payload = JSON.stringify({
|
|
5043
|
+
output: [{ role: "assistant", content: responseText }],
|
|
5044
|
+
input: inputData.input_messages,
|
|
5045
|
+
question: inputData.input_text,
|
|
5046
|
+
criteria: "",
|
|
5047
|
+
expected_output: [],
|
|
5048
|
+
reference_answer: "",
|
|
5049
|
+
input_files: [],
|
|
5050
|
+
trace: null,
|
|
5051
|
+
token_usage: null,
|
|
5052
|
+
cost_usd: null,
|
|
5053
|
+
duration_ms: null,
|
|
5054
|
+
start_time: null,
|
|
5055
|
+
end_time: null,
|
|
5056
|
+
file_changes: null,
|
|
5057
|
+
workspace_path: null,
|
|
5058
|
+
config: graderConfig.config ?? null,
|
|
5059
|
+
metadata: {},
|
|
5060
|
+
input_text: inputData.input_text,
|
|
5061
|
+
output_text: responseText,
|
|
5062
|
+
expected_output_text: ""
|
|
5063
|
+
});
|
|
5064
|
+
try {
|
|
5065
|
+
const stdout = await executeScript(
|
|
5066
|
+
graderConfig.command,
|
|
5067
|
+
payload,
|
|
5068
|
+
void 0,
|
|
5069
|
+
graderConfig.cwd
|
|
5070
|
+
);
|
|
5071
|
+
const parsed = JSON.parse(stdout);
|
|
5072
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
5073
|
+
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
5074
|
+
await writeFile5(
|
|
5075
|
+
join4(resultsDir, `${graderName}.json`),
|
|
5076
|
+
`${JSON.stringify(
|
|
5077
|
+
{
|
|
5078
|
+
name: graderName,
|
|
5079
|
+
type: "code-grader",
|
|
5080
|
+
score,
|
|
5081
|
+
weight: graderConfig.weight ?? 1,
|
|
5082
|
+
assertions,
|
|
5083
|
+
details: parsed.details ?? {}
|
|
5084
|
+
},
|
|
5085
|
+
null,
|
|
5086
|
+
2
|
|
5087
|
+
)}
|
|
5088
|
+
`,
|
|
5089
|
+
"utf8"
|
|
5090
|
+
);
|
|
5091
|
+
totalGraders++;
|
|
5092
|
+
if (score >= 0.5) totalPassed++;
|
|
5093
|
+
} catch (error) {
|
|
5094
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5095
|
+
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
5096
|
+
await writeFile5(
|
|
5097
|
+
join4(resultsDir, `${graderName}.json`),
|
|
5098
|
+
`${JSON.stringify(
|
|
5099
|
+
{
|
|
5100
|
+
name: graderName,
|
|
5101
|
+
type: "code-grader",
|
|
5102
|
+
score: 0,
|
|
5103
|
+
weight: graderConfig.weight ?? 1,
|
|
5104
|
+
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
5105
|
+
details: { error: message }
|
|
5106
|
+
},
|
|
5107
|
+
null,
|
|
5108
|
+
2
|
|
5109
|
+
)}
|
|
5110
|
+
`,
|
|
5111
|
+
"utf8"
|
|
5112
|
+
);
|
|
5113
|
+
totalGraders++;
|
|
5114
|
+
}
|
|
5115
|
+
}
|
|
5116
|
+
}
|
|
5117
|
+
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
5118
|
+
console.log(`
|
|
5119
|
+
Done. Agent can now perform LLM grading on responses in ${outDir}`);
|
|
5120
|
+
}
|
|
5121
|
+
});
|
|
5122
|
+
async function writeJson2(filePath, data) {
|
|
5123
|
+
await writeFile5(filePath, `${JSON.stringify(data, null, 2)}
|
|
5124
|
+
`, "utf8");
|
|
5125
|
+
}
|
|
5126
|
+
async function writeGraderConfigs2(testDir, assertions, evalDir) {
|
|
5127
|
+
const codeGradersDir = join4(testDir, "code_graders");
|
|
5128
|
+
const llmGradersDir = join4(testDir, "llm_graders");
|
|
5129
|
+
let hasCodeGraders = false;
|
|
5130
|
+
let hasLlmGraders = false;
|
|
5131
|
+
for (const assertion of assertions) {
|
|
5132
|
+
if (assertion.type === "code-grader") {
|
|
5133
|
+
if (!hasCodeGraders) {
|
|
5134
|
+
await mkdir4(codeGradersDir, { recursive: true });
|
|
5135
|
+
hasCodeGraders = true;
|
|
5136
|
+
}
|
|
5137
|
+
const config = assertion;
|
|
5138
|
+
await writeJson2(join4(codeGradersDir, `${config.name}.json`), {
|
|
5139
|
+
name: config.name,
|
|
5140
|
+
command: config.command,
|
|
5141
|
+
cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
|
|
5142
|
+
weight: config.weight ?? 1,
|
|
5143
|
+
config: config.config ?? {}
|
|
5144
|
+
});
|
|
5145
|
+
} else if (assertion.type === "llm-grader") {
|
|
5146
|
+
if (!hasLlmGraders) {
|
|
5147
|
+
await mkdir4(llmGradersDir, { recursive: true });
|
|
5148
|
+
hasLlmGraders = true;
|
|
5149
|
+
}
|
|
5150
|
+
const config = assertion;
|
|
5151
|
+
let promptContent = "";
|
|
5152
|
+
if (config.resolvedPromptPath) {
|
|
5153
|
+
try {
|
|
5154
|
+
promptContent = readFileSync4(config.resolvedPromptPath, "utf8");
|
|
5155
|
+
} catch {
|
|
5156
|
+
promptContent = typeof config.prompt === "string" ? config.prompt : "";
|
|
5157
|
+
}
|
|
5158
|
+
} else if (typeof config.prompt === "string") {
|
|
5159
|
+
promptContent = config.prompt;
|
|
5160
|
+
}
|
|
5161
|
+
await writeJson2(join4(llmGradersDir, `${config.name}.json`), {
|
|
5162
|
+
name: config.name,
|
|
5163
|
+
prompt_content: promptContent,
|
|
5164
|
+
weight: config.weight ?? 1,
|
|
5165
|
+
threshold: 0.5,
|
|
5166
|
+
config: {}
|
|
5167
|
+
});
|
|
5168
|
+
}
|
|
5169
|
+
}
|
|
5170
|
+
}
|
|
5171
|
+
|
|
4817
5172
|
// src/commands/pipeline/index.ts
|
|
4818
5173
|
var pipelineCommand = subcommands({
|
|
4819
5174
|
name: "pipeline",
|
|
@@ -4821,7 +5176,8 @@ var pipelineCommand = subcommands({
|
|
|
4821
5176
|
cmds: {
|
|
4822
5177
|
input: evalInputCommand,
|
|
4823
5178
|
grade: evalGradeCommand,
|
|
4824
|
-
bench: evalBenchCommand
|
|
5179
|
+
bench: evalBenchCommand,
|
|
5180
|
+
run: evalRunCommand2
|
|
4825
5181
|
}
|
|
4826
5182
|
});
|
|
4827
5183
|
|
|
@@ -4829,10 +5185,10 @@ var pipelineCommand = subcommands({
|
|
|
4829
5185
|
import path7 from "node:path";
|
|
4830
5186
|
|
|
4831
5187
|
// src/commands/results/shared.ts
|
|
4832
|
-
import { existsSync as
|
|
5188
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
4833
5189
|
|
|
4834
5190
|
// src/commands/trace/utils.ts
|
|
4835
|
-
import { readFileSync as
|
|
5191
|
+
import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
4836
5192
|
import path6 from "node:path";
|
|
4837
5193
|
var colors2 = {
|
|
4838
5194
|
reset: "\x1B[0m",
|
|
@@ -4872,7 +5228,7 @@ function resolveTraceResultPath(filePath) {
|
|
|
4872
5228
|
return resolveWorkspaceOrFilePath(filePath);
|
|
4873
5229
|
}
|
|
4874
5230
|
function loadJsonlRecords(filePath) {
|
|
4875
|
-
const content =
|
|
5231
|
+
const content = readFileSync5(filePath, "utf8");
|
|
4876
5232
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
4877
5233
|
return lines.map((line, i) => {
|
|
4878
5234
|
const record = JSON.parse(line);
|
|
@@ -4925,7 +5281,7 @@ function toRawResult(result) {
|
|
|
4925
5281
|
};
|
|
4926
5282
|
}
|
|
4927
5283
|
function loadOtlpTraceFile(filePath) {
|
|
4928
|
-
const parsed = JSON.parse(
|
|
5284
|
+
const parsed = JSON.parse(readFileSync5(filePath, "utf8"));
|
|
4929
5285
|
const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
|
|
4930
5286
|
if (!spans || spans.length === 0) {
|
|
4931
5287
|
return [];
|
|
@@ -5243,14 +5599,14 @@ async function resolveSourceFile(source, cwd) {
|
|
|
5243
5599
|
let sourceFile;
|
|
5244
5600
|
if (source) {
|
|
5245
5601
|
sourceFile = resolveResultSourcePath(source, cwd);
|
|
5246
|
-
if (!
|
|
5602
|
+
if (!existsSync3(sourceFile)) {
|
|
5247
5603
|
console.error(`Error: File not found: ${sourceFile}`);
|
|
5248
5604
|
process.exit(1);
|
|
5249
5605
|
}
|
|
5250
5606
|
} else {
|
|
5251
5607
|
const cache = await loadRunCache(cwd);
|
|
5252
5608
|
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5253
|
-
if (cachedFile &&
|
|
5609
|
+
if (cachedFile && existsSync3(cachedFile)) {
|
|
5254
5610
|
sourceFile = cachedFile;
|
|
5255
5611
|
} else {
|
|
5256
5612
|
const metas = listResultFiles(cwd, 1);
|
|
@@ -5462,7 +5818,7 @@ var resultsShowCommand = command({
|
|
|
5462
5818
|
});
|
|
5463
5819
|
|
|
5464
5820
|
// src/commands/results/summary.ts
|
|
5465
|
-
import { existsSync as
|
|
5821
|
+
import { existsSync as existsSync4, readFileSync as readFileSync6 } from "node:fs";
|
|
5466
5822
|
function formatSummary(results, grading) {
|
|
5467
5823
|
const total = results.length;
|
|
5468
5824
|
let passed;
|
|
@@ -5513,9 +5869,9 @@ var resultsSummaryCommand = command({
|
|
|
5513
5869
|
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5514
5870
|
let grading;
|
|
5515
5871
|
const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
|
|
5516
|
-
if (
|
|
5872
|
+
if (existsSync4(gradingPath)) {
|
|
5517
5873
|
try {
|
|
5518
|
-
grading = JSON.parse(
|
|
5874
|
+
grading = JSON.parse(readFileSync6(gradingPath, "utf8"));
|
|
5519
5875
|
} catch {
|
|
5520
5876
|
}
|
|
5521
5877
|
}
|
|
@@ -5540,7 +5896,7 @@ var resultsCommand = subcommands({
|
|
|
5540
5896
|
});
|
|
5541
5897
|
|
|
5542
5898
|
// src/commands/results/serve.ts
|
|
5543
|
-
import { existsSync as
|
|
5899
|
+
import { existsSync as existsSync5, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
|
|
5544
5900
|
import path8 from "node:path";
|
|
5545
5901
|
import { Hono } from "hono";
|
|
5546
5902
|
function feedbackPath(resultDir) {
|
|
@@ -5548,11 +5904,11 @@ function feedbackPath(resultDir) {
|
|
|
5548
5904
|
}
|
|
5549
5905
|
function readFeedback(cwd) {
|
|
5550
5906
|
const fp = feedbackPath(cwd);
|
|
5551
|
-
if (!
|
|
5907
|
+
if (!existsSync5(fp)) {
|
|
5552
5908
|
return { reviews: [] };
|
|
5553
5909
|
}
|
|
5554
5910
|
try {
|
|
5555
|
-
return JSON.parse(
|
|
5911
|
+
return JSON.parse(readFileSync7(fp, "utf8"));
|
|
5556
5912
|
} catch (err2) {
|
|
5557
5913
|
console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
|
|
5558
5914
|
return { reviews: [] };
|
|
@@ -6352,7 +6708,7 @@ var resultsServeCommand = command({
|
|
|
6352
6708
|
let sourceFile;
|
|
6353
6709
|
if (source) {
|
|
6354
6710
|
const resolved = resolveResultSourcePath(source, cwd);
|
|
6355
|
-
if (!
|
|
6711
|
+
if (!existsSync5(resolved)) {
|
|
6356
6712
|
console.error(`Error: Source file not found: ${resolved}`);
|
|
6357
6713
|
process.exit(1);
|
|
6358
6714
|
}
|
|
@@ -6361,7 +6717,7 @@ var resultsServeCommand = command({
|
|
|
6361
6717
|
} else {
|
|
6362
6718
|
const cache = await loadRunCache(cwd);
|
|
6363
6719
|
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
6364
|
-
if (cachedFile &&
|
|
6720
|
+
if (cachedFile && existsSync5(cachedFile)) {
|
|
6365
6721
|
sourceFile = cachedFile;
|
|
6366
6722
|
results = patchTestIds(loadManifestResults(cachedFile));
|
|
6367
6723
|
} else {
|
|
@@ -6411,7 +6767,7 @@ function detectPackageManager() {
|
|
|
6411
6767
|
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
6412
6768
|
}
|
|
6413
6769
|
function runCommand(cmd, args) {
|
|
6414
|
-
return new Promise((
|
|
6770
|
+
return new Promise((resolve3, reject) => {
|
|
6415
6771
|
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
6416
6772
|
let stdout = "";
|
|
6417
6773
|
child.stdout?.on("data", (data) => {
|
|
@@ -6419,7 +6775,7 @@ function runCommand(cmd, args) {
|
|
|
6419
6775
|
stdout += data.toString();
|
|
6420
6776
|
});
|
|
6421
6777
|
child.on("error", reject);
|
|
6422
|
-
child.on("close", (code) =>
|
|
6778
|
+
child.on("close", (code) => resolve3({ exitCode: code ?? 1, stdout }));
|
|
6423
6779
|
});
|
|
6424
6780
|
}
|
|
6425
6781
|
var updateCommand = command({
|
|
@@ -7327,7 +7683,7 @@ var transpileCommand = command({
|
|
|
7327
7683
|
});
|
|
7328
7684
|
|
|
7329
7685
|
// src/commands/trim/index.ts
|
|
7330
|
-
import { readFileSync as
|
|
7686
|
+
import { readFileSync as readFileSync8, writeFileSync as writeFileSync5 } from "node:fs";
|
|
7331
7687
|
var trimCommand = command({
|
|
7332
7688
|
name: "trim",
|
|
7333
7689
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -7346,7 +7702,7 @@ var trimCommand = command({
|
|
|
7346
7702
|
},
|
|
7347
7703
|
handler: async ({ input, out }) => {
|
|
7348
7704
|
try {
|
|
7349
|
-
const content =
|
|
7705
|
+
const content = readFileSync8(input, "utf8");
|
|
7350
7706
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
7351
7707
|
const trimmedLines = lines.map((line) => {
|
|
7352
7708
|
const record = JSON.parse(line);
|
|
@@ -7452,7 +7808,7 @@ function isTTY() {
|
|
|
7452
7808
|
|
|
7453
7809
|
// src/commands/validate/validate-files.ts
|
|
7454
7810
|
import { constants } from "node:fs";
|
|
7455
|
-
import { access, readdir as
|
|
7811
|
+
import { access, readdir as readdir4, stat } from "node:fs/promises";
|
|
7456
7812
|
import path10 from "node:path";
|
|
7457
7813
|
async function validateFiles(paths) {
|
|
7458
7814
|
const filePaths = await expandPaths(paths);
|
|
@@ -7518,7 +7874,7 @@ async function expandPaths(paths) {
|
|
|
7518
7874
|
async function findYamlFiles(dirPath) {
|
|
7519
7875
|
const results = [];
|
|
7520
7876
|
try {
|
|
7521
|
-
const entries2 = await
|
|
7877
|
+
const entries2 = await readdir4(dirPath, { withFileTypes: true });
|
|
7522
7878
|
for (const entry of entries2) {
|
|
7523
7879
|
const fullPath = path10.join(dirPath, entry.name);
|
|
7524
7880
|
if (entry.isDirectory()) {
|
|
@@ -7575,14 +7931,14 @@ var validateCommand = command({
|
|
|
7575
7931
|
});
|
|
7576
7932
|
|
|
7577
7933
|
// src/commands/workspace/clean.ts
|
|
7578
|
-
import { existsSync as
|
|
7579
|
-
import { readFile as
|
|
7934
|
+
import { existsSync as existsSync6 } from "node:fs";
|
|
7935
|
+
import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
|
|
7580
7936
|
import path11 from "node:path";
|
|
7581
7937
|
async function confirm(message) {
|
|
7582
7938
|
const readline2 = await import("node:readline");
|
|
7583
7939
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
7584
|
-
const answer = await new Promise((
|
|
7585
|
-
rl.question(`${message} [y/N] `,
|
|
7940
|
+
const answer = await new Promise((resolve3) => {
|
|
7941
|
+
rl.question(`${message} [y/N] `, resolve3);
|
|
7586
7942
|
});
|
|
7587
7943
|
rl.close();
|
|
7588
7944
|
return answer.toLowerCase() === "y";
|
|
@@ -7604,19 +7960,19 @@ var cleanCommand = command({
|
|
|
7604
7960
|
},
|
|
7605
7961
|
handler: async ({ repo, force }) => {
|
|
7606
7962
|
const poolRoot = getWorkspacePoolRoot();
|
|
7607
|
-
if (!
|
|
7963
|
+
if (!existsSync6(poolRoot)) {
|
|
7608
7964
|
console.log("No workspace pool entries found.");
|
|
7609
7965
|
return;
|
|
7610
7966
|
}
|
|
7611
7967
|
if (repo) {
|
|
7612
|
-
const entries2 = await
|
|
7968
|
+
const entries2 = await readdir5(poolRoot, { withFileTypes: true });
|
|
7613
7969
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7614
7970
|
const matchingDirs = [];
|
|
7615
7971
|
for (const dir of poolDirs) {
|
|
7616
7972
|
const poolDir = path11.join(poolRoot, dir.name);
|
|
7617
7973
|
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
7618
7974
|
try {
|
|
7619
|
-
const raw = await
|
|
7975
|
+
const raw = await readFile5(metadataPath, "utf-8");
|
|
7620
7976
|
const metadata = JSON.parse(raw);
|
|
7621
7977
|
const hasRepo = metadata.repos?.some((r) => {
|
|
7622
7978
|
if (r.source.type === "git" && r.source.url) {
|
|
@@ -7663,13 +8019,13 @@ var cleanCommand = command({
|
|
|
7663
8019
|
});
|
|
7664
8020
|
|
|
7665
8021
|
// src/commands/workspace/list.ts
|
|
7666
|
-
import { existsSync as
|
|
7667
|
-
import { readFile as
|
|
8022
|
+
import { existsSync as existsSync7 } from "node:fs";
|
|
8023
|
+
import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
|
|
7668
8024
|
import path12 from "node:path";
|
|
7669
8025
|
async function getDirectorySize(dirPath) {
|
|
7670
8026
|
let totalSize = 0;
|
|
7671
8027
|
try {
|
|
7672
|
-
const entries2 = await
|
|
8028
|
+
const entries2 = await readdir6(dirPath, { withFileTypes: true });
|
|
7673
8029
|
for (const entry of entries2) {
|
|
7674
8030
|
const fullPath = path12.join(dirPath, entry.name);
|
|
7675
8031
|
if (entry.isDirectory()) {
|
|
@@ -7695,11 +8051,11 @@ var listCommand = command({
|
|
|
7695
8051
|
args: {},
|
|
7696
8052
|
handler: async () => {
|
|
7697
8053
|
const poolRoot = getWorkspacePoolRoot();
|
|
7698
|
-
if (!
|
|
8054
|
+
if (!existsSync7(poolRoot)) {
|
|
7699
8055
|
console.log("No workspace pool entries found.");
|
|
7700
8056
|
return;
|
|
7701
8057
|
}
|
|
7702
|
-
const entries2 = await
|
|
8058
|
+
const entries2 = await readdir6(poolRoot, { withFileTypes: true });
|
|
7703
8059
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7704
8060
|
if (poolDirs.length === 0) {
|
|
7705
8061
|
console.log("No workspace pool entries found.");
|
|
@@ -7708,12 +8064,12 @@ var listCommand = command({
|
|
|
7708
8064
|
for (const dir of poolDirs) {
|
|
7709
8065
|
const poolDir = path12.join(poolRoot, dir.name);
|
|
7710
8066
|
const fingerprint = dir.name;
|
|
7711
|
-
const poolEntries = await
|
|
8067
|
+
const poolEntries = await readdir6(poolDir, { withFileTypes: true });
|
|
7712
8068
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
7713
8069
|
const metadataPath = path12.join(poolDir, "metadata.json");
|
|
7714
8070
|
let metadata = null;
|
|
7715
8071
|
try {
|
|
7716
|
-
const raw = await
|
|
8072
|
+
const raw = await readFile6(metadataPath, "utf-8");
|
|
7717
8073
|
metadata = JSON.parse(raw);
|
|
7718
8074
|
} catch {
|
|
7719
8075
|
}
|
|
@@ -7750,16 +8106,16 @@ var workspaceCommand = subcommands({
|
|
|
7750
8106
|
|
|
7751
8107
|
// src/update-check.ts
|
|
7752
8108
|
import { spawn as spawn2 } from "node:child_process";
|
|
7753
|
-
import { readFile as
|
|
7754
|
-
import { join as
|
|
8109
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
8110
|
+
import { join as join5 } from "node:path";
|
|
7755
8111
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
7756
8112
|
var AGENTV_DIR = getAgentvHome();
|
|
7757
8113
|
var CACHE_FILE = "version-check.json";
|
|
7758
8114
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
7759
8115
|
async function getCachedUpdateInfo(path13) {
|
|
7760
|
-
const filePath = path13 ??
|
|
8116
|
+
const filePath = path13 ?? join5(AGENTV_DIR, CACHE_FILE);
|
|
7761
8117
|
try {
|
|
7762
|
-
const raw = await
|
|
8118
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
7763
8119
|
const data = JSON.parse(raw);
|
|
7764
8120
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
7765
8121
|
return data;
|
|
@@ -7791,7 +8147,7 @@ function buildNotice(currentVersion, latestVersion) {
|
|
|
7791
8147
|
}
|
|
7792
8148
|
function backgroundUpdateCheck() {
|
|
7793
8149
|
const dir = AGENTV_DIR;
|
|
7794
|
-
const filePath =
|
|
8150
|
+
const filePath = join5(dir, CACHE_FILE);
|
|
7795
8151
|
const script = `
|
|
7796
8152
|
const https = require('https');
|
|
7797
8153
|
const fs = require('fs');
|
|
@@ -7914,4 +8270,4 @@ export {
|
|
|
7914
8270
|
preprocessArgv,
|
|
7915
8271
|
runCli
|
|
7916
8272
|
};
|
|
7917
|
-
//# sourceMappingURL=chunk-
|
|
8273
|
+
//# sourceMappingURL=chunk-3UW7KUQ3.js.map
|