agentv 4.1.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TDY2FQN5.js → chunk-ASU5L5ZW.js} +135 -176
- package/dist/chunk-ASU5L5ZW.js.map +1 -0
- package/dist/{chunk-XEAW7OQT.js → chunk-XLM3RNN7.js} +19 -29
- package/dist/chunk-XLM3RNN7.js.map +1 -0
- package/dist/{chunk-QCKPJPYC.js → chunk-ZDJN5FSI.js} +4 -4
- package/dist/{chunk-QCKPJPYC.js.map → chunk-ZDJN5FSI.js.map} +1 -1
- package/dist/cli.js +3 -3
- package/dist/{dist-2JUUJ6PT.js → dist-VVXR6TYM.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-ASB4FU3J.js → interactive-BKK53ETJ.js} +3 -3
- package/dist/studio/assets/{index-DofvSOmX.js → index-Cir5Hc8S.js} +1 -1
- package/dist/studio/assets/{index-CDGReinH.js → index-D8LVkz9x.js} +1 -1
- package/dist/studio/index.html +1 -1
- package/package.json +1 -1
- package/dist/chunk-TDY2FQN5.js.map +0 -1
- package/dist/chunk-XEAW7OQT.js.map +0 -1
- /package/dist/{dist-2JUUJ6PT.js.map → dist-VVXR6TYM.js.map} +0 -0
- /package/dist/{interactive-ASB4FU3J.js.map → interactive-BKK53ETJ.js.map} +0 -0
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
validateFileReferences,
|
|
25
25
|
validateTargetsFile,
|
|
26
26
|
writeArtifactsFromResults
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-ZDJN5FSI.js";
|
|
28
28
|
import {
|
|
29
29
|
DEFAULT_CATEGORY,
|
|
30
30
|
createBuiltinRegistry,
|
|
@@ -43,7 +43,7 @@ import {
|
|
|
43
43
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
44
44
|
transpileEvalYamlFile,
|
|
45
45
|
trimBaselineResult
|
|
46
|
-
} from "./chunk-
|
|
46
|
+
} from "./chunk-XLM3RNN7.js";
|
|
47
47
|
import {
|
|
48
48
|
__commonJS,
|
|
49
49
|
__esm,
|
|
@@ -4217,7 +4217,7 @@ var evalRunCommand = command({
|
|
|
4217
4217
|
},
|
|
4218
4218
|
handler: async (args) => {
|
|
4219
4219
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4220
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4220
|
+
const { launchInteractiveWizard } = await import("./interactive-BKK53ETJ.js");
|
|
4221
4221
|
await launchInteractiveWizard();
|
|
4222
4222
|
return;
|
|
4223
4223
|
}
|
|
@@ -4441,27 +4441,15 @@ var evalBenchCommand = command({
|
|
|
4441
4441
|
type: string,
|
|
4442
4442
|
displayName: "export-dir",
|
|
4443
4443
|
description: "Export directory from pipeline input/grade"
|
|
4444
|
-
}),
|
|
4445
|
-
llmScores: option({
|
|
4446
|
-
type: optional(string),
|
|
4447
|
-
long: "llm-scores",
|
|
4448
|
-
description: "Path to LLM scores JSON file (reads from stdin if omitted)"
|
|
4449
4444
|
})
|
|
4450
4445
|
},
|
|
4451
|
-
handler: async ({ exportDir
|
|
4446
|
+
handler: async ({ exportDir }) => {
|
|
4452
4447
|
const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
|
|
4453
4448
|
const testIds = manifest.test_ids;
|
|
4454
4449
|
const targetName = manifest.target?.name ?? "unknown";
|
|
4455
4450
|
const evalSet = manifest.dataset ?? "";
|
|
4456
4451
|
const experiment = manifest.experiment;
|
|
4457
4452
|
const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, "_") : "";
|
|
4458
|
-
let stdinData;
|
|
4459
|
-
if (llmScoresPath) {
|
|
4460
|
-
stdinData = await readFile(llmScoresPath, "utf8");
|
|
4461
|
-
} else {
|
|
4462
|
-
stdinData = await readStdin();
|
|
4463
|
-
}
|
|
4464
|
-
const llmScores = stdinData ? JSON.parse(stdinData) : {};
|
|
4465
4453
|
const indexLines = [];
|
|
4466
4454
|
const allPassRates = [];
|
|
4467
4455
|
for (const testId of testIds) {
|
|
@@ -4488,14 +4476,18 @@ var evalBenchCommand = command({
|
|
|
4488
4476
|
}
|
|
4489
4477
|
} catch {
|
|
4490
4478
|
}
|
|
4491
|
-
const testLlmScores = llmScores[testId] ?? {};
|
|
4492
4479
|
const llmGradersDir = join(testDir, "llm_graders");
|
|
4493
4480
|
try {
|
|
4494
4481
|
const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
|
|
4495
4482
|
for (const file of graderFiles) {
|
|
4496
4483
|
const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
|
|
4497
4484
|
const graderName = graderMeta.name;
|
|
4498
|
-
const
|
|
4485
|
+
const diskResultPath = join(testDir, "llm_grader_results", `${graderName}.json`);
|
|
4486
|
+
let llmResult;
|
|
4487
|
+
try {
|
|
4488
|
+
llmResult = JSON.parse(await readFile(diskResultPath, "utf8"));
|
|
4489
|
+
} catch {
|
|
4490
|
+
}
|
|
4499
4491
|
if (llmResult) {
|
|
4500
4492
|
evaluators.push({
|
|
4501
4493
|
name: graderName,
|
|
@@ -4515,7 +4507,7 @@ var evalBenchCommand = command({
|
|
|
4515
4507
|
const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
|
|
4516
4508
|
const passed = allAssertions.filter((a) => a.passed).length;
|
|
4517
4509
|
const failed = allAssertions.filter((a) => !a.passed).length;
|
|
4518
|
-
const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
|
|
4510
|
+
const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : weightedScore >= 0.5 ? 1 : 0;
|
|
4519
4511
|
allPassRates.push(passRate);
|
|
4520
4512
|
const grading = {
|
|
4521
4513
|
assertions: allAssertions,
|
|
@@ -4608,13 +4600,6 @@ var evalBenchCommand = command({
|
|
|
4608
4600
|
console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
|
|
4609
4601
|
}
|
|
4610
4602
|
});
|
|
4611
|
-
async function readStdin() {
|
|
4612
|
-
const chunks = [];
|
|
4613
|
-
for await (const chunk of process.stdin) {
|
|
4614
|
-
chunks.push(chunk);
|
|
4615
|
-
}
|
|
4616
|
-
return Buffer.concat(chunks).toString("utf8").trim();
|
|
4617
|
-
}
|
|
4618
4603
|
function computeStats(values) {
|
|
4619
4604
|
if (values.length === 0) return { mean: 0, stddev: 0 };
|
|
4620
4605
|
const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
@@ -4628,12 +4613,118 @@ function computeStats(values) {
|
|
|
4628
4613
|
// src/commands/pipeline/grade.ts
|
|
4629
4614
|
import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
|
|
4630
4615
|
import { join as join2 } from "node:path";
|
|
4616
|
+
var DEFAULT_CONCURRENCY = 10;
|
|
4631
4617
|
function extractInputText(input) {
|
|
4632
4618
|
if (!input || input.length === 0) return "";
|
|
4633
4619
|
if (input.length === 1) return input[0].content;
|
|
4634
4620
|
return input.map((m) => `@[${m.role}]:
|
|
4635
4621
|
${m.content}`).join("\n\n");
|
|
4636
4622
|
}
|
|
4623
|
+
async function runCodeGraders(tasks, concurrency) {
|
|
4624
|
+
let totalGraders = 0;
|
|
4625
|
+
let totalPassed = 0;
|
|
4626
|
+
let completed = 0;
|
|
4627
|
+
const total = tasks.length;
|
|
4628
|
+
if (total === 0) return { totalGraders: 0, totalPassed: 0 };
|
|
4629
|
+
const writeProgress = () => {
|
|
4630
|
+
process.stderr.write(`\rGrading: ${completed}/${total} done`);
|
|
4631
|
+
};
|
|
4632
|
+
writeProgress();
|
|
4633
|
+
const executeGrader = async (task) => {
|
|
4634
|
+
const { testId, testDir, resultsDir, graderFile, responseText, inputData } = task;
|
|
4635
|
+
const graderConfig = JSON.parse(
|
|
4636
|
+
await readFile2(join2(testDir, "code_graders", graderFile), "utf8")
|
|
4637
|
+
);
|
|
4638
|
+
const graderName = graderConfig.name;
|
|
4639
|
+
const inputText = extractInputText(inputData.input);
|
|
4640
|
+
const payload = JSON.stringify({
|
|
4641
|
+
output: [{ role: "assistant", content: responseText }],
|
|
4642
|
+
input: inputData.input,
|
|
4643
|
+
criteria: "",
|
|
4644
|
+
expected_output: [],
|
|
4645
|
+
input_files: inputData.input_files ?? [],
|
|
4646
|
+
trace: null,
|
|
4647
|
+
token_usage: null,
|
|
4648
|
+
cost_usd: null,
|
|
4649
|
+
duration_ms: null,
|
|
4650
|
+
start_time: null,
|
|
4651
|
+
end_time: null,
|
|
4652
|
+
file_changes: null,
|
|
4653
|
+
workspace_path: null,
|
|
4654
|
+
config: graderConfig.config ?? null,
|
|
4655
|
+
metadata: inputData.metadata ?? {},
|
|
4656
|
+
input_text: inputText,
|
|
4657
|
+
output_text: responseText,
|
|
4658
|
+
expected_output_text: ""
|
|
4659
|
+
});
|
|
4660
|
+
try {
|
|
4661
|
+
const stdout = await executeScript(
|
|
4662
|
+
graderConfig.command,
|
|
4663
|
+
payload,
|
|
4664
|
+
void 0,
|
|
4665
|
+
graderConfig.cwd
|
|
4666
|
+
);
|
|
4667
|
+
const parsed = JSON.parse(stdout);
|
|
4668
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
4669
|
+
const assertions = Array.isArray(parsed.assertions) && parsed.assertions.length > 0 ? parsed.assertions : [
|
|
4670
|
+
...(parsed.hits ?? []).map((h) => ({ text: h, passed: true })),
|
|
4671
|
+
...(parsed.misses ?? []).map((m) => ({ text: m, passed: false }))
|
|
4672
|
+
];
|
|
4673
|
+
const result = {
|
|
4674
|
+
name: graderName,
|
|
4675
|
+
type: "code-grader",
|
|
4676
|
+
score,
|
|
4677
|
+
weight: graderConfig.weight ?? 1,
|
|
4678
|
+
assertions,
|
|
4679
|
+
details: parsed.details ?? {}
|
|
4680
|
+
};
|
|
4681
|
+
await writeFile3(
|
|
4682
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4683
|
+
`${JSON.stringify(result, null, 2)}
|
|
4684
|
+
`,
|
|
4685
|
+
"utf8"
|
|
4686
|
+
);
|
|
4687
|
+
totalGraders++;
|
|
4688
|
+
if (score >= 0.5) totalPassed++;
|
|
4689
|
+
} catch (error) {
|
|
4690
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4691
|
+
process.stderr.write(`
|
|
4692
|
+
${testId}/${graderName}: ERROR \u2014 ${message}
|
|
4693
|
+
`);
|
|
4694
|
+
const errorResult = {
|
|
4695
|
+
name: graderName,
|
|
4696
|
+
type: "code-grader",
|
|
4697
|
+
score: 0,
|
|
4698
|
+
weight: graderConfig.weight ?? 1,
|
|
4699
|
+
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
4700
|
+
details: { error: message }
|
|
4701
|
+
};
|
|
4702
|
+
await writeFile3(
|
|
4703
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4704
|
+
`${JSON.stringify(errorResult, null, 2)}
|
|
4705
|
+
`,
|
|
4706
|
+
"utf8"
|
|
4707
|
+
);
|
|
4708
|
+
totalGraders++;
|
|
4709
|
+
} finally {
|
|
4710
|
+
completed++;
|
|
4711
|
+
writeProgress();
|
|
4712
|
+
}
|
|
4713
|
+
};
|
|
4714
|
+
const pending = /* @__PURE__ */ new Set();
|
|
4715
|
+
for (const task of tasks) {
|
|
4716
|
+
const p = executeGrader(task).then(() => {
|
|
4717
|
+
pending.delete(p);
|
|
4718
|
+
});
|
|
4719
|
+
pending.add(p);
|
|
4720
|
+
if (pending.size >= concurrency) {
|
|
4721
|
+
await Promise.race(pending);
|
|
4722
|
+
}
|
|
4723
|
+
}
|
|
4724
|
+
await Promise.all(pending);
|
|
4725
|
+
process.stderr.write("\n");
|
|
4726
|
+
return { totalGraders, totalPassed };
|
|
4727
|
+
}
|
|
4637
4728
|
var evalGradeCommand = command({
|
|
4638
4729
|
name: "grade",
|
|
4639
4730
|
description: "Run code-grader assertions on responses in an export directory",
|
|
@@ -4642,16 +4733,22 @@ var evalGradeCommand = command({
|
|
|
4642
4733
|
type: string,
|
|
4643
4734
|
displayName: "export-dir",
|
|
4644
4735
|
description: "Export directory from pipeline input"
|
|
4736
|
+
}),
|
|
4737
|
+
concurrency: option({
|
|
4738
|
+
type: optional(number),
|
|
4739
|
+
long: "concurrency",
|
|
4740
|
+
short: "j",
|
|
4741
|
+
description: `Number of graders to run in parallel (default: ${DEFAULT_CONCURRENCY})`
|
|
4645
4742
|
})
|
|
4646
4743
|
},
|
|
4647
|
-
handler: async ({ exportDir }) => {
|
|
4744
|
+
handler: async ({ exportDir, concurrency }) => {
|
|
4745
|
+
const maxWorkers = concurrency ?? DEFAULT_CONCURRENCY;
|
|
4648
4746
|
const manifestPath = join2(exportDir, "manifest.json");
|
|
4649
4747
|
const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
|
|
4650
4748
|
const testIds = manifest.test_ids;
|
|
4651
4749
|
const evalSet = manifest.dataset ?? "";
|
|
4652
4750
|
const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, "_") : "";
|
|
4653
|
-
|
|
4654
|
-
let totalPassed = 0;
|
|
4751
|
+
const tasks = [];
|
|
4655
4752
|
for (const testId of testIds) {
|
|
4656
4753
|
const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
|
|
4657
4754
|
const testDir = join2(exportDir, ...subpath);
|
|
@@ -4668,76 +4765,10 @@ var evalGradeCommand = command({
|
|
|
4668
4765
|
const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
|
|
4669
4766
|
const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
|
|
4670
4767
|
for (const graderFile of graderFiles) {
|
|
4671
|
-
|
|
4672
|
-
const graderName = graderConfig.name;
|
|
4673
|
-
const inputText = extractInputText(inputData.input);
|
|
4674
|
-
const payload = JSON.stringify({
|
|
4675
|
-
output: [{ role: "assistant", content: responseText }],
|
|
4676
|
-
input: inputData.input,
|
|
4677
|
-
criteria: "",
|
|
4678
|
-
expected_output: [],
|
|
4679
|
-
input_files: inputData.input_files ?? [],
|
|
4680
|
-
trace: null,
|
|
4681
|
-
token_usage: null,
|
|
4682
|
-
cost_usd: null,
|
|
4683
|
-
duration_ms: null,
|
|
4684
|
-
start_time: null,
|
|
4685
|
-
end_time: null,
|
|
4686
|
-
file_changes: null,
|
|
4687
|
-
workspace_path: null,
|
|
4688
|
-
config: graderConfig.config ?? null,
|
|
4689
|
-
metadata: inputData.metadata ?? {},
|
|
4690
|
-
input_text: inputText,
|
|
4691
|
-
output_text: responseText,
|
|
4692
|
-
expected_output_text: ""
|
|
4693
|
-
});
|
|
4694
|
-
try {
|
|
4695
|
-
const stdout = await executeScript(
|
|
4696
|
-
graderConfig.command,
|
|
4697
|
-
payload,
|
|
4698
|
-
void 0,
|
|
4699
|
-
graderConfig.cwd
|
|
4700
|
-
);
|
|
4701
|
-
const parsed = JSON.parse(stdout);
|
|
4702
|
-
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
4703
|
-
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
4704
|
-
const result = {
|
|
4705
|
-
name: graderName,
|
|
4706
|
-
type: "code-grader",
|
|
4707
|
-
score,
|
|
4708
|
-
weight: graderConfig.weight ?? 1,
|
|
4709
|
-
assertions,
|
|
4710
|
-
details: parsed.details ?? {}
|
|
4711
|
-
};
|
|
4712
|
-
await writeFile3(
|
|
4713
|
-
join2(resultsDir, `${graderName}.json`),
|
|
4714
|
-
`${JSON.stringify(result, null, 2)}
|
|
4715
|
-
`,
|
|
4716
|
-
"utf8"
|
|
4717
|
-
);
|
|
4718
|
-
totalGraders++;
|
|
4719
|
-
if (score >= 0.5) totalPassed++;
|
|
4720
|
-
} catch (error) {
|
|
4721
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
4722
|
-
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
4723
|
-
const errorResult = {
|
|
4724
|
-
name: graderName,
|
|
4725
|
-
type: "code-grader",
|
|
4726
|
-
score: 0,
|
|
4727
|
-
weight: graderConfig.weight ?? 1,
|
|
4728
|
-
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
4729
|
-
details: { error: message }
|
|
4730
|
-
};
|
|
4731
|
-
await writeFile3(
|
|
4732
|
-
join2(resultsDir, `${graderName}.json`),
|
|
4733
|
-
`${JSON.stringify(errorResult, null, 2)}
|
|
4734
|
-
`,
|
|
4735
|
-
"utf8"
|
|
4736
|
-
);
|
|
4737
|
-
totalGraders++;
|
|
4738
|
-
}
|
|
4768
|
+
tasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
|
|
4739
4769
|
}
|
|
4740
4770
|
}
|
|
4771
|
+
const { totalGraders, totalPassed } = await runCodeGraders(tasks, maxWorkers);
|
|
4741
4772
|
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
4742
4773
|
}
|
|
4743
4774
|
});
|
|
@@ -5156,8 +5187,7 @@ Done. Results in ${outDir}`);
|
|
|
5156
5187
|
);
|
|
5157
5188
|
return;
|
|
5158
5189
|
}
|
|
5159
|
-
|
|
5160
|
-
let totalPassed = 0;
|
|
5190
|
+
const graderTasks = [];
|
|
5161
5191
|
for (const testId of testIds) {
|
|
5162
5192
|
const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
|
|
5163
5193
|
const testDir = join4(outDir, ...subpath);
|
|
@@ -5174,82 +5204,11 @@ Done. Results in ${outDir}`);
|
|
|
5174
5204
|
const responseText = await readFile4(join4(testDir, "response.md"), "utf8");
|
|
5175
5205
|
const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
|
|
5176
5206
|
for (const graderFile of graderFiles) {
|
|
5177
|
-
|
|
5178
|
-
const graderName = graderConfig.name;
|
|
5179
|
-
const inputText = extractInputText2(inputData.input);
|
|
5180
|
-
const payload = JSON.stringify({
|
|
5181
|
-
output: [{ role: "assistant", content: responseText }],
|
|
5182
|
-
input: inputData.input,
|
|
5183
|
-
criteria: "",
|
|
5184
|
-
expected_output: [],
|
|
5185
|
-
input_files: inputData.input_files ?? [],
|
|
5186
|
-
trace: null,
|
|
5187
|
-
token_usage: null,
|
|
5188
|
-
cost_usd: null,
|
|
5189
|
-
duration_ms: null,
|
|
5190
|
-
start_time: null,
|
|
5191
|
-
end_time: null,
|
|
5192
|
-
file_changes: null,
|
|
5193
|
-
workspace_path: null,
|
|
5194
|
-
config: graderConfig.config ?? null,
|
|
5195
|
-
metadata: inputData.metadata ?? {},
|
|
5196
|
-
input_text: inputText,
|
|
5197
|
-
output_text: responseText,
|
|
5198
|
-
expected_output_text: ""
|
|
5199
|
-
});
|
|
5200
|
-
try {
|
|
5201
|
-
const stdout = await executeScript(
|
|
5202
|
-
graderConfig.command,
|
|
5203
|
-
payload,
|
|
5204
|
-
void 0,
|
|
5205
|
-
graderConfig.cwd
|
|
5206
|
-
);
|
|
5207
|
-
const parsed = JSON.parse(stdout);
|
|
5208
|
-
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
5209
|
-
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
5210
|
-
await writeFile5(
|
|
5211
|
-
join4(resultsDir, `${graderName}.json`),
|
|
5212
|
-
`${JSON.stringify(
|
|
5213
|
-
{
|
|
5214
|
-
name: graderName,
|
|
5215
|
-
type: "code-grader",
|
|
5216
|
-
score,
|
|
5217
|
-
weight: graderConfig.weight ?? 1,
|
|
5218
|
-
assertions,
|
|
5219
|
-
details: parsed.details ?? {}
|
|
5220
|
-
},
|
|
5221
|
-
null,
|
|
5222
|
-
2
|
|
5223
|
-
)}
|
|
5224
|
-
`,
|
|
5225
|
-
"utf8"
|
|
5226
|
-
);
|
|
5227
|
-
totalGraders++;
|
|
5228
|
-
if (score >= 0.5) totalPassed++;
|
|
5229
|
-
} catch (error) {
|
|
5230
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
5231
|
-
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
5232
|
-
await writeFile5(
|
|
5233
|
-
join4(resultsDir, `${graderName}.json`),
|
|
5234
|
-
`${JSON.stringify(
|
|
5235
|
-
{
|
|
5236
|
-
name: graderName,
|
|
5237
|
-
type: "code-grader",
|
|
5238
|
-
score: 0,
|
|
5239
|
-
weight: graderConfig.weight ?? 1,
|
|
5240
|
-
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
5241
|
-
details: { error: message }
|
|
5242
|
-
},
|
|
5243
|
-
null,
|
|
5244
|
-
2
|
|
5245
|
-
)}
|
|
5246
|
-
`,
|
|
5247
|
-
"utf8"
|
|
5248
|
-
);
|
|
5249
|
-
totalGraders++;
|
|
5250
|
-
}
|
|
5207
|
+
graderTasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
|
|
5251
5208
|
}
|
|
5252
5209
|
}
|
|
5210
|
+
const graderConcurrency = workers ?? 10;
|
|
5211
|
+
const { totalGraders, totalPassed } = await runCodeGraders(graderTasks, graderConcurrency);
|
|
5253
5212
|
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
5254
5213
|
console.log(`
|
|
5255
5214
|
Done. Agent can now perform LLM grading on responses in ${outDir}`);
|
|
@@ -6743,8 +6702,8 @@ function resolveStudioDistDir() {
|
|
|
6743
6702
|
path9.resolve(currentDir, "../../../../studio/dist"),
|
|
6744
6703
|
// From dist/ → sibling apps/studio/dist (monorepo dev)
|
|
6745
6704
|
path9.resolve(currentDir, "../../studio/dist"),
|
|
6746
|
-
// Bundled inside CLI dist (published package)
|
|
6747
|
-
path9.resolve(currentDir, "
|
|
6705
|
+
// Bundled inside CLI dist (published package: dist/studio/)
|
|
6706
|
+
path9.resolve(currentDir, "studio"),
|
|
6748
6707
|
// From dist/ in monorepo root context
|
|
6749
6708
|
path9.resolve(currentDir, "../../../apps/studio/dist")
|
|
6750
6709
|
];
|
|
@@ -8359,4 +8318,4 @@ export {
|
|
|
8359
8318
|
preprocessArgv,
|
|
8360
8319
|
runCli
|
|
8361
8320
|
};
|
|
8362
|
-
//# sourceMappingURL=chunk-
|
|
8321
|
+
//# sourceMappingURL=chunk-ASU5L5ZW.js.map
|