@workbench-ai/workbench-built-in-adapters 0.0.72 → 0.0.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execute.js +39 -39
- package/package.json +4 -4
package/dist/execute.js
CHANGED
|
@@ -180,9 +180,6 @@ async function executeTestsEngineRequest(request) {
|
|
|
180
180
|
}
|
|
181
181
|
await ensureRunSkillDirectories(request);
|
|
182
182
|
const testsRoot = requiredRequestPath(request.paths.enginePrivate, "paths.enginePrivate");
|
|
183
|
-
const verifierRoot = testsVerifierOutputDir(request.paths.output);
|
|
184
|
-
await fs.rm(verifierRoot, { recursive: true, force: true }).catch(() => undefined);
|
|
185
|
-
await fs.mkdir(verifierRoot, { recursive: true });
|
|
186
183
|
const script = await firstExistingFile([
|
|
187
184
|
path.join(testsRoot, "test.sh"),
|
|
188
185
|
path.join(testsRoot, "run.sh"),
|
|
@@ -190,17 +187,23 @@ async function executeTestsEngineRequest(request) {
|
|
|
190
187
|
if (!script) {
|
|
191
188
|
throw new Error(`Tests engine requires ${path.join(testsRoot, "test.sh")}.`);
|
|
192
189
|
}
|
|
193
|
-
await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
|
|
190
|
+
const shellFailure = await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
|
|
194
191
|
SKILL_DIR: request.paths.skill ?? path.join(request.paths.workspace, "input", "skills", "primary"),
|
|
195
192
|
SKILLS_DIR: request.paths.skills ?? path.join(request.paths.workspace, "input", "skills"),
|
|
196
193
|
CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
|
|
197
194
|
OUTPUT_DIR: request.paths.output,
|
|
198
|
-
WORKBENCH_TESTS_VERIFIER_DIR: verifierRoot,
|
|
199
195
|
WORKBENCH_CASE_ID: request.context?.attempt?.caseId ?? "current",
|
|
200
|
-
});
|
|
196
|
+
}).then(() => null, (error) => error);
|
|
201
197
|
const result = await readTestsResult({
|
|
202
|
-
|
|
198
|
+
outputRoot: request.paths.output,
|
|
203
199
|
caseId: request.context?.attempt?.caseId ?? "current",
|
|
200
|
+
}).catch((error) => {
|
|
201
|
+
if (shellFailure) {
|
|
202
|
+
const shellMessage = shellFailure instanceof Error ? shellFailure.message : String(shellFailure);
|
|
203
|
+
const resultMessage = error instanceof Error ? error.message : String(error);
|
|
204
|
+
throw new Error(`${shellMessage}; ${resultMessage}`);
|
|
205
|
+
}
|
|
206
|
+
throw error;
|
|
204
207
|
});
|
|
205
208
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
206
209
|
protocol: "workbench.adapter-result.v1",
|
|
@@ -492,29 +495,12 @@ async function fileExists(filePath) {
|
|
|
492
495
|
return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
|
|
493
496
|
}
|
|
494
497
|
async function readTestsResult(args) {
|
|
495
|
-
const
|
|
496
|
-
if (
|
|
497
|
-
return normalizeTestsResult(
|
|
498
|
-
}
|
|
499
|
-
const rewardText = await fs.readFile(path.join(args.verifierRoot, "reward.txt"), "utf8").catch((error) => {
|
|
500
|
-
if (error.code === "ENOENT") {
|
|
501
|
-
return null;
|
|
502
|
-
}
|
|
503
|
-
throw error;
|
|
504
|
-
});
|
|
505
|
-
if (rewardText !== null) {
|
|
506
|
-
const score = Number.parseFloat(rewardText.trim());
|
|
507
|
-
if (!Number.isFinite(score)) {
|
|
508
|
-
throw new Error("Tests engine reward.txt must contain a finite numeric reward.");
|
|
509
|
-
}
|
|
510
|
-
return normalizeTestsResult({ reward: score }, args.caseId);
|
|
498
|
+
const resultJson = await readOptionalJson(path.join(args.outputRoot, "result.json"));
|
|
499
|
+
if (resultJson) {
|
|
500
|
+
return normalizeTestsResult(resultJson, args.caseId);
|
|
511
501
|
}
|
|
512
|
-
throw new Error(
|
|
513
|
-
|
|
514
|
-
"$WORKBENCH_TESTS_VERIFIER_DIR/reward.json or $WORKBENCH_TESTS_VERIFIER_DIR/reward.txt.");
|
|
515
|
-
}
|
|
516
|
-
function testsVerifierOutputDir(outputRoot) {
|
|
517
|
-
return path.join(outputRoot, ".workbench", "internal", "verifier");
|
|
502
|
+
throw new Error(`Tests engine did not find result.json under OUTPUT_DIR (${args.outputRoot}). ` +
|
|
503
|
+
"The tests script must write a result to $OUTPUT_DIR/result.json.");
|
|
518
504
|
}
|
|
519
505
|
async function readOptionalJson(filePath) {
|
|
520
506
|
const source = await fs.readFile(filePath, "utf8").catch((error) => {
|
|
@@ -533,13 +519,20 @@ async function readOptionalJson(filePath) {
|
|
|
533
519
|
return parsed;
|
|
534
520
|
}
|
|
535
521
|
function normalizeTestsResult(record, caseId) {
|
|
522
|
+
const rawPassed = typeof record.ok === "boolean"
|
|
523
|
+
? record.ok
|
|
524
|
+
: typeof record.passed === "boolean"
|
|
525
|
+
? record.passed
|
|
526
|
+
: typeof record.pass === "boolean"
|
|
527
|
+
? record.pass
|
|
528
|
+
: undefined;
|
|
536
529
|
const rawScore = typeof record.score === "number"
|
|
537
530
|
? record.score
|
|
538
|
-
:
|
|
539
|
-
?
|
|
531
|
+
: rawPassed !== undefined
|
|
532
|
+
? rawPassed ? 1 : 0
|
|
540
533
|
: undefined;
|
|
541
534
|
if (rawScore === undefined || !Number.isFinite(rawScore)) {
|
|
542
|
-
throw new Error("Tests engine
|
|
535
|
+
throw new Error("Tests engine result must include a finite numeric score or boolean ok/passed/pass.");
|
|
543
536
|
}
|
|
544
537
|
const metrics = normalizeTestsMetrics(record, rawScore);
|
|
545
538
|
return {
|
|
@@ -547,12 +540,19 @@ function normalizeTestsResult(record, caseId) {
|
|
|
547
540
|
metrics,
|
|
548
541
|
cases: [{
|
|
549
542
|
id: caseId,
|
|
550
|
-
status: "completed",
|
|
543
|
+
status: rawPassed === false ? "error" : "completed",
|
|
551
544
|
metrics,
|
|
545
|
+
...(rawPassed === false
|
|
546
|
+
? { feedback: { message: typeof record.message === "string" ? record.message : "Test failed." } }
|
|
547
|
+
: {}),
|
|
552
548
|
}],
|
|
553
|
-
...(typeof record.summary === "string"
|
|
549
|
+
...(typeof record.summary === "string"
|
|
550
|
+
? { summary: record.summary }
|
|
551
|
+
: typeof record.message === "string"
|
|
552
|
+
? { summary: record.message }
|
|
553
|
+
: {}),
|
|
554
554
|
feedback: {
|
|
555
|
-
|
|
555
|
+
result: record,
|
|
556
556
|
},
|
|
557
557
|
};
|
|
558
558
|
}
|
|
@@ -560,10 +560,10 @@ function normalizeTestsMetrics(record, score) {
|
|
|
560
560
|
const metrics = { score };
|
|
561
561
|
const source = record.metrics && typeof record.metrics === "object" && !Array.isArray(record.metrics)
|
|
562
562
|
? record.metrics
|
|
563
|
-
:
|
|
563
|
+
: {};
|
|
564
564
|
for (const [key, value] of Object.entries(source)) {
|
|
565
565
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
566
|
-
metrics[key
|
|
566
|
+
metrics[key] = value;
|
|
567
567
|
}
|
|
568
568
|
}
|
|
569
569
|
return metrics;
|
|
@@ -1038,8 +1038,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
|
|
|
1038
1038
|
"- The skill already ran in this same working directory.",
|
|
1039
1039
|
"- Skill outputs are available in the current working directory.",
|
|
1040
1040
|
"- Public case files are mounted at /workspace/input/case.",
|
|
1041
|
-
"-
|
|
1042
|
-
"- Score only from the current working directory, public case files,
|
|
1041
|
+
"- Private case files are mounted at /workspace/private/engine when the case provides them.",
|
|
1042
|
+
"- Score only from the current working directory, public case files, private case files, and the criterion above.",
|
|
1043
1043
|
"",
|
|
1044
1044
|
"Output:",
|
|
1045
1045
|
"Return only a JSON object. Do not wrap it in Markdown.",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench-built-in-adapters",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.74",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://github.com/workbench-ai/workbench.git",
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
"yaml": "^2.8.2",
|
|
35
35
|
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
|
|
36
36
|
"@workbench-ai/agent-driver-openai-codex": "0.0.46",
|
|
37
|
-
"@workbench-ai/workbench-core": "0.0.72",
|
|
38
37
|
"@workbench-ai/agent-driver": "0.0.46",
|
|
39
|
-
"@workbench-ai/workbench-
|
|
40
|
-
"@workbench-ai/workbench-
|
|
38
|
+
"@workbench-ai/workbench-core": "0.0.74",
|
|
39
|
+
"@workbench-ai/workbench-contract": "0.0.74",
|
|
40
|
+
"@workbench-ai/workbench-protocol": "0.0.74"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^24.3.1",
|