@workbench-ai/workbench-built-in-adapters 0.0.73 → 0.0.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/execute.js +39 -39
  2. package/package.json +4 -4
package/dist/execute.js CHANGED
@@ -180,9 +180,6 @@ async function executeTestsEngineRequest(request) {
180
180
  }
181
181
  await ensureRunSkillDirectories(request);
182
182
  const testsRoot = requiredRequestPath(request.paths.enginePrivate, "paths.enginePrivate");
183
- const verifierRoot = testsVerifierOutputDir(request.paths.output);
184
- await fs.rm(verifierRoot, { recursive: true, force: true }).catch(() => undefined);
185
- await fs.mkdir(verifierRoot, { recursive: true });
186
183
  const script = await firstExistingFile([
187
184
  path.join(testsRoot, "test.sh"),
188
185
  path.join(testsRoot, "run.sh"),
@@ -190,17 +187,23 @@ async function executeTestsEngineRequest(request) {
190
187
  if (!script) {
191
188
  throw new Error(`Tests engine requires ${path.join(testsRoot, "test.sh")}.`);
192
189
  }
193
- await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
190
+ const shellFailure = await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
194
191
  SKILL_DIR: request.paths.skill ?? path.join(request.paths.workspace, "input", "skills", "primary"),
195
192
  SKILLS_DIR: request.paths.skills ?? path.join(request.paths.workspace, "input", "skills"),
196
193
  CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
197
194
  OUTPUT_DIR: request.paths.output,
198
- WORKBENCH_TESTS_VERIFIER_DIR: verifierRoot,
199
195
  WORKBENCH_CASE_ID: request.context?.attempt?.caseId ?? "current",
200
- });
196
+ }).then(() => null, (error) => error);
201
197
  const result = await readTestsResult({
202
- verifierRoot,
198
+ outputRoot: request.paths.output,
203
199
  caseId: request.context?.attempt?.caseId ?? "current",
200
+ }).catch((error) => {
201
+ if (shellFailure) {
202
+ const shellMessage = shellFailure instanceof Error ? shellFailure.message : String(shellFailure);
203
+ const resultMessage = error instanceof Error ? error.message : String(error);
204
+ throw new Error(`${shellMessage}; ${resultMessage}`);
205
+ }
206
+ throw error;
204
207
  });
205
208
  await writeWorkbenchAdapterOperationResult(request.paths.output, {
206
209
  protocol: "workbench.adapter-result.v1",
@@ -492,29 +495,12 @@ async function fileExists(filePath) {
492
495
  return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
493
496
  }
494
497
  async function readTestsResult(args) {
495
- const rewardJson = await readOptionalJson(path.join(args.verifierRoot, "reward.json"));
496
- if (rewardJson) {
497
- return normalizeTestsResult(rewardJson, args.caseId);
498
- }
499
- const rewardText = await fs.readFile(path.join(args.verifierRoot, "reward.txt"), "utf8").catch((error) => {
500
- if (error.code === "ENOENT") {
501
- return null;
502
- }
503
- throw error;
504
- });
505
- if (rewardText !== null) {
506
- const score = Number.parseFloat(rewardText.trim());
507
- if (!Number.isFinite(score)) {
508
- throw new Error("Tests engine reward.txt must contain a finite numeric reward.");
509
- }
510
- return normalizeTestsResult({ reward: score }, args.caseId);
498
+ const resultJson = await readOptionalJson(path.join(args.outputRoot, "result.json"));
499
+ if (resultJson) {
500
+ return normalizeTestsResult(resultJson, args.caseId);
511
501
  }
512
- throw new Error("Tests engine did not find reward.json or reward.txt under its verifier output directory " +
513
- `(${args.verifierRoot}). The tests script must write a reward to ` +
514
- "$WORKBENCH_TESTS_VERIFIER_DIR/reward.json or $WORKBENCH_TESTS_VERIFIER_DIR/reward.txt.");
515
- }
516
- function testsVerifierOutputDir(outputRoot) {
517
- return path.join(outputRoot, ".workbench", "internal", "verifier");
502
+ throw new Error(`Tests engine did not find result.json under OUTPUT_DIR (${args.outputRoot}). ` +
503
+ "The tests script must write a result to $OUTPUT_DIR/result.json.");
518
504
  }
519
505
  async function readOptionalJson(filePath) {
520
506
  const source = await fs.readFile(filePath, "utf8").catch((error) => {
@@ -533,13 +519,20 @@ async function readOptionalJson(filePath) {
533
519
  return parsed;
534
520
  }
535
521
  function normalizeTestsResult(record, caseId) {
522
+ const rawPassed = typeof record.ok === "boolean"
523
+ ? record.ok
524
+ : typeof record.passed === "boolean"
525
+ ? record.passed
526
+ : typeof record.pass === "boolean"
527
+ ? record.pass
528
+ : undefined;
536
529
  const rawScore = typeof record.score === "number"
537
530
  ? record.score
538
- : typeof record.reward === "number"
539
- ? record.reward
531
+ : rawPassed !== undefined
532
+ ? rawPassed ? 1 : 0
540
533
  : undefined;
541
534
  if (rawScore === undefined || !Number.isFinite(rawScore)) {
542
- throw new Error("Tests engine reward must include a finite numeric score or reward.");
535
+ throw new Error("Tests engine result must include a finite numeric score or boolean ok/passed/pass.");
543
536
  }
544
537
  const metrics = normalizeTestsMetrics(record, rawScore);
545
538
  return {
@@ -547,12 +540,19 @@ function normalizeTestsResult(record, caseId) {
547
540
  metrics,
548
541
  cases: [{
549
542
  id: caseId,
550
- status: "completed",
543
+ status: rawPassed === false ? "error" : "completed",
551
544
  metrics,
545
+ ...(rawPassed === false
546
+ ? { feedback: { message: typeof record.message === "string" ? record.message : "Test failed." } }
547
+ : {}),
552
548
  }],
553
- ...(typeof record.summary === "string" ? { summary: record.summary } : {}),
549
+ ...(typeof record.summary === "string"
550
+ ? { summary: record.summary }
551
+ : typeof record.message === "string"
552
+ ? { summary: record.message }
553
+ : {}),
554
554
  feedback: {
555
- reward: record,
555
+ result: record,
556
556
  },
557
557
  };
558
558
  }
@@ -560,10 +560,10 @@ function normalizeTestsMetrics(record, score) {
560
560
  const metrics = { score };
561
561
  const source = record.metrics && typeof record.metrics === "object" && !Array.isArray(record.metrics)
562
562
  ? record.metrics
563
- : record;
563
+ : {};
564
564
  for (const [key, value] of Object.entries(source)) {
565
565
  if (typeof value === "number" && Number.isFinite(value)) {
566
- metrics[key === "reward" ? "score" : key] = value;
566
+ metrics[key] = value;
567
567
  }
568
568
  }
569
569
  return metrics;
@@ -1038,8 +1038,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
1038
1038
  "- The skill already ran in this same working directory.",
1039
1039
  "- Skill outputs are available in the current working directory.",
1040
1040
  "- Public case files are mounted at /workspace/input/case.",
1041
- "- Verifier-private files are mounted at /workspace/private/engine when the case provides them.",
1042
- "- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
1041
+ "- Private case files are mounted at /workspace/private/engine when the case provides them.",
1042
+ "- Score only from the current working directory, public case files, private case files, and the criterion above.",
1043
1043
  "",
1044
1044
  "Output:",
1045
1045
  "Return only a JSON object. Do not wrap it in Markdown.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workbench-ai/workbench-built-in-adapters",
3
- "version": "0.0.73",
3
+ "version": "0.0.74",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/workbench-ai/workbench.git",
@@ -35,9 +35,9 @@
35
35
  "@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
36
36
  "@workbench-ai/agent-driver-openai-codex": "0.0.46",
37
37
  "@workbench-ai/agent-driver": "0.0.46",
38
- "@workbench-ai/workbench-core": "0.0.73",
39
- "@workbench-ai/workbench-protocol": "0.0.73",
40
- "@workbench-ai/workbench-contract": "0.0.73"
38
+ "@workbench-ai/workbench-core": "0.0.74",
39
+ "@workbench-ai/workbench-contract": "0.0.74",
40
+ "@workbench-ai/workbench-protocol": "0.0.74"
41
41
  },
42
42
  "devDependencies": {
43
43
  "@types/node": "^24.3.1",