npm - @workbench-ai/workbench-built-in-adapters - Versions diffs - 0.0.73 → 0.0.74 - Mend

@workbench-ai/workbench-built-in-adapters 0.0.73 → 0.0.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/execute.js +39 -39
package/package.json +4 -4

package/dist/execute.js CHANGED Viewed

@@ -180,9 +180,6 @@ async function executeTestsEngineRequest(request) {
     }
     await ensureRunSkillDirectories(request);
     const testsRoot = requiredRequestPath(request.paths.enginePrivate, "paths.enginePrivate");
-    const verifierRoot = testsVerifierOutputDir(request.paths.output);
-    await fs.rm(verifierRoot, { recursive: true, force: true }).catch(() => undefined);
-    await fs.mkdir(verifierRoot, { recursive: true });
     const script = await firstExistingFile([
         path.join(testsRoot, "test.sh"),
         path.join(testsRoot, "run.sh"),
@@ -190,17 +187,23 @@ async function executeTestsEngineRequest(request) {
     if (!script) {
         throw new Error(`Tests engine requires ${path.join(testsRoot, "test.sh")}.`);
     }
-    await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
+    const shellFailure = await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
         SKILL_DIR: request.paths.skill ?? path.join(request.paths.workspace, "input", "skills", "primary"),
         SKILLS_DIR: request.paths.skills ?? path.join(request.paths.workspace, "input", "skills"),
         CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
         OUTPUT_DIR: request.paths.output,
-        WORKBENCH_TESTS_VERIFIER_DIR: verifierRoot,
         WORKBENCH_CASE_ID: request.context?.attempt?.caseId ?? "current",
-    });
+    }).then(() => null, (error) => error);
     const result = await readTestsResult({
-        verifierRoot,
+        outputRoot: request.paths.output,
         caseId: request.context?.attempt?.caseId ?? "current",
+    }).catch((error) => {
+        if (shellFailure) {
+            const shellMessage = shellFailure instanceof Error ? shellFailure.message : String(shellFailure);
+            const resultMessage = error instanceof Error ? error.message : String(error);
+            throw new Error(`${shellMessage}; ${resultMessage}`);
+        }
+        throw error;
     });
     await writeWorkbenchAdapterOperationResult(request.paths.output, {
         protocol: "workbench.adapter-result.v1",
@@ -492,29 +495,12 @@ async function fileExists(filePath) {
     return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
 }
 async function readTestsResult(args) {
-    const rewardJson = await readOptionalJson(path.join(args.verifierRoot, "reward.json"));
-    if (rewardJson) {
-        return normalizeTestsResult(rewardJson, args.caseId);
-    }
-    const rewardText = await fs.readFile(path.join(args.verifierRoot, "reward.txt"), "utf8").catch((error) => {
-        if (error.code === "ENOENT") {
-            return null;
-        }
-        throw error;
-    });
-    if (rewardText !== null) {
-        const score = Number.parseFloat(rewardText.trim());
-        if (!Number.isFinite(score)) {
-            throw new Error("Tests engine reward.txt must contain a finite numeric reward.");
-        }
-        return normalizeTestsResult({ reward: score }, args.caseId);
+    const resultJson = await readOptionalJson(path.join(args.outputRoot, "result.json"));
+    if (resultJson) {
+        return normalizeTestsResult(resultJson, args.caseId);
     }
-    throw new Error("Tests engine did not find reward.json or reward.txt under its verifier output directory " +
-        `(${args.verifierRoot}). The tests script must write a reward to ` +
-        "$WORKBENCH_TESTS_VERIFIER_DIR/reward.json or $WORKBENCH_TESTS_VERIFIER_DIR/reward.txt.");
-}
-function testsVerifierOutputDir(outputRoot) {
-    return path.join(outputRoot, ".workbench", "internal", "verifier");
+    throw new Error(`Tests engine did not find result.json under OUTPUT_DIR (${args.outputRoot}). ` +
+        "The tests script must write a result to $OUTPUT_DIR/result.json.");
 }
 async function readOptionalJson(filePath) {
     const source = await fs.readFile(filePath, "utf8").catch((error) => {
@@ -533,13 +519,20 @@ async function readOptionalJson(filePath) {
     return parsed;
 }
 function normalizeTestsResult(record, caseId) {
+    const rawPassed = typeof record.ok === "boolean"
+        ? record.ok
+        : typeof record.passed === "boolean"
+            ? record.passed
+            : typeof record.pass === "boolean"
+                ? record.pass
+                : undefined;
     const rawScore = typeof record.score === "number"
         ? record.score
-        : typeof record.reward === "number"
-            ? record.reward
+        : rawPassed !== undefined
+            ? rawPassed ? 1 : 0
             : undefined;
     if (rawScore === undefined || !Number.isFinite(rawScore)) {
-        throw new Error("Tests engine reward must include a finite numeric score or reward.");
+        throw new Error("Tests engine result must include a finite numeric score or boolean ok/passed/pass.");
     }
     const metrics = normalizeTestsMetrics(record, rawScore);
     return {
@@ -547,12 +540,19 @@ function normalizeTestsResult(record, caseId) {
         metrics,
         cases: [{
                 id: caseId,
-                status: "completed",
+                status: rawPassed === false ? "error" : "completed",
                 metrics,
+                ...(rawPassed === false
+                    ? { feedback: { message: typeof record.message === "string" ? record.message : "Test failed." } }
+                    : {}),
             }],
-        ...(typeof record.summary === "string" ? { summary: record.summary } : {}),
+        ...(typeof record.summary === "string"
+            ? { summary: record.summary }
+            : typeof record.message === "string"
+                ? { summary: record.message }
+                : {}),
         feedback: {
-            reward: record,
+            result: record,
         },
     };
 }
@@ -560,10 +560,10 @@ function normalizeTestsMetrics(record, score) {
     const metrics = { score };
     const source = record.metrics && typeof record.metrics === "object" && !Array.isArray(record.metrics)
         ? record.metrics
-        : record;
+        : {};
     for (const [key, value] of Object.entries(source)) {
         if (typeof value === "number" && Number.isFinite(value)) {
-            metrics[key === "reward" ? "score" : key] = value;
+            metrics[key] = value;
         }
     }
     return metrics;
@@ -1038,8 +1038,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
         "- The skill already ran in this same working directory.",
         "- Skill outputs are available in the current working directory.",
         "- Public case files are mounted at /workspace/input/case.",
-        "- Verifier-private files are mounted at /workspace/private/engine when the case provides them.",
-        "- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
+        "- Private case files are mounted at /workspace/private/engine when the case provides them.",
+        "- Score only from the current working directory, public case files, private case files, and the criterion above.",
         "",
         "Output:",
         "Return only a JSON object. Do not wrap it in Markdown.",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@workbench-ai/workbench-built-in-adapters",
-  "version": "0.0.73",
+  "version": "0.0.74",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/workbench-ai/workbench.git",
@@ -35,9 +35,9 @@
     "@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
     "@workbench-ai/agent-driver-openai-codex": "0.0.46",
     "@workbench-ai/agent-driver": "0.0.46",
-    "@workbench-ai/workbench-core": "0.0.73",
-    "@workbench-ai/workbench-protocol": "0.0.73",
-    "@workbench-ai/workbench-contract": "0.0.73"
+    "@workbench-ai/workbench-core": "0.0.74",
+    "@workbench-ai/workbench-contract": "0.0.74",
+    "@workbench-ai/workbench-protocol": "0.0.74"
   },
   "devDependencies": {
     "@types/node": "^24.3.1",