npm - codeharness - Versions diffs - 0.37.0 → 0.37.1 - Mend

codeharness 0.37.0 → 0.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/{chunk-HCAPQSAZ.js → chunk-AXFKDGFF.js} +1 -1
package/dist/{docker-6GFZ4B3V.js → docker-OA5CDTQZ.js} +1 -1
package/dist/index.js +51 -193
package/package.json +1 -1
package/templates/agents/evaluator.yaml +12 -47

package/dist/{chunk-HCAPQSAZ.js → chunk-AXFKDGFF.js} RENAMED Viewed

@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
 }
 // src/modules/infra/init-project.ts
-var HARNESS_VERSION = true ? "0.37.0" : "0.0.0-dev";
+var HARNESS_VERSION = true ? "0.37.1" : "0.0.0-dev";
 function failResult(opts, error) {
   return {
     status: "fail",

package/dist/{docker-6GFZ4B3V.js → docker-OA5CDTQZ.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   stopCollectorOnly,
   stopSharedStack,
   stopStack
-} from "./chunk-HCAPQSAZ.js";
+} from "./chunk-AXFKDGFF.js";
 export {
   checkRemoteEndpoint,
   cleanupOrphanedContainers,

package/dist/index.js CHANGED Viewed

@@ -40,7 +40,7 @@ import {
   validateDockerfile,
   warn,
   writeState
-} from "./chunk-HCAPQSAZ.js";
+} from "./chunk-AXFKDGFF.js";
 // src/index.ts
 import { Command } from "commander";
@@ -2648,179 +2648,59 @@ var DispatchError = class extends Error {
 };
 // src/lib/verdict-parser.ts
-import Ajv2 from "ajv";
-// src/schemas/verdict.schema.json
-var verdict_schema_default = {
-  $schema: "http://json-schema.org/draft-07/schema#",
-  $id: "https://codeharness.dev/schemas/verdict.schema.json",
-  title: "EvaluatorVerdict",
-  description: "Schema for evaluator verdict output (AD5)",
-  type: "object",
-  required: ["verdict", "score", "findings"],
-  additionalProperties: true,
-  properties: {
-    verdict: {
-      type: "string",
-      enum: ["pass", "fail"]
-    },
-    score: {
-      type: "object",
-      required: ["passed", "failed", "unknown", "total"],
-      additionalProperties: true,
-      properties: {
-        passed: {
-          type: "integer",
-          minimum: 0
-        },
-        failed: {
-          type: "integer",
-          minimum: 0
-        },
-        unknown: {
-          type: "integer",
-          minimum: 0
-        },
-        total: {
-          type: "integer",
-          minimum: 0
-        }
-      }
-    },
-    findings: {
-      type: "array",
-      items: {
-        type: "object",
-        required: ["ac", "description", "status", "evidence"],
-        additionalProperties: true,
-        properties: {
-          ac: {
-            type: "integer"
-          },
-          description: {
-            type: "string"
-          },
-          status: {
-            type: "string",
-            enum: ["pass", "fail", "unknown"]
-          },
-          evidence: {
-            type: "object",
-            required: ["commands_run", "output_observed", "reasoning"],
-            additionalProperties: true,
-            properties: {
-              commands_run: {
-                type: "array",
-                items: {
-                  type: "string"
-                }
-              },
-              output_observed: {
-                type: "string"
-              },
-              reasoning: {
-                type: "string"
-              }
-            }
-          }
-        }
-      }
-    },
-    evaluator_trace_id: {
-      type: "string"
-    },
-    duration_seconds: {
-      type: "number"
-    }
-  }
-};
-// src/lib/verdict-parser.ts
-var VerdictParseError = class _VerdictParseError extends Error {
-  retryable;
-  rawOutput;
-  validationErrors;
-  constructor(message, retryable, rawOutput, validationErrors) {
-    super(message);
-    Object.setPrototypeOf(this, _VerdictParseError.prototype);
-    this.name = "VerdictParseError";
-    this.retryable = retryable;
-    this.rawOutput = rawOutput;
-    this.validationErrors = validationErrors;
-  }
-};
-var ajv2 = new Ajv2({ allErrors: true });
-var validateSchema = ajv2.compile(verdict_schema_default);
-function validateVerdict(data) {
-  const valid = validateSchema(data);
-  if (valid) {
-    const verdict = JSON.parse(JSON.stringify(data));
-    return { valid: true, verdict };
-  }
-  const errors = (validateSchema.errors ?? []).map((err) => {
-    const path = err.instancePath || "/";
-    return `${path}: ${err.message ?? "unknown error"}`;
-  });
-  return { valid: false, errors };
-}
 function parseVerdict(output) {
-  let parsed;
-  try {
-    parsed = JSON.parse(output);
-  } catch {
-    throw new VerdictParseError(
-      "Failed to parse verdict: invalid JSON",
-      true,
-      output
-    );
-  }
-  const result = validateVerdict(parsed);
-  if (!result.valid) {
-    throw new VerdictParseError(
-      `Failed to parse verdict: schema validation failed`,
-      true,
-      output,
-      result.errors
-    );
+  const verdictMatch = /<verdict>(pass|fail)<\/verdict>/i.exec(output);
+  const verdictValue = verdictMatch ? verdictMatch[1].toLowerCase() : "fail";
+  const findings = [];
+  const evidenceRegex = /<evidence\s+ac="(\d+)"\s+status="(pass|fail|unknown)">([\s\S]*?)<\/evidence>/gi;
+  let evidenceMatch;
+  while ((evidenceMatch = evidenceRegex.exec(output)) !== null) {
+    findings.push({
+      ac: parseInt(evidenceMatch[1], 10),
+      description: `AC #${evidenceMatch[1]}`,
+      status: evidenceMatch[2].toLowerCase(),
+      evidence: {
+        commands_run: [],
+        output_observed: evidenceMatch[3].trim(),
+        reasoning: evidenceMatch[3].trim()
+      }
+    });
   }
-  const verdict = result.verdict;
-  let passDowngraded = false;
-  for (const finding of verdict.findings) {
-    if (finding.status === "pass" && (!finding.evidence.commands_run || finding.evidence.commands_run.length === 0)) {
-      finding.status = "unknown";
-      finding.evidence.reasoning += " [Downgraded from PASS: no commands_run evidence provided]";
-      passDowngraded = true;
+  if (findings.length === 0) {
+    const issuesMatch = /<issues>([\s\S]*?)<\/issues>/i.exec(output);
+    if (issuesMatch && verdictValue === "fail") {
+      findings.push({
+        ac: 1,
+        description: "Issues found",
+        status: "fail",
+        evidence: {
+          commands_run: [],
+          output_observed: issuesMatch[1].trim(),
+          reasoning: issuesMatch[1].trim()
+        }
+      });
     }
   }
-  if (passDowngraded) {
-    let passed = 0;
-    let failed = 0;
-    let unknown = 0;
-    for (const finding of verdict.findings) {
-      if (finding.status === "pass") passed++;
-      else if (finding.status === "fail") failed++;
-      else unknown++;
-    }
-    verdict.score = {
-      passed,
-      failed,
-      unknown,
-      total: verdict.findings.length
-    };
-    if (passed === 0) {
-      verdict.verdict = "fail";
+  let passed = 0;
+  let failed = 0;
+  let unknown = 0;
+  for (const f of findings) {
+    if (f.status === "pass") passed++;
+    else if (f.status === "fail") failed++;
+    else unknown++;
+  }
+  const total = findings.length || 1;
+  if (findings.length === 0) {
+    if (verdictValue === "pass") {
+      passed = 1;
+    } else {
+      failed = 1;
     }
   }
-  return verdict;
-}
-function parseVerdictTag(output) {
-  const match = /<verdict>(pass|fail)<\/verdict>/i.exec(output);
-  if (!match) return null;
-  const verdict = match[1].toLowerCase();
-  const issuesMatch = /<issues>([\s\S]*?)<\/issues>/i.exec(output);
   return {
-    verdict,
-    ...issuesMatch ? { issues: issuesMatch[1].trim() } : {}
+    verdict: verdictValue,
+    score: { passed, failed, unknown, total },
+    findings
   };
 }
 function extractTag(output, tag) {
@@ -6249,15 +6129,6 @@ ${formatted}
 Focus on fixing the failed criteria above.`;
 }
-function buildAllUnknownVerdict(workItems, reasoning) {
-  const findings = workItems.map((_, index) => ({
-    ac: index + 1,
-    description: `AC #${index + 1}`,
-    status: "unknown",
-    evidence: { commands_run: [], output_observed: "", reasoning }
-  }));
-  return { verdict: "fail", score: { passed: 0, failed: 0, unknown: findings.length, total: findings.length }, findings };
-}
 function getFailedItems(verdict, allItems) {
   if (!verdict) return allItems;
   if (verdict.verdict === "pass") return [];
@@ -6358,20 +6229,7 @@ var loopIterationActor = fromPromise2(async ({ input }) => {
         accumulatedCostUsd += dr.contract?.cost_usd ?? 0;
         tasksCompleted++;
         if (taskName === lastAgentTaskInLoop) {
-          let verdict = null;
-          const tagged = parseVerdictTag(dr.output);
-          if (tagged) {
-            verdict = { verdict: tagged.verdict, score: { passed: tagged.verdict === "pass" ? 1 : 0, failed: tagged.verdict === "fail" ? 1 : 0, unknown: 0, total: 1 }, findings: [] };
-          }
-          if (!verdict) {
-            try {
-              verdict = parseVerdict(dr.output);
-            } catch {
-            }
-          }
-          if (!verdict) {
-            verdict = buildAllUnknownVerdict(workItems, "No verdict tag or JSON found in output");
-          }
+          const verdict = parseVerdict(dr.output);
           lastVerdict = verdict;
           if (verdict) {
             const score = { iteration: currentState.iteration, passed: verdict.score.passed, failed: verdict.score.failed, unknown: verdict.score.unknown, total: verdict.score.total, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
@@ -11125,7 +10983,7 @@ function registerTeardownCommand(program) {
     } else if (otlpMode === "remote-routed") {
       if (!options.keepDocker) {
         try {
-          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-6GFZ4B3V.js");
+          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-OA5CDTQZ.js");
           stopCollectorOnly2();
           result.docker.stopped = true;
           if (!isJson) {
@@ -11157,7 +11015,7 @@ function registerTeardownCommand(program) {
         info("Shared stack: kept running (other projects may use it)");
       }
     } else if (isLegacyStack) {
-      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-6GFZ4B3V.js");
+      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-OA5CDTQZ.js");
       let stackRunning = false;
       try {
         stackRunning = isStackRunning2(composeFile);
@@ -14142,7 +14000,7 @@ function registerDriversCommand(program) {
 }
 // src/index.ts
-var VERSION = true ? "0.37.0" : "0.0.0-dev";
+var VERSION = true ? "0.37.1" : "0.0.0-dev";
 function createProgram() {
   const program = new Command();
   program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codeharness",
-  "version": "0.37.0",
+  "version": "0.37.1",
   "type": "module",
   "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
   "bin": {

package/templates/agents/evaluator.yaml CHANGED Viewed

@@ -66,50 +66,15 @@ prompt_template: |
   Base your scores on what you observe through the running system, not assumptions.
-  ## Output Format
-  ```json
-  {
-    "verdict": "pass" | "fail",
-    "score": {
-      "passed": <number>,
-      "failed": <number>,
-      "unknown": <number>,
-      "total": <number>
-    },
-    "findings": [
-      {
-        "ac": <number>,
-        "description": "<AC description>",
-        "status": "pass" | "fail" | "unknown",
-        "evidence": {
-          "commands_run": ["<command>"],
-          "output_observed": "<output>",
-          "reasoning": "<why>"
-        }
-      }
-    ],
-    "quality_scores": {
-      "architecture": <1-5>,
-      "originality": <1-5>,
-      "craft": <1-5>,
-      "functionality": <1-5>
-    }
-  }
-  ```
-  Verdict is "pass" only if ALL findings have status "pass". Quality scores are informational.
-  ## XML Tags — MANDATORY
-  In addition to the JSON file output, your response MUST include these XML tags (machine-parsed):
-  Include `<verdict>pass</verdict>` or `<verdict>fail</verdict>`.
-  For each AC, include `<evidence ac="N" status="pass|fail|unknown">command, output, reasoning</evidence>`.
-  Include `<quality-scores>architecture: N, originality: N, craft: N, functionality: N</quality-scores>`.
-  ## Output Location
-  Write verdict JSON to ./verdict/verdict.json
+  ## Output Format — XML Tags (machine-parsed)
+  Your response MUST include these XML tags:
+  `<verdict>pass</verdict>` or `<verdict>fail</verdict>`
+  Verdict is "pass" only if ALL ACs have status "pass".
+  For each AC:
+  `<evidence ac="N" status="pass|fail|unknown">command run, output observed, reasoning</evidence>`
+  Quality assessment:
+  `<quality-scores>architecture: N, originality: N, craft: N, functionality: N</quality-scores>`