npm - codeharness - Versions diffs - 0.32.2 → 0.33.0 - Mend

codeharness 0.32.2 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/{chunk-IICSAAF4.js → chunk-4YPX74BX.js} +1 -1
package/dist/{docker-GLX24TXX.js → docker-Y73EO7Z4.js} +1 -1
package/dist/index.js +102 -180
package/package.json +1 -1
package/patches/dev/enforcement.md +8 -17
package/patches/retro/enforcement.md +1 -1
package/patches/review/enforcement.md +4 -26
package/patches/verify/story-verification.md +6 -30
package/templates/agents/documenter.yaml +64 -0
package/templates/agents/evaluator.yaml +16 -11
package/templates/workflows/default.yaml +7 -0

package/dist/{chunk-IICSAAF4.js → chunk-4YPX74BX.js} RENAMED Viewed

@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
 }
 // src/modules/infra/init-project.ts
-var HARNESS_VERSION = true ? "0.32.2" : "0.0.0-dev";
+var HARNESS_VERSION = true ? "0.33.0" : "0.0.0-dev";
 function failResult(opts, error) {
   return {
     status: "fail",

package/dist/{docker-GLX24TXX.js → docker-Y73EO7Z4.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   stopCollectorOnly,
   stopSharedStack,
   stopStack
-} from "./chunk-IICSAAF4.js";
+} from "./chunk-4YPX74BX.js";
 export {
   checkRemoteEndpoint,
   cleanupOrphanedContainers,

package/dist/index.js CHANGED Viewed

@@ -40,7 +40,7 @@ import {
   validateDockerfile,
   warn,
   writeState
-} from "./chunk-IICSAAF4.js";
+} from "./chunk-4YPX74BX.js";
 // src/index.ts
 import { Command } from "commander";
@@ -2507,7 +2507,7 @@ function resolveWorkflow(options) {
 }
 // src/lib/workflow-engine.ts
-import { readFileSync as readFileSync13, existsSync as existsSync15 } from "fs";
+import { readFileSync as readFileSync13, existsSync as existsSync15, writeFileSync as writeFileSync8, mkdirSync as mkdirSync6, rmSync as rmSync2 } from "fs";
 import { join as join12 } from "path";
 import { parse as parse5 } from "yaml";
@@ -3347,7 +3347,7 @@ async function executeNullTask(task, taskName, storyKey, state, config, previous
   writeWorkflowState(updatedState, projectDir);
   return { updatedState, output: result.output ?? "", contract };
 }
-async function dispatchTaskWithResult(task, taskName, storyKey, definition, state, config, customPrompt, previousOutputContract) {
+async function dispatchTaskWithResult(task, taskName, storyKey, definition, state, config, customPrompt, previousOutputContract, storyFiles) {
   const projectDir = config.projectDir ?? process.cwd();
   const traceId = generateTraceId(config.runId, state.iteration, taskName);
   const tracePrompt = formatTracePrompt(traceId);
@@ -3361,7 +3361,7 @@ async function dispatchTaskWithResult(task, taskName, storyKey, definition, stat
   let workspace = null;
   if (task.source_access === false) {
     try {
-      workspace = await createIsolatedWorkspace({ runId: config.runId, storyFiles: [] });
+      workspace = await createIsolatedWorkspace({ runId: config.runId, storyFiles: storyFiles ?? [] });
       cwd = workspace?.toDispatchOptions()?.cwd ?? projectDir;
     } catch {
       cwd = projectDir;
@@ -3878,7 +3878,7 @@ async function executeWorkflow(config) {
   }
   if (state.phase === "error" || state.phase === "failed") {
     const errorCount = state.tasks_completed.filter((t) => t.error).length;
-    info(`Resuming from ${state.phase} state \u2014 ${errorCount} previous error(s), retrying failed tasks`);
+    if (!config.onEvent) info(`Resuming from ${state.phase} state \u2014 ${errorCount} previous error(s), retrying failed tasks`);
   }
   state = {
     ...state,
@@ -3929,13 +3929,17 @@ async function executeWorkflow(config) {
   for (const [epicId, epicItems] of epicGroups) {
     if (halted) break;
     if (config.abortSignal?.aborted) {
-      info("Execution interrupted \u2014 saving state");
+      if (!config.onEvent) info("Execution interrupted \u2014 saving state");
       state = { ...state, phase: "interrupted" };
       writeWorkflowState(state, projectDir);
       halted = true;
       break;
     }
-    info(`[epic-${epicId}] Starting epic with ${epicItems.length} stories`);
+    if (config.onEvent) {
+      config.onEvent({ type: "dispatch-start", taskName: "story_flow", storyKey: `__epic_${epicId}__` });
+    } else {
+      info(`[epic-${epicId}] Starting epic with ${epicItems.length} stories`);
+    }
     for (const step of config.workflow.epicFlow) {
       if (halted) break;
       if (config.abortSignal?.aborted) {
@@ -4026,8 +4030,27 @@ async function executeWorkflow(config) {
       }
       const epicSentinel = `__epic_${epicId}__`;
       if (isTaskCompleted(state, taskName, epicSentinel)) continue;
+      let guideFiles = [];
+      if (task.source_access === false) {
+        const guidesDir = join12(projectDir, ".codeharness", "verify-guides");
+        try {
+          mkdirSync6(guidesDir, { recursive: true });
+          for (const item of epicItems) {
+            const contractPath = join12(projectDir, ".codeharness", "contracts", `document-${item.key}.json`);
+            if (existsSync15(contractPath)) {
+              const contractData = JSON.parse(readFileSync13(contractPath, "utf-8"));
+              if (contractData.output) {
+                const guidePath = join12(guidesDir, `${item.key}-guide.md`);
+                writeFileSync8(guidePath, contractData.output, "utf-8");
+                guideFiles.push(guidePath);
+              }
+            }
+          }
+        } catch {
+        }
+      }
       try {
-        const dr = await dispatchTaskWithResult(task, taskName, epicSentinel, definition, state, config, void 0, lastOutputContract ?? void 0);
+        const dr = await dispatchTaskWithResult(task, taskName, epicSentinel, definition, state, config, void 0, lastOutputContract ?? void 0, guideFiles);
         state = dr.updatedState;
         lastOutputContract = dr.contract;
         propagateVerifyFlags(taskName, dr.contract, projectDir);
@@ -4046,10 +4069,18 @@ async function executeWorkflow(config) {
         if (err instanceof DispatchError && HALT_ERROR_CODES.has(err.code)) {
           halted = true;
         }
+      } finally {
+        if (guideFiles.length > 0) {
+          const guidesDir = join12(projectDir, ".codeharness", "verify-guides");
+          try {
+            rmSync2(guidesDir, { recursive: true, force: true });
+          } catch {
+          }
+        }
       }
     }
     if (!halted) {
-      info(`[epic-${epicId}] Epic completed`);
+      if (!config.onEvent) info(`[epic-${epicId}] Epic completed`);
     }
   }
   if (state.phase === "interrupted") {
@@ -4111,7 +4142,7 @@ import { join as join14 } from "path";
 // src/lib/cross-worktree-validator.ts
 import { exec } from "child_process";
-import { appendFileSync as appendFileSync2, mkdirSync as mkdirSync6 } from "fs";
+import { appendFileSync as appendFileSync2, mkdirSync as mkdirSync7 } from "fs";
 import { join as join13 } from "path";
 import { promisify } from "util";
 var execAsync = promisify(exec);
@@ -4147,7 +4178,7 @@ function writeMergeTelemetry(opts, result) {
       errors: result.valid ? [] : ["Test suite failed after merge"]
     };
     const dir = join13(opts.cwd, TELEMETRY_DIR2);
-    mkdirSync6(dir, { recursive: true });
+    mkdirSync7(dir, { recursive: true });
     appendFileSync2(join13(dir, TELEMETRY_FILE2), JSON.stringify(entry) + "\n");
   } catch {
   }
@@ -5548,11 +5579,11 @@ function startRenderer(options) {
   let lastStoryKey = state.sprintInfo?.storyKey ?? null;
   const pendingStoryCosts = /* @__PURE__ */ new Map();
   let cleaned = false;
+  process.stdout.write("\x1B[2J\x1B[H");
   const onQuit = options?.onQuit;
   const inkInstance = inkRender(/* @__PURE__ */ jsx9(App, { state, onCycleLane: () => cycleLane(), onQuit: onQuit ? () => onQuit() : void 0 }), {
     exitOnCtrlC: false,
-    patchConsole: false,
-    // Disable console patching to prevent flicker
+    patchConsole: !options?._forceTTY,
     maxFps: 10
   });
   function rerender() {
@@ -6195,10 +6226,11 @@ function registerRunCommand(program) {
         currentTaskName = event.taskName;
         const inLoop = inEpicPhase && epicLoopTasks.has(event.taskName) && taskStates[event.taskName] === "done";
         const stateKey = inLoop ? `loop:${event.taskName}` : event.taskName;
-        const epicId = extractEpicId2(event.storyKey);
+        const epicId = event.storyKey.startsWith("__epic_") ? event.storyKey.replace("__epic_", "").replace("__", "") : extractEpicId2(event.storyKey);
+        const displayStoryKey = event.storyKey.startsWith("__epic_") ? `Epic ${epicId}` : event.storyKey;
         const epic = epicData[epicId];
         renderer.updateSprintState({
-          storyKey: event.storyKey,
+          storyKey: displayStoryKey,
           phase: event.taskName,
           done: storiesDone,
           total: counts.total,
@@ -6239,17 +6271,17 @@ function registerRunCommand(program) {
           total: counts.total,
           totalCost: totalCostUsd
         });
-        if (storyFlowTasks.has(event.taskName)) {
-          const allStoryDone = [...storyFlowTasks].every((tn) => taskStates[tn] === "done");
-          if (allStoryDone) {
-            storiesDone++;
-            updateStoryStatus2(event.storyKey, "done");
-            const idx = storyEntries.findIndex((s) => s.key === event.storyKey);
-            if (idx >= 0) {
-              storyEntries[idx] = { ...storyEntries[idx], status: "done" };
-              renderer.updateStories([...storyEntries]);
+        if (event.taskName === "verify" && event.storyKey.startsWith("__epic_")) {
+          const epicId = event.storyKey.replace("__epic_", "").replace("__", "");
+          for (let i = 0; i < storyEntries.length; i++) {
+            const se = storyEntries[i];
+            if (se.status === "in-progress" && se.key.startsWith(`${epicId}-`)) {
+              storiesDone++;
+              updateStoryStatus2(se.key, "done");
+              storyEntries[i] = { ...se, status: "done" };
             }
           }
+          renderer.updateStories([...storyEntries]);
         }
       }
       if (event.type === "dispatch-error") {
@@ -6394,22 +6426,6 @@ import { readFileSync as readFileSync24 } from "fs";
 // src/modules/verify/proof.ts
 import { existsSync as existsSync18, readFileSync as readFileSync15 } from "fs";
-// src/modules/verify/types.ts
-var TIER_HIERARCHY = [
-  "test-provable",
-  "runtime-provable",
-  "environment-provable",
-  "escalate"
-];
-var LEGACY_TIER_MAP = {
-  "cli-verifiable": "test-provable",
-  "integration-required": "environment-provable",
-  "unit-testable": "test-provable",
-  "black-box": "environment-provable"
-};
-// src/modules/verify/proof.ts
 function classifyEvidenceCommands(proofContent) {
   const results = [];
   const codeBlockPattern = /```(?:bash|shell)\n([\s\S]*?)```/g;
@@ -6499,15 +6515,7 @@ function validateProofQuality(proofPath) {
     return emptyResult;
   }
   const content = readFileSync15(proofPath, "utf-8");
-  const allTierNames = [...TIER_HIERARCHY, ...Object.keys(LEGACY_TIER_MAP)];
-  const uniqueTierNames = [...new Set(allTierNames)];
-  const tierPattern = new RegExp(`\\*\\*Tier:\\*\\*\\s*(${uniqueTierNames.join("|")})`, "i");
-  const bbTierMatch = tierPattern.exec(content);
-  const rawTierValue = bbTierMatch ? bbTierMatch[1].toLowerCase() : null;
-  const normalizedTier = rawTierValue ? LEGACY_TIER_MAP[rawTierValue] ?? (TIER_HIERARCHY.includes(rawTierValue) ? rawTierValue : null) : null;
-  const skipDockerEnforcement = normalizedTier !== null && normalizedTier !== "environment-provable";
-  const bbRawEnforcement = checkBlackBoxEnforcement(content);
-  const bbEnforcement = skipDockerEnforcement ? { ...bbRawEnforcement, blackBoxPass: true } : bbRawEnforcement;
+  const bbEnforcement = checkBlackBoxEnforcement(content);
   function buildResult(base) {
     const basePassed = base.pending === 0 && base.verified > 0;
     return {
@@ -6647,7 +6655,7 @@ function validateProofQuality(proofPath) {
 // src/modules/verify/orchestrator.ts
 import { execFileSync } from "child_process";
-import { mkdirSync as mkdirSync8, writeFileSync as writeFileSync9 } from "fs";
+import { mkdirSync as mkdirSync9, writeFileSync as writeFileSync10 } from "fs";
 import { join as join20 } from "path";
 // src/lib/doc-health/types.ts
@@ -7109,10 +7117,10 @@ function checkAgentsMdLineCount(filePath, docPath, documents) {
 // src/lib/doc-health/report.ts
 import {
   existsSync as existsSync21,
-  mkdirSync as mkdirSync7,
+  mkdirSync as mkdirSync8,
   readFileSync as readFileSync18,
   unlinkSync as unlinkSync2,
-  writeFileSync as writeFileSync8
+  writeFileSync as writeFileSync9
 } from "fs";
 import { join as join19 } from "path";
 function printDocHealthOutput(report) {
@@ -7148,9 +7156,9 @@ function completeExecPlan(storyId, dir) {
 Completed: ${timestamp}`
   );
   const completedDir = join19(root, "docs", "exec-plans", "completed");
-  mkdirSync7(completedDir, { recursive: true });
+  mkdirSync8(completedDir, { recursive: true });
   const completedPath = join19(completedDir, `${storyId}.md`);
-  writeFileSync8(completedPath, content, "utf-8");
+  writeFileSync9(completedPath, content, "utf-8");
   try {
     unlinkSync2(activePath);
   } catch {
@@ -7192,9 +7200,9 @@ function checkPreconditions(dir, storyId) {
 function createProofDocument(storyId, _storyTitle, _acs, dir) {
   const root = dir ?? process.cwd();
   const verificationDir = join20(root, "verification");
-  mkdirSync8(verificationDir, { recursive: true });
+  mkdirSync9(verificationDir, { recursive: true });
   const proofPath = join20(verificationDir, `${storyId}-proof.md`);
-  writeFileSync9(proofPath, `# ${storyId} \u2014 Proof
+  writeFileSync10(proofPath, `# ${storyId} \u2014 Proof
 Pending: blind evaluator (Epic 6)
 `, "utf-8");
@@ -7257,87 +7265,8 @@ var DB_KEYWORDS = [
   "sql",
   "table"
 ];
-var INTEGRATION_KEYWORDS = [
-  "external system",
-  "real infrastructure",
-  "manual verification"
-];
-var ESCALATE_KEYWORDS = [
-  "physical hardware",
-  "manual human",
-  "visual inspection by human",
-  "paid external service"
-];
-var RUNTIME_PROVABLE_KEYWORDS = [
-  "cli command",
-  "api endpoint",
-  "http",
-  "server",
-  "output shows",
-  "exit code",
-  "binary",
-  "runs and produces",
-  "cli outputs",
-  "when run"
-];
-var ENVIRONMENT_PROVABLE_KEYWORDS = [
-  "docker",
-  "container",
-  "observability",
-  "telemetry",
-  "database",
-  "queue",
-  "distributed",
-  "multi-service",
-  "end-to-end",
-  "victorialogs"
-];
-var ESCALATE_TIER_KEYWORDS = [
-  "physical hardware",
-  "human visual",
-  "paid service",
-  "gpu",
-  "manual inspection",
-  "physical display"
-];
 // src/modules/verify/parser.ts
-function classifyVerifiability(description) {
-  const lower = description.toLowerCase();
-  for (const kw of INTEGRATION_KEYWORDS) {
-    if (lower.includes(kw)) return "integration-required";
-  }
-  return "cli-verifiable";
-}
-function classifyStrategy(description) {
-  const lower = description.toLowerCase();
-  for (const kw of ESCALATE_KEYWORDS) {
-    if (lower.includes(kw)) return "escalate";
-  }
-  return "docker";
-}
-function classifyTier(description) {
-  const lower = description.toLowerCase();
-  for (const kw of ESCALATE_TIER_KEYWORDS) {
-    if (lower.includes(kw)) return "escalate";
-  }
-  for (const kw of ENVIRONMENT_PROVABLE_KEYWORDS) {
-    if (lower.includes(kw)) return "environment-provable";
-  }
-  for (const kw of RUNTIME_PROVABLE_KEYWORDS) {
-    if (lower.includes(kw)) return "runtime-provable";
-  }
-  return "test-provable";
-}
-var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required|unit-testable|black-box|test-provable|runtime-provable|environment-provable|escalate)\s*-->/;
-function parseVerificationTag(text) {
-  const match = VERIFICATION_TAG_PATTERN.exec(text);
-  if (!match) return null;
-  const raw = match[1];
-  const mapped = LEGACY_TIER_MAP[raw] ?? raw;
-  if (!TIER_HIERARCHY.includes(mapped)) return null;
-  return mapped;
-}
 function classifyAC(description) {
   const lower = description.toLowerCase();
   for (const kw of UI_KEYWORDS) {
@@ -7387,17 +7316,10 @@ function parseStoryACs(storyFilePath) {
     if (currentId !== null && currentDesc.length > 0) {
       const description = currentDesc.join(" ").trim();
       if (description) {
-        const tag = parseVerificationTag(description);
-        const tier = tag ?? classifyTier(description);
-        const verifiability = classifyVerifiability(description);
-        const strategy = classifyStrategy(description);
         acs.push({
           id: currentId,
           description,
-          type: classifyAC(description),
-          verifiability,
-          strategy,
-          tier
+          type: classifyAC(description)
         });
       } else {
         warn(`Skipping malformed AC #${currentId}: empty description`);
@@ -7575,7 +7497,7 @@ function normalizeSeverity(severity) {
 }
 // src/modules/observability/coverage.ts
-import { readFileSync as readFileSync20, writeFileSync as writeFileSync10, renameSync as renameSync3, existsSync as existsSync24 } from "fs";
+import { readFileSync as readFileSync20, writeFileSync as writeFileSync11, renameSync as renameSync3, existsSync as existsSync24 } from "fs";
 import { join as join22 } from "path";
 var STATE_FILE2 = "sprint-state.json";
 var DEFAULT_STATIC_TARGET = 80;
@@ -7664,7 +7586,7 @@ function parseGapArray(raw) {
 }
 // src/modules/observability/runtime-coverage.ts
-import { readFileSync as readFileSync21, writeFileSync as writeFileSync11, renameSync as renameSync4, existsSync as existsSync25 } from "fs";
+import { readFileSync as readFileSync21, writeFileSync as writeFileSync12, renameSync as renameSync4, existsSync as existsSync25 } from "fs";
 import { join as join23 } from "path";
 // src/modules/observability/coverage-gate.ts
@@ -8506,7 +8428,7 @@ function getACById(id) {
 // src/modules/verify/validation-runner.ts
 import { execSync as execSync5 } from "child_process";
-import { writeFileSync as writeFileSync12, mkdirSync as mkdirSync9 } from "fs";
+import { writeFileSync as writeFileSync13, mkdirSync as mkdirSync10 } from "fs";
 import { join as join25, dirname as dirname3 } from "path";
 var MAX_VALIDATION_ATTEMPTS = 10;
 var AC_COMMAND_TIMEOUT_MS = 3e4;
@@ -8659,8 +8581,8 @@ function createFixStory(ac, error) {
       "Fix the root cause so the validation command passes.",
       ""
     ].join("\n");
-    mkdirSync9(dirname3(storyPath), { recursive: true });
-    writeFileSync12(storyPath, markdown, "utf-8");
+    mkdirSync10(dirname3(storyPath), { recursive: true });
+    writeFileSync13(storyPath, markdown, "utf-8");
     return ok2(storyKey);
   } catch (err) {
     const msg = err instanceof Error ? err.message : String(err);
@@ -8986,7 +8908,7 @@ function runValidationCycle() {
 // src/modules/verify/env.ts
 import { execFileSync as execFileSync5 } from "child_process";
-import { existsSync as existsSync27, mkdirSync as mkdirSync10, readdirSync as readdirSync7, readFileSync as readFileSync23, writeFileSync as writeFileSync13, cpSync, rmSync as rmSync2, statSync as statSync6 } from "fs";
+import { existsSync as existsSync27, mkdirSync as mkdirSync11, readdirSync as readdirSync7, readFileSync as readFileSync23, writeFileSync as writeFileSync14, cpSync, rmSync as rmSync3, statSync as statSync6 } from "fs";
 import { join as join27, basename as basename2 } from "path";
 import { createHash } from "crypto";
@@ -9135,7 +9057,7 @@ function buildNodeImage(projectDir) {
   const tarballName = basename2(lastLine);
   const tarballPath = join27("/tmp", tarballName);
   const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
-  mkdirSync10(buildContext, { recursive: true });
+  mkdirSync11(buildContext, { recursive: true });
   try {
     cpSync(tarballPath, join27(buildContext, tarballName));
     const dockerfile = generateVerifyDockerfile(projectDir) + `
@@ -9144,15 +9066,15 @@ ARG TARBALL=package.tgz
 COPY \${TARBALL} /tmp/\${TARBALL}
 RUN npm install -g /tmp/\${TARBALL} && rm /tmp/\${TARBALL}
 `;
-    writeFileSync13(join27(buildContext, "Dockerfile"), dockerfile);
+    writeFileSync14(join27(buildContext, "Dockerfile"), dockerfile);
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "--build-arg", `TARBALL=${tarballName}`, "."], {
       cwd: buildContext,
       stdio: "pipe",
       timeout: 12e4
     });
   } finally {
-    rmSync2(buildContext, { recursive: true, force: true });
-    rmSync2(tarballPath, { force: true });
+    rmSync3(buildContext, { recursive: true, force: true });
+    rmSync3(tarballPath, { force: true });
   }
 }
 function buildPythonImage(projectDir) {
@@ -9163,7 +9085,7 @@ function buildPythonImage(projectDir) {
   }
   const distFile = distFiles.filter((f) => f.endsWith(".tar.gz"))[0] ?? distFiles[0];
   const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
-  mkdirSync10(buildContext, { recursive: true });
+  mkdirSync11(buildContext, { recursive: true });
   try {
     cpSync(join27(distDir, distFile), join27(buildContext, distFile));
     const dockerfile = generateVerifyDockerfile(projectDir) + `
@@ -9171,14 +9093,14 @@ function buildPythonImage(projectDir) {
 COPY ${distFile} /tmp/${distFile}
 RUN pip install --break-system-packages /tmp/${distFile} && rm /tmp/${distFile}
 `;
-    writeFileSync13(join27(buildContext, "Dockerfile"), dockerfile);
+    writeFileSync14(join27(buildContext, "Dockerfile"), dockerfile);
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
       timeout: 12e4
     });
   } finally {
-    rmSync2(buildContext, { recursive: true, force: true });
+    rmSync3(buildContext, { recursive: true, force: true });
   }
 }
 function prepareVerifyWorkspace(storyKey, projectDir) {
@@ -9189,8 +9111,8 @@ function prepareVerifyWorkspace(storyKey, projectDir) {
   const storyFile = join27(root, STORY_DIR, `${storyKey}.md`);
   if (!existsSync27(storyFile)) throw new Error(`Story file not found: ${storyFile}`);
   const workspace = `${TEMP_PREFIX}${storyKey}`;
-  if (existsSync27(workspace)) rmSync2(workspace, { recursive: true, force: true });
-  mkdirSync10(workspace, { recursive: true });
+  if (existsSync27(workspace)) rmSync3(workspace, { recursive: true, force: true });
+  mkdirSync11(workspace, { recursive: true });
   cpSync(storyFile, join27(workspace, "story.md"));
   const readmePath = join27(root, "README.md");
   if (existsSync27(readmePath)) cpSync(readmePath, join27(workspace, "README.md"));
@@ -9198,7 +9120,7 @@ function prepareVerifyWorkspace(storyKey, projectDir) {
   if (existsSync27(docsDir) && statSync6(docsDir).isDirectory()) {
     cpSync(docsDir, join27(workspace, "docs"), { recursive: true });
   }
-  mkdirSync10(join27(workspace, "verification"), { recursive: true });
+  mkdirSync11(join27(workspace, "verification"), { recursive: true });
   return workspace;
 }
 function checkVerifyEnv() {
@@ -9240,7 +9162,7 @@ function cleanupVerifyEnv(storyKey) {
   }
   const workspace = `${TEMP_PREFIX}${storyKey}`;
   const containerName = `codeharness-verify-${storyKey}`;
-  if (existsSync27(workspace)) rmSync2(workspace, { recursive: true, force: true });
+  if (existsSync27(workspace)) rmSync3(workspace, { recursive: true, force: true });
   try {
     execFileSync5("docker", ["stop", containerName], { stdio: "pipe", timeout: 15e3 });
   } catch {
@@ -9252,7 +9174,7 @@ function cleanupVerifyEnv(storyKey) {
 }
 function buildPluginImage(projectDir) {
   const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
-  mkdirSync10(buildContext, { recursive: true });
+  mkdirSync11(buildContext, { recursive: true });
   try {
     const pluginDir = join27(projectDir, ".claude-plugin");
     cpSync(pluginDir, join27(buildContext, ".claude-plugin"), { recursive: true });
@@ -9262,28 +9184,28 @@ function buildPluginImage(projectDir) {
         cpSync(src, join27(buildContext, dir), { recursive: true });
       }
     }
-    writeFileSync13(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
+    writeFileSync14(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
       timeout: 12e4
     });
   } finally {
-    rmSync2(buildContext, { recursive: true, force: true });
+    rmSync3(buildContext, { recursive: true, force: true });
   }
 }
 function buildSimpleImage(projectDir, timeout = 12e4) {
   const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
-  mkdirSync10(buildContext, { recursive: true });
+  mkdirSync11(buildContext, { recursive: true });
   try {
-    writeFileSync13(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
+    writeFileSync14(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
       timeout
     });
   } finally {
-    rmSync2(buildContext, { recursive: true, force: true });
+    rmSync3(buildContext, { recursive: true, force: true });
   }
 }
 function dockerImageExists(tag) {
@@ -10881,7 +10803,7 @@ function formatAuditJson(result) {
 }
 // src/modules/audit/fix-generator.ts
-import { existsSync as existsSync34, writeFileSync as writeFileSync14, mkdirSync as mkdirSync11 } from "fs";
+import { existsSync as existsSync34, writeFileSync as writeFileSync15, mkdirSync as mkdirSync12 } from "fs";
 import { join as join33, dirname as dirname5 } from "path";
 function buildStoryKey(gap2, index) {
   const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
@@ -10936,8 +10858,8 @@ function generateFixStories(auditResult) {
           continue;
         }
         const markdown = buildStoryMarkdown(gap2, key);
-        mkdirSync11(dirname5(filePath), { recursive: true });
-        writeFileSync14(filePath, markdown, "utf-8");
+        mkdirSync12(dirname5(filePath), { recursive: true });
+        writeFileSync15(filePath, markdown, "utf-8");
         stories.push({ key, filePath, gap: gap2, skipped: false });
         created++;
       }
@@ -11113,7 +11035,7 @@ function registerOnboardCommand(program) {
 }
 // src/commands/teardown.ts
-import { existsSync as existsSync35, unlinkSync as unlinkSync3, readFileSync as readFileSync29, writeFileSync as writeFileSync15, rmSync as rmSync3 } from "fs";
+import { existsSync as existsSync35, unlinkSync as unlinkSync3, readFileSync as readFileSync29, writeFileSync as writeFileSync16, rmSync as rmSync4 } from "fs";
 import { join as join34 } from "path";
 function buildDefaultResult() {
   return {
@@ -11160,7 +11082,7 @@ function registerTeardownCommand(program) {
     } else if (otlpMode === "remote-routed") {
       if (!options.keepDocker) {
         try {
-          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-GLX24TXX.js");
+          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-Y73EO7Z4.js");
           stopCollectorOnly2();
           result.docker.stopped = true;
           if (!isJson) {
@@ -11192,7 +11114,7 @@ function registerTeardownCommand(program) {
         info("Shared stack: kept running (other projects may use it)");
       }
     } else if (isLegacyStack) {
-      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-GLX24TXX.js");
+      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-Y73EO7Z4.js");
       let stackRunning = false;
       try {
         stackRunning = isStackRunning2(composeFile);
@@ -11262,7 +11184,7 @@ function registerTeardownCommand(program) {
               for (const key of keysToRemove) {
                 delete scripts[key];
               }
-              writeFileSync15(pkgPath, JSON.stringify(pkg, null, 2) + "\n", "utf-8");
+              writeFileSync16(pkgPath, JSON.stringify(pkg, null, 2) + "\n", "utf-8");
               result.otlp_cleaned = true;
               if (!isJson) {
                 ok("OTLP: removed instrumented scripts from package.json");
@@ -11290,7 +11212,7 @@ function registerTeardownCommand(program) {
     }
     const harnessDir = join34(projectDir, ".harness");
     if (existsSync35(harnessDir)) {
-      rmSync3(harnessDir, { recursive: true, force: true });
+      rmSync4(harnessDir, { recursive: true, force: true });
       result.removed.push(".harness/");
       if (!isJson) {
         ok("Removed: .harness/");
@@ -12096,7 +12018,7 @@ function isDuplicate(newItem, existingTitles, threshold = 0.8) {
 }
 // src/lib/issue-tracker.ts
-import { existsSync as existsSync36, readFileSync as readFileSync30, writeFileSync as writeFileSync16, mkdirSync as mkdirSync12 } from "fs";
+import { existsSync as existsSync36, readFileSync as readFileSync30, writeFileSync as writeFileSync17, mkdirSync as mkdirSync13 } from "fs";
 import { join as join35 } from "path";
 import { parse as parse6, stringify as stringify3 } from "yaml";
 var VALID_PRIORITIES = /* @__PURE__ */ new Set([
@@ -12125,9 +12047,9 @@ function writeIssues(data, dir = process.cwd()) {
   const filePath = issuesPath(dir);
   const dirPath = join35(dir, ".codeharness");
   if (!existsSync36(dirPath)) {
-    mkdirSync12(dirPath, { recursive: true });
+    mkdirSync13(dirPath, { recursive: true });
   }
-  writeFileSync16(filePath, stringify3(data, { nullStr: "" }), "utf-8");
+  writeFileSync17(filePath, stringify3(data, { nullStr: "" }), "utf-8");
 }
 function nextIssueId(existing) {
   let max = 0;
@@ -13113,7 +13035,7 @@ function registerAuditCommand(program) {
 }
 // src/commands/stats.ts
-import { existsSync as existsSync39, readdirSync as readdirSync10, readFileSync as readFileSync32, writeFileSync as writeFileSync17 } from "fs";
+import { existsSync as existsSync39, readdirSync as readdirSync10, readFileSync as readFileSync32, writeFileSync as writeFileSync18 } from "fs";
 import { join as join38 } from "path";
 var RATES = {
   input: 15,
@@ -13323,7 +13245,7 @@ function registerStatsCommand(program) {
     console.log(formatted);
     if (options.save) {
       const outPath = join38(projectDir, "_bmad-output", "implementation-artifacts", "cost-report.md");
-      writeFileSync17(outPath, formatted, "utf-8");
+      writeFileSync18(outPath, formatted, "utf-8");
       ok(`Report saved to ${outPath}`);
     }
   });
@@ -14179,7 +14101,7 @@ function registerDriversCommand(program) {
 }
 // src/index.ts
-var VERSION = true ? "0.32.2" : "0.0.0-dev";
+var VERSION = true ? "0.33.0" : "0.0.0-dev";
 function createProgram() {
   const program = new Command();
   program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codeharness",
-  "version": "0.32.2",
+  "version": "0.33.0",
   "type": "module",
   "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
   "bin": {

package/patches/dev/enforcement.md CHANGED Viewed

@@ -3,8 +3,8 @@
 Dev agents repeatedly shipped code without reading module conventions (AGENTS.md),
 skipped observability checks, and produced features that could not be verified
 from outside the source tree. This patch enforces architecture awareness,
-observability validation, documentation hygiene, test coverage gates, and
-verification tier awareness — all operational failures observed in prior sprints.
+observability validation, documentation hygiene, and test coverage gates
+— all operational failures observed in prior sprints.
 (FR33, FR34, NFR20)
 ## Codeharness Development Enforcement
@@ -35,23 +35,14 @@ After running tests, verify telemetry is flowing:
 - Coverage gate: 100% of new/changed code
 - Run `npm test` / `pytest` and verify no regressions
-### Verification Tier Awareness
+### Verification Readiness
-Write code that can be verified at the appropriate tier. The four verification tiers determine what evidence is needed to prove an AC works:
+Write code that can be verified via Docker-based blind verification. Ask yourself:
+- Are my functions testable and my outputs greppable?
+- Can I run the CLI/server and verify output?
+- Does `docker exec` work? Are logs flowing to the observability stack?
-- **`test-provable`** — Code must be testable via `npm test` / `npm run build`. Ensure functions have test coverage, outputs are greppable, and build artifacts are inspectable. No running app required.
-- **`runtime-provable`** — Code must be exercisable via CLI or local server. Ensure the binary/CLI produces verifiable stdout, exit codes, or HTTP responses without needing Docker.
-- **`environment-provable`** — Code must work in a Docker verification environment. Ensure the Dockerfile is current, services start correctly, and `docker exec` can exercise the feature. Observability queries should return expected log/trace events.
-- **`escalate`** — Reserved for ACs that genuinely cannot be automated (physical hardware, paid external APIs). This is rare — exhaust all automated approaches first.
-Ask yourself:
-- What tier is this story tagged with?
-- Does my implementation produce the evidence that tier requires?
-- If `test-provable`: are my functions testable and my outputs greppable?
-- If `runtime-provable`: can I run the CLI/server and verify output locally?
-- If `environment-provable`: does `docker exec` work? Are logs flowing to the observability stack?
-If the answer is "no", the feature has a testability gap — fix the code to be verifiable at the appropriate tier.
+If the answer is "no", the feature has a testability gap — fix the code to be verifiable.
 ### Dockerfile Maintenance

package/patches/retro/enforcement.md CHANGED Viewed

@@ -20,7 +20,7 @@ quality trends, and mandatory concrete action items with owners.
 - Did the verifier hang on permissions? (check for `--allowedTools` issues)
 - Did stories get stuck in verify→dev loops? (check `attempts` counter)
-- Were stories assigned the wrong verification tier?
+- Were stories assigned the wrong verification method?
 - Did the verify parser correctly detect `[FAIL]` verdicts?
 ### Documentation Health

package/patches/review/enforcement.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ## WHY
 Review agents approved stories without verifying proof documents existed or
-checking that evidence matched the story's verification tier. Stories passed review
-with fabricated output and missing coverage data. This patch enforces proof
-existence, tier-appropriate evidence quality, and coverage delta reporting as hard
-gates before a story can leave review.
+checking that evidence was real. Stories passed review with fabricated output
+and missing coverage data. This patch enforces proof existence, evidence
+quality, and coverage delta reporting as hard gates before a story can leave
+review.
 (FR33, FR34, NFR20)
 ## Codeharness Review Gates
@@ -18,34 +18,12 @@ gates before a story can leave review.
 ### Proof Quality Checks
-The proof must pass tier-appropriate evidence enforcement. The required evidence depends on the story's verification tier:
-#### `test-provable` stories
-- Evidence comes from build output, test results, and grep/read of code or generated artifacts
-- `npm test` / `npm run build` output is the primary evidence
-- Source-level assertions (grep against `src/`) are acceptable — this IS the verification method for this tier
-- `docker exec` evidence is NOT required
-- Each AC section must show actual test output or build results
-#### `runtime-provable` stories
-- Evidence comes from running the actual binary, CLI, or server
-- Process execution output (stdout, stderr, exit codes) is the primary evidence
-- HTTP responses from a locally running server are acceptable
-- `docker exec` evidence is NOT required
-- Each AC section must show actual command execution and output
-#### `environment-provable` stories
 - Commands run via `docker exec` (not direct host access)
 - Less than 50% of evidence commands are `grep` against `src/`
 - Each AC section has at least one `docker exec`, `docker ps/logs`, or observability query
 - `[FAIL]` verdicts outside code blocks cause the proof to fail
 - `[ESCALATE]` is acceptable only when all automated approaches are exhausted
-#### `escalate` stories
-- Human judgment is required — automated evidence may be partial or absent
-- Proof document must explain why automation is not possible
-- `[ESCALATE]` verdict is expected and acceptable
 ### Observability
 Run `semgrep scan --config patches/observability/ --config patches/error-handling/ --json` against changed files and report gaps.

package/patches/verify/story-verification.md CHANGED Viewed

@@ -1,49 +1,25 @@
 ## WHY
 Stories were marked "done" with no proof artifact, or with proofs that only
-grepped source code instead of exercising the feature at the appropriate
-verification tier. This patch mandates tier-appropriate proof documents,
-verification tags per AC, and test coverage targets — preventing regressions
-from being hidden behind inadequate evidence.
+grepped source code instead of exercising the feature. This patch mandates
+proof documents with real evidence, and test coverage targets — preventing
+regressions from being hidden behind inadequate evidence.
 (FR33, FR36, NFR20)
 ## Verification Requirements
-Every story must produce a **proof document** with evidence appropriate to its verification tier.
+Every story must produce a **proof document** with real evidence from Docker-based blind verification.
 ### Proof Standard
 - Proof document at `verification/<story-key>-proof.md`
-- Each AC gets a `## AC N:` section with tier-appropriate evidence and captured output
+- Each AC gets a `## AC N:` section with evidence and captured output
 - `[FAIL]` = AC failed with evidence showing what went wrong
 - `[ESCALATE]` = AC genuinely cannot be automated (last resort — try everything first)
-**Tier-dependent evidence rules:**
-- **`test-provable`** — Evidence comes from build + test output + grep/read of code or artifacts. Run `npm test` or `npm run build`, capture results. Source-level assertions are the primary verification method. No running app or Docker required.
-- **`runtime-provable`** — Evidence comes from running the actual binary/server and interacting with it. Start the process, make requests or run commands, capture stdout/stderr/exit codes. No Docker stack required.
-- **`environment-provable`** — Evidence comes from `docker exec` commands and observability queries. Full Docker verification environment required. Each AC section needs at least one `docker exec`, `docker ps/logs`, or observability query. Evidence must come from running the installed CLI/tool in Docker, not from grepping source.
-- **`escalate`** — Human judgment required. Document why automation is not possible. `[ESCALATE]` verdict is expected.
-### Verification Tags
-For each AC, append a tag indicating its verification tier:
-- `<!-- verification: test-provable -->` — Can be verified by building and running tests. Evidence: build output, test results, grep/read of code. No running app needed.
-- `<!-- verification: runtime-provable -->` — Requires running the actual binary/CLI/server. Evidence: process output, HTTP responses, exit codes. No Docker stack needed.
-- `<!-- verification: environment-provable -->` — Requires full Docker environment with observability. Evidence: `docker exec` commands, VictoriaLogs queries, multi-service interaction.
-- `<!-- verification: escalate -->` — Cannot be automated. Requires human judgment, physical hardware, or paid external services.
-**Decision criteria:**
-1. Can you prove it with `npm test` or `npm run build` alone? → `test-provable`
-2. Do you need to run the actual binary/server locally? → `runtime-provable`
-3. Do you need Docker, external services, or observability? → `environment-provable`
-4. Have you exhausted all automated approaches? → `escalate`
-**Do not over-tag.** Most stories are `test-provable` or `runtime-provable`. Only use `environment-provable` when Docker infrastructure is genuinely needed. Only use `escalate` as a last resort.
 ### Observability Evidence
-After each `docker exec` command (applicable to `environment-provable` stories), query the observability backend for log events from the last 30 seconds.
+After each `docker exec` command, query the observability backend for log events from the last 30 seconds.
 Use the configured VictoriaLogs endpoint (default: `http://localhost:9428`):
 ```bash

package/templates/agents/documenter.yaml ADDED Viewed

@@ -0,0 +1,64 @@
+name: documenter
+role:
+  title: Verification Guide Writer
+  purpose: Read implementation and write Docker-executable verification guides for blind QA
+persona:
+  identity: |
+    Technical writer who translates source code into executable verification steps.
+    Reads what was built, understands how it works, then writes guides that a blind
+    QA agent can follow using only Docker commands.
+  communication_style: "Precise, command-oriented. Every verification step is a copy-pasteable command with expected output."
+  principles:
+    - Every AC must map to a concrete docker exec or curl command
+    - Commands must be copy-pasteable — no pseudocode, no placeholders
+    - Include the Docker container name in every command
+    - 'Expected output must be specific — not "should work" but "prints PASS: hook registered"'
+    - Include a Prerequisites section with container name and required services
+prompt_template: |
+  ## Role
+  You are writing a verification guide for a blind QA evaluator. The evaluator CANNOT see source code — it can only run Docker commands and observe output.
+  ## Process
+  1. Read the story spec to understand the acceptance criteria
+  2. Read the implementation source to understand what was built
+  3. Discover the Docker container name: run `docker ps` or read `docker-compose.yml`
+  4. For each AC, write an executable verification step
+  ## Guide Format
+  Write a markdown document with this structure:
+  ```
+  # Verification Guide: [Story Title]
+  ## Prerequisites
+  - Container: [container name from docker ps]
+  - Required services: [list any dependent services]
+  - Setup: [any one-time setup commands needed]
+  ## AC 1: [AC description]
+  ### Command
+  docker exec [container] python -c "from app.module import Class; obj = Class(); result = obj.method(args); assert result == expected; print('PASS: [description]')"
+  ### Expected Output
+  PASS: [description]
+  ### What This Proves
+  [One sentence: why this output satisfies the AC]
+  ## AC 2: [AC description]
+  ...
+  ```
+  ## Rules
+  - Every command must be copy-pasteable into a terminal
+  - No pseudocode — use real import paths, real class names, real method signatures
+  - For API features: use `curl http://localhost:PORT/endpoint` with expected response body
+  - For internal code: use `docker exec [container] python -c "..."` with assertion + print
+  - For CLI features: use `docker exec [container] command --args` with expected output
+  - If a feature cannot be verified via Docker (e.g., build-time only), state this explicitly with reason
+  ## Output
+  Write the complete verification guide as your response. Do not write to files — the engine captures your output.

package/templates/agents/evaluator.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 name: evaluator
 role:
   title: Adversarial QA Evaluator
-  purpose: Exercise the built artifact and determine if it actually works
+  purpose: Exercise the built artifact via Docker and determine if it actually works
 persona:
   identity: Senior QA engineer who trusts nothing without evidence. Treats every claim as unverified until proven with concrete output. Assumes code is broken until demonstrated otherwise.
   communication_style: "Blunt, evidence-first. States what was observed, not what was expected. No softening, no encouragement, no benefit of the doubt."
@@ -22,11 +22,23 @@ disallowedTools:
 prompt_template: |
   ## Role
-  You are verifying acceptance criteria for a software story. Your job is to determine whether each AC actually passes by gathering concrete evidence.
+  You are verifying acceptance criteria for an epic. Your job is to determine whether each AC actually passes by running commands and observing output.
   ## Input
-  Read acceptance criteria from ./story-files/. Each file contains the ACs to verify. Parse every AC and verify each one independently.
+  Read verification guides from ./story-files/. Each guide explains:
+  - What was built
+  - Docker container name and prerequisites
+  - For each AC: an exact command to run and expected output
+  ## Verification Method
+  Use `docker exec`, `docker logs`, `curl`, and other Docker/HTTP commands as described in the guides. Every AC must be verified by:
+  1. Running the exact command from the guide
+  2. Capturing the actual output
+  3. Comparing to expected output
+  You do NOT have access to source code. You verify by exercising the running system via Docker only.
   ## Anti-Leniency Rules
@@ -35,14 +47,7 @@ prompt_template: |
   - Every PASS requires commands_run evidence — if you cannot run a command to verify, score UNKNOWN.
   - UNKNOWN if unable to verify — never guess at outcomes.
   - Do not infer success from lack of errors. Silence is not evidence.
-  ## Tool Access
-  You have access to:
-  - Docker commands: `docker exec`, `docker logs`, `docker ps`
-  - Observability query endpoints
-  You do NOT have access to source code. Do not attempt to read, edit, or write source files. Gather all evidence through runtime observation only.
+  - If Docker is not running or the app container is not available, report ALL ACs as UNKNOWN with reason "Docker not available".
   ## Evidence Requirements

package/templates/workflows/default.yaml CHANGED Viewed

@@ -19,6 +19,11 @@ tasks:
     session: fresh
     source_access: true
     driver: codex
+  document:
+    agent: documenter
+    session: fresh
+    source_access: true
+    model: claude-opus-4-6
   verify:
     agent: evaluator
     session: fresh
@@ -40,6 +45,7 @@ story_flow:
   - implement
   - check
   - review
+  - document
 epic_flow:
   - story_flow
@@ -48,5 +54,6 @@ epic_flow:
       - retry
       - check
       - review
+      - document
       - verify
   - retro