npm - codeharness - Versions diffs - 0.26.3 → 0.26.5 - Mend

codeharness 0.26.3 → 0.26.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/{chunk-NYZZCLQG.js → chunk-F6L7CXLK.js} +24 -19
package/dist/{docker-GG765ZJT.js → docker-VHOP56YP.js} +1 -1
package/dist/index.js +193 -103
package/package.json +1 -1
package/patches/AGENTS.md +1 -1
package/patches/dev/enforcement.md +16 -7
package/patches/retro/enforcement.md +2 -2
package/patches/review/enforcement.md +24 -3
package/patches/verify/story-verification.md +25 -11
package/ralph/ralph.sh +13 -8

package/dist/{chunk-NYZZCLQG.js → chunk-F6L7CXLK.js} RENAMED Viewed

@@ -1096,25 +1096,33 @@ RUN cargo build --release
   // ── Task 16: getProjectName ───────────────────────────────────────────
   // ── getVerifyDockerfileSection ──────────────────────────────────────
   getVerifyDockerfileSection(projectDir) {
+    let needsBevy = false;
+    const cargoContent = readTextSafe(join6(projectDir, "Cargo.toml"));
+    if (cargoContent) {
+      const depsSection = getCargoDepsSection(cargoContent);
+      needsBevy = hasCargoDep(depsSection, "bevy");
+    }
+    const aptPackages = ["build-essential", "pkg-config", "libssl-dev"];
+    if (needsBevy) {
+      aptPackages.push(
+        "libudev-dev",
+        "libasound2-dev",
+        "libwayland-dev",
+        "libxkbcommon-dev",
+        "libfontconfig1-dev",
+        "libx11-dev"
+      );
+    }
     const lines = [
       "# --- Rust tooling ---",
+      "RUN apt-get update && apt-get install -y --no-install-recommends \\",
+      `    ${aptPackages.join(" ")} \\`,
+      "    && rm -rf /var/lib/apt/lists/*",
       'RUN curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable',
       'ENV PATH="/root/.cargo/bin:$PATH"',
       "RUN rustup component add clippy",
       "RUN cargo install cargo-tarpaulin"
     ];
-    const cargoContent = readTextSafe(join6(projectDir, "Cargo.toml"));
-    if (cargoContent) {
-      const depsSection = getCargoDepsSection(cargoContent);
-      if (hasCargoDep(depsSection, "bevy")) {
-        lines.push(
-          "RUN apt-get update && apt-get install -y --no-install-recommends \\",
-          "    libudev-dev libasound2-dev libwayland-dev libxkbcommon-dev \\",
-          "    libfontconfig1-dev libx11-dev \\",
-          "    && rm -rf /var/lib/apt/lists/*"
-        );
-      }
-    }
     return lines.join("\n");
   }
   getProjectName(dir) {
@@ -1337,9 +1345,8 @@ function parseValue(raw) {
   if (raw === "true") return true;
   if (raw === "false") return false;
   if (raw === "null") return null;
-  const num = Number(raw);
-  if (!Number.isNaN(num) && raw.trim() !== "") return num;
-  return raw;
+  const n = Number(raw);
+  return !Number.isNaN(n) && raw.trim() !== "" ? n : raw;
 }
 // src/lib/observability/instrument.ts
@@ -1932,9 +1939,7 @@ function handleLocalShared(opts, state) {
   }
   if (!opts.isJson) {
     fail("Observability stack: failed to start");
-    if (startResult.error) {
-      info(`Error: ${startResult.error}`);
-    }
+    if (startResult.error) info(`Error: ${startResult.error}`);
   }
   const docker = {
     compose_file: sharedComposeFile,
@@ -3207,7 +3212,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
 }
 // src/modules/infra/init-project.ts
-var HARNESS_VERSION = true ? "0.26.3" : "0.0.0-dev";
+var HARNESS_VERSION = true ? "0.26.5" : "0.0.0-dev";
 function failResult(opts, error) {
   return {
     status: "fail",

package/dist/{docker-GG765ZJT.js → docker-VHOP56YP.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   stopCollectorOnly,
   stopSharedStack,
   stopStack
-} from "./chunk-NYZZCLQG.js";
+} from "./chunk-F6L7CXLK.js";
 export {
   checkRemoteEndpoint,
   cleanupOrphanedContainers,

package/dist/index.js CHANGED Viewed

@@ -51,7 +51,7 @@ import {
   validateDockerfile,
   warn,
   writeState
-} from "./chunk-NYZZCLQG.js";
+} from "./chunk-F6L7CXLK.js";
 // src/index.ts
 import { Command } from "commander";
@@ -805,16 +805,6 @@ function defaultState() {
     actionItems: []
   };
 }
-function defaultStoryState() {
-  return {
-    status: "backlog",
-    attempts: 0,
-    lastAttempt: null,
-    lastError: null,
-    proofPath: null,
-    acResults: null
-  };
-}
 function getStoryStatusesFromState(state) {
   const result = {};
   for (const [key, story] of Object.entries(state.stories)) {
@@ -982,7 +972,10 @@ function updateStoryStatus(key, status, detail) {
     return fail2(stateResult.error);
   }
   const current = stateResult.data;
-  const existingStory = current.stories[key] ?? defaultStoryState();
+  const existingStory = current.stories[key];
+  if (!existingStory) {
+    return fail2(`Story '${key}' does not exist in sprint state \u2014 refusing to create phantom entry`);
+  }
   const isNewAttempt = status === "in-progress";
   const updatedStory = {
     ...existingStory,
@@ -1130,17 +1123,30 @@ function reconcileState() {
     }
     const updatedEpics = { ...state.epics };
     for (const [epicKey, storyKeys] of epicStories) {
+      const total = storyKeys.length;
+      const doneCount = storyKeys.filter((k) => state.stories[k].status === "done").length;
+      const failedCount = storyKeys.filter((k) => state.stories[k].status === "failed").length;
+      const computedStatus = doneCount === total ? "done" : doneCount + failedCount === total ? "done" : "in-progress";
       if (!(epicKey in updatedEpics)) {
-        const total = storyKeys.length;
-        const doneCount = storyKeys.filter((k) => state.stories[k].status === "done").length;
-        const epicStatus = doneCount === total ? "done" : "in-progress";
         updatedEpics[epicKey] = {
-          status: epicStatus,
+          status: computedStatus,
           storiesTotal: total,
           storiesDone: doneCount
         };
         changed = true;
         corrections.push(`created missing epic entry: ${epicKey}`);
+      } else {
+        const existing = updatedEpics[epicKey];
+        if (existing.storiesTotal !== total || existing.storiesDone !== doneCount || existing.status !== computedStatus) {
+          updatedEpics[epicKey] = {
+            ...existing,
+            status: computedStatus,
+            storiesTotal: total,
+            storiesDone: doneCount
+          };
+          changed = true;
+          corrections.push(`fixed epic ${epicKey}: ${existing.status}\u2192${computedStatus} (${doneCount}/${total} done, ${failedCount} failed)`);
+        }
       }
     }
     state.epics = updatedEpics;
@@ -2317,6 +2323,7 @@ var ERROR_LINE = /\[ERROR\]\s+(.+)/;
 function parseRalphMessage(rawLine) {
   const clean = rawLine.replace(ANSI_ESCAPE, "").replace(TIMESTAMP_PREFIX, "").trim();
   if (clean.length === 0) return null;
+  if (clean.startsWith("{")) return null;
   const success = SUCCESS_STORY.exec(clean);
   if (success) {
     const key = success[1];
@@ -2559,10 +2566,6 @@ function handleAgentEvent(event, rendererHandle, state) {
       rendererHandle.update(event);
       break;
     case "story-complete": {
-      const completeResult = updateStoryStatus2(event.key, "review");
-      if (!completeResult.success) {
-        info(`[WARN] Failed to update status for ${event.key}: ${completeResult.error}`);
-      }
       rendererHandle.addMessage({ type: "ok", key: event.key, message: event.details });
       break;
     }
@@ -2799,13 +2802,29 @@ function registerRunCommand(program) {
 // src/commands/verify.ts
 import { existsSync as existsSync22, readFileSync as readFileSync19 } from "fs";
-import { join as join20 } from "path";
+import { join as join21 } from "path";
 // src/modules/verify/index.ts
 import { readFileSync as readFileSync18 } from "fs";
 // src/modules/verify/proof.ts
 import { existsSync as existsSync12, readFileSync as readFileSync9 } from "fs";
+// src/modules/verify/types.ts
+var TIER_HIERARCHY = [
+  "test-provable",
+  "runtime-provable",
+  "environment-provable",
+  "escalate"
+];
+var LEGACY_TIER_MAP = {
+  "cli-verifiable": "test-provable",
+  "integration-required": "environment-provable",
+  "unit-testable": "test-provable",
+  "black-box": "environment-provable"
+};
+// src/modules/verify/proof.ts
 function classifyEvidenceCommands(proofContent) {
   const results = [];
   const codeBlockPattern = /```(?:bash|shell)\n([\s\S]*?)```/g;
@@ -2895,9 +2914,15 @@ function validateProofQuality(proofPath) {
     return emptyResult;
   }
   const content = readFileSync9(proofPath, "utf-8");
-  const bbTierMatch = /\*\*Tier:\*\*\s*(unit-testable|black-box)/i.exec(content);
-  const bbIsUnitTestable = bbTierMatch ? bbTierMatch[1].toLowerCase() === "unit-testable" : false;
-  const bbEnforcement = bbIsUnitTestable ? { blackBoxPass: true, grepSrcCount: 0, dockerExecCount: 0, observabilityCount: 0, otherCount: 0, grepRatio: 0, acsMissingDockerExec: [] } : checkBlackBoxEnforcement(content);
+  const allTierNames = [...TIER_HIERARCHY, ...Object.keys(LEGACY_TIER_MAP)];
+  const uniqueTierNames = [...new Set(allTierNames)];
+  const tierPattern = new RegExp(`\\*\\*Tier:\\*\\*\\s*(${uniqueTierNames.join("|")})`, "i");
+  const bbTierMatch = tierPattern.exec(content);
+  const rawTierValue = bbTierMatch ? bbTierMatch[1].toLowerCase() : null;
+  const normalizedTier = rawTierValue ? LEGACY_TIER_MAP[rawTierValue] ?? (TIER_HIERARCHY.includes(rawTierValue) ? rawTierValue : null) : null;
+  const skipDockerEnforcement = normalizedTier !== null && normalizedTier !== "environment-provable";
+  const bbRawEnforcement = checkBlackBoxEnforcement(content);
+  const bbEnforcement = skipDockerEnforcement ? { ...bbRawEnforcement, blackBoxPass: true } : bbRawEnforcement;
   function buildResult(base) {
     const basePassed = base.pending === 0 && base.verified > 0;
     return {
@@ -3716,6 +3741,8 @@ function closeBeadsIssue(storyId, dir) {
 // src/modules/verify/parser.ts
 import { existsSync as existsSync17, readFileSync as readFileSync13 } from "fs";
+// src/modules/verify/parser-keywords.ts
 var UI_KEYWORDS = [
   "agent-browser",
   "screenshot",
@@ -3752,6 +3779,40 @@ var ESCALATE_KEYWORDS = [
   "visual inspection by human",
   "paid external service"
 ];
+var RUNTIME_PROVABLE_KEYWORDS = [
+  "cli command",
+  "api endpoint",
+  "http",
+  "server",
+  "output shows",
+  "exit code",
+  "binary",
+  "runs and produces",
+  "cli outputs",
+  "when run"
+];
+var ENVIRONMENT_PROVABLE_KEYWORDS = [
+  "docker",
+  "container",
+  "observability",
+  "telemetry",
+  "database",
+  "queue",
+  "distributed",
+  "multi-service",
+  "end-to-end",
+  "victorialogs"
+];
+var ESCALATE_TIER_KEYWORDS = [
+  "physical hardware",
+  "human visual",
+  "paid service",
+  "gpu",
+  "manual inspection",
+  "physical display"
+];
+// src/modules/verify/parser.ts
 function classifyVerifiability(description) {
   const lower = description.toLowerCase();
   for (const kw of INTEGRATION_KEYWORDS) {
@@ -3766,10 +3827,27 @@ function classifyStrategy(description) {
   }
   return "docker";
 }
-var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required)\s*-->/;
+function classifyTier(description) {
+  const lower = description.toLowerCase();
+  for (const kw of ESCALATE_TIER_KEYWORDS) {
+    if (lower.includes(kw)) return "escalate";
+  }
+  for (const kw of ENVIRONMENT_PROVABLE_KEYWORDS) {
+    if (lower.includes(kw)) return "environment-provable";
+  }
+  for (const kw of RUNTIME_PROVABLE_KEYWORDS) {
+    if (lower.includes(kw)) return "runtime-provable";
+  }
+  return "test-provable";
+}
+var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required|unit-testable|black-box|test-provable|runtime-provable|environment-provable|escalate)\s*-->/;
 function parseVerificationTag(text) {
   const match = VERIFICATION_TAG_PATTERN.exec(text);
-  return match ? match[1] : null;
+  if (!match) return null;
+  const raw = match[1];
+  const mapped = LEGACY_TIER_MAP[raw] ?? raw;
+  if (!TIER_HIERARCHY.includes(mapped)) return null;
+  return mapped;
 }
 function classifyAC(description) {
   const lower = description.toLowerCase();
@@ -3821,14 +3899,16 @@ function parseStoryACs(storyFilePath) {
       const description = currentDesc.join(" ").trim();
       if (description) {
         const tag = parseVerificationTag(description);
-        const verifiability = tag ?? classifyVerifiability(description);
+        const tier = tag ?? classifyTier(description);
+        const verifiability = classifyVerifiability(description);
         const strategy = classifyStrategy(description);
         acs.push({
           id: currentId,
           description,
           type: classifyAC(description),
           verifiability,
-          strategy
+          strategy,
+          tier
         });
       } else {
         warn(`Skipping malformed AC #${currentId}: empty description`);
@@ -5418,14 +5498,16 @@ function runValidationCycle() {
 // src/modules/verify/env.ts
 import { execFileSync as execFileSync5 } from "child_process";
 import { existsSync as existsSync21, mkdirSync as mkdirSync6, readdirSync as readdirSync6, readFileSync as readFileSync17, writeFileSync as writeFileSync12, cpSync, rmSync, statSync as statSync5 } from "fs";
-import { join as join19, basename } from "path";
+import { join as join20, basename } from "path";
 import { createHash } from "crypto";
 // src/modules/verify/dockerfile-generator.ts
+import { join as join19 } from "path";
 function generateVerifyDockerfile(projectDir) {
   const detections = detectStacks(projectDir);
   const sections = [];
   sections.push("FROM ubuntu:22.04");
+  sections.push("ENV DEBIAN_FRONTEND=noninteractive");
   sections.push("");
   sections.push("# Common tools");
   sections.push(
@@ -5440,7 +5522,8 @@ function generateVerifyDockerfile(projectDir) {
   for (const detection of detections) {
     const provider = getStackProvider(detection.stack);
     if (!provider) continue;
-    const section = provider.getVerifyDockerfileSection(projectDir);
+    const resolvedDir = detection.dir === "." ? projectDir : join19(projectDir, detection.dir);
+    const section = provider.getVerifyDockerfileSection(resolvedDir);
     if (section) {
       sections.push(section);
       sections.push("");
@@ -5468,7 +5551,7 @@ function isValidStoryKey(storyKey) {
   return /^[a-zA-Z0-9_-]+$/.test(storyKey);
 }
 function computeDistHash(projectDir) {
-  const distDir = join19(projectDir, "dist");
+  const distDir = join20(projectDir, "dist");
   if (!existsSync21(distDir)) return null;
   const hash = createHash("sha256");
   const files = collectFiles(distDir).sort();
@@ -5481,7 +5564,7 @@ function computeDistHash(projectDir) {
 function collectFiles(dir) {
   const results = [];
   for (const entry of readdirSync6(dir, { withFileTypes: true })) {
-    const fullPath = join19(dir, entry.name);
+    const fullPath = join20(dir, entry.name);
     if (entry.isDirectory()) {
       results.push(...collectFiles(fullPath));
     } else {
@@ -5517,7 +5600,7 @@ function detectProjectType(projectDir) {
   const rootDetection = allStacks.find((s) => s.dir === ".");
   const stack = rootDetection ? rootDetection.stack : null;
   if (stack && STACK_TO_PROJECT_TYPE[stack]) return STACK_TO_PROJECT_TYPE[stack];
-  if (existsSync21(join19(projectDir, ".claude-plugin", "plugin.json"))) return "plugin";
+  if (existsSync21(join20(projectDir, ".claude-plugin", "plugin.json"))) return "plugin";
   return "generic";
 }
 function buildVerifyImage(options = {}) {
@@ -5561,18 +5644,18 @@ function buildNodeImage(projectDir) {
   const lastLine = packOutput.split("\n").pop()?.trim();
   if (!lastLine) throw new Error("npm pack produced no output \u2014 cannot determine tarball filename.");
   const tarballName = basename(lastLine);
-  const tarballPath = join19("/tmp", tarballName);
-  const buildContext = join19("/tmp", `codeharness-verify-build-${Date.now()}`);
+  const tarballPath = join20("/tmp", tarballName);
+  const buildContext = join20("/tmp", `codeharness-verify-build-${Date.now()}`);
   mkdirSync6(buildContext, { recursive: true });
   try {
-    cpSync(tarballPath, join19(buildContext, tarballName));
+    cpSync(tarballPath, join20(buildContext, tarballName));
     const dockerfile = generateVerifyDockerfile(projectDir) + `
 # Install project from tarball
 ARG TARBALL=package.tgz
 COPY \${TARBALL} /tmp/\${TARBALL}
 RUN npm install -g /tmp/\${TARBALL} && rm /tmp/\${TARBALL}
 `;
-    writeFileSync12(join19(buildContext, "Dockerfile"), dockerfile);
+    writeFileSync12(join20(buildContext, "Dockerfile"), dockerfile);
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "--build-arg", `TARBALL=${tarballName}`, "."], {
       cwd: buildContext,
       stdio: "pipe",
@@ -5584,22 +5667,22 @@ RUN npm install -g /tmp/\${TARBALL} && rm /tmp/\${TARBALL}
   }
 }
 function buildPythonImage(projectDir) {
-  const distDir = join19(projectDir, "dist");
+  const distDir = join20(projectDir, "dist");
   const distFiles = readdirSync6(distDir).filter((f) => f.endsWith(".tar.gz") || f.endsWith(".whl"));
   if (distFiles.length === 0) {
     throw new Error("No distribution files found in dist/. Run your build command first (e.g., python -m build).");
   }
   const distFile = distFiles.filter((f) => f.endsWith(".tar.gz"))[0] ?? distFiles[0];
-  const buildContext = join19("/tmp", `codeharness-verify-build-${Date.now()}`);
+  const buildContext = join20("/tmp", `codeharness-verify-build-${Date.now()}`);
   mkdirSync6(buildContext, { recursive: true });
   try {
-    cpSync(join19(distDir, distFile), join19(buildContext, distFile));
+    cpSync(join20(distDir, distFile), join20(buildContext, distFile));
     const dockerfile = generateVerifyDockerfile(projectDir) + `
 # Install project from distribution
 COPY ${distFile} /tmp/${distFile}
 RUN pip install --break-system-packages /tmp/${distFile} && rm /tmp/${distFile}
 `;
-    writeFileSync12(join19(buildContext, "Dockerfile"), dockerfile);
+    writeFileSync12(join20(buildContext, "Dockerfile"), dockerfile);
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
@@ -5614,19 +5697,19 @@ function prepareVerifyWorkspace(storyKey, projectDir) {
   if (!isValidStoryKey(storyKey)) {
     throw new Error(`Invalid story key: ${storyKey}. Keys must contain only alphanumeric characters, hyphens, and underscores.`);
   }
-  const storyFile = join19(root, STORY_DIR, `${storyKey}.md`);
+  const storyFile = join20(root, STORY_DIR, `${storyKey}.md`);
   if (!existsSync21(storyFile)) throw new Error(`Story file not found: ${storyFile}`);
   const workspace = `${TEMP_PREFIX}${storyKey}`;
   if (existsSync21(workspace)) rmSync(workspace, { recursive: true, force: true });
   mkdirSync6(workspace, { recursive: true });
-  cpSync(storyFile, join19(workspace, "story.md"));
-  const readmePath = join19(root, "README.md");
-  if (existsSync21(readmePath)) cpSync(readmePath, join19(workspace, "README.md"));
-  const docsDir = join19(root, "docs");
+  cpSync(storyFile, join20(workspace, "story.md"));
+  const readmePath = join20(root, "README.md");
+  if (existsSync21(readmePath)) cpSync(readmePath, join20(workspace, "README.md"));
+  const docsDir = join20(root, "docs");
   if (existsSync21(docsDir) && statSync5(docsDir).isDirectory()) {
-    cpSync(docsDir, join19(workspace, "docs"), { recursive: true });
+    cpSync(docsDir, join20(workspace, "docs"), { recursive: true });
   }
-  mkdirSync6(join19(workspace, "verification"), { recursive: true });
+  mkdirSync6(join20(workspace, "verification"), { recursive: true });
   return workspace;
 }
 function checkVerifyEnv() {
@@ -5679,18 +5762,18 @@ function cleanupVerifyEnv(storyKey) {
   }
 }
 function buildPluginImage(projectDir) {
-  const buildContext = join19("/tmp", `codeharness-verify-build-${Date.now()}`);
+  const buildContext = join20("/tmp", `codeharness-verify-build-${Date.now()}`);
   mkdirSync6(buildContext, { recursive: true });
   try {
-    const pluginDir = join19(projectDir, ".claude-plugin");
-    cpSync(pluginDir, join19(buildContext, ".claude-plugin"), { recursive: true });
+    const pluginDir = join20(projectDir, ".claude-plugin");
+    cpSync(pluginDir, join20(buildContext, ".claude-plugin"), { recursive: true });
     for (const dir of ["commands", "hooks", "knowledge", "skills"]) {
-      const src = join19(projectDir, dir);
+      const src = join20(projectDir, dir);
       if (existsSync21(src) && statSync5(src).isDirectory()) {
-        cpSync(src, join19(buildContext, dir), { recursive: true });
+        cpSync(src, join20(buildContext, dir), { recursive: true });
       }
     }
-    writeFileSync12(join19(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
+    writeFileSync12(join20(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
@@ -5701,10 +5784,10 @@ function buildPluginImage(projectDir) {
   }
 }
 function buildSimpleImage(projectDir, timeout = 12e4) {
-  const buildContext = join19("/tmp", `codeharness-verify-build-${Date.now()}`);
+  const buildContext = join20("/tmp", `codeharness-verify-build-${Date.now()}`);
   mkdirSync6(buildContext, { recursive: true });
   try {
-    writeFileSync12(join19(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
+    writeFileSync12(join20(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
     execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
       cwd: buildContext,
       stdio: "pipe",
@@ -5786,7 +5869,7 @@ function verifyRetro(opts, isJson, root) {
     return;
   }
   const retroFile = `epic-${epicNum}-retrospective.md`;
-  const retroPath = join20(root, STORY_DIR2, retroFile);
+  const retroPath = join21(root, STORY_DIR2, retroFile);
   if (!existsSync22(retroPath)) {
     if (isJson) {
       jsonOutput({ status: "fail", epic: epicNum, retroFile, message: `${retroFile} not found` });
@@ -5804,7 +5887,7 @@ function verifyRetro(opts, isJson, root) {
     warn(`Failed to update sprint status: ${message}`);
   }
   if (isJson) {
-    jsonOutput({ status: "ok", epic: epicNum, retroFile: join20(STORY_DIR2, retroFile) });
+    jsonOutput({ status: "ok", epic: epicNum, retroFile: join21(STORY_DIR2, retroFile) });
   } else {
     ok(`Epic ${epicNum} retrospective: marked done`);
   }
@@ -5815,7 +5898,7 @@ function verifyStory(storyId, isJson, root) {
     process.exitCode = 1;
     return;
   }
-  const readmePath = join20(root, "README.md");
+  const readmePath = join21(root, "README.md");
   if (!existsSync22(readmePath)) {
     if (isJson) {
       jsonOutput({ status: "fail", message: "No README.md found \u2014 verification requires user documentation" });
@@ -5825,7 +5908,7 @@ function verifyStory(storyId, isJson, root) {
     process.exitCode = 1;
     return;
   }
-  const storyFilePath = join20(root, STORY_DIR2, `${storyId}.md`);
+  const storyFilePath = join21(root, STORY_DIR2, `${storyId}.md`);
   if (!existsSync22(storyFilePath)) {
     fail(`Story file not found: ${storyFilePath}`, { json: isJson });
     process.exitCode = 1;
@@ -5866,7 +5949,7 @@ function verifyStory(storyId, isJson, root) {
     return;
   }
   const storyTitle = extractStoryTitle(storyFilePath);
-  const expectedProofPath = join20(root, "verification", `${storyId}-proof.md`);
+  const expectedProofPath = join21(root, "verification", `${storyId}-proof.md`);
   const proofPath = existsSync22(expectedProofPath) ? expectedProofPath : createProofDocument(storyId, storyTitle, acs, root);
   const proofQuality = validateProofQuality(proofPath);
   if (!proofQuality.passed) {
@@ -5996,8 +6079,15 @@ var ELK_ENDPOINTS = {
 function getDefaultEndpointsForBackend(backend) {
   return backend === "elk" ? ELK_ENDPOINTS : DEFAULT_ENDPOINTS;
 }
-function buildScopedEndpoints(endpoints, serviceName) {
+function buildScopedEndpoints(endpoints, serviceName, backend) {
   const encoded = encodeURIComponent(serviceName);
+  if (backend === "elk") {
+    return {
+      logs: `${endpoints.logs}/_search?q=${encodeURIComponent(`service_name:${serviceName}`)}&size=100`,
+      metrics: `${endpoints.metrics}/_search?q=${encodeURIComponent(`service_name:${serviceName}`)}&size=100`,
+      traces: `${endpoints.traces}/_search?q=${encodeURIComponent(`trace_id:* AND service_name:${serviceName}`)}&size=20`
+    };
+  }
   return {
     logs: `${endpoints.logs}/select/logsql/query?query=${encodeURIComponent(`service_name:${serviceName}`)}`,
     metrics: `${endpoints.metrics}/api/v1/query?query=${encodeURIComponent(`{service_name="${serviceName}"}`)}`,
@@ -6031,12 +6121,12 @@ function resolveEndpoints(state) {
 // src/lib/onboard-checks.ts
 import { existsSync as existsSync26 } from "fs";
-import { join as join23, dirname as dirname5 } from "path";
+import { join as join24, dirname as dirname5 } from "path";
 import { fileURLToPath } from "url";
 // src/lib/coverage/parser.ts
 import { existsSync as existsSync23, readFileSync as readFileSync20 } from "fs";
-import { join as join21 } from "path";
+import { join as join22 } from "path";
 function parseTestCounts(output) {
   const vitestMatch = /Tests\s+(\d+)\s+passed(?:\s*\|\s*(\d+)\s+failed)?/i.exec(output);
   if (vitestMatch) {
@@ -6100,7 +6190,7 @@ function parseVitestCoverage(dir) {
   }
 }
 function parsePythonCoverage(dir) {
-  const reportPath = join21(dir, "coverage.json");
+  const reportPath = join22(dir, "coverage.json");
   if (!existsSync23(reportPath)) {
     warn("Coverage report not found at coverage.json");
     return 0;
@@ -6114,7 +6204,7 @@ function parsePythonCoverage(dir) {
   }
 }
 function parseTarpaulinCoverage(dir) {
-  const reportPath = join21(dir, "coverage", "tarpaulin-report.json");
+  const reportPath = join22(dir, "coverage", "tarpaulin-report.json");
   if (!existsSync23(reportPath)) {
     warn("Tarpaulin report not found at coverage/tarpaulin-report.json");
     return 0;
@@ -6129,8 +6219,8 @@ function parseTarpaulinCoverage(dir) {
 }
 function findCoverageSummary(dir) {
   const candidates = [
-    join21(dir, "coverage", "coverage-summary.json"),
-    join21(dir, "src", "coverage", "coverage-summary.json")
+    join22(dir, "coverage", "coverage-summary.json"),
+    join22(dir, "src", "coverage", "coverage-summary.json")
   ];
   for (const p of candidates) {
     if (existsSync23(p)) return p;
@@ -6141,7 +6231,7 @@ function findCoverageSummary(dir) {
 // src/lib/coverage/runner.ts
 import { execSync as execSync6 } from "child_process";
 import { existsSync as existsSync24, readFileSync as readFileSync21 } from "fs";
-import { join as join22 } from "path";
+import { join as join23 } from "path";
 function detectCoverageTool(dir) {
   const baseDir = dir ?? process.cwd();
   const stateHint = getStateToolHint(baseDir);
@@ -6174,7 +6264,7 @@ function detectRustCoverageTool(dir) {
     warn("cargo-tarpaulin not installed \u2014 coverage detection unavailable");
     return { tool: "unknown", runCommand: "", reportFormat: "" };
   }
-  const cargoPath = join22(dir, "Cargo.toml");
+  const cargoPath = join23(dir, "Cargo.toml");
   let isWorkspace = false;
   try {
     const cargoContent = readFileSync21(cargoPath, "utf-8");
@@ -6197,8 +6287,8 @@ function getStateToolHint(dir) {
   }
 }
 function detectNodeCoverageTool(dir, stateHint) {
-  const hasVitestConfig = existsSync24(join22(dir, "vitest.config.ts")) || existsSync24(join22(dir, "vitest.config.js"));
-  const pkgPath = join22(dir, "package.json");
+  const hasVitestConfig = existsSync24(join23(dir, "vitest.config.ts")) || existsSync24(join23(dir, "vitest.config.js"));
+  const pkgPath = join23(dir, "package.json");
   let hasVitestCoverageV8 = false;
   let hasVitestCoverageIstanbul = false;
   let hasC8 = false;
@@ -6259,7 +6349,7 @@ function getNodeTestCommand(scripts, runner) {
   return "npm test";
 }
 function detectPythonCoverageTool(dir) {
-  const reqPath = join22(dir, "requirements.txt");
+  const reqPath = join23(dir, "requirements.txt");
   if (existsSync24(reqPath)) {
     try {
       const content = readFileSync21(reqPath, "utf-8");
@@ -6273,7 +6363,7 @@ function detectPythonCoverageTool(dir) {
     } catch {
     }
   }
-  const pyprojectPath = join22(dir, "pyproject.toml");
+  const pyprojectPath = join23(dir, "pyproject.toml");
   if (existsSync24(pyprojectPath)) {
     try {
       const content = readFileSync21(pyprojectPath, "utf-8");
@@ -6472,7 +6562,7 @@ function checkBmadInstalled(dir) {
 function checkHooksRegistered(dir) {
   const __filename = fileURLToPath(import.meta.url);
   const __dirname = dirname5(__filename);
-  const hooksPath = join23(__dirname, "..", "..", "hooks", "hooks.json");
+  const hooksPath = join24(__dirname, "..", "..", "hooks", "hooks.json");
   return { ok: existsSync26(hooksPath) };
 }
 function runPreconditions(dir) {
@@ -6603,7 +6693,7 @@ function handleFullStatus(isJson) {
   const serviceName = state.otlp?.service_name;
   if (serviceName) {
     const endpoints = resolveEndpoints(state);
-    const scoped = buildScopedEndpoints(endpoints, serviceName);
+    const scoped = buildScopedEndpoints(endpoints, serviceName, state.otlp?.backend);
     console.log(`  Scoped: logs=${scoped.logs} metrics=${scoped.metrics} traces=${scoped.traces}`);
   }
   printBeadsSummary();
@@ -6671,7 +6761,7 @@ function handleFullStatusJson(state) {
   }
   const endpoints = resolveEndpoints(state);
   const serviceName = state.otlp?.service_name;
-  const scoped_endpoints = serviceName ? buildScopedEndpoints(endpoints, serviceName) : void 0;
+  const scoped_endpoints = serviceName ? buildScopedEndpoints(endpoints, serviceName, state.otlp?.backend) : void 0;
   const beads = getBeadsData();
   const onboarding = getOnboardingProgressData();
   const sprint = getSprintReportData();
@@ -7112,7 +7202,7 @@ function registerStatusCommand(program) {
 // src/modules/audit/dimensions.ts
 import { existsSync as existsSync27, readdirSync as readdirSync7 } from "fs";
-import { join as join24 } from "path";
+import { join as join25 } from "path";
 function gap(dimension, description, suggestedFix) {
   return { dimension, description, suggestedFix };
 }
@@ -7224,15 +7314,15 @@ function checkDocumentation(projectDir) {
 function checkVerification(projectDir) {
   try {
     const gaps = [];
-    const sprintPath = join24(projectDir, "_bmad-output", "implementation-artifacts", "sprint-status.yaml");
+    const sprintPath = join25(projectDir, "_bmad-output", "implementation-artifacts", "sprint-status.yaml");
     if (!existsSync27(sprintPath)) return dimOk("verification", "warn", "no sprint data", [gap("verification", "No sprint-status.yaml found", "Run sprint planning to create sprint status")]);
-    const vDir = join24(projectDir, "verification");
+    const vDir = join25(projectDir, "verification");
     let proofCount = 0, totalChecked = 0;
     if (existsSync27(vDir)) {
       for (const file of readdirSafe(vDir)) {
         if (!file.endsWith("-proof.md")) continue;
         totalChecked++;
-        const r = parseProof(join24(vDir, file));
+        const r = parseProof(join25(vDir, file));
         if (isOk(r) && r.data.passed) {
           proofCount++;
         } else {
@@ -7310,7 +7400,7 @@ function formatAuditJson(result) {
 // src/modules/audit/fix-generator.ts
 import { existsSync as existsSync28, writeFileSync as writeFileSync13, mkdirSync as mkdirSync7 } from "fs";
-import { join as join25, dirname as dirname6 } from "path";
+import { join as join26, dirname as dirname6 } from "path";
 function buildStoryKey(gap2, index) {
   const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
   return `audit-fix-${safeDimension}-${index}`;
@@ -7342,7 +7432,7 @@ function generateFixStories(auditResult) {
     const stories = [];
     let created = 0;
     let skipped = 0;
-    const artifactsDir = join25(
+    const artifactsDir = join26(
       process.cwd(),
       "_bmad-output",
       "implementation-artifacts"
@@ -7351,7 +7441,7 @@ function generateFixStories(auditResult) {
       for (let i = 0; i < dimension.gaps.length; i++) {
         const gap2 = dimension.gaps[i];
         const key = buildStoryKey(gap2, i + 1);
-        const filePath = join25(artifactsDir, `${key}.md`);
+        const filePath = join26(artifactsDir, `${key}.md`);
         if (existsSync28(filePath)) {
           stories.push({
             key,
@@ -7542,7 +7632,7 @@ function registerOnboardCommand(program) {
 // src/commands/teardown.ts
 import { existsSync as existsSync29, unlinkSync as unlinkSync3, readFileSync as readFileSync23, writeFileSync as writeFileSync14, rmSync as rmSync2 } from "fs";
-import { join as join26 } from "path";
+import { join as join27 } from "path";
 function buildDefaultResult() {
   return {
     status: "ok",
@@ -7588,7 +7678,7 @@ function registerTeardownCommand(program) {
     } else if (otlpMode === "remote-routed") {
       if (!options.keepDocker) {
         try {
-          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-GG765ZJT.js");
+          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-VHOP56YP.js");
           stopCollectorOnly2();
           result.docker.stopped = true;
           if (!isJson) {
@@ -7620,7 +7710,7 @@ function registerTeardownCommand(program) {
         info("Shared stack: kept running (other projects may use it)");
       }
     } else if (isLegacyStack) {
-      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-GG765ZJT.js");
+      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-VHOP56YP.js");
       let stackRunning = false;
       try {
         stackRunning = isStackRunning2(composeFile);
@@ -7645,7 +7735,7 @@ function registerTeardownCommand(program) {
           info("Docker stack: not running, skipping");
         }
       }
-      const composeFilePath = join26(projectDir, composeFile);
+      const composeFilePath = join27(projectDir, composeFile);
       if (existsSync29(composeFilePath)) {
         unlinkSync3(composeFilePath);
         result.removed.push(composeFile);
@@ -7653,7 +7743,7 @@ function registerTeardownCommand(program) {
           ok(`Removed: ${composeFile}`);
         }
       }
-      const otelConfigPath = join26(projectDir, "otel-collector-config.yaml");
+      const otelConfigPath = join27(projectDir, "otel-collector-config.yaml");
       if (existsSync29(otelConfigPath)) {
         unlinkSync3(otelConfigPath);
         result.removed.push("otel-collector-config.yaml");
@@ -7664,7 +7754,7 @@ function registerTeardownCommand(program) {
     }
     let patchesRemoved = 0;
     for (const [patchName, relativePath] of Object.entries(PATCH_TARGETS)) {
-      const filePath = join26(projectDir, "_bmad", relativePath);
+      const filePath = join27(projectDir, "_bmad", relativePath);
       if (!existsSync29(filePath)) {
         continue;
       }
@@ -7686,7 +7776,7 @@ function registerTeardownCommand(program) {
     }
     const stacks = state.stacks ?? (state.stack ? [state.stack] : []);
     if (state.otlp?.enabled && stacks.includes("nodejs")) {
-      const pkgPath = join26(projectDir, "package.json");
+      const pkgPath = join27(projectDir, "package.json");
       if (existsSync29(pkgPath)) {
         try {
           const raw = readFileSync23(pkgPath, "utf-8");
@@ -7729,7 +7819,7 @@ function registerTeardownCommand(program) {
         }
       }
     }
-    const harnessDir = join26(projectDir, ".harness");
+    const harnessDir = join27(projectDir, ".harness");
     if (existsSync29(harnessDir)) {
       rmSync2(harnessDir, { recursive: true, force: true });
       result.removed.push(".harness/");
@@ -8488,7 +8578,7 @@ function registerQueryCommand(program) {
 // src/commands/retro-import.ts
 import { existsSync as existsSync30, readFileSync as readFileSync24 } from "fs";
-import { join as join27 } from "path";
+import { join as join28 } from "path";
 // src/lib/retro-parser.ts
 var KNOWN_TOOLS = ["showboat", "ralph", "beads", "bmad"];
@@ -8657,7 +8747,7 @@ function registerRetroImportCommand(program) {
       return;
     }
     const retroFile = `epic-${epicNum}-retrospective.md`;
-    const retroPath = join27(root, STORY_DIR3, retroFile);
+    const retroPath = join28(root, STORY_DIR3, retroFile);
     if (!existsSync30(retroPath)) {
       fail(`Retro file not found: ${retroFile}`, { json: isJson });
       process.exitCode = 1;
@@ -9046,10 +9136,10 @@ function registerVerifyEnvCommand(program) {
 }
 // src/commands/retry.ts
-import { join as join29 } from "path";
+import { join as join30 } from "path";
 // src/lib/retry-state.ts
-import { join as join28 } from "path";
+import { join as join29 } from "path";
 function mutateState(mutator) {
   const result = getSprintState2();
   if (!result.success) return;
@@ -9110,7 +9200,7 @@ function registerRetryCommand(program) {
   program.command("retry").description("Manage retry state for stories").option("--reset", "Clear retry counters and flagged stories").option("--story <key>", "Target a specific story key (used with --reset or --status)").option("--status", "Show retry status for all stories").action((_options, cmd) => {
     const opts = cmd.optsWithGlobals();
     const isJson = opts.json === true;
-    const dir = join29(process.cwd(), RALPH_SUBDIR);
+    const dir = join30(process.cwd(), RALPH_SUBDIR);
     if (opts.story && !isValidStoryKey3(opts.story)) {
       if (isJson) {
         jsonOutput({ status: "fail", message: `Invalid story key: ${opts.story}` });
@@ -9520,7 +9610,7 @@ function registerAuditCommand(program) {
 // src/commands/stats.ts
 import { existsSync as existsSync31, readdirSync as readdirSync8, readFileSync as readFileSync25, writeFileSync as writeFileSync15 } from "fs";
-import { join as join30 } from "path";
+import { join as join31 } from "path";
 var RATES = {
   input: 15,
   output: 75,
@@ -9605,8 +9695,8 @@ function parseLogFile(filePath, report) {
   }
 }
 function generateReport3(projectDir) {
-  const logsDir = join30(projectDir, "ralph", "logs");
-  const logFiles = readdirSync8(logsDir).filter((f) => f.startsWith("claude_output_") && f.endsWith(".log")).sort().map((f) => join30(logsDir, f));
+  const logsDir = join31(projectDir, "ralph", "logs");
+  const logFiles = readdirSync8(logsDir).filter((f) => f.startsWith("claude_output_") && f.endsWith(".log")).sort().map((f) => join31(logsDir, f));
   const report = {
     byPhase: /* @__PURE__ */ new Map(),
     byStory: /* @__PURE__ */ new Map(),
@@ -9705,7 +9795,7 @@ function registerStatsCommand(program) {
     const globalOpts = cmd.optsWithGlobals();
     const isJson = !!globalOpts.json;
     const projectDir = process.cwd();
-    const logsDir = join30(projectDir, "ralph", "logs");
+    const logsDir = join31(projectDir, "ralph", "logs");
     if (!existsSync31(logsDir)) {
       fail("No ralph/logs/ directory found \u2014 run codeharness run first");
       process.exitCode = 1;
@@ -9719,7 +9809,7 @@ function registerStatsCommand(program) {
     const formatted = formatReport2(report);
     console.log(formatted);
     if (options.save) {
-      const outPath = join30(projectDir, "_bmad-output", "implementation-artifacts", "cost-report.md");
+      const outPath = join31(projectDir, "_bmad-output", "implementation-artifacts", "cost-report.md");
       writeFileSync15(outPath, formatted, "utf-8");
       ok(`Report saved to ${outPath}`);
     }
@@ -9727,7 +9817,7 @@ function registerStatsCommand(program) {
 }
 // src/index.ts
-var VERSION = true ? "0.26.3" : "0.0.0-dev";
+var VERSION = true ? "0.26.5" : "0.0.0-dev";
 function createProgram() {
   const program = new Command();
   program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codeharness",
-  "version": "0.26.3",
+  "version": "0.26.5",
   "type": "module",
   "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
   "bin": {

package/patches/AGENTS.md CHANGED Viewed

@@ -12,7 +12,7 @@ prevent recurrence of observed failures.
 patches/
   dev/enforcement.md        — Dev agent guardrails
   review/enforcement.md     — Review gates (proof quality, coverage)
-  verify/story-verification.md — Black-box proof requirements
+  verify/story-verification.md — Tier-appropriate proof requirements
   sprint/planning.md        — Sprint planning pre-checks
   retro/enforcement.md      — Retrospective quality metrics
 ```

package/patches/dev/enforcement.md CHANGED Viewed

@@ -4,7 +4,7 @@ Dev agents repeatedly shipped code without reading module conventions (AGENTS.md
 skipped observability checks, and produced features that could not be verified
 from outside the source tree. This patch enforces architecture awareness,
 observability validation, documentation hygiene, test coverage gates, and
-black-box thinking — all operational failures observed in prior sprints.
+verification tier awareness — all operational failures observed in prior sprints.
 (FR33, FR34, NFR20)
 ## Codeharness Development Enforcement
@@ -35,14 +35,23 @@ After running tests, verify telemetry is flowing:
 - Coverage gate: 100% of new/changed code
 - Run `npm test` / `pytest` and verify no regressions
-### Black-Box Thinking
+### Verification Tier Awareness
-Write code that can be verified from the outside. Ask yourself:
-- Can a user exercise this feature from the CLI alone?
-- Is the behavior documented in README.md?
-- Would a verifier with NO source access be able to tell if this works?
+Write code that can be verified at the appropriate tier. The four verification tiers determine what evidence is needed to prove an AC works:
-If the answer is "no", the feature has a testability gap — fix the CLI/docs, not the verification process.
+- **`test-provable`** — Code must be testable via `npm test` / `npm run build`. Ensure functions have test coverage, outputs are greppable, and build artifacts are inspectable. No running app required.
+- **`runtime-provable`** — Code must be exercisable via CLI or local server. Ensure the binary/CLI produces verifiable stdout, exit codes, or HTTP responses without needing Docker.
+- **`environment-provable`** — Code must work in a Docker verification environment. Ensure the Dockerfile is current, services start correctly, and `docker exec` can exercise the feature. Observability queries should return expected log/trace events.
+- **`escalate`** — Reserved for ACs that genuinely cannot be automated (physical hardware, paid external APIs). This is rare — exhaust all automated approaches first.
+Ask yourself:
+- What tier is this story tagged with?
+- Does my implementation produce the evidence that tier requires?
+- If `test-provable`: are my functions testable and my outputs greppable?
+- If `runtime-provable`: can I run the CLI/server and verify output locally?
+- If `environment-provable`: does `docker exec` work? Are logs flowing to the observability stack?
+If the answer is "no", the feature has a testability gap — fix the code to be verifiable at the appropriate tier.
 ### Dockerfile Maintenance

package/patches/retro/enforcement.md CHANGED Viewed

@@ -11,7 +11,7 @@ quality trends, and mandatory concrete action items with owners.
 ### Verification Effectiveness
-- How many ACs were caught by black-box verification vs slipped through?
+- How many ACs were caught by tier-appropriate verification vs slipped through?
 - Were there false positives (proof said PASS but feature was broken)?
 - Were there false negatives (proof said FAIL but feature actually works)?
 - Time spent on verification — is it proportional to value?
@@ -20,7 +20,7 @@ quality trends, and mandatory concrete action items with owners.
 - Did the verifier hang on permissions? (check for `--allowedTools` issues)
 - Did stories get stuck in verify→dev loops? (check `attempts` counter)
-- Were stories incorrectly flagged as `integration-required`?
+- Were stories assigned the wrong verification tier?
 - Did the verify parser correctly detect `[FAIL]` verdicts?
 ### Documentation Health

package/patches/review/enforcement.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ## WHY
 Review agents approved stories without verifying proof documents existed or
-checking that evidence was black-box (not source-grep). Stories passed review
+checking that evidence matched the story's verification tier. Stories passed review
 with fabricated output and missing coverage data. This patch enforces proof
-existence, black-box evidence quality, and coverage delta reporting as hard
+existence, tier-appropriate evidence quality, and coverage delta reporting as hard
 gates before a story can leave review.
 (FR33, FR34, NFR20)
@@ -18,13 +18,34 @@ gates before a story can leave review.
 ### Proof Quality Checks
-The proof must pass black-box enforcement:
+The proof must pass tier-appropriate evidence enforcement. The required evidence depends on the story's verification tier:
+#### `test-provable` stories
+- Evidence comes from build output, test results, and grep/read of code or generated artifacts
+- `npm test` / `npm run build` output is the primary evidence
+- Source-level assertions (grep against `src/`) are acceptable — this IS the verification method for this tier
+- `docker exec` evidence is NOT required
+- Each AC section must show actual test output or build results
+#### `runtime-provable` stories
+- Evidence comes from running the actual binary, CLI, or server
+- Process execution output (stdout, stderr, exit codes) is the primary evidence
+- HTTP responses from a locally running server are acceptable
+- `docker exec` evidence is NOT required
+- Each AC section must show actual command execution and output
+#### `environment-provable` stories
 - Commands run via `docker exec` (not direct host access)
 - Less than 50% of evidence commands are `grep` against `src/`
 - Each AC section has at least one `docker exec`, `docker ps/logs`, or observability query
 - `[FAIL]` verdicts outside code blocks cause the proof to fail
 - `[ESCALATE]` is acceptable only when all automated approaches are exhausted
+#### `escalate` stories
+- Human judgment is required — automated evidence may be partial or absent
+- Proof document must explain why automation is not possible
+- `[ESCALATE]` verdict is expected and acceptable
 ### Observability
 Run `semgrep scan --config patches/observability/ --config patches/error-handling/ --json` against changed files and report gaps.

package/patches/verify/story-verification.md CHANGED Viewed

@@ -1,35 +1,49 @@
 ## WHY
 Stories were marked "done" with no proof artifact, or with proofs that only
-grepped source code instead of exercising the feature from the user's
-perspective. This patch mandates black-box proof documents, docker exec evidence,
+grepped source code instead of exercising the feature at the appropriate
+verification tier. This patch mandates tier-appropriate proof documents,
 verification tags per AC, and test coverage targets — preventing regressions
-from being hidden behind source-level assertions.
+from being hidden behind inadequate evidence.
 (FR33, FR36, NFR20)
 ## Verification Requirements
-Every story must produce a **black-box proof** — evidence that the feature works from the user's perspective, NOT from reading source code.
+Every story must produce a **proof document** with evidence appropriate to its verification tier.
 ### Proof Standard
 - Proof document at `verification/<story-key>-proof.md`
-- Each AC gets a `## AC N:` section with `docker exec` commands and captured output
-- Evidence must come from running the installed CLI/tool, not from grepping source
+- Each AC gets a `## AC N:` section with tier-appropriate evidence and captured output
 - `[FAIL]` = AC failed with evidence showing what went wrong
 - `[ESCALATE]` = AC genuinely cannot be automated (last resort — try everything first)
+**Tier-dependent evidence rules:**
+- **`test-provable`** — Evidence comes from build + test output + grep/read of code or artifacts. Run `npm test` or `npm run build`, capture results. Source-level assertions are the primary verification method. No running app or Docker required.
+- **`runtime-provable`** — Evidence comes from running the actual binary/server and interacting with it. Start the process, make requests or run commands, capture stdout/stderr/exit codes. No Docker stack required.
+- **`environment-provable`** — Evidence comes from `docker exec` commands and observability queries. Full Docker verification environment required. Each AC section needs at least one `docker exec`, `docker ps/logs`, or observability query. Evidence must come from running the installed CLI/tool in Docker, not from grepping source.
+- **`escalate`** — Human judgment required. Document why automation is not possible. `[ESCALATE]` verdict is expected.
 ### Verification Tags
-For each AC, append a tag indicating verification approach:
-- `<!-- verification: cli-verifiable -->` — default. Can be verified via CLI commands in a Docker container.
-- `<!-- verification: integration-required -->` — requires external systems not available in the test environment (e.g., paid third-party APIs, physical hardware). This is rare — most things including workflows, agent sessions, and multi-step processes CAN be verified in Docker.
+For each AC, append a tag indicating its verification tier:
+- `<!-- verification: test-provable -->` — Can be verified by building and running tests. Evidence: build output, test results, grep/read of code. No running app needed.
+- `<!-- verification: runtime-provable -->` — Requires running the actual binary/CLI/server. Evidence: process output, HTTP responses, exit codes. No Docker stack needed.
+- `<!-- verification: environment-provable -->` — Requires full Docker environment with observability. Evidence: `docker exec` commands, VictoriaLogs queries, multi-service interaction.
+- `<!-- verification: escalate -->` — Cannot be automated. Requires human judgment, physical hardware, or paid external services.
+**Decision criteria:**
+1. Can you prove it with `npm test` or `npm run build` alone? → `test-provable`
+2. Do you need to run the actual binary/server locally? → `runtime-provable`
+3. Do you need Docker, external services, or observability? → `environment-provable`
+4. Have you exhausted all automated approaches? → `escalate`
-**Do not over-tag.** Workflows, sprint planning, user sessions, slash commands, and agent behavior are all verifiable via `docker exec ... claude --print`. Only tag `integration-required` when there is genuinely no automated path.
+**Do not over-tag.** Most stories are `test-provable` or `runtime-provable`. Only use `environment-provable` when Docker infrastructure is genuinely needed. Only use `escalate` as a last resort.
 ### Observability Evidence
-After each `docker exec` command, query the observability backend for log events from the last 30 seconds.
+After each `docker exec` command (applicable to `environment-provable` stories), query the observability backend for log events from the last 30 seconds.
 Use the configured VictoriaLogs endpoint (default: `http://localhost:9428`):
 ```bash

package/ralph/ralph.sh CHANGED Viewed

@@ -279,13 +279,10 @@ check_sprint_complete() {
     local done_count=0
     local flagged_count=0
-    # Load flagged stories for comparison
-    local -A flagged_map
+    # Load flagged stories into a newline-separated string for lookup
+    local flagged_list=""
     if [[ -f "$FLAGGED_STORIES_FILE" ]]; then
-        while IFS= read -r flagged_key; do
-            flagged_key=$(echo "$flagged_key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
-            [[ -n "$flagged_key" ]] && flagged_map["$flagged_key"]=1
-        done < "$FLAGGED_STORIES_FILE"
+        flagged_list=$(sed 's/^[[:space:]]*//;s/[[:space:]]*$//' "$FLAGGED_STORIES_FILE" | grep -v '^$')
     fi
     while IFS=: read -r key value; do
@@ -301,7 +298,7 @@ check_sprint_complete() {
             total=$((total + 1))
             if [[ "$value" == "done" ]]; then
                 done_count=$((done_count + 1))
-            elif [[ -n "${flagged_map[$key]+x}" ]]; then
+            elif [[ -n "$flagged_list" ]] && echo "$flagged_list" | grep -qxF "$key"; then
                 # Retry-exhausted/flagged stories count as "effectively done"
                 # — no autonomous work can be done on them
                 flagged_count=$((flagged_count + 1))
@@ -341,7 +338,7 @@ get_task_counts() {
         if [[ "$key" =~ ^[0-9]+-[0-9]+- ]]; then
             total=$((total + 1))
-            if [[ "$value" == "done" ]]; then
+            if [[ "$value" == "done" || "$value" == "failed" ]]; then
                 completed=$((completed + 1))
             fi
         fi
@@ -442,6 +439,14 @@ flag_story() {
     if ! is_story_flagged "$story_key"; then
         echo "$story_key" >> "$FLAGGED_STORIES_FILE"
     fi
+    # Also update sprint-status.yaml to 'failed' so reconciliation picks it up
+    # and sprint-state.json stays consistent (prevents flagged stories stuck at 'review')
+    local sprint_yaml="${SPRINT_STATUS_FILE:-}"
+    if [[ -n "$sprint_yaml" && -f "$sprint_yaml" ]]; then
+        sed -i.bak "s/^  ${story_key}: .*/  ${story_key}: failed/" "$sprint_yaml" 2>/dev/null
+        rm -f "${sprint_yaml}.bak" 2>/dev/null
+    fi
 }
 # Get list of flagged stories (newline-separated).