npm - agent-harness-kit - Versions diffs - 0.3.0 → 0.5.0 - Mend

agent-harness-kit 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-harness-kit",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
   "type": "module",
   "bin": {
@@ -40,7 +40,9 @@
   "scripts": {
     "test": "node --test tests/*.test.mjs",
     "lint": "echo 'no-op (kit is plain ESM JS)'",
-    "selftest": "node bin/cli.mjs --version"
+    "selftest": "node bin/cli.mjs --version",
+    "harness:eval": "node src/templates/_adapter-typescript/harness/eval-runner.mjs",
+    "harness:check": "node scripts/kit-structural-check.mjs"
   },
   "dependencies": {
     "@inquirer/prompts": "^7.0.0",

package/src/core/render-templates.mjs CHANGED Viewed

@@ -87,16 +87,15 @@ async function* walk(dir) {
 }
 function buildContext({ projectName, preset, layers, stack, kitVersion }) {
-  const installCmd =
-    stack.language === "python"
-      ? "pip install -e '.[dev]'"
-      : stack.packageManager === "pnpm"
-        ? "pnpm install"
-        : stack.packageManager === "yarn"
-          ? "yarn"
-          : stack.packageManager === "bun"
-            ? "bun install"
-            : "npm install";
+  const installCmd = (() => {
+    if (stack.language === "python") return "pip install -e '.[dev]'";
+    if (stack.language === "go") return "go mod download";
+    if (stack.language === "rust") return "cargo build";
+    if (stack.packageManager === "pnpm") return "pnpm install";
+    if (stack.packageManager === "yarn") return "yarn";
+    if (stack.packageManager === "bun") return "bun install";
+    return "npm install";
+  })();
   const devCmd = (() => {
     switch (stack.framework) {
       case "nextjs":  return "npm run dev";
@@ -107,18 +106,28 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
       case "django":  return "python manage.py runserver";
       case "flask":   return "flask --app app run --debug";
       default:
-        return stack.language === "python" ? "python -m app" : "npm run dev";
+        if (stack.language === "python") return "python -m app";
+        if (stack.language === "go") return "go run ./cmd/...";
+        if (stack.language === "rust") return "cargo run";
+        return "npm run dev";
     }
   })();
   const testCmd = (() => {
     switch (stack.framework) {
       case "django": return "python manage.py test";
       default:
-        return stack.language === "python" ? "pytest -x" : "npm test";
+        if (stack.language === "python") return "pytest -x";
+        if (stack.language === "go") return "go test ./...";
+        if (stack.language === "rust") return "cargo test";
+        return "npm test";
     }
   })();
-  const lintCmd =
-    stack.language === "python" ? "ruff check ." : "npm run lint";
+  const lintCmd = (() => {
+    if (stack.language === "python") return "ruff check .";
+    if (stack.language === "go") return "go vet ./...";
+    if (stack.language === "rust") return "cargo clippy --all-targets -- -D warnings";
+    return "npm run lint";
+  })();
   return {
     projectName,
@@ -137,6 +146,8 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
     kitVersion,
     isTypescript: stack.language === "typescript",
     isPython: stack.language === "python",
+    isGo: stack.language === "go",
+    isRust: stack.language === "rust",
     isNextjs: stack.framework === "nextjs",
     isFastapi: stack.framework === "fastapi",
     isExpress: stack.framework === "express",
@@ -152,11 +163,27 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
 // into the root.
 function pathForStack(rel, stack) {
   if (rel.startsWith("_adapter-typescript/")) {
-    return stack.language === "typescript" ? rel.slice("_adapter-typescript/".length) : null;
+    const stripped = rel.slice("_adapter-typescript/".length);
+    if (stack.language === "typescript") return stripped;
+    // eval-runner.mjs is language-agnostic — share with Go/Rust users who
+    // don't ship a native runner. Avoids duplicating ~322 lines per adapter.
+    if (
+      stripped === "harness/eval-runner.mjs" &&
+      (stack.language === "go" || stack.language === "rust")
+    ) {
+      return stripped;
+    }
+    return null;
   }
   if (rel.startsWith("_adapter-python/")) {
     return stack.language === "python" ? rel.slice("_adapter-python/".length) : null;
   }
+  if (rel.startsWith("_adapter-go/")) {
+    return stack.language === "go" ? rel.slice("_adapter-go/".length) : null;
+  }
+  if (rel.startsWith("_adapter-rust/")) {
+    return stack.language === "rust" ? rel.slice("_adapter-rust/".length) : null;
+  }
   if (rel.startsWith("_preset-nextjs/")) {
     return stack.framework === "nextjs" ? rel.slice("_preset-nextjs/".length) : null;
   }

package/src/templates/.claude/agents/architecture-reviewer.md.hbs CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: architecture-reviewer
-description: Use this agent immediately after any change that touches multiple layers, adds a new domain, or modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
+description: Use this agent when the Stop hook surfaces a `multi-layer-review` flag (changes span ≥2 layers in a single domain — mechanical count, not self-judgment), or when a change adds a new domain / modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
 tools: Read, Grep, Glob, Bash({{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}), Bash(git diff:*)
 model: sonnet
 ---

package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md CHANGED Viewed

@@ -18,6 +18,12 @@ suggested-turns: 8
      invoke `/write-skill`.
    - **(d) Wrong layer / architecture** — the structure invited the
      mistake. Fix: write an ADR via `/add-adr`.
+   - **(e) Wrong instruction in prompt** — the failure traces back to a
+     skill/agent prompt that was ambiguous, misleading, or under-constrained.
+     The agent followed the prompt correctly but the prompt itself led astray.
+     Fix: edit the offending file under `.claude/skills/<name>/SKILL.md` or
+     `.claude/agents/<name>.md`. Re-run `/eval-runner` afterward to confirm
+     the regression is closed.
 3. **Append entry** to `docs/agent-failures.md` with: date, symptom, fix,
    fix-type, file modified.
 4. **Apply the fix in the right place.** NEVER paper over with a CLAUDE.md

package/src/templates/CLAUDE.md.hbs CHANGED Viewed

@@ -10,7 +10,7 @@ an encyclopedia.
 - Dev:        `{{devCmd}}`
 - Test:       `{{testCmd}}`
 - Lint:       `{{lintCmd}}`
-- Structural: `{{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}` (must pass before any PR)
+- Structural: `{{#if isPython}}python -m harness.structural_test{{else}}{{#if isGo}}go run harness/structural_check.go{{else}}{{#if isRust}}node harness/structural-check.mjs{{else}}npm run harness:check{{/if}}{{/if}}{{/if}}` (must pass before any PR)
 ## Architecture (brief)
@@ -68,3 +68,5 @@ Full list: `docs/golden-principles.md`.
   imports across layers).
 - Don't update CLAUDE.md without proposing a harness improvement
   (`/propose-harness-improvement`).
+- Don't grow CLAUDE.md past 200 instructions — Stop hook blocks the stop on
+  overflow (HumanLayer measurement). Excess belongs in `docs/` or @-imports.

package/src/templates/_adapter-go/harness/structural_check.go.hbs ADDED Viewed

@@ -0,0 +1,263 @@
+// harness/structural_test.go — forward-only layer enforcement for Go.
+//
+// Reads harness.config.json + go.mod. For each domain, parses every .go
+// file's imports (via go/parser stdlib) and asserts that no internal
+// import goes "backward" through the layer order.
+//
+// Layer assignment: a file's layer = first path segment after `<root>/`.
+// E.g. `internal/repo/store.go` belongs to the `repo` layer when
+// `domains[0].root == "internal"`.
+//
+// External imports (stdlib, third-party modules) are ignored — only
+// imports starting with `<module-path>/<root>/` are checked.
+//
+// Exit codes:
+//   0 — clean (or only baselined violations)
+//   2 — new violations found
+//
+// Run: go run harness/structural_test.go [--file <path>]
+package main
+import (
+	"encoding/json"
+	"fmt"
+	"go/parser"
+	"go/token"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+)
+type Domain struct {
+	Name   string   `json:"name"`
+	Root   string   `json:"root"`
+	Layers []string `json:"layers"`
+}
+type Config struct {
+	Domains []Domain `json:"domains"`
+}
+type Violation struct {
+	File   string
+	Line   int
+	From   string
+	To     string
+	Domain string
+	Key    string
+}
+func readModulePath(repoRoot string) (string, error) {
+	data, err := os.ReadFile(filepath.Join(repoRoot, "go.mod"))
+	if err != nil {
+		return "", err
+	}
+	re := regexp.MustCompile(`(?m)^module\s+(\S+)`)
+	m := re.FindStringSubmatch(string(data))
+	if len(m) < 2 {
+		return "", fmt.Errorf("module declaration not found in go.mod")
+	}
+	return m[1], nil
+}
+func readConfig(repoRoot string) (*Config, error) {
+	data, err := os.ReadFile(filepath.Join(repoRoot, "harness.config.json"))
+	if err != nil {
+		return nil, err
+	}
+	var c Config
+	if err := json.Unmarshal(data, &c); err != nil {
+		return nil, err
+	}
+	return &c, nil
+}
+func readBaseline(repoRoot string) map[string]bool {
+	out := map[string]bool{}
+	data, err := os.ReadFile(filepath.Join(repoRoot, ".harness/structural-baseline.json"))
+	if err != nil {
+		return out
+	}
+	var keys []string
+	if err := json.Unmarshal(data, &keys); err != nil {
+		return out
+	}
+	for _, k := range keys {
+		out[k] = true
+	}
+	return out
+}
+// Returns (layerName, domainPtr) or ("", nil) if file doesn't belong to any layer.
+func layerOf(relPath string, cfg *Config) (string, *Domain) {
+	for i := range cfg.Domains {
+		d := &cfg.Domains[i]
+		if !strings.HasPrefix(relPath, d.Root+string(os.PathSeparator)) &&
+			!strings.HasPrefix(relPath, d.Root+"/") {
+			continue
+		}
+		stripped := strings.TrimPrefix(relPath, d.Root+string(os.PathSeparator))
+		stripped = strings.TrimPrefix(stripped, d.Root+"/")
+		parts := strings.SplitN(stripped, "/", 2)
+		if len(parts) == 0 {
+			continue
+		}
+		first := parts[0]
+		for _, l := range d.Layers {
+			if l == first {
+				return l, d
+			}
+		}
+	}
+	return "", nil
+}
+// For an import path like "github.com/foo/bar/internal/repo/store",
+// returns ("repo", domainPtr) when domain.root == "internal" and
+// modulePath == "github.com/foo/bar". Returns ("", nil) for external imports.
+func importLayer(importPath, modulePath string, cfg *Config) (string, *Domain) {
+	prefix := modulePath + "/"
+	if !strings.HasPrefix(importPath, prefix) {
+		return "", nil
+	}
+	relPath := strings.TrimPrefix(importPath, prefix)
+	return layerOf(relPath, cfg)
+}
+func indexOf(slice []string, s string) int {
+	for i, v := range slice {
+		if v == s {
+			return i
+		}
+	}
+	return -1
+}
+func main() {
+	repoRoot, err := os.Getwd()
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+	scopedFile := ""
+	for i, a := range os.Args {
+		if a == "--file" && i+1 < len(os.Args) {
+			scopedFile = os.Args[i+1]
+		}
+	}
+	cfg, err := readConfig(repoRoot)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "read harness.config.json:", err)
+		os.Exit(1)
+	}
+	modulePath, err := readModulePath(repoRoot)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "read go.mod:", err)
+		os.Exit(1)
+	}
+	baseline := readBaseline(repoRoot)
+	baselineExists := len(baseline) > 0 || fileExists(filepath.Join(repoRoot, ".harness/structural-baseline.json"))
+	violations := []Violation{}
+	fset := token.NewFileSet()
+	walkErr := filepath.Walk(repoRoot, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if info.IsDir() {
+			if info.Name() == "vendor" || info.Name() == ".git" || strings.HasPrefix(info.Name(), ".") {
+				if path == repoRoot {
+					return nil
+				}
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
+			return nil
+		}
+		if scopedFile != "" {
+			abs, _ := filepath.Abs(scopedFile)
+			if path != abs && path != filepath.Join(repoRoot, scopedFile) {
+				return nil
+			}
+		}
+		relPath, _ := filepath.Rel(repoRoot, path)
+		srcLayer, srcDomain := layerOf(relPath, cfg)
+		if srcDomain == nil {
+			return nil
+		}
+		srcIdx := indexOf(srcDomain.Layers, srcLayer)
+		f, err := parser.ParseFile(fset, path, nil, parser.ImportsOnly)
+		if err != nil {
+			return nil
+		}
+		for _, imp := range f.Imports {
+			impPath := strings.Trim(imp.Path.Value, `"`)
+			tgtLayer, tgtDomain := importLayer(impPath, modulePath, cfg)
+			if tgtDomain == nil || tgtDomain.Name != srcDomain.Name {
+				continue
+			}
+			tgtIdx := indexOf(tgtDomain.Layers, tgtLayer)
+			if srcIdx < tgtIdx {
+				key := fmt.Sprintf("%s::%s", relPath, impPath)
+				if baseline[key] {
+					continue
+				}
+				violations = append(violations, Violation{
+					File:   relPath,
+					Line:   fset.Position(imp.Pos()).Line,
+					From:   srcLayer,
+					To:     tgtLayer,
+					Domain: srcDomain.Name,
+					Key:    key,
+				})
+			}
+		}
+		return nil
+	})
+	if walkErr != nil {
+		fmt.Fprintln(os.Stderr, "walk:", walkErr)
+		os.Exit(1)
+	}
+	// First-run baseline: write current violations + exit 0.
+	if !baselineExists && len(violations) > 0 {
+		_ = os.MkdirAll(filepath.Join(repoRoot, ".harness"), 0o755)
+		keys := make([]string, len(violations))
+		for i, v := range violations {
+			keys[i] = v.Key
+		}
+		sort.Strings(keys)
+		out, _ := json.MarshalIndent(keys, "", "  ")
+		_ = os.WriteFile(filepath.Join(repoRoot, ".harness/structural-baseline.json"), append(out, '\n'), 0o644)
+		fmt.Printf("✓ structural test: baselined %d existing violations (.harness/structural-baseline.json).\n", len(violations))
+		fmt.Println("  New violations introduced after this point will block. Existing ones can be fixed incrementally.")
+		os.Exit(0)
+	}
+	if len(violations) == 0 {
+		fmt.Println("✓ structural test passed")
+		os.Exit(0)
+	}
+	for _, v := range violations {
+		fmt.Fprintf(os.Stderr, "✖ %s:%d  layer=%s → %s  (must be forward-only)\n", v.File, v.Line, v.From, v.To)
+	}
+	fmt.Fprintf(os.Stderr, "\n%d new layer violation(s). Fix the import direction.\n", len(violations))
+	if len(cfg.Domains) > 0 {
+		fmt.Fprintf(os.Stderr, "Layer order for domain %q: %s\n", cfg.Domains[0].Name, strings.Join(cfg.Domains[0].Layers, " → "))
+	}
+	os.Exit(2)
+}
+func fileExists(p string) bool {
+	_, err := os.Stat(p)
+	return err == nil
+}

package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs ADDED Viewed

@@ -0,0 +1,198 @@
+// harness/structural-check.mjs — forward-only layer enforcement for Rust.
+//
+// Reads harness.config.json. For each domain, walks every .rs file under
+// the domain root (excluding target/, .git/, vendor/) and asserts no
+// `use crate::<layer>::...` import goes "backward" through the layer order.
+//
+// Layer assignment: a file's layer = first path segment after `<root>/`.
+// E.g. `src/repo/store.rs` belongs to the `repo` layer when
+// `domains[0].root == "src"`.
+//
+// Why Node + regex (not a Cargo binary):
+//   - Avoids polluting the user's Cargo workspace with a check crate.
+//   - Node is already required to install the kit (npx).
+//   - Regex over `use crate::<X>` is sufficient — we never need full
+//     parse trees because the layer rule is a syntactic property.
+//   - `super::` and `self::` are scoped to the current module, which is
+//     by definition the same layer, so we ignore them.
+//
+// Exit codes:
+//   0 — clean (or only baselined violations; or first-run baseline write)
+//   2 — new violations found
+import {
+  readFileSync,
+  writeFileSync,
+  existsSync,
+  mkdirSync,
+  readdirSync,
+} from "node:fs";
+import { join, relative, sep } from "node:path";
+const repoRoot = process.cwd();
+const SKIP_DIRS = new Set([
+  ".git",
+  "target",
+  "node_modules",
+  ".harness",
+  "vendor",
+]);
+function readConfig() {
+  const path = join(repoRoot, "harness.config.json");
+  return JSON.parse(readFileSync(path, "utf8"));
+}
+function readBaseline() {
+  const path = join(repoRoot, ".harness/structural-baseline.json");
+  if (!existsSync(path)) return { exists: false, set: new Set() };
+  try {
+    const arr = JSON.parse(readFileSync(path, "utf8"));
+    return { exists: true, set: new Set(Array.isArray(arr) ? arr : []) };
+  } catch {
+    return { exists: true, set: new Set() };
+  }
+}
+function* walkRustFiles(root) {
+  if (!existsSync(root)) return;
+  for (const ent of readdirSync(root, { withFileTypes: true })) {
+    if (SKIP_DIRS.has(ent.name) || ent.name.startsWith(".")) continue;
+    const full = join(root, ent.name);
+    if (ent.isDirectory()) {
+      yield* walkRustFiles(full);
+    } else if (ent.isFile() && ent.name.endsWith(".rs")) {
+      yield full;
+    }
+  }
+}
+// Returns { layer, domain } or null.
+function layerOf(relPath, cfg) {
+  for (const d of cfg.domains) {
+    const altPrefix = d.root + "/";
+    const sepPrefix = d.root + sep;
+    let stripped;
+    if (relPath.startsWith(altPrefix)) stripped = relPath.slice(altPrefix.length);
+    else if (relPath.startsWith(sepPrefix))
+      stripped = relPath.slice(sepPrefix.length);
+    else continue;
+    const first = stripped.split(/[\/\\]/)[0];
+    if (d.layers.includes(first)) return { layer: first, domain: d };
+  }
+  return null;
+}
+// Capture the first identifier after `use crate::` (or `pub use crate::`).
+const USE_CRATE_RE = /\b(?:pub\s+)?use\s+crate::([a-zA-Z_][a-zA-Z0-9_]*)/g;
+function parseUseCrate(line) {
+  return [...line.matchAll(USE_CRATE_RE)].map((m) => m[1]);
+}
+// Return only the code portion of a line — strip line comments (//) and
+// double-quoted string contents so the regex can't match `use crate::X`
+// inside text like `"use crate::service"` or `// use crate::service`.
+// Block comments and char literals (single quotes — also Rust lifetimes)
+// are not handled; collisions are rare and would only cause noise, not
+// missed real violations.
+function stripCommentsAndStrings(line) {
+  let result = "";
+  let inStr = false;
+  for (let i = 0; i < line.length; i++) {
+    const c = line[i];
+    if (inStr) {
+      if (c === "\\" && i + 1 < line.length) {
+        i++;
+        continue;
+      }
+      if (c === '"') inStr = false;
+      continue;
+    }
+    if (c === '"') {
+      inStr = true;
+      continue;
+    }
+    if (c === "/" && i + 1 < line.length && line[i + 1] === "/") break;
+    result += c;
+  }
+  return result;
+}
+function main() {
+  const cfg = readConfig();
+  const { exists: baselineExists, set: baselineSet } = readBaseline();
+  const violations = [];
+  for (const d of cfg.domains) {
+    const rootDir = join(repoRoot, d.root);
+    for (const file of walkRustFiles(rootDir)) {
+      const relPath = relative(repoRoot, file);
+      const src = layerOf(relPath, cfg);
+      if (!src) continue;
+      const srcIdx = src.domain.layers.indexOf(src.layer);
+      const content = readFileSync(file, "utf8");
+      const lines = content.split("\n");
+      for (let i = 0; i < lines.length; i++) {
+        const codeOnly = stripCommentsAndStrings(lines[i]);
+        const targets = parseUseCrate(codeOnly);
+        for (const tgtLayer of targets) {
+          if (!src.domain.layers.includes(tgtLayer)) continue;
+          const tgtIdx = src.domain.layers.indexOf(tgtLayer);
+          if (srcIdx < tgtIdx) {
+            const key = `${relPath}::${tgtLayer}`;
+            if (baselineSet.has(key)) continue;
+            violations.push({
+              file: relPath,
+              line: i + 1,
+              from: src.layer,
+              to: tgtLayer,
+              domain: src.domain.name,
+              key,
+            });
+          }
+        }
+      }
+    }
+  }
+  // First-run baseline: no baseline file + violations → record + exit 0.
+  if (!baselineExists && violations.length > 0) {
+    mkdirSync(join(repoRoot, ".harness"), { recursive: true });
+    const keys = [...new Set(violations.map((v) => v.key))].sort();
+    writeFileSync(
+      join(repoRoot, ".harness/structural-baseline.json"),
+      JSON.stringify(keys, null, 2) + "\n",
+    );
+    console.log(
+      `✓ structural test: baselined ${keys.length} existing violation(s) (.harness/structural-baseline.json).`,
+    );
+    console.log(
+      "  New violations introduced after this point will block. Existing ones can be fixed incrementally.",
+    );
+    process.exit(0);
+  }
+  if (violations.length === 0) {
+    console.log("✓ structural test passed");
+    process.exit(0);
+  }
+  for (const v of violations) {
+    console.error(
+      `✖ ${v.file}:${v.line}  layer=${v.from} → ${v.to}  (must be forward-only)`,
+    );
+  }
+  console.error(
+    `\n${violations.length} new layer violation(s). Fix the import direction.`,
+  );
+  if (cfg.domains.length > 0) {
+    console.error(
+      `Layer order for domain "${cfg.domains[0].name}": ${cfg.domains[0].layers.join(" → ")}`,
+    );
+  }
+  process.exit(2);
+}
+main();

package/src/templates/docs/agent-failures.md CHANGED Viewed

@@ -15,7 +15,7 @@ The `/propose-harness-improvement` skill appends entries here automatically.
 ```
 ### YYYY-MM-DD  <slug>
 - **Symptom:** <what went wrong>
-- **Classification:** (a) missing context | (b) missing rule | (c) missing tool/skill | (d) wrong layer
+- **Classification:** (a) missing context | (b) missing rule | (c) missing tool/skill | (d) wrong layer | (e) wrong instruction in prompt
 - **Fix applied:** <what we did>
 - **Fix lives in:** path/or/file
 ```

package/src/templates/docs/golden-principles.md.hbs CHANGED Viewed

@@ -74,6 +74,51 @@ Why: Mitchell Hashimoto's discipline. CLAUDE.md is a table of contents — it
 won't be re-read on every action.
 Enforced by: `/propose-harness-improvement` skill.
+## 8. CLAUDE.md is bounded — at most 200 instructions
+CLAUDE.md is loaded into context every session. Beyond ~150-200 instructions
+(HumanLayer measurement) agents stop following it reliably; verbose
+CLAUDE.md silently degrades behavior. Promote details to `docs/` or use
+`@-imports` to load context on demand.
+Why: closes principle 7's loop — without a hard cap, "every failure becomes
+a CLAUDE.md line" is the path of least resistance and CLAUDE.md grows
+unbounded until the agent ignores it.
+Enforced by: Stop hook (`scripts/precompletion-checklist.sh`) counts
+bullets and numbered items in `CLAUDE.md` against
+`harness.config.json` `claudeMd.maxInstructions` (default 200) and blocks
+the stop on overflow.
+## 9. Baselines are decreasing-only
+`.harness/structural-baseline.json` lists existing violations the codebase
+inherits when a new structural rule is introduced. New code must not add to
+this file — fixes only REMOVE entries.
+Why: a growing baseline silently masks structural-test failures. Without
+this guard, the path of least resistance for a violation is "append it to
+the baseline," which defeats the rule. PMD's baseline pattern works only
+because it's enforced as monotonic.
+Enforced by: pre-push hook (`scripts/pre-push.sh`) compares
+`.harness/structural-baseline.json` length to its HEAD version and blocks
+the push when the count grew.
+## 10. Reviewer subagent triggers are mechanical, not self-judged
+`architecture-reviewer` runs when changes span ≥2 layers in a single
+domain. The decision is made by counting layers off
+`harness.config.json` `domains[].layers` against the changed-file set —
+not by the agent guessing whether its diff "touches multiple layers".
+Why: self-judged triggers fail open on borderline cases. The agent that
+just shipped a layer-spanning change is the one least equipped to notice
+it. Mechanical counting closes that gap.
+Enforced by: Stop hook (`scripts/precompletion-checklist.sh`) emits a
+`multi-layer-review` failure when `git` reveals ≥2 touched layers in any
+domain. The agent reads the recommendation, invokes
+`architecture-reviewer` (or documents why review is unnecessary), and the
+loop guard (`stop_hook_active`) lets the next stop succeed.
 ---
 _Add new principles via `/structural-test-author`, which forces you to

package/src/templates/harness.config.json.hbs CHANGED Viewed

@@ -7,14 +7,14 @@
   "domains": [
     {
       "name": "default",
-      "root": "{{#if isPython}}app{{else}}src{{/if}}",
+      "root": "{{#if isPython}}app{{else}}{{#if isGo}}internal{{else}}src{{/if}}{{/if}}",
       "layers": [{{#each layers}}"{{this}}"{{#unless @last}}, {{/unless}}{{/each}}]
     }
   ],
   "providers": ["auth", "telemetry", "feature-flags"],
   "goldenPrinciples": "docs/golden-principles.md",
   "structuralTest": {
-    "engine": "{{#if isPython}}libcst{{else}}ts-morph{{/if}}",
+    "engine": "{{#if isPython}}libcst{{else}}{{#if isGo}}go-parser{{else}}{{#if isRust}}rust-regex{{else}}ts-morph{{/if}}{{/if}}{{/if}}",
     "configPath": ".harness/structural-test.config.json",
     "blockOnViolation": true
   },
@@ -28,6 +28,13 @@
     "maxFixesPerRun": 3,
     "scope": ["dead-imports", "duplicate-utils", "layer-violations", "doc-drift"]
   },
+  "claudeMd": {
+    "path": "CLAUDE.md",
+    "maxInstructions": 200
+  },
+  "recovery": {
+    "headless": false
+  },
   "models": {
     "main": "claude-sonnet-4-6",
     "reviewers": "claude-sonnet-4-6",

package/src/templates/scripts/pre-push.sh CHANGED Viewed

@@ -4,6 +4,35 @@
 # Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
 set -e
+# Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
+# only — fixes REMOVE entries; no path should ADD them. Catches the "mask
+# violations by baselining them" anti-pattern before code leaves the machine.
+# Runs first because a grown baseline silently masks structural-test failures.
+BASELINE_FILE=".harness/structural-baseline.json"
+if [ -f "$BASELINE_FILE" ] \
+   && command -v jq >/dev/null 2>&1 \
+   && git rev-parse --verify HEAD >/dev/null 2>&1 \
+   && git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
+  CURRENT_COUNT=$(jq 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
+  HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
+  if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
+    {
+      echo
+      echo "[pre-push] BLOCKED: structural-baseline.json grew vs HEAD"
+      echo "  Previous: $HEAD_COUNT entries"
+      echo "  Current:  $CURRENT_COUNT entries (+$((CURRENT_COUNT - HEAD_COUNT)))"
+      echo
+      echo "Baseline is decreasing-only. New violations should be FIXED,"
+      echo "not appended to the baseline."
+      echo
+      echo "To bypass intentionally (e.g. legitimate refactor that re-baselines"
+      echo "across a domain boundary):"
+      echo "  git push --no-verify   # then document the reason in the commit"
+    } >&2
+    exit 2
+  fi
+fi
 echo "[pre-push] running structural test…"
 if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
   python -m harness.structural_test

package/src/templates/scripts/precompletion-checklist.sh.hbs CHANGED Viewed

@@ -4,8 +4,10 @@
 # failure context (not just check names) via stderr and exit 2. On second
 # stop (stop_hook_active=true), exit 0 to allow real exit.
 #
-# Optional: set AHK_HEADLESS_RECOVER=1 to spawn `claude -p` in the background
-# for one turn of recovery (costs tokens; off by default).
+# Optional headless recovery: when enabled, spawn `claude -p` in the background
+# for one turn of recovery on failure. Costs tokens; off by default. Configure
+# via harness.config.json `.recovery.headless` (persistent), or override per-run
+# with AHK_HEADLESS_RECOVER=1 (env var wins).
 set -e
 INPUT=$(cat)
@@ -53,6 +55,86 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
   run_check ruff ruff check . || true
 fi
+# CLAUDE.md instruction cap. HumanLayer measurement: agents stop following
+# CLAUDE.md reliably beyond ~150-200 bullets/numbered items. Treat the file
+# as a table of contents; promote details to docs/ or @-imports.
+if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
+  CMD_PATH=$(jq -r '.claudeMd.path // "CLAUDE.md"' harness.config.json)
+  CMD_CAP=$(jq -r '.claudeMd.maxInstructions // 200' harness.config.json)
+  if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
+    CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
+    if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
+      {
+        echo "$CMD_PATH instruction count: $CMD_COUNT (cap: $CMD_CAP)"
+        echo
+        echo "HumanLayer measurement: agents stop following CLAUDE.md reliably"
+        echo "beyond ~150-200 instructions. Your file exceeds the cap."
+        echo
+        echo "Fix options:"
+        echo "  - extract sections to docs/ and link from CLAUDE.md"
+        echo "  - use @-imports to load detailed context on demand"
+        echo "  - delete obsolete rules (run /garbage-collection)"
+        echo
+        echo "Adjust the cap (with justification) in harness.config.json:"
+        echo "  .claudeMd.maxInstructions"
+      } > "$TMPDIR_HOOK/claude-md-cap.out"
+      echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
+    fi
+  fi
+fi
+# Multi-layer review trigger. When uncommitted/staged/untracked changes touch
+# ≥2 layers within a single domain, the `architecture-reviewer` subagent
+# should run before commit. Replaces the agent's self-judgment about "touches
+# multiple layers" (the §4.3 #3 ambiguity in the harness-techniques research)
+# with a mechanical count off `harness.config.json` `domains[].layers` /
+# `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
+# next stop succeed after the agent has read the recommendation.
+if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git >/dev/null 2>&1; then
+  CHANGED=$(
+    {
+      git diff --name-only 2>/dev/null || true
+      git diff --name-only --cached 2>/dev/null || true
+      git ls-files --others --exclude-standard 2>/dev/null || true
+    } | sort -u
+  )
+  if [ -n "$CHANGED" ]; then
+    NUM_DOMAINS=$(jq '.domains | length' harness.config.json 2>/dev/null || echo 0)
+    MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
+    : > "$MULTI_OUT"
+    MULTI_HIT=0
+    i=0
+    while [ "$i" -lt "$NUM_DOMAINS" ]; do
+      ROOT=$(jq -r ".domains[$i].root" harness.config.json)
+      DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
+      TOUCHED_COUNT=0
+      TOUCHED_NAMES=""
+      while IFS= read -r layer; do
+        [ -z "$layer" ] && continue
+        if echo "$CHANGED" | grep -qE "^${ROOT}/${layer}(/|$)"; then
+          TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
+          TOUCHED_NAMES="$TOUCHED_NAMES $layer"
+        fi
+      done < <(jq -r ".domains[$i].layers[]" harness.config.json)
+      if [ "$TOUCHED_COUNT" -ge 2 ]; then
+        echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
+        MULTI_HIT=1
+      fi
+      i=$((i + 1))
+    done
+    if [ "$MULTI_HIT" = "1" ]; then
+      {
+        echo
+        echo "Recommend invoking the 'architecture-reviewer' subagent before commit."
+        echo "Mechanical detection — replaces self-judgment about 'touches multiple layers'."
+        echo "If review is genuinely not needed (e.g. mass-rename across layers), state"
+        echo "the reason in the commit message and re-run; the loop guard will release."
+      } >> "$MULTI_OUT"
+      echo "multi-layer-review" >> "$TMPDIR_HOOK/failed.list"
+    fi
+  fi
+fi
 if [ ! -s "$TMPDIR_HOOK/failed.list" ]; then
   exit 0
 fi
@@ -83,12 +165,28 @@ fi
   echo "the task complete. Do NOT disable a check to make the hook pass."
 } >&2
-# Optional: opt-in headless recovery. Spawns a one-turn `claude -p` to
-# attempt the fix autonomously. Useful for unattended CI / cron contexts.
-# Off by default because it costs tokens.
-if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ] && command -v claude >/dev/null 2>&1; then
+# Opt-in headless recovery. Spawns a one-turn `claude -p` to attempt the fix
+# autonomously. Useful for unattended CI / cron contexts. Off by default
+# because it costs tokens.
+#
+# Resolution order (first wins):
+#   1. AHK_HEADLESS_RECOVER=1   (env-var override, per-run)
+#   2. harness.config.json `.recovery.headless: true`   (persistent)
+HEADLESS_RECOVER=0
+HEADLESS_SOURCE=""
+if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
+  HEADLESS_RECOVER=1
+  HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
+elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
+  CFG_VAL=$(jq -r '.recovery.headless // false' harness.config.json 2>/dev/null)
+  if [ "$CFG_VAL" = "true" ]; then
+    HEADLESS_RECOVER=1
+    HEADLESS_SOURCE="harness.config.json:.recovery.headless"
+  fi
+fi
+if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
   FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
-  echo "[ahk] AHK_HEADLESS_RECOVER=1 — spawning recovery turn for: $FAILED_LIST" >&2
+  echo "[ahk] headless recovery enabled ($HEADLESS_SOURCE) — spawning recovery turn for: $FAILED_LIST" >&2
   claude -p \
     "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $TMPDIR_HOOK and apply the smallest fix. Do not disable any check." \
     --max-turns 5 \