agent-harness-kit 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-harness-kit",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -40,7 +40,9 @@
40
40
  "scripts": {
41
41
  "test": "node --test tests/*.test.mjs",
42
42
  "lint": "echo 'no-op (kit is plain ESM JS)'",
43
- "selftest": "node bin/cli.mjs --version"
43
+ "selftest": "node bin/cli.mjs --version",
44
+ "harness:eval": "node src/templates/_adapter-typescript/harness/eval-runner.mjs",
45
+ "harness:check": "node scripts/kit-structural-check.mjs"
44
46
  },
45
47
  "dependencies": {
46
48
  "@inquirer/prompts": "^7.0.0",
@@ -87,16 +87,15 @@ async function* walk(dir) {
87
87
  }
88
88
 
89
89
  function buildContext({ projectName, preset, layers, stack, kitVersion }) {
90
- const installCmd =
91
- stack.language === "python"
92
- ? "pip install -e '.[dev]'"
93
- : stack.packageManager === "pnpm"
94
- ? "pnpm install"
95
- : stack.packageManager === "yarn"
96
- ? "yarn"
97
- : stack.packageManager === "bun"
98
- ? "bun install"
99
- : "npm install";
90
+ const installCmd = (() => {
91
+ if (stack.language === "python") return "pip install -e '.[dev]'";
92
+ if (stack.language === "go") return "go mod download";
93
+ if (stack.language === "rust") return "cargo build";
94
+ if (stack.packageManager === "pnpm") return "pnpm install";
95
+ if (stack.packageManager === "yarn") return "yarn";
96
+ if (stack.packageManager === "bun") return "bun install";
97
+ return "npm install";
98
+ })();
100
99
  const devCmd = (() => {
101
100
  switch (stack.framework) {
102
101
  case "nextjs": return "npm run dev";
@@ -107,18 +106,28 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
107
106
  case "django": return "python manage.py runserver";
108
107
  case "flask": return "flask --app app run --debug";
109
108
  default:
110
- return stack.language === "python" ? "python -m app" : "npm run dev";
109
+ if (stack.language === "python") return "python -m app";
110
+ if (stack.language === "go") return "go run ./cmd/...";
111
+ if (stack.language === "rust") return "cargo run";
112
+ return "npm run dev";
111
113
  }
112
114
  })();
113
115
  const testCmd = (() => {
114
116
  switch (stack.framework) {
115
117
  case "django": return "python manage.py test";
116
118
  default:
117
- return stack.language === "python" ? "pytest -x" : "npm test";
119
+ if (stack.language === "python") return "pytest -x";
120
+ if (stack.language === "go") return "go test ./...";
121
+ if (stack.language === "rust") return "cargo test";
122
+ return "npm test";
118
123
  }
119
124
  })();
120
- const lintCmd =
121
- stack.language === "python" ? "ruff check ." : "npm run lint";
125
+ const lintCmd = (() => {
126
+ if (stack.language === "python") return "ruff check .";
127
+ if (stack.language === "go") return "go vet ./...";
128
+ if (stack.language === "rust") return "cargo clippy --all-targets -- -D warnings";
129
+ return "npm run lint";
130
+ })();
122
131
 
123
132
  return {
124
133
  projectName,
@@ -137,6 +146,8 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
137
146
  kitVersion,
138
147
  isTypescript: stack.language === "typescript",
139
148
  isPython: stack.language === "python",
149
+ isGo: stack.language === "go",
150
+ isRust: stack.language === "rust",
140
151
  isNextjs: stack.framework === "nextjs",
141
152
  isFastapi: stack.framework === "fastapi",
142
153
  isExpress: stack.framework === "express",
@@ -152,11 +163,27 @@ function buildContext({ projectName, preset, layers, stack, kitVersion }) {
152
163
  // into the root.
153
164
  function pathForStack(rel, stack) {
154
165
  if (rel.startsWith("_adapter-typescript/")) {
155
- return stack.language === "typescript" ? rel.slice("_adapter-typescript/".length) : null;
166
+ const stripped = rel.slice("_adapter-typescript/".length);
167
+ if (stack.language === "typescript") return stripped;
168
+ // eval-runner.mjs is language-agnostic — share with Go/Rust users who
169
+ // don't ship a native runner. Avoids duplicating ~322 lines per adapter.
170
+ if (
171
+ stripped === "harness/eval-runner.mjs" &&
172
+ (stack.language === "go" || stack.language === "rust")
173
+ ) {
174
+ return stripped;
175
+ }
176
+ return null;
156
177
  }
157
178
  if (rel.startsWith("_adapter-python/")) {
158
179
  return stack.language === "python" ? rel.slice("_adapter-python/".length) : null;
159
180
  }
181
+ if (rel.startsWith("_adapter-go/")) {
182
+ return stack.language === "go" ? rel.slice("_adapter-go/".length) : null;
183
+ }
184
+ if (rel.startsWith("_adapter-rust/")) {
185
+ return stack.language === "rust" ? rel.slice("_adapter-rust/".length) : null;
186
+ }
160
187
  if (rel.startsWith("_preset-nextjs/")) {
161
188
  return stack.framework === "nextjs" ? rel.slice("_preset-nextjs/".length) : null;
162
189
  }
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: architecture-reviewer
3
- description: Use this agent immediately after any change that touches multiple layers, adds a new domain, or modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
3
+ description: Use this agent when the Stop hook surfaces a `multi-layer-review` flag (changes span ≥2 layers in a single domain — mechanical count, not self-judgment), or when a change adds a new domain / modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
4
4
  tools: Read, Grep, Glob, Bash({{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}), Bash(git diff:*)
5
5
  model: sonnet
6
6
  ---
@@ -18,6 +18,12 @@ suggested-turns: 8
18
18
  invoke `/write-skill`.
19
19
  - **(d) Wrong layer / architecture** — the structure invited the
20
20
  mistake. Fix: write an ADR via `/add-adr`.
21
+ - **(e) Wrong instruction in prompt** — the failure traces back to a
22
+ skill/agent prompt that was ambiguous, misleading, or under-constrained.
23
+ The agent followed the prompt correctly but the prompt itself led astray.
24
+ Fix: edit the offending file under `.claude/skills/<name>/SKILL.md` or
25
+ `.claude/agents/<name>.md`. Re-run `/eval-runner` afterward to confirm
26
+ the regression is closed.
21
27
  3. **Append entry** to `docs/agent-failures.md` with: date, symptom, fix,
22
28
  fix-type, file modified.
23
29
  4. **Apply the fix in the right place.** NEVER paper over with a CLAUDE.md
@@ -10,7 +10,7 @@ an encyclopedia.
10
10
  - Dev: `{{devCmd}}`
11
11
  - Test: `{{testCmd}}`
12
12
  - Lint: `{{lintCmd}}`
13
- - Structural: `{{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}` (must pass before any PR)
13
+ - Structural: `{{#if isPython}}python -m harness.structural_test{{else}}{{#if isGo}}go run harness/structural_check.go{{else}}{{#if isRust}}node harness/structural-check.mjs{{else}}npm run harness:check{{/if}}{{/if}}{{/if}}` (must pass before any PR)
14
14
 
15
15
  ## Architecture (brief)
16
16
 
@@ -68,3 +68,5 @@ Full list: `docs/golden-principles.md`.
68
68
  imports across layers).
69
69
  - Don't update CLAUDE.md without proposing a harness improvement
70
70
  (`/propose-harness-improvement`).
71
+ - Don't grow CLAUDE.md past 200 instructions — Stop hook blocks the stop on
72
+ overflow (HumanLayer measurement). Excess belongs in `docs/` or @-imports.
@@ -0,0 +1,263 @@
1
+ // harness/structural_test.go — forward-only layer enforcement for Go.
2
+ //
3
+ // Reads harness.config.json + go.mod. For each domain, parses every .go
4
+ // file's imports (via go/parser stdlib) and asserts that no internal
5
+ // import goes "backward" through the layer order.
6
+ //
7
+ // Layer assignment: a file's layer = first path segment after `<root>/`.
8
+ // E.g. `internal/repo/store.go` belongs to the `repo` layer when
9
+ // `domains[0].root == "internal"`.
10
+ //
11
+ // External imports (stdlib, third-party modules) are ignored — only
12
+ // imports starting with `<module-path>/<root>/` are checked.
13
+ //
14
+ // Exit codes:
15
+ // 0 — clean (or only baselined violations)
16
+ // 2 — new violations found
17
+ //
18
+ // Run: go run harness/structural_test.go [--file <path>]
19
+
20
+ package main
21
+
22
+ import (
23
+ "encoding/json"
24
+ "fmt"
25
+ "go/parser"
26
+ "go/token"
27
+ "os"
28
+ "path/filepath"
29
+ "regexp"
30
+ "sort"
31
+ "strings"
32
+ )
33
+
34
+ type Domain struct {
35
+ Name string `json:"name"`
36
+ Root string `json:"root"`
37
+ Layers []string `json:"layers"`
38
+ }
39
+
40
+ type Config struct {
41
+ Domains []Domain `json:"domains"`
42
+ }
43
+
44
+ type Violation struct {
45
+ File string
46
+ Line int
47
+ From string
48
+ To string
49
+ Domain string
50
+ Key string
51
+ }
52
+
53
+ func readModulePath(repoRoot string) (string, error) {
54
+ data, err := os.ReadFile(filepath.Join(repoRoot, "go.mod"))
55
+ if err != nil {
56
+ return "", err
57
+ }
58
+ re := regexp.MustCompile(`(?m)^module\s+(\S+)`)
59
+ m := re.FindStringSubmatch(string(data))
60
+ if len(m) < 2 {
61
+ return "", fmt.Errorf("module declaration not found in go.mod")
62
+ }
63
+ return m[1], nil
64
+ }
65
+
66
+ func readConfig(repoRoot string) (*Config, error) {
67
+ data, err := os.ReadFile(filepath.Join(repoRoot, "harness.config.json"))
68
+ if err != nil {
69
+ return nil, err
70
+ }
71
+ var c Config
72
+ if err := json.Unmarshal(data, &c); err != nil {
73
+ return nil, err
74
+ }
75
+ return &c, nil
76
+ }
77
+
78
+ func readBaseline(repoRoot string) map[string]bool {
79
+ out := map[string]bool{}
80
+ data, err := os.ReadFile(filepath.Join(repoRoot, ".harness/structural-baseline.json"))
81
+ if err != nil {
82
+ return out
83
+ }
84
+ var keys []string
85
+ if err := json.Unmarshal(data, &keys); err != nil {
86
+ return out
87
+ }
88
+ for _, k := range keys {
89
+ out[k] = true
90
+ }
91
+ return out
92
+ }
93
+
94
+ // Returns (layerName, domainPtr) or ("", nil) if file doesn't belong to any layer.
95
+ func layerOf(relPath string, cfg *Config) (string, *Domain) {
96
+ for i := range cfg.Domains {
97
+ d := &cfg.Domains[i]
98
+ if !strings.HasPrefix(relPath, d.Root+string(os.PathSeparator)) &&
99
+ !strings.HasPrefix(relPath, d.Root+"/") {
100
+ continue
101
+ }
102
+ stripped := strings.TrimPrefix(relPath, d.Root+string(os.PathSeparator))
103
+ stripped = strings.TrimPrefix(stripped, d.Root+"/")
104
+ parts := strings.SplitN(stripped, "/", 2)
105
+ if len(parts) == 0 {
106
+ continue
107
+ }
108
+ first := parts[0]
109
+ for _, l := range d.Layers {
110
+ if l == first {
111
+ return l, d
112
+ }
113
+ }
114
+ }
115
+ return "", nil
116
+ }
117
+
118
+ // For an import path like "github.com/foo/bar/internal/repo/store",
119
+ // returns ("repo", domainPtr) when domain.root == "internal" and
120
+ // modulePath == "github.com/foo/bar". Returns ("", nil) for external imports.
121
+ func importLayer(importPath, modulePath string, cfg *Config) (string, *Domain) {
122
+ prefix := modulePath + "/"
123
+ if !strings.HasPrefix(importPath, prefix) {
124
+ return "", nil
125
+ }
126
+ relPath := strings.TrimPrefix(importPath, prefix)
127
+ return layerOf(relPath, cfg)
128
+ }
129
+
130
+ func indexOf(slice []string, s string) int {
131
+ for i, v := range slice {
132
+ if v == s {
133
+ return i
134
+ }
135
+ }
136
+ return -1
137
+ }
138
+
139
+ func main() {
140
+ repoRoot, err := os.Getwd()
141
+ if err != nil {
142
+ fmt.Fprintln(os.Stderr, err)
143
+ os.Exit(1)
144
+ }
145
+
146
+ scopedFile := ""
147
+ for i, a := range os.Args {
148
+ if a == "--file" && i+1 < len(os.Args) {
149
+ scopedFile = os.Args[i+1]
150
+ }
151
+ }
152
+
153
+ cfg, err := readConfig(repoRoot)
154
+ if err != nil {
155
+ fmt.Fprintln(os.Stderr, "read harness.config.json:", err)
156
+ os.Exit(1)
157
+ }
158
+ modulePath, err := readModulePath(repoRoot)
159
+ if err != nil {
160
+ fmt.Fprintln(os.Stderr, "read go.mod:", err)
161
+ os.Exit(1)
162
+ }
163
+ baseline := readBaseline(repoRoot)
164
+ baselineExists := len(baseline) > 0 || fileExists(filepath.Join(repoRoot, ".harness/structural-baseline.json"))
165
+
166
+ violations := []Violation{}
167
+ fset := token.NewFileSet()
168
+ walkErr := filepath.Walk(repoRoot, func(path string, info os.FileInfo, err error) error {
169
+ if err != nil {
170
+ return err
171
+ }
172
+ if info.IsDir() {
173
+ if info.Name() == "vendor" || info.Name() == ".git" || strings.HasPrefix(info.Name(), ".") {
174
+ if path == repoRoot {
175
+ return nil
176
+ }
177
+ return filepath.SkipDir
178
+ }
179
+ return nil
180
+ }
181
+ if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
182
+ return nil
183
+ }
184
+ if scopedFile != "" {
185
+ abs, _ := filepath.Abs(scopedFile)
186
+ if path != abs && path != filepath.Join(repoRoot, scopedFile) {
187
+ return nil
188
+ }
189
+ }
190
+ relPath, _ := filepath.Rel(repoRoot, path)
191
+ srcLayer, srcDomain := layerOf(relPath, cfg)
192
+ if srcDomain == nil {
193
+ return nil
194
+ }
195
+ srcIdx := indexOf(srcDomain.Layers, srcLayer)
196
+
197
+ f, err := parser.ParseFile(fset, path, nil, parser.ImportsOnly)
198
+ if err != nil {
199
+ return nil
200
+ }
201
+ for _, imp := range f.Imports {
202
+ impPath := strings.Trim(imp.Path.Value, `"`)
203
+ tgtLayer, tgtDomain := importLayer(impPath, modulePath, cfg)
204
+ if tgtDomain == nil || tgtDomain.Name != srcDomain.Name {
205
+ continue
206
+ }
207
+ tgtIdx := indexOf(tgtDomain.Layers, tgtLayer)
208
+ if srcIdx < tgtIdx {
209
+ key := fmt.Sprintf("%s::%s", relPath, impPath)
210
+ if baseline[key] {
211
+ continue
212
+ }
213
+ violations = append(violations, Violation{
214
+ File: relPath,
215
+ Line: fset.Position(imp.Pos()).Line,
216
+ From: srcLayer,
217
+ To: tgtLayer,
218
+ Domain: srcDomain.Name,
219
+ Key: key,
220
+ })
221
+ }
222
+ }
223
+ return nil
224
+ })
225
+ if walkErr != nil {
226
+ fmt.Fprintln(os.Stderr, "walk:", walkErr)
227
+ os.Exit(1)
228
+ }
229
+
230
+ // First-run baseline: write current violations + exit 0.
231
+ if !baselineExists && len(violations) > 0 {
232
+ _ = os.MkdirAll(filepath.Join(repoRoot, ".harness"), 0o755)
233
+ keys := make([]string, len(violations))
234
+ for i, v := range violations {
235
+ keys[i] = v.Key
236
+ }
237
+ sort.Strings(keys)
238
+ out, _ := json.MarshalIndent(keys, "", " ")
239
+ _ = os.WriteFile(filepath.Join(repoRoot, ".harness/structural-baseline.json"), append(out, '\n'), 0o644)
240
+ fmt.Printf("✓ structural test: baselined %d existing violations (.harness/structural-baseline.json).\n", len(violations))
241
+ fmt.Println(" New violations introduced after this point will block. Existing ones can be fixed incrementally.")
242
+ os.Exit(0)
243
+ }
244
+
245
+ if len(violations) == 0 {
246
+ fmt.Println("✓ structural test passed")
247
+ os.Exit(0)
248
+ }
249
+
250
+ for _, v := range violations {
251
+ fmt.Fprintf(os.Stderr, "✖ %s:%d layer=%s → %s (must be forward-only)\n", v.File, v.Line, v.From, v.To)
252
+ }
253
+ fmt.Fprintf(os.Stderr, "\n%d new layer violation(s). Fix the import direction.\n", len(violations))
254
+ if len(cfg.Domains) > 0 {
255
+ fmt.Fprintf(os.Stderr, "Layer order for domain %q: %s\n", cfg.Domains[0].Name, strings.Join(cfg.Domains[0].Layers, " → "))
256
+ }
257
+ os.Exit(2)
258
+ }
259
+
260
+ func fileExists(p string) bool {
261
+ _, err := os.Stat(p)
262
+ return err == nil
263
+ }
@@ -0,0 +1,198 @@
1
+ // harness/structural-check.mjs — forward-only layer enforcement for Rust.
2
+ //
3
+ // Reads harness.config.json. For each domain, walks every .rs file under
4
+ // the domain root (excluding target/, .git/, vendor/) and asserts no
5
+ // `use crate::<layer>::...` import goes "backward" through the layer order.
6
+ //
7
+ // Layer assignment: a file's layer = first path segment after `<root>/`.
8
+ // E.g. `src/repo/store.rs` belongs to the `repo` layer when
9
+ // `domains[0].root == "src"`.
10
+ //
11
+ // Why Node + regex (not a Cargo binary):
12
+ // - Avoids polluting the user's Cargo workspace with a check crate.
13
+ // - Node is already required to install the kit (npx).
14
+ // - Regex over `use crate::<X>` is sufficient — we never need full
15
+ // parse trees because the layer rule is a syntactic property.
16
+ // - `super::` and `self::` are scoped to the current module, which is
17
+ // by definition the same layer, so we ignore them.
18
+ //
19
+ // Exit codes:
20
+ // 0 — clean (or only baselined violations; or first-run baseline write)
21
+ // 2 — new violations found
22
+
23
+ import {
24
+ readFileSync,
25
+ writeFileSync,
26
+ existsSync,
27
+ mkdirSync,
28
+ readdirSync,
29
+ } from "node:fs";
30
+ import { join, relative, sep } from "node:path";
31
+
32
+ const repoRoot = process.cwd();
33
+ const SKIP_DIRS = new Set([
34
+ ".git",
35
+ "target",
36
+ "node_modules",
37
+ ".harness",
38
+ "vendor",
39
+ ]);
40
+
41
+ function readConfig() {
42
+ const path = join(repoRoot, "harness.config.json");
43
+ return JSON.parse(readFileSync(path, "utf8"));
44
+ }
45
+
46
+ function readBaseline() {
47
+ const path = join(repoRoot, ".harness/structural-baseline.json");
48
+ if (!existsSync(path)) return { exists: false, set: new Set() };
49
+ try {
50
+ const arr = JSON.parse(readFileSync(path, "utf8"));
51
+ return { exists: true, set: new Set(Array.isArray(arr) ? arr : []) };
52
+ } catch {
53
+ return { exists: true, set: new Set() };
54
+ }
55
+ }
56
+
57
+ function* walkRustFiles(root) {
58
+ if (!existsSync(root)) return;
59
+ for (const ent of readdirSync(root, { withFileTypes: true })) {
60
+ if (SKIP_DIRS.has(ent.name) || ent.name.startsWith(".")) continue;
61
+ const full = join(root, ent.name);
62
+ if (ent.isDirectory()) {
63
+ yield* walkRustFiles(full);
64
+ } else if (ent.isFile() && ent.name.endsWith(".rs")) {
65
+ yield full;
66
+ }
67
+ }
68
+ }
69
+
70
+ // Returns { layer, domain } or null.
71
+ function layerOf(relPath, cfg) {
72
+ for (const d of cfg.domains) {
73
+ const altPrefix = d.root + "/";
74
+ const sepPrefix = d.root + sep;
75
+ let stripped;
76
+ if (relPath.startsWith(altPrefix)) stripped = relPath.slice(altPrefix.length);
77
+ else if (relPath.startsWith(sepPrefix))
78
+ stripped = relPath.slice(sepPrefix.length);
79
+ else continue;
80
+ const first = stripped.split(/[\/\\]/)[0];
81
+ if (d.layers.includes(first)) return { layer: first, domain: d };
82
+ }
83
+ return null;
84
+ }
85
+
86
+ // Capture the first identifier after `use crate::` (or `pub use crate::`).
87
+ const USE_CRATE_RE = /\b(?:pub\s+)?use\s+crate::([a-zA-Z_][a-zA-Z0-9_]*)/g;
88
+
89
+ function parseUseCrate(line) {
90
+ return [...line.matchAll(USE_CRATE_RE)].map((m) => m[1]);
91
+ }
92
+
93
+ // Return only the code portion of a line — strip line comments (//) and
94
+ // double-quoted string contents so the regex can't match `use crate::X`
95
+ // inside text like `"use crate::service"` or `// use crate::service`.
96
+ // Block comments and char literals (single quotes — also Rust lifetimes)
97
+ // are not handled; collisions are rare and would only cause noise, not
98
+ // missed real violations.
99
+ function stripCommentsAndStrings(line) {
100
+ let result = "";
101
+ let inStr = false;
102
+ for (let i = 0; i < line.length; i++) {
103
+ const c = line[i];
104
+ if (inStr) {
105
+ if (c === "\\" && i + 1 < line.length) {
106
+ i++;
107
+ continue;
108
+ }
109
+ if (c === '"') inStr = false;
110
+ continue;
111
+ }
112
+ if (c === '"') {
113
+ inStr = true;
114
+ continue;
115
+ }
116
+ if (c === "/" && i + 1 < line.length && line[i + 1] === "/") break;
117
+ result += c;
118
+ }
119
+ return result;
120
+ }
121
+
122
+ function main() {
123
+ const cfg = readConfig();
124
+ const { exists: baselineExists, set: baselineSet } = readBaseline();
125
+ const violations = [];
126
+
127
+ for (const d of cfg.domains) {
128
+ const rootDir = join(repoRoot, d.root);
129
+ for (const file of walkRustFiles(rootDir)) {
130
+ const relPath = relative(repoRoot, file);
131
+ const src = layerOf(relPath, cfg);
132
+ if (!src) continue;
133
+ const srcIdx = src.domain.layers.indexOf(src.layer);
134
+
135
+ const content = readFileSync(file, "utf8");
136
+ const lines = content.split("\n");
137
+ for (let i = 0; i < lines.length; i++) {
138
+ const codeOnly = stripCommentsAndStrings(lines[i]);
139
+ const targets = parseUseCrate(codeOnly);
140
+ for (const tgtLayer of targets) {
141
+ if (!src.domain.layers.includes(tgtLayer)) continue;
142
+ const tgtIdx = src.domain.layers.indexOf(tgtLayer);
143
+ if (srcIdx < tgtIdx) {
144
+ const key = `${relPath}::${tgtLayer}`;
145
+ if (baselineSet.has(key)) continue;
146
+ violations.push({
147
+ file: relPath,
148
+ line: i + 1,
149
+ from: src.layer,
150
+ to: tgtLayer,
151
+ domain: src.domain.name,
152
+ key,
153
+ });
154
+ }
155
+ }
156
+ }
157
+ }
158
+ }
159
+
160
+ // First-run baseline: no baseline file + violations → record + exit 0.
161
+ if (!baselineExists && violations.length > 0) {
162
+ mkdirSync(join(repoRoot, ".harness"), { recursive: true });
163
+ const keys = [...new Set(violations.map((v) => v.key))].sort();
164
+ writeFileSync(
165
+ join(repoRoot, ".harness/structural-baseline.json"),
166
+ JSON.stringify(keys, null, 2) + "\n",
167
+ );
168
+ console.log(
169
+ `✓ structural test: baselined ${keys.length} existing violation(s) (.harness/structural-baseline.json).`,
170
+ );
171
+ console.log(
172
+ " New violations introduced after this point will block. Existing ones can be fixed incrementally.",
173
+ );
174
+ process.exit(0);
175
+ }
176
+
177
+ if (violations.length === 0) {
178
+ console.log("✓ structural test passed");
179
+ process.exit(0);
180
+ }
181
+
182
+ for (const v of violations) {
183
+ console.error(
184
+ `✖ ${v.file}:${v.line} layer=${v.from} → ${v.to} (must be forward-only)`,
185
+ );
186
+ }
187
+ console.error(
188
+ `\n${violations.length} new layer violation(s). Fix the import direction.`,
189
+ );
190
+ if (cfg.domains.length > 0) {
191
+ console.error(
192
+ `Layer order for domain "${cfg.domains[0].name}": ${cfg.domains[0].layers.join(" → ")}`,
193
+ );
194
+ }
195
+ process.exit(2);
196
+ }
197
+
198
+ main();
@@ -15,7 +15,7 @@ The `/propose-harness-improvement` skill appends entries here automatically.
15
15
  ```
16
16
  ### YYYY-MM-DD <slug>
17
17
  - **Symptom:** <what went wrong>
18
- - **Classification:** (a) missing context | (b) missing rule | (c) missing tool/skill | (d) wrong layer
18
+ - **Classification:** (a) missing context | (b) missing rule | (c) missing tool/skill | (d) wrong layer | (e) wrong instruction in prompt
19
19
  - **Fix applied:** <what we did>
20
20
  - **Fix lives in:** path/or/file
21
21
  ```
@@ -74,6 +74,51 @@ Why: Mitchell Hashimoto's discipline. CLAUDE.md is a table of contents — it
74
74
  won't be re-read on every action.
75
75
  Enforced by: `/propose-harness-improvement` skill.
76
76
 
77
+ ## 8. CLAUDE.md is bounded — at most 200 instructions
78
+
79
+ CLAUDE.md is loaded into context every session. Beyond ~150-200 instructions
80
+ (HumanLayer measurement) agents stop following it reliably; verbose
81
+ CLAUDE.md silently degrades behavior. Promote details to `docs/` or use
82
+ `@-imports` to load context on demand.
83
+
84
+ Why: closes principle 7's loop — without a hard cap, "every failure becomes
85
+ a CLAUDE.md line" is the path of least resistance and CLAUDE.md grows
86
+ unbounded until the agent ignores it.
87
+ Enforced by: Stop hook (`scripts/precompletion-checklist.sh`) counts
88
+ bullets and numbered items in `CLAUDE.md` against
89
+ `harness.config.json` `claudeMd.maxInstructions` (default 200) and blocks
90
+ the stop on overflow.
91
+
92
+ ## 9. Baselines are decreasing-only
93
+
94
+ `.harness/structural-baseline.json` lists existing violations the codebase
95
+ inherits when a new structural rule is introduced. New code must not add to
96
+ this file — fixes only REMOVE entries.
97
+
98
+ Why: a growing baseline silently masks structural-test failures. Without
99
+ this guard, the path of least resistance for a violation is "append it to
100
+ the baseline," which defeats the rule. PMD's baseline pattern works only
101
+ because it's enforced as monotonic.
102
+ Enforced by: pre-push hook (`scripts/pre-push.sh`) compares
103
+ `.harness/structural-baseline.json` length to its HEAD version and blocks
104
+ the push when the count grew.
105
+
106
+ ## 10. Reviewer subagent triggers are mechanical, not self-judged
107
+
108
+ `architecture-reviewer` runs when changes span ≥2 layers in a single
109
+ domain. The decision is made by counting layers off
110
+ `harness.config.json` `domains[].layers` against the changed-file set —
111
+ not by the agent guessing whether its diff "touches multiple layers".
112
+
113
+ Why: self-judged triggers fail open on borderline cases. The agent that
114
+ just shipped a layer-spanning change is the one least equipped to notice
115
+ it. Mechanical counting closes that gap.
116
+ Enforced by: Stop hook (`scripts/precompletion-checklist.sh`) emits a
117
+ `multi-layer-review` failure when `git` reveals ≥2 touched layers in any
118
+ domain. The agent reads the recommendation, invokes
119
+ `architecture-reviewer` (or documents why review is unnecessary), and the
120
+ loop guard (`stop_hook_active`) lets the next stop succeed.
121
+
77
122
  ---
78
123
 
79
124
  _Add new principles via `/structural-test-author`, which forces you to
@@ -7,14 +7,14 @@
7
7
  "domains": [
8
8
  {
9
9
  "name": "default",
10
- "root": "{{#if isPython}}app{{else}}src{{/if}}",
10
+ "root": "{{#if isPython}}app{{else}}{{#if isGo}}internal{{else}}src{{/if}}{{/if}}",
11
11
  "layers": [{{#each layers}}"{{this}}"{{#unless @last}}, {{/unless}}{{/each}}]
12
12
  }
13
13
  ],
14
14
  "providers": ["auth", "telemetry", "feature-flags"],
15
15
  "goldenPrinciples": "docs/golden-principles.md",
16
16
  "structuralTest": {
17
- "engine": "{{#if isPython}}libcst{{else}}ts-morph{{/if}}",
17
+ "engine": "{{#if isPython}}libcst{{else}}{{#if isGo}}go-parser{{else}}{{#if isRust}}rust-regex{{else}}ts-morph{{/if}}{{/if}}{{/if}}",
18
18
  "configPath": ".harness/structural-test.config.json",
19
19
  "blockOnViolation": true
20
20
  },
@@ -28,6 +28,13 @@
28
28
  "maxFixesPerRun": 3,
29
29
  "scope": ["dead-imports", "duplicate-utils", "layer-violations", "doc-drift"]
30
30
  },
31
+ "claudeMd": {
32
+ "path": "CLAUDE.md",
33
+ "maxInstructions": 200
34
+ },
35
+ "recovery": {
36
+ "headless": false
37
+ },
31
38
  "models": {
32
39
  "main": "claude-sonnet-4-6",
33
40
  "reviewers": "claude-sonnet-4-6",
@@ -4,6 +4,35 @@
4
4
  # Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
5
5
  set -e
6
6
 
7
+ # Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
8
+ # only — fixes REMOVE entries; no path should ADD them. Catches the "mask
9
+ # violations by baselining them" anti-pattern before code leaves the machine.
10
+ # Runs first because a grown baseline silently masks structural-test failures.
11
+ BASELINE_FILE=".harness/structural-baseline.json"
12
+ if [ -f "$BASELINE_FILE" ] \
13
+ && command -v jq >/dev/null 2>&1 \
14
+ && git rev-parse --verify HEAD >/dev/null 2>&1 \
15
+ && git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
16
+ CURRENT_COUNT=$(jq 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
17
+ HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
18
+ if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
19
+ {
20
+ echo
21
+ echo "[pre-push] BLOCKED: structural-baseline.json grew vs HEAD"
22
+ echo " Previous: $HEAD_COUNT entries"
23
+ echo " Current: $CURRENT_COUNT entries (+$((CURRENT_COUNT - HEAD_COUNT)))"
24
+ echo
25
+ echo "Baseline is decreasing-only. New violations should be FIXED,"
26
+ echo "not appended to the baseline."
27
+ echo
28
+ echo "To bypass intentionally (e.g. legitimate refactor that re-baselines"
29
+ echo "across a domain boundary):"
30
+ echo " git push --no-verify # then document the reason in the commit"
31
+ } >&2
32
+ exit 2
33
+ fi
34
+ fi
35
+
7
36
  echo "[pre-push] running structural test…"
8
37
  if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
9
38
  python -m harness.structural_test
@@ -4,8 +4,10 @@
4
4
  # failure context (not just check names) via stderr and exit 2. On second
5
5
  # stop (stop_hook_active=true), exit 0 to allow real exit.
6
6
  #
7
- # Optional: set AHK_HEADLESS_RECOVER=1 to spawn `claude -p` in the background
8
- # for one turn of recovery (costs tokens; off by default).
7
+ # Optional headless recovery: when enabled, spawn `claude -p` in the background
8
+ # for one turn of recovery on failure. Costs tokens; off by default. Configure
9
+ # via harness.config.json `.recovery.headless` (persistent), or override per-run
10
+ # with AHK_HEADLESS_RECOVER=1 (env var wins).
9
11
  set -e
10
12
 
11
13
  INPUT=$(cat)
@@ -53,6 +55,86 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
53
55
  run_check ruff ruff check . || true
54
56
  fi
55
57
 
58
+ # CLAUDE.md instruction cap. HumanLayer measurement: agents stop following
59
+ # CLAUDE.md reliably beyond ~150-200 bullets/numbered items. Treat the file
60
+ # as a table of contents; promote details to docs/ or @-imports.
61
+ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
62
+ CMD_PATH=$(jq -r '.claudeMd.path // "CLAUDE.md"' harness.config.json)
63
+ CMD_CAP=$(jq -r '.claudeMd.maxInstructions // 200' harness.config.json)
64
+ if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
65
+ CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
66
+ if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
67
+ {
68
+ echo "$CMD_PATH instruction count: $CMD_COUNT (cap: $CMD_CAP)"
69
+ echo
70
+ echo "HumanLayer measurement: agents stop following CLAUDE.md reliably"
71
+ echo "beyond ~150-200 instructions. Your file exceeds the cap."
72
+ echo
73
+ echo "Fix options:"
74
+ echo " - extract sections to docs/ and link from CLAUDE.md"
75
+ echo " - use @-imports to load detailed context on demand"
76
+ echo " - delete obsolete rules (run /garbage-collection)"
77
+ echo
78
+ echo "Adjust the cap (with justification) in harness.config.json:"
79
+ echo " .claudeMd.maxInstructions"
80
+ } > "$TMPDIR_HOOK/claude-md-cap.out"
81
+ echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
82
+ fi
83
+ fi
84
+ fi
85
+
86
+ # Multi-layer review trigger. When uncommitted/staged/untracked changes touch
87
+ # ≥2 layers within a single domain, the `architecture-reviewer` subagent
88
+ # should run before commit. Replaces the agent's self-judgment about "touches
89
+ # multiple layers" (the §4.3 #3 ambiguity in the harness-techniques research)
90
+ # with a mechanical count off `harness.config.json` `domains[].layers` /
91
+ # `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
92
+ # next stop succeed after the agent has read the recommendation.
93
+ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git >/dev/null 2>&1; then
94
+ CHANGED=$(
95
+ {
96
+ git diff --name-only 2>/dev/null || true
97
+ git diff --name-only --cached 2>/dev/null || true
98
+ git ls-files --others --exclude-standard 2>/dev/null || true
99
+ } | sort -u
100
+ )
101
+ if [ -n "$CHANGED" ]; then
102
+ NUM_DOMAINS=$(jq '.domains | length' harness.config.json 2>/dev/null || echo 0)
103
+ MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
104
+ : > "$MULTI_OUT"
105
+ MULTI_HIT=0
106
+ i=0
107
+ while [ "$i" -lt "$NUM_DOMAINS" ]; do
108
+ ROOT=$(jq -r ".domains[$i].root" harness.config.json)
109
+ DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
110
+ TOUCHED_COUNT=0
111
+ TOUCHED_NAMES=""
112
+ while IFS= read -r layer; do
113
+ [ -z "$layer" ] && continue
114
+ if echo "$CHANGED" | grep -qE "^${ROOT}/${layer}(/|$)"; then
115
+ TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
116
+ TOUCHED_NAMES="$TOUCHED_NAMES $layer"
117
+ fi
118
+ done < <(jq -r ".domains[$i].layers[]" harness.config.json)
119
+ if [ "$TOUCHED_COUNT" -ge 2 ]; then
120
+ echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
121
+ MULTI_HIT=1
122
+ fi
123
+ i=$((i + 1))
124
+ done
125
+ if [ "$MULTI_HIT" = "1" ]; then
126
+ {
127
+ echo
128
+ echo "Recommend invoking the 'architecture-reviewer' subagent before commit."
129
+ echo "Mechanical detection — replaces self-judgment about 'touches multiple layers'."
130
+ echo "If review is genuinely not needed (e.g. mass-rename across layers), state"
131
+ echo "the reason in the commit message and re-run; the loop guard will release."
132
+ } >> "$MULTI_OUT"
133
+ echo "multi-layer-review" >> "$TMPDIR_HOOK/failed.list"
134
+ fi
135
+ fi
136
+ fi
137
+
56
138
  if [ ! -s "$TMPDIR_HOOK/failed.list" ]; then
57
139
  exit 0
58
140
  fi
@@ -83,12 +165,28 @@ fi
83
165
  echo "the task complete. Do NOT disable a check to make the hook pass."
84
166
  } >&2
85
167
 
86
- # Optional: opt-in headless recovery. Spawns a one-turn `claude -p` to
87
- # attempt the fix autonomously. Useful for unattended CI / cron contexts.
88
- # Off by default because it costs tokens.
89
- if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ] && command -v claude >/dev/null 2>&1; then
168
+ # Opt-in headless recovery. Spawns a one-turn `claude -p` to attempt the fix
169
+ # autonomously. Useful for unattended CI / cron contexts. Off by default
170
+ # because it costs tokens.
171
+ #
172
+ # Resolution order (first wins):
173
+ # 1. AHK_HEADLESS_RECOVER=1 (env-var override, per-run)
174
+ # 2. harness.config.json `.recovery.headless: true` (persistent)
175
+ HEADLESS_RECOVER=0
176
+ HEADLESS_SOURCE=""
177
+ if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
178
+ HEADLESS_RECOVER=1
179
+ HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
180
+ elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
181
+ CFG_VAL=$(jq -r '.recovery.headless // false' harness.config.json 2>/dev/null)
182
+ if [ "$CFG_VAL" = "true" ]; then
183
+ HEADLESS_RECOVER=1
184
+ HEADLESS_SOURCE="harness.config.json:.recovery.headless"
185
+ fi
186
+ fi
187
+ if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
90
188
  FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
91
- echo "[ahk] AHK_HEADLESS_RECOVER=1 — spawning recovery turn for: $FAILED_LIST" >&2
189
+ echo "[ahk] headless recovery enabled ($HEADLESS_SOURCE) — spawning recovery turn for: $FAILED_LIST" >&2
92
190
  claude -p \
93
191
  "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $TMPDIR_HOOK and apply the smallest fix. Do not disable any check." \
94
192
  --max-turns 5 \