agent-harness-kit 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/README.md +33 -2
- package/package.json +1 -1
- package/src/core/detect-stack.mjs +16 -0
- package/src/core/upgrade.mjs +50 -1
- package/src/templates/.claude/settings.json.hbs +3 -0
- package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs +89 -8
- package/src/templates/_adapter-typescript/eslint.config.mjs +9 -7
- package/src/templates/scripts/pre-push.sh +14 -5
- package/src/templates/scripts/precompletion-checklist.sh.hbs +13 -3
- package/src/templates/scripts/structural-test-on-edit.sh.hbs +30 -1
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
"source": {
|
|
12
12
|
"source": "github",
|
|
13
13
|
"repo": "tuanle96/agent-harness-kit",
|
|
14
|
-
"ref": "v0.
|
|
14
|
+
"ref": "v0.6.0"
|
|
15
15
|
},
|
|
16
|
-
"version": "0.
|
|
16
|
+
"version": "0.6.0",
|
|
17
17
|
"description": "Solo-dev harness engineering kit — layered architecture, GC ritual, structural tests, review subagents.",
|
|
18
18
|
"category": "development",
|
|
19
19
|
"keywords": [
|
package/README.md
CHANGED
|
@@ -67,9 +67,13 @@ Option B: install as a Claude Code plugin
|
|
|
67
67
|
is ~100 lines). The kit's CLAUDE.md is 50–80 lines.
|
|
68
68
|
2. **Every agent failure becomes a permanent harness change** (Hashimoto's
|
|
69
69
|
discipline). The `/propose-harness-improvement` skill enforces this.
|
|
70
|
-
3. **Computational sensors
|
|
70
|
+
3. **Computational sensors as safety net** (Fowler/Böckeler). The TS and
|
|
71
71
|
Python adapters ship one deterministic structural test per language; LLM
|
|
72
|
-
subagents are reserved for semantic judgment.
|
|
72
|
+
subagents are reserved for semantic judgment. Note: in our 1-shot bench
|
|
73
|
+
(n=3, ts-layered), the agent already followed visible seed patterns and
|
|
74
|
+
produced 0 boundary violations without enforcement. Treat structural tests
|
|
75
|
+
as a safety net for drift in long sessions, not as a happy-path
|
|
76
|
+
differentiator — see [Honest expectations](#honest-expectations).
|
|
73
77
|
4. **Garbage collection over Friday cleanup, scaled to solo** (OpenAI's
|
|
74
78
|
ritual, shrunk to top-3 fixes per week).
|
|
75
79
|
|
|
@@ -140,6 +144,33 @@ agent-harness-kit doctor # diagnose installed kit + Claude Code env
|
|
|
140
144
|
agent-harness-kit --version
|
|
141
145
|
```
|
|
142
146
|
|
|
147
|
+
## Honest expectations
|
|
148
|
+
|
|
149
|
+
What this kit **does** differentiate from bare claude-cli (anecdotal + design-level):
|
|
150
|
+
|
|
151
|
+
- Opinionated CLAUDE.md template (50–80 lines) so context isn't blown on style
|
|
152
|
+
- 10 skills (`/add-feature`, `/garbage-collection`, `/propose-harness-improvement`, …) that codify Hashimoto/OpenAI rituals
|
|
153
|
+
- 5 read-only review subagents for cheap second-opinion passes
|
|
154
|
+
- `feature_list.json` + ADR template + GC ritual for solo-scale planning hygiene
|
|
155
|
+
- Solo-dev cost defaults (~$2/day) and per-run budget enforcement
|
|
156
|
+
|
|
157
|
+
What it does **not** measurably differentiate (5 consecutive null benches, May 2026):
|
|
158
|
+
|
|
159
|
+
- Structural enforcement on happy-path 1-shot tasks. When seed code shows the
|
|
160
|
+
layer pattern, claude-cli follows it — the boundaries lint has nothing to
|
|
161
|
+
catch. We measured 0/6 ui→repo violations across bare and kit arms on the
|
|
162
|
+
`ts-layered` fixture.
|
|
163
|
+
|
|
164
|
+
Where the structural test *might* still earn its keep (untested, listed for
|
|
165
|
+
honesty, not as a claim):
|
|
166
|
+
|
|
167
|
+
- Long multi-turn sessions where pattern context drifts
|
|
168
|
+
- Adversarial "make it fast" pressure that tempts shortcuts
|
|
169
|
+
- Greenfield code with no existing pattern to follow
|
|
170
|
+
- Weaker model substrates (haiku, gpt-4o-mini)
|
|
171
|
+
|
|
172
|
+
Use the lint as a **safety net**, not as the reason you adopted the kit.
|
|
173
|
+
|
|
143
174
|
## Token / cost expectations
|
|
144
175
|
|
|
145
176
|
A typical day with the default model split (Sonnet 4.6 main + Haiku 4.5
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-harness-kit",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -68,6 +68,22 @@ export async function detectStack(cwd) {
|
|
|
68
68
|
// primary language. Walks 1 level deep into common monorepo dirs.
|
|
69
69
|
await probePolyglot(cwd, result);
|
|
70
70
|
|
|
71
|
+
// Rust workspace — must be checked BEFORE package.json because a polyglot
|
|
72
|
+
// repo (Rust backend + Next.js frontend) typically has BOTH at the root,
|
|
73
|
+
// and we want the Rust adapter installed by default since structural-test
|
|
74
|
+
// enforcement matters more for the workspace than for the marketing site.
|
|
75
|
+
// Single-crate Cargo.toml falls through to the legacy check at the bottom.
|
|
76
|
+
const rootCargo = await readTextSafe(resolve(cwd, "Cargo.toml"));
|
|
77
|
+
if (rootCargo && /^\s*\[workspace\]/m.test(rootCargo)) {
|
|
78
|
+
result.language = "rust";
|
|
79
|
+
result.framework = "rust-workspace";
|
|
80
|
+
result.packageManager = "cargo";
|
|
81
|
+
result.monorepo = true;
|
|
82
|
+
result.suggestedPreset = "generic";
|
|
83
|
+
result.availablePresets = ["generic"];
|
|
84
|
+
return result;
|
|
85
|
+
}
|
|
86
|
+
|
|
71
87
|
// JavaScript/TypeScript.
|
|
72
88
|
const pkg = await readJsonSafe(resolve(cwd, "package.json"));
|
|
73
89
|
if (pkg) {
|
package/src/core/upgrade.mjs
CHANGED
|
@@ -67,6 +67,37 @@ export async function syncHarnessConfigVersion(cwd, kitVersion) {
|
|
|
67
67
|
return { changed: true, reason: "synced" };
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
// Ensure .claude/settings.json includes the critical write-tool permissions.
|
|
71
|
+
// Older kit versions shipped a template without Edit/Write/MultiEdit, which
|
|
72
|
+
// causes agents to silently no-op when they try to modify files. This patch
|
|
73
|
+
// adds any missing entries to the existing `permissions.allow` array without
|
|
74
|
+
// touching anything else the user customized.
|
|
75
|
+
//
|
|
76
|
+
// Exported for unit tests; called from `upgrade()` below.
|
|
77
|
+
export async function ensureWritePermissions(cwd) {
|
|
78
|
+
const settingsPath = resolve(cwd, ".claude/settings.json");
|
|
79
|
+
if (!existsSync(settingsPath)) return { changed: false, reason: "missing" };
|
|
80
|
+
const raw = await readFile(settingsPath, "utf8");
|
|
81
|
+
let cfg;
|
|
82
|
+
try {
|
|
83
|
+
cfg = JSON.parse(raw);
|
|
84
|
+
} catch {
|
|
85
|
+
return { changed: false, reason: "invalid-json" };
|
|
86
|
+
}
|
|
87
|
+
const allow = cfg?.permissions?.allow;
|
|
88
|
+
if (!Array.isArray(allow)) return { changed: false, reason: "no-allow-list" };
|
|
89
|
+
|
|
90
|
+
const required = ["Edit", "Write", "MultiEdit"];
|
|
91
|
+
const missing = required.filter((p) => !allow.includes(p));
|
|
92
|
+
if (missing.length === 0) return { changed: false, reason: "already-present" };
|
|
93
|
+
|
|
94
|
+
// Prepend missing entries so they appear before other Bash(...) rules,
|
|
95
|
+
// matching the template's ordering.
|
|
96
|
+
cfg.permissions.allow = [...missing, ...allow];
|
|
97
|
+
await writeFile(settingsPath, JSON.stringify(cfg, null, 2) + "\n");
|
|
98
|
+
return { changed: true, reason: "patched", added: missing };
|
|
99
|
+
}
|
|
100
|
+
|
|
70
101
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
71
102
|
const TEMPLATES_ROOT = resolve(__dirname, "..", "templates");
|
|
72
103
|
|
|
@@ -115,11 +146,19 @@ export async function upgrade({ cwd, kitVersion, yes }) {
|
|
|
115
146
|
// older `version`/`$schema` (it's user-owned and skipped by the file walk).
|
|
116
147
|
// Sync those two fields so doctor stops flagging drift.
|
|
117
148
|
const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
|
|
149
|
+
// Also patch settings.json if it's missing write permissions (legacy bug).
|
|
150
|
+
const permSync = await ensureWritePermissions(cwd);
|
|
118
151
|
if (cfgSync.changed) {
|
|
119
152
|
console.log(
|
|
120
153
|
pc.green(`harness.config.json version + $schema synced to v${kitVersion}.`),
|
|
121
154
|
);
|
|
122
|
-
}
|
|
155
|
+
}
|
|
156
|
+
if (permSync.changed) {
|
|
157
|
+
console.log(
|
|
158
|
+
pc.green(`.claude/settings.json patched: added ${permSync.added.join(", ")}.`),
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
if (!cfgSync.changed && !permSync.changed) {
|
|
123
162
|
console.log(pc.green(`Already on v${kitVersion}. Nothing to do.`));
|
|
124
163
|
}
|
|
125
164
|
return;
|
|
@@ -263,6 +302,16 @@ export async function upgrade({ cwd, kitVersion, yes }) {
|
|
|
263
302
|
console.log(pc.dim(` ${pc.green("~")} harness.config.json (version + $schema synced)`));
|
|
264
303
|
}
|
|
265
304
|
|
|
305
|
+
// Patch .claude/settings.json if it's missing the critical write
|
|
306
|
+
// permissions (Edit/Write/MultiEdit). Old kit versions shipped without
|
|
307
|
+
// these — agents would silently no-op. Idempotent.
|
|
308
|
+
const permSync = await ensureWritePermissions(cwd);
|
|
309
|
+
if (permSync.changed) {
|
|
310
|
+
console.log(
|
|
311
|
+
pc.dim(` ${pc.green("~")} .claude/settings.json (added ${permSync.added.join(", ")})`),
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
|
|
266
315
|
console.log(pc.bold(pc.green(`\n✓ upgrade complete (v${kitVersion}).`)));
|
|
267
316
|
if (sidecars.length > 0) {
|
|
268
317
|
console.log(
|
|
@@ -2,16 +2,26 @@
|
|
|
2
2
|
//
|
|
3
3
|
// Reads harness.config.json. For each domain, walks every .rs file under
|
|
4
4
|
// the domain root (excluding target/, .git/, vendor/) and asserts no
|
|
5
|
-
//
|
|
5
|
+
// import goes "backward" through the layer order.
|
|
6
6
|
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
7
|
+
// Two layouts are supported:
|
|
8
|
+
//
|
|
9
|
+
// * Single-crate (default): a file's layer is the first path segment
|
|
10
|
+
// after `<root>/`. Intra-crate dependencies are written as
|
|
11
|
+
// `use crate::<layer>::...`.
|
|
12
|
+
//
|
|
13
|
+
// * Workspace mode (`layerDirPattern` + `useIdentPattern` in
|
|
14
|
+
// `harness.config.json`): each layer is its own crate. The directory
|
|
15
|
+
// pattern maps layer name → folder (e.g. `unibot-{layer}` →
|
|
16
|
+
// `unibot-types/`), and the use-ident pattern maps layer name → the
|
|
17
|
+
// crate identifier in `use` statements (e.g. `unibot_{layer}` →
|
|
18
|
+
// `use unibot_types::`). Both default to `{layer}` and preserve the
|
|
19
|
+
// legacy single-crate behavior.
|
|
10
20
|
//
|
|
11
21
|
// Why Node + regex (not a Cargo binary):
|
|
12
22
|
// - Avoids polluting the user's Cargo workspace with a check crate.
|
|
13
23
|
// - Node is already required to install the kit (npx).
|
|
14
|
-
// - Regex over `use crate
|
|
24
|
+
// - Regex over `use <crate>::<X>` is sufficient — we never need full
|
|
15
25
|
// parse trees because the layer rule is a syntactic property.
|
|
16
26
|
// - `super::` and `self::` are scoped to the current module, which is
|
|
17
27
|
// by definition the same layer, so we ignore them.
|
|
@@ -68,6 +78,12 @@ function* walkRustFiles(root) {
|
|
|
68
78
|
}
|
|
69
79
|
|
|
70
80
|
// Returns { layer, domain } or null.
|
|
81
|
+
//
|
|
82
|
+
// Resolution:
|
|
83
|
+
// 1. Strip the domain root prefix from the path.
|
|
84
|
+
// 2. The first segment is the candidate layer directory.
|
|
85
|
+
// 3. Match it against `layerDirPattern` (default `{layer}`) — if the
|
|
86
|
+
// pattern resolves a layer name, use it.
|
|
71
87
|
function layerOf(relPath, cfg) {
|
|
72
88
|
for (const d of cfg.domains) {
|
|
73
89
|
const altPrefix = d.root + "/";
|
|
@@ -78,12 +94,72 @@ function layerOf(relPath, cfg) {
|
|
|
78
94
|
stripped = relPath.slice(sepPrefix.length);
|
|
79
95
|
else continue;
|
|
80
96
|
const first = stripped.split(/[\/\\]/)[0];
|
|
81
|
-
|
|
97
|
+
const pattern = d.layerDirPattern || "{layer}";
|
|
98
|
+
const layer = resolveLayerFromDir(first, pattern, d.layers);
|
|
99
|
+
if (layer) return { layer, domain: d };
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Given a directory name and a pattern like `unibot-{layer}`, return the
|
|
105
|
+
// matching layer name from `layers` (or null). Handles the legacy
|
|
106
|
+
// `{layer}` pattern as the identity case.
|
|
107
|
+
function resolveLayerFromDir(dirName, pattern, layers) {
|
|
108
|
+
if (pattern === "{layer}") {
|
|
109
|
+
return layers.includes(dirName) ? dirName : null;
|
|
110
|
+
}
|
|
111
|
+
// Escape regex specials in the surrounding pattern fragments.
|
|
112
|
+
const [prefix, suffix] = pattern.split("{layer}");
|
|
113
|
+
const pre = (prefix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
114
|
+
const suf = (suffix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
115
|
+
const re = new RegExp(`^${pre}(.+?)${suf}$`);
|
|
116
|
+
const m = dirName.match(re);
|
|
117
|
+
if (m && layers.includes(m[1])) return m[1];
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Capture the first identifier after `use ` (or `pub use ...`). The trailing
|
|
122
|
+
// `::` is optional — `use demo_service;` is legal Rust even though most
|
|
123
|
+
// real usage is `use demo_service::foo`.
|
|
124
|
+
const USE_RE = /\b(?:pub\s+)?use\s+([a-zA-Z_][a-zA-Z0-9_]*)/g;
|
|
125
|
+
|
|
126
|
+
function parseUseTargets(line, domain) {
|
|
127
|
+
const useIdent = domain.useIdentPattern || "crate";
|
|
128
|
+
const matches = [...line.matchAll(USE_RE)];
|
|
129
|
+
const layers = [];
|
|
130
|
+
for (const m of matches) {
|
|
131
|
+
const ident = m[1];
|
|
132
|
+
const layer = resolveLayerFromUseIdent(ident, useIdent, domain.layers);
|
|
133
|
+
if (layer) layers.push(layer);
|
|
134
|
+
}
|
|
135
|
+
return layers;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Map a `use <ident>` to a layer. For single-crate mode this is
|
|
139
|
+
// `use crate::<layer>::...` — `ident == "crate"` and the layer is the
|
|
140
|
+
// SECOND segment. For workspace mode it is `use <crate>::...` where the
|
|
141
|
+
// crate name itself encodes the layer.
|
|
142
|
+
function resolveLayerFromUseIdent(ident, useIdentPattern, layers) {
|
|
143
|
+
if (useIdentPattern === "crate") {
|
|
144
|
+
// Legacy single-crate mode: only `use crate::...` matters; the layer
|
|
145
|
+
// is read from the captured ident only when ident === "crate" but
|
|
146
|
+
// the actual layer name is the segment AFTER `crate::` — see the
|
|
147
|
+
// separate `USE_CRATE_RE` path used by the caller for compatibility.
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
if (useIdentPattern === "{layer}") {
|
|
151
|
+
return layers.includes(ident) ? ident : null;
|
|
82
152
|
}
|
|
153
|
+
const [prefix, suffix] = useIdentPattern.split("{layer}");
|
|
154
|
+
const pre = (prefix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
155
|
+
const suf = (suffix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
156
|
+
const re = new RegExp(`^${pre}(.+?)${suf}$`);
|
|
157
|
+
const m = ident.match(re);
|
|
158
|
+
if (m && layers.includes(m[1])) return m[1];
|
|
83
159
|
return null;
|
|
84
160
|
}
|
|
85
161
|
|
|
86
|
-
//
|
|
162
|
+
// Backwards-compat: also keep the original single-crate matcher.
|
|
87
163
|
const USE_CRATE_RE = /\b(?:pub\s+)?use\s+crate::([a-zA-Z_][a-zA-Z0-9_]*)/g;
|
|
88
164
|
|
|
89
165
|
function parseUseCrate(line) {
|
|
@@ -134,9 +210,14 @@ function main() {
|
|
|
134
210
|
|
|
135
211
|
const content = readFileSync(file, "utf8");
|
|
136
212
|
const lines = content.split("\n");
|
|
213
|
+
// Workspace mode uses `useIdentPattern` (e.g. "unibot_{layer}");
|
|
214
|
+
// single-crate mode keeps the historical `use crate::<layer>::` form.
|
|
215
|
+
const workspaceMode = !!src.domain.useIdentPattern;
|
|
137
216
|
for (let i = 0; i < lines.length; i++) {
|
|
138
217
|
const codeOnly = stripCommentsAndStrings(lines[i]);
|
|
139
|
-
const targets =
|
|
218
|
+
const targets = workspaceMode
|
|
219
|
+
? parseUseTargets(codeOnly, src.domain)
|
|
220
|
+
: parseUseCrate(codeOnly);
|
|
140
221
|
for (const tgtLayer of targets) {
|
|
141
222
|
if (!src.domain.layers.includes(tgtLayer)) continue;
|
|
142
223
|
const tgtIdx = src.domain.layers.indexOf(tgtLayer);
|
|
@@ -22,15 +22,17 @@ export default [
|
|
|
22
22
|
"boundaries/include": ["src/**/*"],
|
|
23
23
|
},
|
|
24
24
|
rules: {
|
|
25
|
-
|
|
25
|
+
// eslint-plugin-boundaries v5: rule name is `element-types`, not `dependencies`.
|
|
26
|
+
// Schema: `{ from: ["t1"], allow: ["t2", "t3"] }` — flat arrays of element-type names.
|
|
27
|
+
"boundaries/element-types": [2, {
|
|
26
28
|
default: "disallow",
|
|
27
29
|
rules: [
|
|
28
|
-
{ from:
|
|
29
|
-
{ from:
|
|
30
|
-
{ from:
|
|
31
|
-
{ from:
|
|
32
|
-
{ from:
|
|
33
|
-
{ from:
|
|
30
|
+
{ from: ["ui"], allow: ["runtime", "service", "config", "types"] },
|
|
31
|
+
{ from: ["runtime"], allow: ["service", "repo", "config", "types"] },
|
|
32
|
+
{ from: ["service"], allow: ["repo", "config", "types"] },
|
|
33
|
+
{ from: ["repo"], allow: ["config", "types"] },
|
|
34
|
+
{ from: ["config"], allow: ["types"] },
|
|
35
|
+
{ from: ["types"], disallow: ["*"] },
|
|
34
36
|
],
|
|
35
37
|
}],
|
|
36
38
|
},
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# pre-push hook — Stripe "shift-feedback-left" pattern. Runs only the
|
|
3
3
|
# deterministic checks (structural test + linter + tests on changed files).
|
|
4
4
|
# Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
|
|
5
|
-
set -
|
|
5
|
+
set -eo pipefail
|
|
6
6
|
|
|
7
7
|
# Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
|
|
8
8
|
# only — fixes REMOVE entries; no path should ADD them. Catches the "mask
|
|
@@ -33,11 +33,20 @@ if [ -f "$BASELINE_FILE" ] \
|
|
|
33
33
|
fi
|
|
34
34
|
fi
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
# Structural test. Skipped when `structuralTest.engine` is explicitly "none"
|
|
37
|
+
# (e.g. during scaffold of a polyglot repo where the adapter is not yet
|
|
38
|
+
# wired). Without this guard the push fails silently because
|
|
39
|
+
# `npm run harness:check` has no matching script.
|
|
40
|
+
if [ -f harness.config.json ] \
|
|
41
|
+
&& grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
|
|
42
|
+
echo "[pre-push] structural test skipped (structuralTest.engine: none)"
|
|
39
43
|
else
|
|
40
|
-
|
|
44
|
+
echo "[pre-push] running structural test…"
|
|
45
|
+
if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
|
|
46
|
+
python -m harness.structural_test
|
|
47
|
+
else
|
|
48
|
+
npm run --silent harness:check
|
|
49
|
+
fi
|
|
41
50
|
fi
|
|
42
51
|
|
|
43
52
|
echo "[pre-push] running lint…"
|
|
@@ -39,8 +39,12 @@ run_check() {
|
|
|
39
39
|
fi
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
-
# Structural test.
|
|
43
|
-
|
|
42
|
+
# Structural test. Skipped when `structuralTest.engine` is explicitly "none"
|
|
43
|
+
# (e.g. during scaffold of a polyglot repo where the adapter is not yet
|
|
44
|
+
# wired). Without this guard the check fails silently with an empty body
|
|
45
|
+
# because `npm run harness:check` has no matching script.
|
|
46
|
+
if [ -f harness.config.json ] \
|
|
47
|
+
&& ! grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
|
|
44
48
|
if grep -q '"language": "python"' harness.config.json; then
|
|
45
49
|
run_check structural-test python -m harness.structural_test || true
|
|
46
50
|
else
|
|
@@ -107,11 +111,17 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
|
|
|
107
111
|
while [ "$i" -lt "$NUM_DOMAINS" ]; do
|
|
108
112
|
ROOT=$(jq -r ".domains[$i].root" harness.config.json)
|
|
109
113
|
DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
|
|
114
|
+
# Optional layerDirPattern — supports conventions where the layer
|
|
115
|
+
# directory is not literally `{layer}`. Example: a Rust workspace
|
|
116
|
+
# with crates named `unibot-types`, `unibot-crypto`, ... uses
|
|
117
|
+
# `"layerDirPattern": "unibot-{layer}"`. Defaults to `{layer}`.
|
|
118
|
+
LAYER_PATTERN=$(jq -r ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
|
|
110
119
|
TOUCHED_COUNT=0
|
|
111
120
|
TOUCHED_NAMES=""
|
|
112
121
|
while IFS= read -r layer; do
|
|
113
122
|
[ -z "$layer" ] && continue
|
|
114
|
-
|
|
123
|
+
LAYER_DIR=$(printf '%s' "$LAYER_PATTERN" | sed "s/{layer}/$layer/g")
|
|
124
|
+
if echo "$CHANGED" | grep -qE "^${ROOT}/${LAYER_DIR}(/|$)"; then
|
|
115
125
|
TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
|
|
116
126
|
TOUCHED_NAMES="$TOUCHED_NAMES $layer"
|
|
117
127
|
fi
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# PostToolUse hook — runs the structural test on the file just edited.
|
|
3
3
|
# Defensive: never blocks on missing tooling. Exit code 2 = block + Claude reads stderr.
|
|
4
|
-
|
|
4
|
+
#
|
|
5
|
+
# `pipefail` is critical — without it, `cmd | tail` swallows cmd's exit code
|
|
6
|
+
# and a real structural-test failure looks clean to the agent.
|
|
7
|
+
set -eo pipefail
|
|
5
8
|
|
|
6
9
|
INPUT=$(cat)
|
|
7
10
|
if ! command -v jq >/dev/null 2>&1; then
|
|
@@ -15,6 +18,7 @@ FILE=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty')
|
|
|
15
18
|
case "$FILE" in
|
|
16
19
|
*.ts|*.tsx|*.js|*.jsx|*.mjs|*.cjs) ENGINE=ts ;;
|
|
17
20
|
*.py) ENGINE=py ;;
|
|
21
|
+
*.rs) ENGINE=rust ;;
|
|
18
22
|
*) exit 0 ;;
|
|
19
23
|
esac
|
|
20
24
|
|
|
@@ -25,6 +29,14 @@ if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
|
|
|
25
29
|
exit 0
|
|
26
30
|
fi
|
|
27
31
|
|
|
32
|
+
# Skip cleanly when the structural test is explicitly disabled (polyglot
|
|
33
|
+
# scaffolds where the adapter is not yet wired). Without this guard every
|
|
34
|
+
# edit fires a failing hook that the agent can't actually fix.
|
|
35
|
+
if [ -f harness.config.json ] \
|
|
36
|
+
&& grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
|
|
37
|
+
exit 0
|
|
38
|
+
fi
|
|
39
|
+
|
|
28
40
|
# Run the structural test scoped to this file. Capture output so we can
|
|
29
41
|
# return only the relevant lines via stderr to Claude.
|
|
30
42
|
if [ "$ENGINE" = "ts" ]; then
|
|
@@ -46,6 +58,23 @@ Structural test failed for $FILE.
|
|
|
46
58
|
Layer order: see harness.config.json.
|
|
47
59
|
Run \`python -m harness.structural_test\` for full output.
|
|
48
60
|
Fix the violation before continuing — do NOT disable the test.
|
|
61
|
+
EOF
|
|
62
|
+
exit 2
|
|
63
|
+
fi
|
|
64
|
+
elif [ "$ENGINE" = "rust" ]; then
|
|
65
|
+
# The Rust adapter is a Node script (`harness/structural-check.mjs`); it
|
|
66
|
+
# scans the whole workspace rather than a single file because the regex
|
|
67
|
+
# is cheap. If the script isn't present yet, exit 0 (graceful degrade).
|
|
68
|
+
if [ ! -f harness/structural-check.mjs ]; then
|
|
69
|
+
exit 0
|
|
70
|
+
fi
|
|
71
|
+
if ! node harness/structural-check.mjs 2>&1 | tail -50 >&2; then
|
|
72
|
+
cat >&2 <<EOF
|
|
73
|
+
|
|
74
|
+
Structural test failed (triggered by edit to $FILE).
|
|
75
|
+
Layer order: see harness.config.json.
|
|
76
|
+
Run \`node harness/structural-check.mjs\` for full output.
|
|
77
|
+
Fix the violation before continuing — do NOT disable the test.
|
|
49
78
|
EOF
|
|
50
79
|
exit 2
|
|
51
80
|
fi
|