agent-harness-kit 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,9 @@
11
11
  "source": {
12
12
  "source": "github",
13
13
  "repo": "tuanle96/agent-harness-kit",
14
- "ref": "v0.3.0"
14
+ "ref": "v0.5.1"
15
15
  },
16
- "version": "0.3.0",
16
+ "version": "0.5.1",
17
17
  "description": "Solo-dev harness engineering kit — layered architecture, GC ritual, structural tests, review subagents.",
18
18
  "category": "development",
19
19
  "keywords": [
package/README.md CHANGED
@@ -67,9 +67,13 @@ Option B: install as a Claude Code plugin
67
67
  is ~100 lines). The kit's CLAUDE.md is 50–80 lines.
68
68
  2. **Every agent failure becomes a permanent harness change** (Hashimoto's
69
69
  discipline). The `/propose-harness-improvement` skill enforces this.
70
- 3. **Computational sensors before LLM sensors** (Fowler/Böckeler). The TS and
70
+ 3. **Computational sensors as safety net** (Fowler/Böckeler). The TS and
71
71
  Python adapters ship one deterministic structural test per language; LLM
72
- subagents are reserved for semantic judgment.
72
+ subagents are reserved for semantic judgment. Note: in our 1-shot bench
73
+ (n=3, ts-layered), the agent already followed visible seed patterns and
74
+ produced 0 boundary violations without enforcement. Treat structural tests
75
+ as a safety net for drift in long sessions, not as a happy-path
76
+ differentiator — see [Honest expectations](#honest-expectations).
73
77
  4. **Garbage collection over Friday cleanup, scaled to solo** (OpenAI's
74
78
  ritual, shrunk to top-3 fixes per week).
75
79
 
@@ -140,6 +144,33 @@ agent-harness-kit doctor # diagnose installed kit + Claude Code env
140
144
  agent-harness-kit --version
141
145
  ```
142
146
 
147
+ ## Honest expectations
148
+
149
+ What this kit **does** differentiate from bare claude-cli (anecdotal + design-level):
150
+
151
+ - Opinionated CLAUDE.md template (50–80 lines) so context isn't blown on style
152
+ - 10 skills (`/add-feature`, `/garbage-collection`, `/propose-harness-improvement`, …) that codify Hashimoto/OpenAI rituals
153
+ - 5 read-only review subagents for cheap second-opinion passes
154
+ - `feature_list.json` + ADR template + GC ritual for solo-scale planning hygiene
155
+ - Solo-dev cost defaults (~$2/day) and per-run budget enforcement
156
+
157
+ What it does **not** measurably differentiate (5 consecutive null benches, May 2026):
158
+
159
+ - Structural enforcement on happy-path 1-shot tasks. When seed code shows the
160
+ layer pattern, claude-cli follows it — the boundaries lint has nothing to
161
+ catch. We measured 0/6 ui→repo violations across bare and kit arms on the
162
+ `ts-layered` fixture.
163
+
164
+ Where the structural test *might* still earn its keep (untested, listed for
165
+ honesty, not as a claim):
166
+
167
+ - Long multi-turn sessions where pattern context drifts
168
+ - Adversarial "make it fast" pressure that tempts shortcuts
169
+ - Greenfield code with no existing pattern to follow
170
+ - Weaker model substrates (haiku, gpt-4o-mini)
171
+
172
+ Use the lint as a **safety net**, not as the reason you adopted the kit.
173
+
143
174
  ## Token / cost expectations
144
175
 
145
176
  A typical day with the default model split (Sonnet 4.6 main + Haiku 4.5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-harness-kit",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -67,6 +67,37 @@ export async function syncHarnessConfigVersion(cwd, kitVersion) {
67
67
  return { changed: true, reason: "synced" };
68
68
  }
69
69
 
70
+ // Ensure .claude/settings.json includes the critical write-tool permissions.
71
+ // Older kit versions shipped a template without Edit/Write/MultiEdit, which
72
+ // causes agents to silently no-op when they try to modify files. This patch
73
+ // adds any missing entries to the existing `permissions.allow` array without
74
+ // touching anything else the user customized.
75
+ //
76
+ // Exported for unit tests; called from `upgrade()` below.
77
+ export async function ensureWritePermissions(cwd) {
78
+ const settingsPath = resolve(cwd, ".claude/settings.json");
79
+ if (!existsSync(settingsPath)) return { changed: false, reason: "missing" };
80
+ const raw = await readFile(settingsPath, "utf8");
81
+ let cfg;
82
+ try {
83
+ cfg = JSON.parse(raw);
84
+ } catch {
85
+ return { changed: false, reason: "invalid-json" };
86
+ }
87
+ const allow = cfg?.permissions?.allow;
88
+ if (!Array.isArray(allow)) return { changed: false, reason: "no-allow-list" };
89
+
90
+ const required = ["Edit", "Write", "MultiEdit"];
91
+ const missing = required.filter((p) => !allow.includes(p));
92
+ if (missing.length === 0) return { changed: false, reason: "already-present" };
93
+
94
+ // Prepend missing entries so they appear before other Bash(...) rules,
95
+ // matching the template's ordering.
96
+ cfg.permissions.allow = [...missing, ...allow];
97
+ await writeFile(settingsPath, JSON.stringify(cfg, null, 2) + "\n");
98
+ return { changed: true, reason: "patched", added: missing };
99
+ }
100
+
70
101
  const __dirname = dirname(fileURLToPath(import.meta.url));
71
102
  const TEMPLATES_ROOT = resolve(__dirname, "..", "templates");
72
103
 
@@ -115,11 +146,19 @@ export async function upgrade({ cwd, kitVersion, yes }) {
115
146
  // older `version`/`$schema` (it's user-owned and skipped by the file walk).
116
147
  // Sync those two fields so doctor stops flagging drift.
117
148
  const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
149
+ // Also patch settings.json if it's missing write permissions (legacy bug).
150
+ const permSync = await ensureWritePermissions(cwd);
118
151
  if (cfgSync.changed) {
119
152
  console.log(
120
153
  pc.green(`harness.config.json version + $schema synced to v${kitVersion}.`),
121
154
  );
122
- } else {
155
+ }
156
+ if (permSync.changed) {
157
+ console.log(
158
+ pc.green(`.claude/settings.json patched: added ${permSync.added.join(", ")}.`),
159
+ );
160
+ }
161
+ if (!cfgSync.changed && !permSync.changed) {
123
162
  console.log(pc.green(`Already on v${kitVersion}. Nothing to do.`));
124
163
  }
125
164
  return;
@@ -263,6 +302,16 @@ export async function upgrade({ cwd, kitVersion, yes }) {
263
302
  console.log(pc.dim(` ${pc.green("~")} harness.config.json (version + $schema synced)`));
264
303
  }
265
304
 
305
+ // Patch .claude/settings.json if it's missing the critical write
306
+ // permissions (Edit/Write/MultiEdit). Old kit versions shipped without
307
+ // these — agents would silently no-op. Idempotent.
308
+ const permSync = await ensureWritePermissions(cwd);
309
+ if (permSync.changed) {
310
+ console.log(
311
+ pc.dim(` ${pc.green("~")} .claude/settings.json (added ${permSync.added.join(", ")})`),
312
+ );
313
+ }
314
+
266
315
  console.log(pc.bold(pc.green(`\n✓ upgrade complete (v${kitVersion}).`)));
267
316
  if (sidecars.length > 0) {
268
317
  console.log(
@@ -2,6 +2,9 @@
2
2
  "$schema": "https://json.schemastore.org/claude-code-settings.json",
3
3
  "permissions": {
4
4
  "allow": [
5
+ "Edit",
6
+ "Write",
7
+ "MultiEdit",
5
8
  "Bash(npm run harness:*)",
6
9
  "Bash(npm run lint:*)",
7
10
  "Bash(npm test:*)",
@@ -22,15 +22,17 @@ export default [
22
22
  "boundaries/include": ["src/**/*"],
23
23
  },
24
24
  rules: {
25
- "boundaries/dependencies": [2, {
25
+ // eslint-plugin-boundaries v5: rule name is `element-types`, not `dependencies`.
26
+ // Schema: `{ from: ["t1"], allow: ["t2", "t3"] }` — flat arrays of element-type names.
27
+ "boundaries/element-types": [2, {
26
28
  default: "disallow",
27
29
  rules: [
28
- { from: { type: "ui" }, allow: { to: { type: ["runtime","service","config","types"] } } },
29
- { from: { type: "runtime" }, allow: { to: { type: ["service","repo","config","types"] } } },
30
- { from: { type: "service" }, allow: { to: { type: ["repo","config","types"] } } },
31
- { from: { type: "repo" }, allow: { to: { type: ["config","types"] } } },
32
- { from: { type: "config" }, allow: { to: { type: ["types"] } } },
33
- { from: { type: "types" }, disallow: { to: { type: "*" } } },
30
+ { from: ["ui"], allow: ["runtime", "service", "config", "types"] },
31
+ { from: ["runtime"], allow: ["service", "repo", "config", "types"] },
32
+ { from: ["service"], allow: ["repo", "config", "types"] },
33
+ { from: ["repo"], allow: ["config", "types"] },
34
+ { from: ["config"], allow: ["types"] },
35
+ { from: ["types"], disallow: ["*"] },
34
36
  ],
35
37
  }],
36
38
  },