npm - agent-harness-kit - Versions diffs - 0.5.0 → 0.5.1 - Mend

agent-harness-kit 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.claude-plugin/marketplace.json +2 -2
package/README.md +33 -2
package/package.json +1 -1
package/src/core/upgrade.mjs +50 -1
package/src/templates/.claude/settings.json.hbs +3 -0
package/src/templates/_adapter-typescript/eslint.config.mjs +9 -7

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -11,9 +11,9 @@
       "source": {
         "source": "github",
         "repo": "tuanle96/agent-harness-kit",
-        "ref": "v0.3.0"
+        "ref": "v0.5.1"
       },
-      "version": "0.3.0",
+      "version": "0.5.1",
       "description": "Solo-dev harness engineering kit — layered architecture, GC ritual, structural tests, review subagents.",
       "category": "development",
       "keywords": [

package/README.md CHANGED Viewed

@@ -67,9 +67,13 @@ Option B: install as a Claude Code plugin
    is ~100 lines). The kit's CLAUDE.md is 50–80 lines.
 2. **Every agent failure becomes a permanent harness change** (Hashimoto's
    discipline). The `/propose-harness-improvement` skill enforces this.
-3. **Computational sensors before LLM sensors** (Fowler/Böckeler). The TS and
+3. **Computational sensors as safety net** (Fowler/Böckeler). The TS and
    Python adapters ship one deterministic structural test per language; LLM
-   subagents are reserved for semantic judgment.
+   subagents are reserved for semantic judgment. Note: in our 1-shot bench
+   (n=3, ts-layered), the agent already followed visible seed patterns and
+   produced 0 boundary violations without enforcement. Treat structural tests
+   as a safety net for drift in long sessions, not as a happy-path
+   differentiator — see [Honest expectations](#honest-expectations).
 4. **Garbage collection over Friday cleanup, scaled to solo** (OpenAI's
    ritual, shrunk to top-3 fixes per week).
@@ -140,6 +144,33 @@ agent-harness-kit doctor      # diagnose installed kit + Claude Code env
 agent-harness-kit --version
 ```
+## Honest expectations
+What this kit **does** differentiate from bare claude-cli (anecdotal + design-level):
+- Opinionated CLAUDE.md template (50–80 lines) so context isn't blown on style
+- 10 skills (`/add-feature`, `/garbage-collection`, `/propose-harness-improvement`, …) that codify Hashimoto/OpenAI rituals
+- 5 read-only review subagents for cheap second-opinion passes
+- `feature_list.json` + ADR template + GC ritual for solo-scale planning hygiene
+- Solo-dev cost defaults (~$2/day) and per-run budget enforcement
+What it does **not** measurably differentiate (5 consecutive null benches, May 2026):
+- Structural enforcement on happy-path 1-shot tasks. When seed code shows the
+  layer pattern, claude-cli follows it — the boundaries lint has nothing to
+  catch. We measured 0/6 ui→repo violations across bare and kit arms on the
+  `ts-layered` fixture.
+Where the structural test *might* still earn its keep (untested, listed for
+honesty, not as a claim):
+- Long multi-turn sessions where pattern context drifts
+- Adversarial "make it fast" pressure that tempts shortcuts
+- Greenfield code with no existing pattern to follow
+- Weaker model substrates (haiku, gpt-4o-mini)
+Use the lint as a **safety net**, not as the reason you adopted the kit.
 ## Token / cost expectations
 A typical day with the default model split (Sonnet 4.6 main + Haiku 4.5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-harness-kit",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
   "type": "module",
   "bin": {

package/src/core/upgrade.mjs CHANGED Viewed

@@ -67,6 +67,37 @@ export async function syncHarnessConfigVersion(cwd, kitVersion) {
   return { changed: true, reason: "synced" };
 }
+// Ensure .claude/settings.json includes the critical write-tool permissions.
+// Older kit versions shipped a template without Edit/Write/MultiEdit, which
+// causes agents to silently no-op when they try to modify files. This patch
+// adds any missing entries to the existing `permissions.allow` array without
+// touching anything else the user customized.
+//
+// Exported for unit tests; called from `upgrade()` below.
+export async function ensureWritePermissions(cwd) {
+  const settingsPath = resolve(cwd, ".claude/settings.json");
+  if (!existsSync(settingsPath)) return { changed: false, reason: "missing" };
+  const raw = await readFile(settingsPath, "utf8");
+  let cfg;
+  try {
+    cfg = JSON.parse(raw);
+  } catch {
+    return { changed: false, reason: "invalid-json" };
+  }
+  const allow = cfg?.permissions?.allow;
+  if (!Array.isArray(allow)) return { changed: false, reason: "no-allow-list" };
+  const required = ["Edit", "Write", "MultiEdit"];
+  const missing = required.filter((p) => !allow.includes(p));
+  if (missing.length === 0) return { changed: false, reason: "already-present" };
+  // Prepend missing entries so they appear before other Bash(...) rules,
+  // matching the template's ordering.
+  cfg.permissions.allow = [...missing, ...allow];
+  await writeFile(settingsPath, JSON.stringify(cfg, null, 2) + "\n");
+  return { changed: true, reason: "patched", added: missing };
+}
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const TEMPLATES_ROOT = resolve(__dirname, "..", "templates");
@@ -115,11 +146,19 @@ export async function upgrade({ cwd, kitVersion, yes }) {
     // older `version`/`$schema` (it's user-owned and skipped by the file walk).
     // Sync those two fields so doctor stops flagging drift.
     const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
+    // Also patch settings.json if it's missing write permissions (legacy bug).
+    const permSync = await ensureWritePermissions(cwd);
     if (cfgSync.changed) {
       console.log(
         pc.green(`harness.config.json version + $schema synced to v${kitVersion}.`),
       );
-    } else {
+    }
+    if (permSync.changed) {
+      console.log(
+        pc.green(`.claude/settings.json patched: added ${permSync.added.join(", ")}.`),
+      );
+    }
+    if (!cfgSync.changed && !permSync.changed) {
       console.log(pc.green(`Already on v${kitVersion}. Nothing to do.`));
     }
     return;
@@ -263,6 +302,16 @@ export async function upgrade({ cwd, kitVersion, yes }) {
     console.log(pc.dim(`  ${pc.green("~")} harness.config.json (version + $schema synced)`));
   }
+  // Patch .claude/settings.json if it's missing the critical write
+  // permissions (Edit/Write/MultiEdit). Old kit versions shipped without
+  // these — agents would silently no-op. Idempotent.
+  const permSync = await ensureWritePermissions(cwd);
+  if (permSync.changed) {
+    console.log(
+      pc.dim(`  ${pc.green("~")} .claude/settings.json (added ${permSync.added.join(", ")})`),
+    );
+  }
   console.log(pc.bold(pc.green(`\n✓ upgrade complete (v${kitVersion}).`)));
   if (sidecars.length > 0) {
     console.log(

package/src/templates/.claude/settings.json.hbs CHANGED Viewed

@@ -2,6 +2,9 @@
   "$schema": "https://json.schemastore.org/claude-code-settings.json",
   "permissions": {
     "allow": [
+      "Edit",
+      "Write",
+      "MultiEdit",
       "Bash(npm run harness:*)",
       "Bash(npm run lint:*)",
       "Bash(npm test:*)",

package/src/templates/_adapter-typescript/eslint.config.mjs CHANGED Viewed

@@ -22,15 +22,17 @@ export default [
       "boundaries/include": ["src/**/*"],
     },
     rules: {
-      "boundaries/dependencies": [2, {
+      // eslint-plugin-boundaries v5: rule name is `element-types`, not `dependencies`.
+      // Schema: `{ from: ["t1"], allow: ["t2", "t3"] }` — flat arrays of element-type names.
+      "boundaries/element-types": [2, {
         default: "disallow",
         rules: [
-          { from: { type: "ui" },      allow: { to: { type: ["runtime","service","config","types"] } } },
-          { from: { type: "runtime" }, allow: { to: { type: ["service","repo","config","types"] } } },
-          { from: { type: "service" }, allow: { to: { type: ["repo","config","types"] } } },
-          { from: { type: "repo" },    allow: { to: { type: ["config","types"] } } },
-          { from: { type: "config" },  allow: { to: { type: ["types"] } } },
-          { from: { type: "types" },   disallow: { to: { type: "*" } } },
+          { from: ["ui"],      allow: ["runtime", "service", "config", "types"] },
+          { from: ["runtime"], allow: ["service", "repo", "config", "types"] },
+          { from: ["service"], allow: ["repo", "config", "types"] },
+          { from: ["repo"],    allow: ["config", "types"] },
+          { from: ["config"],  allow: ["types"] },
+          { from: ["types"],   disallow: ["*"] },
         ],
       }],
     },