npm - agent-harness-kit - Versions diffs - 0.5.1 → 0.7.0 - Mend

agent-harness-kit 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/README.md +29 -0
package/bin/cli.mjs +10 -1
package/package.json +1 -1
package/src/core/detect-stack.mjs +32 -0
package/src/core/render-templates.mjs +111 -4
package/src/templates/.claude/hooks/hooks.json +87 -0
package/src/templates/CLAUDE.md.hbs +1 -1
package/src/templates/CLAUDE.md.vi.hbs +70 -0
package/src/templates/_adapter-kotlin/harness/structural-check.mjs.hbs +286 -0
package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs +333 -60
package/src/templates/_adapter-swift/harness/structural-check.mjs.hbs +285 -0
package/src/templates/harness.config.json.hbs +5 -3
package/src/templates/scripts/_lib/approx-tokens.mjs +48 -0
package/src/templates/scripts/_lib/json-pick.mjs +278 -0
package/src/templates/scripts/harness-report.mjs +95 -1
package/src/templates/scripts/pre-compact.sh.hbs +121 -0
package/src/templates/scripts/pre-push.sh +42 -8
package/src/templates/scripts/precompletion-checklist.sh.hbs +143 -24
package/src/templates/scripts/pretooluse-bash-guard.sh.hbs +146 -0
package/src/templates/scripts/session-end.sh.hbs +48 -0
package/src/templates/scripts/session-start.sh.hbs +139 -0
package/src/templates/scripts/structural-test-on-edit.sh.hbs +56 -4
package/src/templates/scripts/telemetry-on-skill.sh +32 -10
package/src/templates/.claude/hooks/hooks.json.hbs +0 -39

package/src/templates/scripts/harness-report.mjs CHANGED Viewed

@@ -20,7 +20,9 @@ const ROOT = process.cwd();
 const RESULTS_DIR = resolve(ROOT, ".harness/eval/results");
 const TELEMETRY = resolve(ROOT, ".harness/telemetry.jsonl");
 const NOW = Date.now();
-const SEVEN_DAYS = 7 * 24 * 60 * 60 * 1000;
+const ONE_DAY = 24 * 60 * 60 * 1000;
+const SEVEN_DAYS = 7 * ONE_DAY;
+const FOURTEEN_DAYS = 14 * ONE_DAY;
 async function readJsonl(path) {
   if (!existsSync(path)) return [];
@@ -61,6 +63,16 @@ function recent(rows, key = "ts") {
   });
 }
+// Rows aged 7–14 days. Used as the comparator for week-over-week deltas
+// so users can spot drift instead of staring at a single-week snapshot.
+function priorWeek(rows, key = "ts") {
+  return rows.filter((r) => {
+    const t = r[key] ? new Date(r[key]).getTime() : r._mtime ?? 0;
+    const age = NOW - t;
+    return age > SEVEN_DAYS && age <= FOURTEEN_DAYS;
+  });
+}
 function tokensOf(row) {
   return (row.grades ?? [])
     .filter((g) => g.dim === "efficiency")
@@ -172,16 +184,98 @@ function driftSignals(evalRows, telemetryRows) {
   }
 }
+// Aggregate eval rows by task into { passed, total, tokens }.
+function aggregateEvals(rows) {
+  const byTask = new Map();
+  for (const r of rows) {
+    const cur = byTask.get(r.taskId) ?? { passed: 0, total: 0, tokens: 0 };
+    cur.total++;
+    if (r.passed) cur.passed++;
+    cur.tokens += tokensOf(r);
+    byTask.set(r.taskId, cur);
+  }
+  return byTask;
+}
+// Render a single delta line. signMode controls icon meaning — for pass-rate,
+// up is good; for tokens, up is bad; for skill invocations, neutral.
+function fmtDelta(now, then, signMode = "neutral", unit = "") {
+  if (then === undefined) return `(new) ${now}${unit}`;
+  const diff = now - then;
+  if (diff === 0) return `${now}${unit} → ${then}${unit}  (=)`;
+  let arrow = diff > 0 ? "↑" : "↓";
+  // Color the arrow by "is this a regression?"
+  let marker = " ";
+  if (signMode === "good-up") marker = diff > 0 ? "+" : "-";
+  else if (signMode === "good-down") marker = diff > 0 ? "-" : "+";
+  return `${now}${unit} ← ${then}${unit}  (${arrow}${marker} ${Math.abs(diff)}${unit})`;
+}
+function weekOverWeek(evalRecent, evalPrior, telRecent, telPrior) {
+  console.log(`\n### Week-over-week (last 7d vs prior 7d)`);
+  const aRecent = aggregateEvals(evalRecent);
+  const aPrior = aggregateEvals(evalPrior);
+  if (aRecent.size === 0 && aPrior.size === 0) {
+    console.log("  (no eval data in either window — run `npm run harness:eval`)");
+  } else {
+    console.log("  task                    pass-rate (now ← prior)        avg-tokens (now ← prior)");
+    console.log("  ----------------------  ----------------------------   --------------------------");
+    const taskIds = new Set([...aRecent.keys(), ...aPrior.keys()]);
+    for (const t of [...taskIds].sort()) {
+      const now = aRecent.get(t);
+      const prior = aPrior.get(t);
+      const nowRate = now ? Math.round((now.passed / now.total) * 100) : null;
+      const priorRate = prior ? Math.round((prior.passed / prior.total) * 100) : null;
+      const nowTok = now && now.total > 0 ? Math.round(now.tokens / now.total) : 0;
+      const priorTok = prior && prior.total > 0 ? Math.round(prior.tokens / prior.total) : 0;
+      const rateCell = nowRate === null
+        ? "(absent now)"
+        : priorRate === null
+          ? `${nowRate}% (new)`
+          : `${nowRate}% ← ${priorRate}%  (${nowRate - priorRate >= 0 ? "+" : ""}${nowRate - priorRate})`;
+      const tokCell = nowTok === 0 && priorTok === 0
+        ? "—"
+        : `${nowTok} ← ${priorTok}  (${nowTok - priorTok >= 0 ? "+" : ""}${nowTok - priorTok})`;
+      console.log(
+        `  ${t.padEnd(22)}  ${rateCell.padEnd(30)} ${tokCell}`,
+      );
+    }
+  }
+  // Skill invocation deltas.
+  const recentBySkill = new Map();
+  for (const r of telRecent) recentBySkill.set(r.skill, (recentBySkill.get(r.skill) ?? 0) + 1);
+  const priorBySkill = new Map();
+  for (const r of telPrior) priorBySkill.set(r.skill, (priorBySkill.get(r.skill) ?? 0) + 1);
+  const allSkills = new Set([...recentBySkill.keys(), ...priorBySkill.keys()]);
+  if (allSkills.size > 0) {
+    console.log("\n  skill                          invocations (now ← prior)");
+    console.log("  -----------------------------  -------------------------------");
+    for (const s of [...allSkills].sort()) {
+      const n = recentBySkill.get(s) ?? 0;
+      const p = priorBySkill.get(s) ?? 0;
+      const d = n - p;
+      const cell = p === 0 ? `${n}  (new)` : `${n} ← ${p}  (${d >= 0 ? "+" : ""}${d})`;
+      console.log(`  ${s.padEnd(29)}  ${cell}`);
+    }
+  }
+}
 async function main() {
   const evalAll = await loadEvalResults();
   const telemetryAll = await readJsonl(TELEMETRY);
   const evalRows = recent(evalAll);
+  const evalPrior = priorWeek(evalAll);
   const telemetryRows = recent(telemetryAll);
+  const telemetryPrior = priorWeek(telemetryAll);
   console.log("=== agent-harness-kit report ===");
   console.log(`Generated: ${new Date().toISOString()}`);
   summarizeEvals(evalRows);
   summarizeTelemetry(telemetryRows);
+  weekOverWeek(evalRows, evalPrior, telemetryRows, telemetryPrior);
   driftSignals(evalRows, telemetryRows);
   console.log("");
 }

package/src/templates/scripts/pre-compact.sh.hbs ADDED Viewed

@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+# PreCompact hook — write a small snapshot of state to
+# .harness/compaction-snapshot.json BEFORE the context compactor runs.
+# The companion SessionStart hook (matcher: compact) reads this snapshot
+# back and re-injects the salient fields so the post-compaction model
+# knows which feature it was working on, which branch, and how dirty
+# the tree was.
+#
+# This is the kit's answer to the "I lost everything after compaction"
+# failure mode that recurs in long sessions. Pair with:
+#   - SessionStart matcher compact → re-inject
+#   - PostCompact (not implemented; SessionStart does the work)
+#
+# Snapshot contents:
+#   {
+#     "compacted_at": "2026-05-16T19:00:00Z",
+#     "branch": "main",
+#     "sha": "abc1234",
+#     "uncommitted": 7,
+#     "feature": "auth-endpoint — POST /auth/login",
+#     "trigger": "manual|auto",
+#     "estimated_tokens_removed": 5000
+#   }
+#
+# The hook NEVER blocks (exit 0 always). PreCompact can technically block
+# compaction but doing so defeats the entire point.
+set -eo pipefail
+INPUT=$(cat)
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+have_jq() {
+  [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
+  command -v jq >/dev/null 2>&1
+}
+have_jp() {
+  have_jq && return 0
+  command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
+  return 1
+}
+jp() {
+  if have_jq; then
+    if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
+  else
+    if [ -n "$2" ]; then
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
+    else
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
+    fi
+  fi
+}
+TRIGGER=""
+TOKENS=""
+if have_jp; then
+  TRIGGER=$(echo "$INPUT" | jp '.trigger // "auto"' 2>/dev/null || true)
+  TOKENS=$(echo "$INPUT" | jp '.estimated_tokens_removed // 0' 2>/dev/null || true)
+fi
+mkdir -p .harness
+TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+BR="(no-git)"
+SHA="(no-git)"
+COUNT=0
+if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
+  BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
+  SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "(none)")
+  COUNT=$(git status --short 2>/dev/null | wc -l | tr -d ' ')
+fi
+FEAT=""
+if [ -f feature_list.json ]; then
+  if have_jq; then
+    FEAT=$(jq -r 'first(.features[] | select(.passes == false)) | "\(.id) — \(.title)"' \
+      feature_list.json 2>/dev/null || true)
+  elif command -v node >/dev/null 2>&1; then
+    FEAT=$(node -e "
+      const f = JSON.parse(require('fs').readFileSync('feature_list.json','utf8'));
+      const o = (f.features || []).find(x => x.passes === false);
+      if (o) process.stdout.write(o.id + ' — ' + o.title);
+    " 2>/dev/null || true)
+  fi
+fi
+# Compose JSON via Node when available — handles escaping right.
+if command -v node >/dev/null 2>&1; then
+  node -e "
+    const fs = require('fs');
+    const snap = {
+      compacted_at: '$TS',
+      branch: '$BR',
+      sha: '$SHA',
+      uncommitted: parseInt('$COUNT', 10) || 0,
+      feature: process.argv[1] || '',
+      trigger: '$TRIGGER' || 'auto',
+      estimated_tokens_removed: parseInt('$TOKENS', 10) || 0
+    };
+    fs.writeFileSync('.harness/compaction-snapshot.json', JSON.stringify(snap, null, 2) + '\n');
+  " "$FEAT"
+elif have_jq; then
+  jq -n --arg ts "$TS" --arg br "$BR" --arg sha "$SHA" \
+        --argjson cnt "$COUNT" --arg feat "$FEAT" \
+        --arg trig "${TRIGGER:-auto}" --argjson tok "${TOKENS:-0}" \
+    '{compacted_at: $ts, branch: $br, sha: $sha, uncommitted: $cnt,
+      feature: $feat, trigger: $trig, estimated_tokens_removed: $tok}' \
+    > .harness/compaction-snapshot.json
+else
+  # No JSON tool available — write a minimal record. SessionStart compact
+  # branch reads fields individually so partial records still work.
+  cat > .harness/compaction-snapshot.json <<EOF
+{
+  "compacted_at": "$TS",
+  "branch": "$BR",
+  "sha": "$SHA",
+  "uncommitted": $COUNT,
+  "feature": "$FEAT",
+  "trigger": "${TRIGGER:-auto}"
+}
+EOF
+fi
+exit 0

package/src/templates/scripts/pre-push.sh CHANGED Viewed

@@ -2,7 +2,32 @@
 # pre-push hook — Stripe "shift-feedback-left" pattern. Runs only the
 # deterministic checks (structural test + linter + tests on changed files).
 # Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
-set -e
+set -eo pipefail
+# Resolve script dir so we can find _lib/json-pick.mjs (Node fallback for jq).
+# Without this fallback, `jq` missing on a fresh CI image silently disabled
+# the baseline-monotonic guard — a known audit hole.
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+have_jq() {
+  [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
+  command -v jq >/dev/null 2>&1
+}
+have_jp() {
+  have_jq && return 0
+  command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
+  return 1
+}
+jp() {
+  if have_jq; then
+    if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
+  else
+    if [ -n "$2" ]; then
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
+    else
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
+    fi
+  fi
+}
 # Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
 # only — fixes REMOVE entries; no path should ADD them. Catches the "mask
@@ -10,11 +35,11 @@ set -e
 # Runs first because a grown baseline silently masks structural-test failures.
 BASELINE_FILE=".harness/structural-baseline.json"
 if [ -f "$BASELINE_FILE" ] \
-   && command -v jq >/dev/null 2>&1 \
+   && have_jp \
    && git rev-parse --verify HEAD >/dev/null 2>&1 \
    && git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
-  CURRENT_COUNT=$(jq 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
-  HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
+  CURRENT_COUNT=$(jp 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
+  HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jp 'length' 2>/dev/null || echo 0)
   if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
     {
       echo
@@ -33,11 +58,20 @@ if [ -f "$BASELINE_FILE" ] \
   fi
 fi
-echo "[pre-push] running structural test…"
-if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
-  python -m harness.structural_test
+# Structural test. Skipped when `structuralTest.engine` is explicitly "none"
+# (e.g. during scaffold of a polyglot repo where the adapter is not yet
+# wired). Without this guard the push fails silently because
+# `npm run harness:check` has no matching script.
+if [ -f harness.config.json ] \
+   && grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
+  echo "[pre-push] structural test skipped (structuralTest.engine: none)"
 else
-  npm run --silent harness:check
+  echo "[pre-push] running structural test…"
+  if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
+    python -m harness.structural_test
+  else
+    npm run --silent harness:check
+  fi
 fi
 echo "[pre-push] running lint…"

package/src/templates/scripts/precompletion-checklist.sh.hbs CHANGED Viewed

@@ -12,9 +12,43 @@ set -e
 INPUT=$(cat)
+# Resolve the directory this hook lives in (used to find _lib/json-pick.mjs).
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+# have_jq — env-overridable probe. AHK_DISABLE_JQ=1 forces the Node fallback,
+# used by tests to exercise the jq-less code path on machines that have jq
+# installed locally.
+have_jq() {
+  [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
+  command -v jq >/dev/null 2>&1
+}
+# jp — JSON picker. Uses `jq` when available, else falls back to a bundled
+# Node script with a jq-subset implementation. Keeps hooks portable on
+# minimal CI / Windows where jq is not installed by default. Without this
+# fallback, the entire pre-completion check used to be a silent no-op.
+jp() {
+  if have_jq; then
+    if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
+  else
+    if [ -n "$2" ]; then
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
+    else
+      node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
+    fi
+  fi
+}
+# Probe: do we have either jq or the Node fallback? Node is always
+# present (kit's `engines` field requires >=20), so this is just an explicit
+# probe and a fail-loud branch if even node is missing.
+have_jp() {
+  have_jq && return 0
+  command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
+  return 1
+}
 # CRITICAL: avoid infinite loops. If the hook already ran, do not block again.
-if command -v jq >/dev/null 2>&1; then
-  if [ "$(echo "$INPUT" | jq -r '.stop_hook_active // false')" = "true" ]; then
+if have_jp; then
+  if [ "$(echo "$INPUT" | jp '.stop_hook_active // false')" = "true" ]; then
     exit 0
   fi
 fi
@@ -39,8 +73,12 @@ run_check() {
   fi
 }
-# Structural test.
-if [ -f harness.config.json ]; then
+# Structural test. Skipped when `structuralTest.engine` is explicitly "none"
+# (e.g. during scaffold of a polyglot repo where the adapter is not yet
+# wired). Without this guard the check fails silently with an empty body
+# because `npm run harness:check` has no matching script.
+if [ -f harness.config.json ] \
+   && ! grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
   if grep -q '"language": "python"' harness.config.json; then
     run_check structural-test python -m harness.structural_test || true
   else
@@ -55,12 +93,17 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
   run_check ruff ruff check . || true
 fi
-# CLAUDE.md instruction cap. HumanLayer measurement: agents stop following
-# CLAUDE.md reliably beyond ~150-200 bullets/numbered items. Treat the file
-# as a table of contents; promote details to docs/ or @-imports.
-if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
-  CMD_PATH=$(jq -r '.claudeMd.path // "CLAUDE.md"' harness.config.json)
-  CMD_CAP=$(jq -r '.claudeMd.maxInstructions // 200' harness.config.json)
+# CLAUDE.md size caps. Two complementary signals:
+#   - maxInstructions (default 200): bullet/numbered-item count. Suits
+#     ASCII-heavy English where a bullet ≈ a fixed token weight.
+#   - maxTokens (default 0 = off): approximate token cap. Catches drift
+#     in non-ASCII content (Vietnamese, CJK, etc.) where 200 bullets
+#     may carry 2–3× more tokens than the HumanLayer baseline measured.
+# Both checks fire independently — exceed either → block.
+if [ -f harness.config.json ] && have_jp; then
+  CMD_PATH=$(jp '.claudeMd.path // "CLAUDE.md"' harness.config.json)
+  CMD_CAP=$(jp '.claudeMd.maxInstructions // 200' harness.config.json)
+  CMD_TOK_CAP=$(jp '.claudeMd.maxTokens // 0' harness.config.json)
   if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
     CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
     if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
@@ -81,6 +124,24 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
       echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
     fi
   fi
+  if [ -f "$CMD_PATH" ] && [ "$CMD_TOK_CAP" -gt 0 ] 2>/dev/null \
+     && command -v node >/dev/null 2>&1 \
+     && [ -f "$SCRIPT_DIR/_lib/approx-tokens.mjs" ]; then
+    CMD_TOK=$(node "$SCRIPT_DIR/_lib/approx-tokens.mjs" "$CMD_PATH" 2>/dev/null || echo 0)
+    if [ "$CMD_TOK" -gt "$CMD_TOK_CAP" ]; then
+      {
+        echo "$CMD_PATH approximate token count: $CMD_TOK (cap: $CMD_TOK_CAP)"
+        echo
+        echo "Heuristic token cap — set because instruction count alone misses"
+        echo "drift in non-ASCII content (Vietnamese, CJK) where a bullet can"
+        echo "carry 2-3x more tokens than the HumanLayer baseline measured."
+        echo
+        echo "Adjust the cap (with justification) in harness.config.json:"
+        echo "  .claudeMd.maxTokens"
+      } > "$TMPDIR_HOOK/claude-md-tokens.out"
+      echo "claude-md-tokens" >> "$TMPDIR_HOOK/failed.list"
+    fi
+  fi
 fi
 # Multi-layer review trigger. When uncommitted/staged/untracked changes touch
@@ -90,7 +151,7 @@ fi
 # with a mechanical count off `harness.config.json` `domains[].layers` /
 # `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
 # next stop succeed after the agent has read the recommendation.
-if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git >/dev/null 2>&1; then
+if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
   CHANGED=$(
     {
       git diff --name-only 2>/dev/null || true
@@ -99,23 +160,29 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
     } | sort -u
   )
   if [ -n "$CHANGED" ]; then
-    NUM_DOMAINS=$(jq '.domains | length' harness.config.json 2>/dev/null || echo 0)
+    NUM_DOMAINS=$(jp '.domains | length' harness.config.json 2>/dev/null || echo 0)
     MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
     : > "$MULTI_OUT"
     MULTI_HIT=0
     i=0
     while [ "$i" -lt "$NUM_DOMAINS" ]; do
-      ROOT=$(jq -r ".domains[$i].root" harness.config.json)
-      DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
+      ROOT=$(jp ".domains[$i].root" harness.config.json)
+      DOMAIN=$(jp ".domains[$i].name" harness.config.json)
+      # Optional layerDirPattern — supports conventions where the layer
+      # directory is not literally `{layer}`. Example: a Rust workspace
+      # with crates named `unibot-types`, `unibot-crypto`, ... uses
+      # `"layerDirPattern": "unibot-{layer}"`. Defaults to `{layer}`.
+      LAYER_PATTERN=$(jp ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
       TOUCHED_COUNT=0
       TOUCHED_NAMES=""
       while IFS= read -r layer; do
         [ -z "$layer" ] && continue
-        if echo "$CHANGED" | grep -qE "^${ROOT}/${layer}(/|$)"; then
+        LAYER_DIR=$(printf '%s' "$LAYER_PATTERN" | sed "s/{layer}/$layer/g")
+        if echo "$CHANGED" | grep -qE "^${ROOT}/${LAYER_DIR}(/|$)"; then
           TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
           TOUCHED_NAMES="$TOUCHED_NAMES $layer"
         fi
-      done < <(jq -r ".domains[$i].layers[]" harness.config.json)
+      done < <(jp ".domains[$i].layers[]" harness.config.json)
       if [ "$TOUCHED_COUNT" -ge 2 ]; then
         echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
         MULTI_HIT=1
@@ -177,8 +244,8 @@ HEADLESS_SOURCE=""
 if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
   HEADLESS_RECOVER=1
   HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
-elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
-  CFG_VAL=$(jq -r '.recovery.headless // false' harness.config.json 2>/dev/null)
+elif [ -f harness.config.json ] && have_jp; then
+  CFG_VAL=$(jp '.recovery.headless // false' harness.config.json 2>/dev/null)
   if [ "$CFG_VAL" = "true" ]; then
     HEADLESS_RECOVER=1
     HEADLESS_SOURCE="harness.config.json:.recovery.headless"
@@ -186,12 +253,64 @@ elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
 fi
 if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
   FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
-  echo "[ahk] headless recovery enabled ($HEADLESS_SOURCE) — spawning recovery turn for: $FAILED_LIST" >&2
-  claude -p \
-    "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $TMPDIR_HOOK and apply the smallest fix. Do not disable any check." \
-    --max-turns 5 \
-    >"$TMPDIR_HOOK/recover.out" 2>&1 &
-  # Don't wait — let the next session pick up the partially-applied fix.
+  # Concurrency guard. Two Stop events in different sessions (e.g. user
+  # working in two terminals, or an unattended CI rerun firing while a
+  # previous recovery is still active) used to race and edit the same
+  # files. The lock is a directory created atomically with `mkdir`; the
+  # PID file inside lets us detect stale locks left by a crashed parent.
+  mkdir -p .harness
+  LOCK_DIR=".harness/recovery.lock"
+  LOCK_STALE_MAX_SECS=${AHK_RECOVERY_LOCK_STALE_SECS:-1800}
+  if mkdir "$LOCK_DIR" 2>/dev/null; then
+    # We won the race — spawn the recovery turn. Snapshot the failure
+    # context into the lock dir BEFORE the parent's EXIT trap deletes
+    # TMPDIR_HOOK; otherwise the subshell's redirect to recover.out
+    # races the parent's cleanup and the subshell dies before claude
+    # can run. Everything the recovery needs (failed.list, per-check
+    # output, recover.out) now lives inside LOCK_DIR — self-contained.
+    cp -r "$TMPDIR_HOOK/." "$LOCK_DIR/snapshot/" 2>/dev/null || true
+    (
+      # Trap removes the lock on subshell EXIT (success, failure, or signal).
+      trap 'rm -rf "$LOCK_DIR"' EXIT
+      claude -p \
+        "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $LOCK_DIR/snapshot and apply the smallest fix. Do not disable any check." \
+        --max-turns 5 \
+        >"$LOCK_DIR/recover.out" 2>&1
+    ) &
+    SUB_PID=$!
+    # Parent writes metadata SYNCHRONOUSLY before printing the "spawned"
+    # message so a second Stop firing immediately after never sees an
+    # empty pid file. Subsecond races between mkdir and these writes are
+    # closed by the bounded read-loop in the lock-held branch below.
+    echo "$SUB_PID" > "$LOCK_DIR/pid"
+    date +%s > "$LOCK_DIR/started_at"
+    echo "$HEADLESS_SOURCE" > "$LOCK_DIR/source"
+    echo "[ahk] headless recovery spawned (source=$HEADLESS_SOURCE, wrapper-pid=$SUB_PID, lock=$LOCK_DIR)" >&2
+  else
+    # Lock already held. Read who holds it and decide: live → skip,
+    # stale → reclaim. We never block the user's Stop on the lock —
+    # worst case we skip a recovery turn that the next Stop can retry.
+    # Bounded wait for the pid file to materialize — closes the race
+    # window between the parent's `mkdir` and its `echo $SUB_PID > pid`.
+    for _ in 1 2 3 4 5 6 7 8 9 10; do
+      [ -s "$LOCK_DIR/pid" ] && break
+      sleep 0.05
+    done
+    EXISTING_PID=$(cat "$LOCK_DIR/pid" 2>/dev/null || true)
+    STARTED_AT=$(cat "$LOCK_DIR/started_at" 2>/dev/null || echo 0)
+    NOW=$(date +%s)
+    AGE=$((NOW - STARTED_AT))
+    if [ -n "$EXISTING_PID" ] && kill -0 "$EXISTING_PID" 2>/dev/null; then
+      echo "[ahk] headless recovery skipped — another session already running (pid=$EXISTING_PID, age=${AGE}s, lock=$LOCK_DIR)" >&2
+    elif [ "$AGE" -gt "$LOCK_STALE_MAX_SECS" ]; then
+      echo "[ahk] headless recovery: removing stale lock (pid=$EXISTING_PID, age=${AGE}s > ${LOCK_STALE_MAX_SECS}s); next stop will retry. lock=$LOCK_DIR" >&2
+      rm -rf "$LOCK_DIR"
+    else
+      echo "[ahk] headless recovery skipped — lock present with dead pid=$EXISTING_PID (age=${AGE}s, will reclaim after ${LOCK_STALE_MAX_SECS}s). lock=$LOCK_DIR" >&2
+    fi
+  fi
 fi
 exit 2