agent-harness-kit 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,9 @@ const ROOT = process.cwd();
20
20
  const RESULTS_DIR = resolve(ROOT, ".harness/eval/results");
21
21
  const TELEMETRY = resolve(ROOT, ".harness/telemetry.jsonl");
22
22
  const NOW = Date.now();
23
- const SEVEN_DAYS = 7 * 24 * 60 * 60 * 1000;
23
+ const ONE_DAY = 24 * 60 * 60 * 1000;
24
+ const SEVEN_DAYS = 7 * ONE_DAY;
25
+ const FOURTEEN_DAYS = 14 * ONE_DAY;
24
26
 
25
27
  async function readJsonl(path) {
26
28
  if (!existsSync(path)) return [];
@@ -61,6 +63,16 @@ function recent(rows, key = "ts") {
61
63
  });
62
64
  }
63
65
 
66
+ // Rows aged 7–14 days. Used as the comparator for week-over-week deltas
67
+ // so users can spot drift instead of staring at a single-week snapshot.
68
+ function priorWeek(rows, key = "ts") {
69
+ return rows.filter((r) => {
70
+ const t = r[key] ? new Date(r[key]).getTime() : r._mtime ?? 0;
71
+ const age = NOW - t;
72
+ return age > SEVEN_DAYS && age <= FOURTEEN_DAYS;
73
+ });
74
+ }
75
+
64
76
  function tokensOf(row) {
65
77
  return (row.grades ?? [])
66
78
  .filter((g) => g.dim === "efficiency")
@@ -172,16 +184,98 @@ function driftSignals(evalRows, telemetryRows) {
172
184
  }
173
185
  }
174
186
 
187
+ // Aggregate eval rows by task into { passed, total, tokens }.
188
+ function aggregateEvals(rows) {
189
+ const byTask = new Map();
190
+ for (const r of rows) {
191
+ const cur = byTask.get(r.taskId) ?? { passed: 0, total: 0, tokens: 0 };
192
+ cur.total++;
193
+ if (r.passed) cur.passed++;
194
+ cur.tokens += tokensOf(r);
195
+ byTask.set(r.taskId, cur);
196
+ }
197
+ return byTask;
198
+ }
199
+
200
+ // Render a single delta line. signMode controls icon meaning — for pass-rate,
201
+ // up is good; for tokens, up is bad; for skill invocations, neutral.
202
+ function fmtDelta(now, then, signMode = "neutral", unit = "") {
203
+ if (then === undefined) return `(new) ${now}${unit}`;
204
+ const diff = now - then;
205
+ if (diff === 0) return `${now}${unit} → ${then}${unit} (=)`;
206
+ let arrow = diff > 0 ? "↑" : "↓";
207
+ // Color the arrow by "is this a regression?"
208
+ let marker = " ";
209
+ if (signMode === "good-up") marker = diff > 0 ? "+" : "-";
210
+ else if (signMode === "good-down") marker = diff > 0 ? "-" : "+";
211
+ return `${now}${unit} ← ${then}${unit} (${arrow}${marker} ${Math.abs(diff)}${unit})`;
212
+ }
213
+
214
+ function weekOverWeek(evalRecent, evalPrior, telRecent, telPrior) {
215
+ console.log(`\n### Week-over-week (last 7d vs prior 7d)`);
216
+ const aRecent = aggregateEvals(evalRecent);
217
+ const aPrior = aggregateEvals(evalPrior);
218
+
219
+ if (aRecent.size === 0 && aPrior.size === 0) {
220
+ console.log(" (no eval data in either window — run `npm run harness:eval`)");
221
+ } else {
222
+ console.log(" task pass-rate (now ← prior) avg-tokens (now ← prior)");
223
+ console.log(" ---------------------- ---------------------------- --------------------------");
224
+ const taskIds = new Set([...aRecent.keys(), ...aPrior.keys()]);
225
+ for (const t of [...taskIds].sort()) {
226
+ const now = aRecent.get(t);
227
+ const prior = aPrior.get(t);
228
+ const nowRate = now ? Math.round((now.passed / now.total) * 100) : null;
229
+ const priorRate = prior ? Math.round((prior.passed / prior.total) * 100) : null;
230
+ const nowTok = now && now.total > 0 ? Math.round(now.tokens / now.total) : 0;
231
+ const priorTok = prior && prior.total > 0 ? Math.round(prior.tokens / prior.total) : 0;
232
+ const rateCell = nowRate === null
233
+ ? "(absent now)"
234
+ : priorRate === null
235
+ ? `${nowRate}% (new)`
236
+ : `${nowRate}% ← ${priorRate}% (${nowRate - priorRate >= 0 ? "+" : ""}${nowRate - priorRate})`;
237
+ const tokCell = nowTok === 0 && priorTok === 0
238
+ ? "—"
239
+ : `${nowTok} ← ${priorTok} (${nowTok - priorTok >= 0 ? "+" : ""}${nowTok - priorTok})`;
240
+ console.log(
241
+ ` ${t.padEnd(22)} ${rateCell.padEnd(30)} ${tokCell}`,
242
+ );
243
+ }
244
+ }
245
+
246
+ // Skill invocation deltas.
247
+ const recentBySkill = new Map();
248
+ for (const r of telRecent) recentBySkill.set(r.skill, (recentBySkill.get(r.skill) ?? 0) + 1);
249
+ const priorBySkill = new Map();
250
+ for (const r of telPrior) priorBySkill.set(r.skill, (priorBySkill.get(r.skill) ?? 0) + 1);
251
+
252
+ const allSkills = new Set([...recentBySkill.keys(), ...priorBySkill.keys()]);
253
+ if (allSkills.size > 0) {
254
+ console.log("\n skill invocations (now ← prior)");
255
+ console.log(" ----------------------------- -------------------------------");
256
+ for (const s of [...allSkills].sort()) {
257
+ const n = recentBySkill.get(s) ?? 0;
258
+ const p = priorBySkill.get(s) ?? 0;
259
+ const d = n - p;
260
+ const cell = p === 0 ? `${n} (new)` : `${n} ← ${p} (${d >= 0 ? "+" : ""}${d})`;
261
+ console.log(` ${s.padEnd(29)} ${cell}`);
262
+ }
263
+ }
264
+ }
265
+
175
266
  async function main() {
176
267
  const evalAll = await loadEvalResults();
177
268
  const telemetryAll = await readJsonl(TELEMETRY);
178
269
  const evalRows = recent(evalAll);
270
+ const evalPrior = priorWeek(evalAll);
179
271
  const telemetryRows = recent(telemetryAll);
272
+ const telemetryPrior = priorWeek(telemetryAll);
180
273
 
181
274
  console.log("=== agent-harness-kit report ===");
182
275
  console.log(`Generated: ${new Date().toISOString()}`);
183
276
  summarizeEvals(evalRows);
184
277
  summarizeTelemetry(telemetryRows);
278
+ weekOverWeek(evalRows, evalPrior, telemetryRows, telemetryPrior);
185
279
  driftSignals(evalRows, telemetryRows);
186
280
  console.log("");
187
281
  }
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env bash
2
+ # PreCompact hook — write a small snapshot of state to
3
+ # .harness/compaction-snapshot.json BEFORE the context compactor runs.
4
+ # The companion SessionStart hook (matcher: compact) reads this snapshot
5
+ # back and re-injects the salient fields so the post-compaction model
6
+ # knows which feature it was working on, which branch, and how dirty
7
+ # the tree was.
8
+ #
9
+ # This is the kit's answer to the "I lost everything after compaction"
10
+ # failure mode that recurs in long sessions. Pair with:
11
+ # - SessionStart matcher compact → re-inject
12
+ # - PostCompact (not implemented; SessionStart does the work)
13
+ #
14
+ # Snapshot contents:
15
+ # {
16
+ # "compacted_at": "2026-05-16T19:00:00Z",
17
+ # "branch": "main",
18
+ # "sha": "abc1234",
19
+ # "uncommitted": 7,
20
+ # "feature": "auth-endpoint — POST /auth/login",
21
+ # "trigger": "manual|auto",
22
+ # "estimated_tokens_removed": 5000
23
+ # }
24
+ #
25
+ # The hook NEVER blocks (exit 0 always). PreCompact can technically block
26
+ # compaction but doing so defeats the entire point.
27
+ set -eo pipefail
28
+
29
+ INPUT=$(cat)
30
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
31
+ have_jq() {
32
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
33
+ command -v jq >/dev/null 2>&1
34
+ }
35
+ have_jp() {
36
+ have_jq && return 0
37
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
38
+ return 1
39
+ }
40
+ jp() {
41
+ if have_jq; then
42
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
43
+ else
44
+ if [ -n "$2" ]; then
45
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
46
+ else
47
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
48
+ fi
49
+ fi
50
+ }
51
+
52
+ TRIGGER=""
53
+ TOKENS=""
54
+ if have_jp; then
55
+ TRIGGER=$(echo "$INPUT" | jp '.trigger // "auto"' 2>/dev/null || true)
56
+ TOKENS=$(echo "$INPUT" | jp '.estimated_tokens_removed // 0' 2>/dev/null || true)
57
+ fi
58
+
59
+ mkdir -p .harness
60
+
61
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
62
+ BR="(no-git)"
63
+ SHA="(no-git)"
64
+ COUNT=0
65
+ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
66
+ BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
67
+ SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "(none)")
68
+ COUNT=$(git status --short 2>/dev/null | wc -l | tr -d ' ')
69
+ fi
70
+
71
+ FEAT=""
72
+ if [ -f feature_list.json ]; then
73
+ if have_jq; then
74
+ FEAT=$(jq -r 'first(.features[] | select(.passes == false)) | "\(.id) — \(.title)"' \
75
+ feature_list.json 2>/dev/null || true)
76
+ elif command -v node >/dev/null 2>&1; then
77
+ FEAT=$(node -e "
78
+ const f = JSON.parse(require('fs').readFileSync('feature_list.json','utf8'));
79
+ const o = (f.features || []).find(x => x.passes === false);
80
+ if (o) process.stdout.write(o.id + ' — ' + o.title);
81
+ " 2>/dev/null || true)
82
+ fi
83
+ fi
84
+
85
+ # Compose JSON via Node when available — handles escaping right.
86
+ if command -v node >/dev/null 2>&1; then
87
+ node -e "
88
+ const fs = require('fs');
89
+ const snap = {
90
+ compacted_at: '$TS',
91
+ branch: '$BR',
92
+ sha: '$SHA',
93
+ uncommitted: parseInt('$COUNT', 10) || 0,
94
+ feature: process.argv[1] || '',
95
+ trigger: '$TRIGGER' || 'auto',
96
+ estimated_tokens_removed: parseInt('$TOKENS', 10) || 0
97
+ };
98
+ fs.writeFileSync('.harness/compaction-snapshot.json', JSON.stringify(snap, null, 2) + '\n');
99
+ " "$FEAT"
100
+ elif have_jq; then
101
+ jq -n --arg ts "$TS" --arg br "$BR" --arg sha "$SHA" \
102
+ --argjson cnt "$COUNT" --arg feat "$FEAT" \
103
+ --arg trig "${TRIGGER:-auto}" --argjson tok "${TOKENS:-0}" \
104
+ '{compacted_at: $ts, branch: $br, sha: $sha, uncommitted: $cnt,
105
+ feature: $feat, trigger: $trig, estimated_tokens_removed: $tok}' \
106
+ > .harness/compaction-snapshot.json
107
+ else
108
+ # No JSON tool available — write a minimal record. SessionStart compact
109
+ # branch reads fields individually so partial records still work.
110
+ cat > .harness/compaction-snapshot.json <<EOF
111
+ {
112
+ "compacted_at": "$TS",
113
+ "branch": "$BR",
114
+ "sha": "$SHA",
115
+ "uncommitted": $COUNT,
116
+ "feature": "$FEAT",
117
+ "trigger": "${TRIGGER:-auto}"
118
+ }
119
+ EOF
120
+ fi
121
+ exit 0
@@ -2,7 +2,32 @@
2
2
  # pre-push hook — Stripe "shift-feedback-left" pattern. Runs only the
3
3
  # deterministic checks (structural test + linter + tests on changed files).
4
4
  # Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
5
- set -e
5
+ set -eo pipefail
6
+
7
+ # Resolve script dir so we can find _lib/json-pick.mjs (Node fallback for jq).
8
+ # Without this fallback, `jq` missing on a fresh CI image silently disabled
9
+ # the baseline-monotonic guard — a known audit hole.
10
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
11
+ have_jq() {
12
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
13
+ command -v jq >/dev/null 2>&1
14
+ }
15
+ have_jp() {
16
+ have_jq && return 0
17
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
18
+ return 1
19
+ }
20
+ jp() {
21
+ if have_jq; then
22
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
23
+ else
24
+ if [ -n "$2" ]; then
25
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
26
+ else
27
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
28
+ fi
29
+ fi
30
+ }
6
31
 
7
32
  # Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
8
33
  # only — fixes REMOVE entries; no path should ADD them. Catches the "mask
@@ -10,11 +35,11 @@ set -e
10
35
  # Runs first because a grown baseline silently masks structural-test failures.
11
36
  BASELINE_FILE=".harness/structural-baseline.json"
12
37
  if [ -f "$BASELINE_FILE" ] \
13
- && command -v jq >/dev/null 2>&1 \
38
+ && have_jp \
14
39
  && git rev-parse --verify HEAD >/dev/null 2>&1 \
15
40
  && git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
16
- CURRENT_COUNT=$(jq 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
17
- HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
41
+ CURRENT_COUNT=$(jp 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
42
+ HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jp 'length' 2>/dev/null || echo 0)
18
43
  if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
19
44
  {
20
45
  echo
@@ -33,11 +58,20 @@ if [ -f "$BASELINE_FILE" ] \
33
58
  fi
34
59
  fi
35
60
 
36
- echo "[pre-push] running structural test…"
37
- if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
38
- python -m harness.structural_test
61
+ # Structural test. Skipped when `structuralTest.engine` is explicitly "none"
62
+ # (e.g. during scaffold of a polyglot repo where the adapter is not yet
63
+ # wired). Without this guard the push fails silently because
64
+ # `npm run harness:check` has no matching script.
65
+ if [ -f harness.config.json ] \
66
+ && grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
67
+ echo "[pre-push] structural test skipped (structuralTest.engine: none)"
39
68
  else
40
- npm run --silent harness:check
69
+ echo "[pre-push] running structural test…"
70
+ if [ -f harness.config.json ] && grep -q '"language": "python"' harness.config.json; then
71
+ python -m harness.structural_test
72
+ else
73
+ npm run --silent harness:check
74
+ fi
41
75
  fi
42
76
 
43
77
  echo "[pre-push] running lint…"
@@ -12,9 +12,43 @@ set -e
12
12
 
13
13
  INPUT=$(cat)
14
14
 
15
+ # Resolve the directory this hook lives in (used to find _lib/json-pick.mjs).
16
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
17
+
18
+ # have_jq — env-overridable probe. AHK_DISABLE_JQ=1 forces the Node fallback,
19
+ # used by tests to exercise the jq-less code path on machines that have jq
20
+ # installed locally.
21
+ have_jq() {
22
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
23
+ command -v jq >/dev/null 2>&1
24
+ }
25
+ # jp — JSON picker. Uses `jq` when available, else falls back to a bundled
26
+ # Node script with a jq-subset implementation. Keeps hooks portable on
27
+ # minimal CI / Windows where jq is not installed by default. Without this
28
+ # fallback, the entire pre-completion check used to be a silent no-op.
29
+ jp() {
30
+ if have_jq; then
31
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
32
+ else
33
+ if [ -n "$2" ]; then
34
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
35
+ else
36
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
37
+ fi
38
+ fi
39
+ }
40
+ # Probe: do we have either jq or the Node fallback? Node is always
41
+ # present (kit's `engines` field requires >=20), so this is just an explicit
42
+ # probe and a fail-loud branch if even node is missing.
43
+ have_jp() {
44
+ have_jq && return 0
45
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
46
+ return 1
47
+ }
48
+
15
49
  # CRITICAL: avoid infinite loops. If the hook already ran, do not block again.
16
- if command -v jq >/dev/null 2>&1; then
17
- if [ "$(echo "$INPUT" | jq -r '.stop_hook_active // false')" = "true" ]; then
50
+ if have_jp; then
51
+ if [ "$(echo "$INPUT" | jp '.stop_hook_active // false')" = "true" ]; then
18
52
  exit 0
19
53
  fi
20
54
  fi
@@ -39,8 +73,12 @@ run_check() {
39
73
  fi
40
74
  }
41
75
 
42
- # Structural test.
43
- if [ -f harness.config.json ]; then
76
+ # Structural test. Skipped when `structuralTest.engine` is explicitly "none"
77
+ # (e.g. during scaffold of a polyglot repo where the adapter is not yet
78
+ # wired). Without this guard the check fails silently with an empty body
79
+ # because `npm run harness:check` has no matching script.
80
+ if [ -f harness.config.json ] \
81
+ && ! grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
44
82
  if grep -q '"language": "python"' harness.config.json; then
45
83
  run_check structural-test python -m harness.structural_test || true
46
84
  else
@@ -55,12 +93,17 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
55
93
  run_check ruff ruff check . || true
56
94
  fi
57
95
 
58
- # CLAUDE.md instruction cap. HumanLayer measurement: agents stop following
59
- # CLAUDE.md reliably beyond ~150-200 bullets/numbered items. Treat the file
60
- # as a table of contents; promote details to docs/ or @-imports.
61
- if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
62
- CMD_PATH=$(jq -r '.claudeMd.path // "CLAUDE.md"' harness.config.json)
63
- CMD_CAP=$(jq -r '.claudeMd.maxInstructions // 200' harness.config.json)
96
+ # CLAUDE.md size caps. Two complementary signals:
97
+ # - maxInstructions (default 200): bullet/numbered-item count. Suits
98
+ # ASCII-heavy English where a bullet a fixed token weight.
99
+ # - maxTokens (default 0 = off): approximate token cap. Catches drift
100
+ # in non-ASCII content (Vietnamese, CJK, etc.) where 200 bullets
101
+ # may carry 2–3× more tokens than the HumanLayer baseline measured.
102
+ # Both checks fire independently — exceed either → block.
103
+ if [ -f harness.config.json ] && have_jp; then
104
+ CMD_PATH=$(jp '.claudeMd.path // "CLAUDE.md"' harness.config.json)
105
+ CMD_CAP=$(jp '.claudeMd.maxInstructions // 200' harness.config.json)
106
+ CMD_TOK_CAP=$(jp '.claudeMd.maxTokens // 0' harness.config.json)
64
107
  if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
65
108
  CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
66
109
  if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
@@ -81,6 +124,24 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
81
124
  echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
82
125
  fi
83
126
  fi
127
+ if [ -f "$CMD_PATH" ] && [ "$CMD_TOK_CAP" -gt 0 ] 2>/dev/null \
128
+ && command -v node >/dev/null 2>&1 \
129
+ && [ -f "$SCRIPT_DIR/_lib/approx-tokens.mjs" ]; then
130
+ CMD_TOK=$(node "$SCRIPT_DIR/_lib/approx-tokens.mjs" "$CMD_PATH" 2>/dev/null || echo 0)
131
+ if [ "$CMD_TOK" -gt "$CMD_TOK_CAP" ]; then
132
+ {
133
+ echo "$CMD_PATH approximate token count: $CMD_TOK (cap: $CMD_TOK_CAP)"
134
+ echo
135
+ echo "Heuristic token cap — set because instruction count alone misses"
136
+ echo "drift in non-ASCII content (Vietnamese, CJK) where a bullet can"
137
+ echo "carry 2-3x more tokens than the HumanLayer baseline measured."
138
+ echo
139
+ echo "Adjust the cap (with justification) in harness.config.json:"
140
+ echo " .claudeMd.maxTokens"
141
+ } > "$TMPDIR_HOOK/claude-md-tokens.out"
142
+ echo "claude-md-tokens" >> "$TMPDIR_HOOK/failed.list"
143
+ fi
144
+ fi
84
145
  fi
85
146
 
86
147
  # Multi-layer review trigger. When uncommitted/staged/untracked changes touch
@@ -90,7 +151,7 @@ fi
90
151
  # with a mechanical count off `harness.config.json` `domains[].layers` /
91
152
  # `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
92
153
  # next stop succeed after the agent has read the recommendation.
93
- if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git >/dev/null 2>&1; then
154
+ if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
94
155
  CHANGED=$(
95
156
  {
96
157
  git diff --name-only 2>/dev/null || true
@@ -99,23 +160,29 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
99
160
  } | sort -u
100
161
  )
101
162
  if [ -n "$CHANGED" ]; then
102
- NUM_DOMAINS=$(jq '.domains | length' harness.config.json 2>/dev/null || echo 0)
163
+ NUM_DOMAINS=$(jp '.domains | length' harness.config.json 2>/dev/null || echo 0)
103
164
  MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
104
165
  : > "$MULTI_OUT"
105
166
  MULTI_HIT=0
106
167
  i=0
107
168
  while [ "$i" -lt "$NUM_DOMAINS" ]; do
108
- ROOT=$(jq -r ".domains[$i].root" harness.config.json)
109
- DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
169
+ ROOT=$(jp ".domains[$i].root" harness.config.json)
170
+ DOMAIN=$(jp ".domains[$i].name" harness.config.json)
171
+ # Optional layerDirPattern — supports conventions where the layer
172
+ # directory is not literally `{layer}`. Example: a Rust workspace
173
+ # with crates named `unibot-types`, `unibot-crypto`, ... uses
174
+ # `"layerDirPattern": "unibot-{layer}"`. Defaults to `{layer}`.
175
+ LAYER_PATTERN=$(jp ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
110
176
  TOUCHED_COUNT=0
111
177
  TOUCHED_NAMES=""
112
178
  while IFS= read -r layer; do
113
179
  [ -z "$layer" ] && continue
114
- if echo "$CHANGED" | grep -qE "^${ROOT}/${layer}(/|$)"; then
180
+ LAYER_DIR=$(printf '%s' "$LAYER_PATTERN" | sed "s/{layer}/$layer/g")
181
+ if echo "$CHANGED" | grep -qE "^${ROOT}/${LAYER_DIR}(/|$)"; then
115
182
  TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
116
183
  TOUCHED_NAMES="$TOUCHED_NAMES $layer"
117
184
  fi
118
- done < <(jq -r ".domains[$i].layers[]" harness.config.json)
185
+ done < <(jp ".domains[$i].layers[]" harness.config.json)
119
186
  if [ "$TOUCHED_COUNT" -ge 2 ]; then
120
187
  echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
121
188
  MULTI_HIT=1
@@ -177,8 +244,8 @@ HEADLESS_SOURCE=""
177
244
  if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
178
245
  HEADLESS_RECOVER=1
179
246
  HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
180
- elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
181
- CFG_VAL=$(jq -r '.recovery.headless // false' harness.config.json 2>/dev/null)
247
+ elif [ -f harness.config.json ] && have_jp; then
248
+ CFG_VAL=$(jp '.recovery.headless // false' harness.config.json 2>/dev/null)
182
249
  if [ "$CFG_VAL" = "true" ]; then
183
250
  HEADLESS_RECOVER=1
184
251
  HEADLESS_SOURCE="harness.config.json:.recovery.headless"
@@ -186,12 +253,64 @@ elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
186
253
  fi
187
254
  if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
188
255
  FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
189
- echo "[ahk] headless recovery enabled ($HEADLESS_SOURCE) — spawning recovery turn for: $FAILED_LIST" >&2
190
- claude -p \
191
- "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $TMPDIR_HOOK and apply the smallest fix. Do not disable any check." \
192
- --max-turns 5 \
193
- >"$TMPDIR_HOOK/recover.out" 2>&1 &
194
- # Don't wait let the next session pick up the partially-applied fix.
256
+
257
+ # Concurrency guard. Two Stop events in different sessions (e.g. user
258
+ # working in two terminals, or an unattended CI rerun firing while a
259
+ # previous recovery is still active) used to race and edit the same
260
+ # files. The lock is a directory created atomically with `mkdir`; the
261
+ # PID file inside lets us detect stale locks left by a crashed parent.
262
+ mkdir -p .harness
263
+ LOCK_DIR=".harness/recovery.lock"
264
+ LOCK_STALE_MAX_SECS=${AHK_RECOVERY_LOCK_STALE_SECS:-1800}
265
+
266
+ if mkdir "$LOCK_DIR" 2>/dev/null; then
267
+ # We won the race — spawn the recovery turn. Snapshot the failure
268
+ # context into the lock dir BEFORE the parent's EXIT trap deletes
269
+ # TMPDIR_HOOK; otherwise the subshell's redirect to recover.out
270
+ # races the parent's cleanup and the subshell dies before claude
271
+ # can run. Everything the recovery needs (failed.list, per-check
272
+ # output, recover.out) now lives inside LOCK_DIR — self-contained.
273
+ cp -r "$TMPDIR_HOOK/." "$LOCK_DIR/snapshot/" 2>/dev/null || true
274
+ (
275
+ # Trap removes the lock on subshell EXIT (success, failure, or signal).
276
+ trap 'rm -rf "$LOCK_DIR"' EXIT
277
+ claude -p \
278
+ "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $LOCK_DIR/snapshot and apply the smallest fix. Do not disable any check." \
279
+ --max-turns 5 \
280
+ >"$LOCK_DIR/recover.out" 2>&1
281
+ ) &
282
+ SUB_PID=$!
283
+ # Parent writes metadata SYNCHRONOUSLY before printing the "spawned"
284
+ # message so a second Stop firing immediately after never sees an
285
+ # empty pid file. Subsecond races between mkdir and these writes are
286
+ # closed by the bounded read-loop in the lock-held branch below.
287
+ echo "$SUB_PID" > "$LOCK_DIR/pid"
288
+ date +%s > "$LOCK_DIR/started_at"
289
+ echo "$HEADLESS_SOURCE" > "$LOCK_DIR/source"
290
+ echo "[ahk] headless recovery spawned (source=$HEADLESS_SOURCE, wrapper-pid=$SUB_PID, lock=$LOCK_DIR)" >&2
291
+ else
292
+ # Lock already held. Read who holds it and decide: live → skip,
293
+ # stale → reclaim. We never block the user's Stop on the lock —
294
+ # worst case we skip a recovery turn that the next Stop can retry.
295
+ # Bounded wait for the pid file to materialize — closes the race
296
+ # window between the parent's `mkdir` and its `echo $SUB_PID > pid`.
297
+ for _ in 1 2 3 4 5 6 7 8 9 10; do
298
+ [ -s "$LOCK_DIR/pid" ] && break
299
+ sleep 0.05
300
+ done
301
+ EXISTING_PID=$(cat "$LOCK_DIR/pid" 2>/dev/null || true)
302
+ STARTED_AT=$(cat "$LOCK_DIR/started_at" 2>/dev/null || echo 0)
303
+ NOW=$(date +%s)
304
+ AGE=$((NOW - STARTED_AT))
305
+ if [ -n "$EXISTING_PID" ] && kill -0 "$EXISTING_PID" 2>/dev/null; then
306
+ echo "[ahk] headless recovery skipped — another session already running (pid=$EXISTING_PID, age=${AGE}s, lock=$LOCK_DIR)" >&2
307
+ elif [ "$AGE" -gt "$LOCK_STALE_MAX_SECS" ]; then
308
+ echo "[ahk] headless recovery: removing stale lock (pid=$EXISTING_PID, age=${AGE}s > ${LOCK_STALE_MAX_SECS}s); next stop will retry. lock=$LOCK_DIR" >&2
309
+ rm -rf "$LOCK_DIR"
310
+ else
311
+ echo "[ahk] headless recovery skipped — lock present with dead pid=$EXISTING_PID (age=${AGE}s, will reclaim after ${LOCK_STALE_MAX_SECS}s). lock=$LOCK_DIR" >&2
312
+ fi
313
+ fi
195
314
  fi
196
315
 
197
316
  exit 2