agent-harness-kit 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +29 -0
  4. package/bin/cli.mjs +15 -1
  5. package/package.json +1 -1
  6. package/src/core/detect-stack.mjs +16 -0
  7. package/src/core/doctor.mjs +23 -0
  8. package/src/core/render-templates.mjs +198 -6
  9. package/src/templates/.claude/hooks/hooks.json +111 -0
  10. package/src/templates/.claude/settings.json.hbs +1 -1
  11. package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +15 -10
  12. package/src/templates/.claude/skills/doc-drift-scan/scripts/scan-paths.mjs +64 -0
  13. package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +14 -5
  14. package/src/templates/.claude/skills/garbage-collection/scripts/gc-classify.mjs +77 -0
  15. package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +17 -14
  16. package/src/templates/.claude/skills/inspect-module/scripts/module-summary.mjs +144 -0
  17. package/src/templates/CLAUDE.md.hbs +10 -6
  18. package/src/templates/CLAUDE.md.vi.hbs +74 -0
  19. package/src/templates/_adapter-kotlin/harness/structural-check.mjs.hbs +286 -0
  20. package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs +292 -100
  21. package/src/templates/_adapter-swift/harness/structural-check.mjs.hbs +285 -0
  22. package/src/templates/harness.config.json.hbs +5 -3
  23. package/src/templates/scripts/_lib/approx-tokens.mjs +48 -0
  24. package/src/templates/scripts/_lib/json-pick.mjs +278 -0
  25. package/src/templates/scripts/harness-report.mjs +95 -1
  26. package/src/templates/scripts/notify-on-block.sh.hbs +73 -0
  27. package/src/templates/scripts/pre-compact.sh.hbs +121 -0
  28. package/src/templates/scripts/pre-push.sh +28 -3
  29. package/src/templates/scripts/precompletion-checklist.sh.hbs +131 -22
  30. package/src/templates/scripts/pretooluse-bash-guard.sh.hbs +146 -0
  31. package/src/templates/scripts/session-end.sh.hbs +48 -0
  32. package/src/templates/scripts/session-start.sh.hbs +139 -0
  33. package/src/templates/scripts/statusline.mjs +63 -0
  34. package/src/templates/scripts/structural-test-on-edit.sh.hbs +31 -8
  35. package/src/templates/scripts/telemetry-on-skill.sh +32 -10
  36. package/src/templates/scripts/userprompt-guard.sh.hbs +100 -0
  37. package/src/templates/.claude/hooks/hooks.json.hbs +0 -39
@@ -20,7 +20,9 @@ const ROOT = process.cwd();
20
20
  const RESULTS_DIR = resolve(ROOT, ".harness/eval/results");
21
21
  const TELEMETRY = resolve(ROOT, ".harness/telemetry.jsonl");
22
22
  const NOW = Date.now();
23
- const SEVEN_DAYS = 7 * 24 * 60 * 60 * 1000;
23
+ const ONE_DAY = 24 * 60 * 60 * 1000;
24
+ const SEVEN_DAYS = 7 * ONE_DAY;
25
+ const FOURTEEN_DAYS = 14 * ONE_DAY;
24
26
 
25
27
  async function readJsonl(path) {
26
28
  if (!existsSync(path)) return [];
@@ -61,6 +63,16 @@ function recent(rows, key = "ts") {
61
63
  });
62
64
  }
63
65
 
66
+ // Rows aged 7–14 days. Used as the comparator for week-over-week deltas
67
+ // so users can spot drift instead of staring at a single-week snapshot.
68
+ function priorWeek(rows, key = "ts") {
69
+ return rows.filter((r) => {
70
+ const t = r[key] ? new Date(r[key]).getTime() : r._mtime ?? 0;
71
+ const age = NOW - t;
72
+ return age > SEVEN_DAYS && age <= FOURTEEN_DAYS;
73
+ });
74
+ }
75
+
64
76
  function tokensOf(row) {
65
77
  return (row.grades ?? [])
66
78
  .filter((g) => g.dim === "efficiency")
@@ -172,16 +184,98 @@ function driftSignals(evalRows, telemetryRows) {
172
184
  }
173
185
  }
174
186
 
187
+ // Aggregate eval rows by task into { passed, total, tokens }.
188
+ function aggregateEvals(rows) {
189
+ const byTask = new Map();
190
+ for (const r of rows) {
191
+ const cur = byTask.get(r.taskId) ?? { passed: 0, total: 0, tokens: 0 };
192
+ cur.total++;
193
+ if (r.passed) cur.passed++;
194
+ cur.tokens += tokensOf(r);
195
+ byTask.set(r.taskId, cur);
196
+ }
197
+ return byTask;
198
+ }
199
+
200
+ // Render a single delta line. signMode controls icon meaning — for pass-rate,
201
+ // up is good; for tokens, up is bad; for skill invocations, neutral.
202
+ function fmtDelta(now, then, signMode = "neutral", unit = "") {
203
+ if (then === undefined) return `(new) ${now}${unit}`;
204
+ const diff = now - then;
205
+ if (diff === 0) return `${now}${unit} → ${then}${unit} (=)`;
206
+ let arrow = diff > 0 ? "↑" : "↓";
207
+ // Color the arrow by "is this a regression?"
208
+ let marker = " ";
209
+ if (signMode === "good-up") marker = diff > 0 ? "+" : "-";
210
+ else if (signMode === "good-down") marker = diff > 0 ? "-" : "+";
211
+ return `${now}${unit} ← ${then}${unit} (${arrow}${marker} ${Math.abs(diff)}${unit})`;
212
+ }
213
+
214
+ function weekOverWeek(evalRecent, evalPrior, telRecent, telPrior) {
215
+ console.log(`\n### Week-over-week (last 7d vs prior 7d)`);
216
+ const aRecent = aggregateEvals(evalRecent);
217
+ const aPrior = aggregateEvals(evalPrior);
218
+
219
+ if (aRecent.size === 0 && aPrior.size === 0) {
220
+ console.log(" (no eval data in either window — run `npm run harness:eval`)");
221
+ } else {
222
+ console.log(" task pass-rate (now ← prior) avg-tokens (now ← prior)");
223
+ console.log(" ---------------------- ---------------------------- --------------------------");
224
+ const taskIds = new Set([...aRecent.keys(), ...aPrior.keys()]);
225
+ for (const t of [...taskIds].sort()) {
226
+ const now = aRecent.get(t);
227
+ const prior = aPrior.get(t);
228
+ const nowRate = now ? Math.round((now.passed / now.total) * 100) : null;
229
+ const priorRate = prior ? Math.round((prior.passed / prior.total) * 100) : null;
230
+ const nowTok = now && now.total > 0 ? Math.round(now.tokens / now.total) : 0;
231
+ const priorTok = prior && prior.total > 0 ? Math.round(prior.tokens / prior.total) : 0;
232
+ const rateCell = nowRate === null
233
+ ? "(absent now)"
234
+ : priorRate === null
235
+ ? `${nowRate}% (new)`
236
+ : `${nowRate}% ← ${priorRate}% (${nowRate - priorRate >= 0 ? "+" : ""}${nowRate - priorRate})`;
237
+ const tokCell = nowTok === 0 && priorTok === 0
238
+ ? "—"
239
+ : `${nowTok} ← ${priorTok} (${nowTok - priorTok >= 0 ? "+" : ""}${nowTok - priorTok})`;
240
+ console.log(
241
+ ` ${t.padEnd(22)} ${rateCell.padEnd(30)} ${tokCell}`,
242
+ );
243
+ }
244
+ }
245
+
246
+ // Skill invocation deltas.
247
+ const recentBySkill = new Map();
248
+ for (const r of telRecent) recentBySkill.set(r.skill, (recentBySkill.get(r.skill) ?? 0) + 1);
249
+ const priorBySkill = new Map();
250
+ for (const r of telPrior) priorBySkill.set(r.skill, (priorBySkill.get(r.skill) ?? 0) + 1);
251
+
252
+ const allSkills = new Set([...recentBySkill.keys(), ...priorBySkill.keys()]);
253
+ if (allSkills.size > 0) {
254
+ console.log("\n skill invocations (now ← prior)");
255
+ console.log(" ----------------------------- -------------------------------");
256
+ for (const s of [...allSkills].sort()) {
257
+ const n = recentBySkill.get(s) ?? 0;
258
+ const p = priorBySkill.get(s) ?? 0;
259
+ const d = n - p;
260
+ const cell = p === 0 ? `${n} (new)` : `${n} ← ${p} (${d >= 0 ? "+" : ""}${d})`;
261
+ console.log(` ${s.padEnd(29)} ${cell}`);
262
+ }
263
+ }
264
+ }
265
+
175
266
  async function main() {
176
267
  const evalAll = await loadEvalResults();
177
268
  const telemetryAll = await readJsonl(TELEMETRY);
178
269
  const evalRows = recent(evalAll);
270
+ const evalPrior = priorWeek(evalAll);
179
271
  const telemetryRows = recent(telemetryAll);
272
+ const telemetryPrior = priorWeek(telemetryAll);
180
273
 
181
274
  console.log("=== agent-harness-kit report ===");
182
275
  console.log(`Generated: ${new Date().toISOString()}`);
183
276
  summarizeEvals(evalRows);
184
277
  summarizeTelemetry(telemetryRows);
278
+ weekOverWeek(evalRows, evalPrior, telemetryRows, telemetryPrior);
185
279
  driftSignals(evalRows, telemetryRows);
186
280
  console.log("");
187
281
  }
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env bash
2
+ # Notification hook — OS-native notification when Claude wants attention.
3
+ # macOS osascript / Linux notify-send / Windows skip.
4
+ # Never blocks. Always exits 0. Opt-out: AHK_DISABLE_NOTIFY=1.
5
+ set -eo pipefail
6
+
7
+ INPUT=$(cat)
8
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
9
+ have_jq() {
10
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
11
+ command -v jq >/dev/null 2>&1
12
+ }
13
+ have_jp() {
14
+ have_jq && return 0
15
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
16
+ return 1
17
+ }
18
+ jp() {
19
+ if have_jq; then
20
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
21
+ else
22
+ if [ -n "$2" ]; then
23
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
24
+ else
25
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
26
+ fi
27
+ fi
28
+ }
29
+
30
+ if [ "${AHK_DISABLE_NOTIFY:-}" = "1" ]; then
31
+ exit 0
32
+ fi
33
+
34
+ TYPE=""
35
+ TITLE=""
36
+ BODY=""
37
+ if have_jp; then
38
+ TYPE=$(echo "$INPUT" | jp '.notification.type // empty')
39
+ TITLE=$(echo "$INPUT" | jp '.notification.title // empty')
40
+ BODY=$(echo "$INPUT" | jp '.notification.body // empty')
41
+ fi
42
+
43
+ [ -z "$TITLE" ] && TITLE="Claude Code"
44
+ if [ -n "$TYPE" ]; then
45
+ BODY="[$TYPE] ${BODY}"
46
+ fi
47
+ [ -z "$BODY" ] && BODY="Claude Code wants your attention."
48
+
49
+ mkdir -p .harness
50
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
51
+ ESCAPED_TITLE=${TITLE//\"/\\\"}
52
+ ESCAPED_BODY=${BODY//\"/\\\"}
53
+ printf '{"ts":"%s","hook":"Notification","type":"%s","title":"%s","body":"%s"}\n' \
54
+ "$TS" "$TYPE" "$ESCAPED_TITLE" "$ESCAPED_BODY" >> .harness/telemetry.jsonl
55
+
56
+ OS_KIND=$(uname -s 2>/dev/null || echo "Unknown")
57
+ case "$OS_KIND" in
58
+ Darwin)
59
+ OSA_TITLE=${TITLE//\"/\\\"}
60
+ OSA_BODY=${BODY//\"/\\\"}
61
+ osascript -e "display notification \"$OSA_BODY\" with title \"$OSA_TITLE\"" >/dev/null 2>&1 || true
62
+ ;;
63
+ Linux)
64
+ if command -v notify-send >/dev/null 2>&1; then
65
+ notify-send -a "Claude Code" "$TITLE" "$BODY" >/dev/null 2>&1 || true
66
+ fi
67
+ ;;
68
+ *)
69
+ :
70
+ ;;
71
+ esac
72
+
73
+ exit 0
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env bash
2
+ # PreCompact hook — write a small snapshot of state to
3
+ # .harness/compaction-snapshot.json BEFORE the context compactor runs.
4
+ # The companion SessionStart hook (matcher: compact) reads this snapshot
5
+ # back and re-injects the salient fields so the post-compaction model
6
+ # knows which feature it was working on, which branch, and how dirty
7
+ # the tree was.
8
+ #
9
+ # This is the kit's answer to the "I lost everything after compaction"
10
+ # failure mode that recurs in long sessions. Pair with:
11
+ # - SessionStart matcher compact → re-inject
12
+ # - PostCompact (not implemented; SessionStart does the work)
13
+ #
14
+ # Snapshot contents:
15
+ # {
16
+ # "compacted_at": "2026-05-16T19:00:00Z",
17
+ # "branch": "main",
18
+ # "sha": "abc1234",
19
+ # "uncommitted": 7,
20
+ # "feature": "auth-endpoint — POST /auth/login",
21
+ # "trigger": "manual|auto",
22
+ # "estimated_tokens_removed": 5000
23
+ # }
24
+ #
25
+ # The hook NEVER blocks (exit 0 always). PreCompact can technically block
26
+ # compaction but doing so defeats the entire point.
27
+ set -eo pipefail
28
+
29
+ INPUT=$(cat)
30
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
31
+ have_jq() {
32
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
33
+ command -v jq >/dev/null 2>&1
34
+ }
35
+ have_jp() {
36
+ have_jq && return 0
37
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
38
+ return 1
39
+ }
40
+ jp() {
41
+ if have_jq; then
42
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
43
+ else
44
+ if [ -n "$2" ]; then
45
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
46
+ else
47
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
48
+ fi
49
+ fi
50
+ }
51
+
52
+ TRIGGER=""
53
+ TOKENS=""
54
+ if have_jp; then
55
+ TRIGGER=$(echo "$INPUT" | jp '.trigger // "auto"' 2>/dev/null || true)
56
+ TOKENS=$(echo "$INPUT" | jp '.estimated_tokens_removed // 0' 2>/dev/null || true)
57
+ fi
58
+
59
+ mkdir -p .harness
60
+
61
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
62
+ BR="(no-git)"
63
+ SHA="(no-git)"
64
+ COUNT=0
65
+ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
66
+ BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
67
+ SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "(none)")
68
+ COUNT=$(git status --short 2>/dev/null | wc -l | tr -d ' ')
69
+ fi
70
+
71
+ FEAT=""
72
+ if [ -f feature_list.json ]; then
73
+ if have_jq; then
74
+ FEAT=$(jq -r 'first(.features[] | select(.passes == false)) | "\(.id) — \(.title)"' \
75
+ feature_list.json 2>/dev/null || true)
76
+ elif command -v node >/dev/null 2>&1; then
77
+ FEAT=$(node -e "
78
+ const f = JSON.parse(require('fs').readFileSync('feature_list.json','utf8'));
79
+ const o = (f.features || []).find(x => x.passes === false);
80
+ if (o) process.stdout.write(o.id + ' — ' + o.title);
81
+ " 2>/dev/null || true)
82
+ fi
83
+ fi
84
+
85
+ # Compose JSON via Node when available — handles escaping right.
86
+ if command -v node >/dev/null 2>&1; then
87
+ node -e "
88
+ const fs = require('fs');
89
+ const snap = {
90
+ compacted_at: '$TS',
91
+ branch: '$BR',
92
+ sha: '$SHA',
93
+ uncommitted: parseInt('$COUNT', 10) || 0,
94
+ feature: process.argv[1] || '',
95
+ trigger: '$TRIGGER' || 'auto',
96
+ estimated_tokens_removed: parseInt('$TOKENS', 10) || 0
97
+ };
98
+ fs.writeFileSync('.harness/compaction-snapshot.json', JSON.stringify(snap, null, 2) + '\n');
99
+ " "$FEAT"
100
+ elif have_jq; then
101
+ jq -n --arg ts "$TS" --arg br "$BR" --arg sha "$SHA" \
102
+ --argjson cnt "$COUNT" --arg feat "$FEAT" \
103
+ --arg trig "${TRIGGER:-auto}" --argjson tok "${TOKENS:-0}" \
104
+ '{compacted_at: $ts, branch: $br, sha: $sha, uncommitted: $cnt,
105
+ feature: $feat, trigger: $trig, estimated_tokens_removed: $tok}' \
106
+ > .harness/compaction-snapshot.json
107
+ else
108
+ # No JSON tool available — write a minimal record. SessionStart compact
109
+ # branch reads fields individually so partial records still work.
110
+ cat > .harness/compaction-snapshot.json <<EOF
111
+ {
112
+ "compacted_at": "$TS",
113
+ "branch": "$BR",
114
+ "sha": "$SHA",
115
+ "uncommitted": $COUNT,
116
+ "feature": "$FEAT",
117
+ "trigger": "${TRIGGER:-auto}"
118
+ }
119
+ EOF
120
+ fi
121
+ exit 0
@@ -4,17 +4,42 @@
4
4
  # Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
5
5
  set -eo pipefail
6
6
 
7
+ # Resolve script dir so we can find _lib/json-pick.mjs (Node fallback for jq).
8
+ # Without this fallback, `jq` missing on a fresh CI image silently disabled
9
+ # the baseline-monotonic guard — a known audit hole.
10
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
11
+ have_jq() {
12
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
13
+ command -v jq >/dev/null 2>&1
14
+ }
15
+ have_jp() {
16
+ have_jq && return 0
17
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
18
+ return 1
19
+ }
20
+ jp() {
21
+ if have_jq; then
22
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
23
+ else
24
+ if [ -n "$2" ]; then
25
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
26
+ else
27
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
28
+ fi
29
+ fi
30
+ }
31
+
7
32
  # Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
8
33
  # only — fixes REMOVE entries; no path should ADD them. Catches the "mask
9
34
  # violations by baselining them" anti-pattern before code leaves the machine.
10
35
  # Runs first because a grown baseline silently masks structural-test failures.
11
36
  BASELINE_FILE=".harness/structural-baseline.json"
12
37
  if [ -f "$BASELINE_FILE" ] \
13
- && command -v jq >/dev/null 2>&1 \
38
+ && have_jp \
14
39
  && git rev-parse --verify HEAD >/dev/null 2>&1 \
15
40
  && git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
16
- CURRENT_COUNT=$(jq 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
17
- HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
41
+ CURRENT_COUNT=$(jp 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
42
+ HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jp 'length' 2>/dev/null || echo 0)
18
43
  if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
19
44
  {
20
45
  echo
@@ -12,9 +12,43 @@ set -e
12
12
 
13
13
  INPUT=$(cat)
14
14
 
15
+ # Resolve the directory this hook lives in (used to find _lib/json-pick.mjs).
16
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
17
+
18
+ # have_jq — env-overridable probe. AHK_DISABLE_JQ=1 forces the Node fallback,
19
+ # used by tests to exercise the jq-less code path on machines that have jq
20
+ # installed locally.
21
+ have_jq() {
22
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
23
+ command -v jq >/dev/null 2>&1
24
+ }
25
+ # jp — JSON picker. Uses `jq` when available, else falls back to a bundled
26
+ # Node script with a jq-subset implementation. Keeps hooks portable on
27
+ # minimal CI / Windows where jq is not installed by default. Without this
28
+ # fallback, the entire pre-completion check used to be a silent no-op.
29
+ jp() {
30
+ if have_jq; then
31
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
32
+ else
33
+ if [ -n "$2" ]; then
34
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
35
+ else
36
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
37
+ fi
38
+ fi
39
+ }
40
+ # Probe: do we have either jq or the Node fallback? Node is always
41
+ # present (kit's `engines` field requires >=20), so this is just an explicit
42
+ # probe and a fail-loud branch if even node is missing.
43
+ have_jp() {
44
+ have_jq && return 0
45
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
46
+ return 1
47
+ }
48
+
15
49
  # CRITICAL: avoid infinite loops. If the hook already ran, do not block again.
16
- if command -v jq >/dev/null 2>&1; then
17
- if [ "$(echo "$INPUT" | jq -r '.stop_hook_active // false')" = "true" ]; then
50
+ if have_jp; then
51
+ if [ "$(echo "$INPUT" | jp '.stop_hook_active // false')" = "true" ]; then
18
52
  exit 0
19
53
  fi
20
54
  fi
@@ -59,12 +93,17 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
59
93
  run_check ruff ruff check . || true
60
94
  fi
61
95
 
62
- # CLAUDE.md instruction cap. HumanLayer measurement: agents stop following
63
- # CLAUDE.md reliably beyond ~150-200 bullets/numbered items. Treat the file
64
- # as a table of contents; promote details to docs/ or @-imports.
65
- if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
66
- CMD_PATH=$(jq -r '.claudeMd.path // "CLAUDE.md"' harness.config.json)
67
- CMD_CAP=$(jq -r '.claudeMd.maxInstructions // 200' harness.config.json)
96
+ # CLAUDE.md size caps. Two complementary signals:
97
+ # - maxInstructions (default 200): bullet/numbered-item count. Suits
98
+ # ASCII-heavy English where a bullet a fixed token weight.
99
+ # - maxTokens (default 0 = off): approximate token cap. Catches drift
100
+ # in non-ASCII content (Vietnamese, CJK, etc.) where 200 bullets
101
+ # may carry 2–3× more tokens than the HumanLayer baseline measured.
102
+ # Both checks fire independently — exceed either → block.
103
+ if [ -f harness.config.json ] && have_jp; then
104
+ CMD_PATH=$(jp '.claudeMd.path // "CLAUDE.md"' harness.config.json)
105
+ CMD_CAP=$(jp '.claudeMd.maxInstructions // 200' harness.config.json)
106
+ CMD_TOK_CAP=$(jp '.claudeMd.maxTokens // 0' harness.config.json)
68
107
  if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
69
108
  CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
70
109
  if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
@@ -85,6 +124,24 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
85
124
  echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
86
125
  fi
87
126
  fi
127
+ if [ -f "$CMD_PATH" ] && [ "$CMD_TOK_CAP" -gt 0 ] 2>/dev/null \
128
+ && command -v node >/dev/null 2>&1 \
129
+ && [ -f "$SCRIPT_DIR/_lib/approx-tokens.mjs" ]; then
130
+ CMD_TOK=$(node "$SCRIPT_DIR/_lib/approx-tokens.mjs" "$CMD_PATH" 2>/dev/null || echo 0)
131
+ if [ "$CMD_TOK" -gt "$CMD_TOK_CAP" ]; then
132
+ {
133
+ echo "$CMD_PATH approximate token count: $CMD_TOK (cap: $CMD_TOK_CAP)"
134
+ echo
135
+ echo "Heuristic token cap — set because instruction count alone misses"
136
+ echo "drift in non-ASCII content (Vietnamese, CJK) where a bullet can"
137
+ echo "carry 2-3x more tokens than the HumanLayer baseline measured."
138
+ echo
139
+ echo "Adjust the cap (with justification) in harness.config.json:"
140
+ echo " .claudeMd.maxTokens"
141
+ } > "$TMPDIR_HOOK/claude-md-tokens.out"
142
+ echo "claude-md-tokens" >> "$TMPDIR_HOOK/failed.list"
143
+ fi
144
+ fi
88
145
  fi
89
146
 
90
147
  # Multi-layer review trigger. When uncommitted/staged/untracked changes touch
@@ -94,7 +151,7 @@ fi
94
151
  # with a mechanical count off `harness.config.json` `domains[].layers` /
95
152
  # `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
96
153
  # next stop succeed after the agent has read the recommendation.
97
- if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git >/dev/null 2>&1; then
154
+ if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
98
155
  CHANGED=$(
99
156
  {
100
157
  git diff --name-only 2>/dev/null || true
@@ -103,19 +160,19 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
103
160
  } | sort -u
104
161
  )
105
162
  if [ -n "$CHANGED" ]; then
106
- NUM_DOMAINS=$(jq '.domains | length' harness.config.json 2>/dev/null || echo 0)
163
+ NUM_DOMAINS=$(jp '.domains | length' harness.config.json 2>/dev/null || echo 0)
107
164
  MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
108
165
  : > "$MULTI_OUT"
109
166
  MULTI_HIT=0
110
167
  i=0
111
168
  while [ "$i" -lt "$NUM_DOMAINS" ]; do
112
- ROOT=$(jq -r ".domains[$i].root" harness.config.json)
113
- DOMAIN=$(jq -r ".domains[$i].name" harness.config.json)
169
+ ROOT=$(jp ".domains[$i].root" harness.config.json)
170
+ DOMAIN=$(jp ".domains[$i].name" harness.config.json)
114
171
  # Optional layerDirPattern — supports conventions where the layer
115
172
  # directory is not literally `{layer}`. Example: a Rust workspace
116
173
  # with crates named `unibot-types`, `unibot-crypto`, ... uses
117
174
  # `"layerDirPattern": "unibot-{layer}"`. Defaults to `{layer}`.
118
- LAYER_PATTERN=$(jq -r ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
175
+ LAYER_PATTERN=$(jp ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
119
176
  TOUCHED_COUNT=0
120
177
  TOUCHED_NAMES=""
121
178
  while IFS= read -r layer; do
@@ -125,7 +182,7 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
125
182
  TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
126
183
  TOUCHED_NAMES="$TOUCHED_NAMES $layer"
127
184
  fi
128
- done < <(jq -r ".domains[$i].layers[]" harness.config.json)
185
+ done < <(jp ".domains[$i].layers[]" harness.config.json)
129
186
  if [ "$TOUCHED_COUNT" -ge 2 ]; then
130
187
  echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
131
188
  MULTI_HIT=1
@@ -187,8 +244,8 @@ HEADLESS_SOURCE=""
187
244
  if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
188
245
  HEADLESS_RECOVER=1
189
246
  HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
190
- elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
191
- CFG_VAL=$(jq -r '.recovery.headless // false' harness.config.json 2>/dev/null)
247
+ elif [ -f harness.config.json ] && have_jp; then
248
+ CFG_VAL=$(jp '.recovery.headless // false' harness.config.json 2>/dev/null)
192
249
  if [ "$CFG_VAL" = "true" ]; then
193
250
  HEADLESS_RECOVER=1
194
251
  HEADLESS_SOURCE="harness.config.json:.recovery.headless"
@@ -196,12 +253,64 @@ elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
196
253
  fi
197
254
  if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
198
255
  FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
199
- echo "[ahk] headless recovery enabled ($HEADLESS_SOURCE) — spawning recovery turn for: $FAILED_LIST" >&2
200
- claude -p \
201
- "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $TMPDIR_HOOK and apply the smallest fix. Do not disable any check." \
202
- --max-turns 5 \
203
- >"$TMPDIR_HOOK/recover.out" 2>&1 &
204
- # Don't wait let the next session pick up the partially-applied fix.
256
+
257
+ # Concurrency guard. Two Stop events in different sessions (e.g. user
258
+ # working in two terminals, or an unattended CI rerun firing while a
259
+ # previous recovery is still active) used to race and edit the same
260
+ # files. The lock is a directory created atomically with `mkdir`; the
261
+ # PID file inside lets us detect stale locks left by a crashed parent.
262
+ mkdir -p .harness
263
+ LOCK_DIR=".harness/recovery.lock"
264
+ LOCK_STALE_MAX_SECS=${AHK_RECOVERY_LOCK_STALE_SECS:-1800}
265
+
266
+ if mkdir "$LOCK_DIR" 2>/dev/null; then
267
+ # We won the race — spawn the recovery turn. Snapshot the failure
268
+ # context into the lock dir BEFORE the parent's EXIT trap deletes
269
+ # TMPDIR_HOOK; otherwise the subshell's redirect to recover.out
270
+ # races the parent's cleanup and the subshell dies before claude
271
+ # can run. Everything the recovery needs (failed.list, per-check
272
+ # output, recover.out) now lives inside LOCK_DIR — self-contained.
273
+ cp -r "$TMPDIR_HOOK/." "$LOCK_DIR/snapshot/" 2>/dev/null || true
274
+ (
275
+ # Trap removes the lock on subshell EXIT (success, failure, or signal).
276
+ trap 'rm -rf "$LOCK_DIR"' EXIT
277
+ claude -p \
278
+ "The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $LOCK_DIR/snapshot and apply the smallest fix. Do not disable any check." \
279
+ --max-turns 5 \
280
+ >"$LOCK_DIR/recover.out" 2>&1
281
+ ) &
282
+ SUB_PID=$!
283
+ # Parent writes metadata SYNCHRONOUSLY before printing the "spawned"
284
+ # message so a second Stop firing immediately after never sees an
285
+ # empty pid file. Subsecond races between mkdir and these writes are
286
+ # closed by the bounded read-loop in the lock-held branch below.
287
+ echo "$SUB_PID" > "$LOCK_DIR/pid"
288
+ date +%s > "$LOCK_DIR/started_at"
289
+ echo "$HEADLESS_SOURCE" > "$LOCK_DIR/source"
290
+ echo "[ahk] headless recovery spawned (source=$HEADLESS_SOURCE, wrapper-pid=$SUB_PID, lock=$LOCK_DIR)" >&2
291
+ else
292
+ # Lock already held. Read who holds it and decide: live → skip,
293
+ # stale → reclaim. We never block the user's Stop on the lock —
294
+ # worst case we skip a recovery turn that the next Stop can retry.
295
+ # Bounded wait for the pid file to materialize — closes the race
296
+ # window between the parent's `mkdir` and its `echo $SUB_PID > pid`.
297
+ for _ in 1 2 3 4 5 6 7 8 9 10; do
298
+ [ -s "$LOCK_DIR/pid" ] && break
299
+ sleep 0.05
300
+ done
301
+ EXISTING_PID=$(cat "$LOCK_DIR/pid" 2>/dev/null || true)
302
+ STARTED_AT=$(cat "$LOCK_DIR/started_at" 2>/dev/null || echo 0)
303
+ NOW=$(date +%s)
304
+ AGE=$((NOW - STARTED_AT))
305
+ if [ -n "$EXISTING_PID" ] && kill -0 "$EXISTING_PID" 2>/dev/null; then
306
+ echo "[ahk] headless recovery skipped — another session already running (pid=$EXISTING_PID, age=${AGE}s, lock=$LOCK_DIR)" >&2
307
+ elif [ "$AGE" -gt "$LOCK_STALE_MAX_SECS" ]; then
308
+ echo "[ahk] headless recovery: removing stale lock (pid=$EXISTING_PID, age=${AGE}s > ${LOCK_STALE_MAX_SECS}s); next stop will retry. lock=$LOCK_DIR" >&2
309
+ rm -rf "$LOCK_DIR"
310
+ else
311
+ echo "[ahk] headless recovery skipped — lock present with dead pid=$EXISTING_PID (age=${AGE}s, will reclaim after ${LOCK_STALE_MAX_SECS}s). lock=$LOCK_DIR" >&2
312
+ fi
313
+ fi
205
314
  fi
206
315
 
207
316
  exit 2