agent-harness-kit 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +29 -0
- package/bin/cli.mjs +15 -1
- package/package.json +1 -1
- package/src/core/detect-stack.mjs +16 -0
- package/src/core/doctor.mjs +23 -0
- package/src/core/render-templates.mjs +198 -6
- package/src/templates/.claude/hooks/hooks.json +111 -0
- package/src/templates/.claude/settings.json.hbs +1 -1
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +15 -10
- package/src/templates/.claude/skills/doc-drift-scan/scripts/scan-paths.mjs +64 -0
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +14 -5
- package/src/templates/.claude/skills/garbage-collection/scripts/gc-classify.mjs +77 -0
- package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +17 -14
- package/src/templates/.claude/skills/inspect-module/scripts/module-summary.mjs +144 -0
- package/src/templates/CLAUDE.md.hbs +10 -6
- package/src/templates/CLAUDE.md.vi.hbs +74 -0
- package/src/templates/_adapter-kotlin/harness/structural-check.mjs.hbs +286 -0
- package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs +292 -100
- package/src/templates/_adapter-swift/harness/structural-check.mjs.hbs +285 -0
- package/src/templates/harness.config.json.hbs +5 -3
- package/src/templates/scripts/_lib/approx-tokens.mjs +48 -0
- package/src/templates/scripts/_lib/json-pick.mjs +278 -0
- package/src/templates/scripts/harness-report.mjs +95 -1
- package/src/templates/scripts/notify-on-block.sh.hbs +73 -0
- package/src/templates/scripts/pre-compact.sh.hbs +121 -0
- package/src/templates/scripts/pre-push.sh +28 -3
- package/src/templates/scripts/precompletion-checklist.sh.hbs +131 -22
- package/src/templates/scripts/pretooluse-bash-guard.sh.hbs +146 -0
- package/src/templates/scripts/session-end.sh.hbs +48 -0
- package/src/templates/scripts/session-start.sh.hbs +139 -0
- package/src/templates/scripts/statusline.mjs +63 -0
- package/src/templates/scripts/structural-test-on-edit.sh.hbs +31 -8
- package/src/templates/scripts/telemetry-on-skill.sh +32 -10
- package/src/templates/scripts/userprompt-guard.sh.hbs +100 -0
- package/src/templates/.claude/hooks/hooks.json.hbs +0 -39
|
@@ -20,7 +20,9 @@ const ROOT = process.cwd();
|
|
|
20
20
|
const RESULTS_DIR = resolve(ROOT, ".harness/eval/results");
|
|
21
21
|
const TELEMETRY = resolve(ROOT, ".harness/telemetry.jsonl");
|
|
22
22
|
const NOW = Date.now();
|
|
23
|
-
const
|
|
23
|
+
const ONE_DAY = 24 * 60 * 60 * 1000;
|
|
24
|
+
const SEVEN_DAYS = 7 * ONE_DAY;
|
|
25
|
+
const FOURTEEN_DAYS = 14 * ONE_DAY;
|
|
24
26
|
|
|
25
27
|
async function readJsonl(path) {
|
|
26
28
|
if (!existsSync(path)) return [];
|
|
@@ -61,6 +63,16 @@ function recent(rows, key = "ts") {
|
|
|
61
63
|
});
|
|
62
64
|
}
|
|
63
65
|
|
|
66
|
+
// Rows aged 7–14 days. Used as the comparator for week-over-week deltas
|
|
67
|
+
// so users can spot drift instead of staring at a single-week snapshot.
|
|
68
|
+
function priorWeek(rows, key = "ts") {
|
|
69
|
+
return rows.filter((r) => {
|
|
70
|
+
const t = r[key] ? new Date(r[key]).getTime() : r._mtime ?? 0;
|
|
71
|
+
const age = NOW - t;
|
|
72
|
+
return age > SEVEN_DAYS && age <= FOURTEEN_DAYS;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
64
76
|
function tokensOf(row) {
|
|
65
77
|
return (row.grades ?? [])
|
|
66
78
|
.filter((g) => g.dim === "efficiency")
|
|
@@ -172,16 +184,98 @@ function driftSignals(evalRows, telemetryRows) {
|
|
|
172
184
|
}
|
|
173
185
|
}
|
|
174
186
|
|
|
187
|
+
// Aggregate eval rows by task into { passed, total, tokens }.
|
|
188
|
+
function aggregateEvals(rows) {
|
|
189
|
+
const byTask = new Map();
|
|
190
|
+
for (const r of rows) {
|
|
191
|
+
const cur = byTask.get(r.taskId) ?? { passed: 0, total: 0, tokens: 0 };
|
|
192
|
+
cur.total++;
|
|
193
|
+
if (r.passed) cur.passed++;
|
|
194
|
+
cur.tokens += tokensOf(r);
|
|
195
|
+
byTask.set(r.taskId, cur);
|
|
196
|
+
}
|
|
197
|
+
return byTask;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Render a single delta line. signMode controls icon meaning — for pass-rate,
|
|
201
|
+
// up is good; for tokens, up is bad; for skill invocations, neutral.
|
|
202
|
+
function fmtDelta(now, then, signMode = "neutral", unit = "") {
|
|
203
|
+
if (then === undefined) return `(new) ${now}${unit}`;
|
|
204
|
+
const diff = now - then;
|
|
205
|
+
if (diff === 0) return `${now}${unit} → ${then}${unit} (=)`;
|
|
206
|
+
let arrow = diff > 0 ? "↑" : "↓";
|
|
207
|
+
// Color the arrow by "is this a regression?"
|
|
208
|
+
let marker = " ";
|
|
209
|
+
if (signMode === "good-up") marker = diff > 0 ? "+" : "-";
|
|
210
|
+
else if (signMode === "good-down") marker = diff > 0 ? "-" : "+";
|
|
211
|
+
return `${now}${unit} ← ${then}${unit} (${arrow}${marker} ${Math.abs(diff)}${unit})`;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function weekOverWeek(evalRecent, evalPrior, telRecent, telPrior) {
|
|
215
|
+
console.log(`\n### Week-over-week (last 7d vs prior 7d)`);
|
|
216
|
+
const aRecent = aggregateEvals(evalRecent);
|
|
217
|
+
const aPrior = aggregateEvals(evalPrior);
|
|
218
|
+
|
|
219
|
+
if (aRecent.size === 0 && aPrior.size === 0) {
|
|
220
|
+
console.log(" (no eval data in either window — run `npm run harness:eval`)");
|
|
221
|
+
} else {
|
|
222
|
+
console.log(" task pass-rate (now ← prior) avg-tokens (now ← prior)");
|
|
223
|
+
console.log(" ---------------------- ---------------------------- --------------------------");
|
|
224
|
+
const taskIds = new Set([...aRecent.keys(), ...aPrior.keys()]);
|
|
225
|
+
for (const t of [...taskIds].sort()) {
|
|
226
|
+
const now = aRecent.get(t);
|
|
227
|
+
const prior = aPrior.get(t);
|
|
228
|
+
const nowRate = now ? Math.round((now.passed / now.total) * 100) : null;
|
|
229
|
+
const priorRate = prior ? Math.round((prior.passed / prior.total) * 100) : null;
|
|
230
|
+
const nowTok = now && now.total > 0 ? Math.round(now.tokens / now.total) : 0;
|
|
231
|
+
const priorTok = prior && prior.total > 0 ? Math.round(prior.tokens / prior.total) : 0;
|
|
232
|
+
const rateCell = nowRate === null
|
|
233
|
+
? "(absent now)"
|
|
234
|
+
: priorRate === null
|
|
235
|
+
? `${nowRate}% (new)`
|
|
236
|
+
: `${nowRate}% ← ${priorRate}% (${nowRate - priorRate >= 0 ? "+" : ""}${nowRate - priorRate})`;
|
|
237
|
+
const tokCell = nowTok === 0 && priorTok === 0
|
|
238
|
+
? "—"
|
|
239
|
+
: `${nowTok} ← ${priorTok} (${nowTok - priorTok >= 0 ? "+" : ""}${nowTok - priorTok})`;
|
|
240
|
+
console.log(
|
|
241
|
+
` ${t.padEnd(22)} ${rateCell.padEnd(30)} ${tokCell}`,
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Skill invocation deltas.
|
|
247
|
+
const recentBySkill = new Map();
|
|
248
|
+
for (const r of telRecent) recentBySkill.set(r.skill, (recentBySkill.get(r.skill) ?? 0) + 1);
|
|
249
|
+
const priorBySkill = new Map();
|
|
250
|
+
for (const r of telPrior) priorBySkill.set(r.skill, (priorBySkill.get(r.skill) ?? 0) + 1);
|
|
251
|
+
|
|
252
|
+
const allSkills = new Set([...recentBySkill.keys(), ...priorBySkill.keys()]);
|
|
253
|
+
if (allSkills.size > 0) {
|
|
254
|
+
console.log("\n skill invocations (now ← prior)");
|
|
255
|
+
console.log(" ----------------------------- -------------------------------");
|
|
256
|
+
for (const s of [...allSkills].sort()) {
|
|
257
|
+
const n = recentBySkill.get(s) ?? 0;
|
|
258
|
+
const p = priorBySkill.get(s) ?? 0;
|
|
259
|
+
const d = n - p;
|
|
260
|
+
const cell = p === 0 ? `${n} (new)` : `${n} ← ${p} (${d >= 0 ? "+" : ""}${d})`;
|
|
261
|
+
console.log(` ${s.padEnd(29)} ${cell}`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
175
266
|
async function main() {
|
|
176
267
|
const evalAll = await loadEvalResults();
|
|
177
268
|
const telemetryAll = await readJsonl(TELEMETRY);
|
|
178
269
|
const evalRows = recent(evalAll);
|
|
270
|
+
const evalPrior = priorWeek(evalAll);
|
|
179
271
|
const telemetryRows = recent(telemetryAll);
|
|
272
|
+
const telemetryPrior = priorWeek(telemetryAll);
|
|
180
273
|
|
|
181
274
|
console.log("=== agent-harness-kit report ===");
|
|
182
275
|
console.log(`Generated: ${new Date().toISOString()}`);
|
|
183
276
|
summarizeEvals(evalRows);
|
|
184
277
|
summarizeTelemetry(telemetryRows);
|
|
278
|
+
weekOverWeek(evalRows, evalPrior, telemetryRows, telemetryPrior);
|
|
185
279
|
driftSignals(evalRows, telemetryRows);
|
|
186
280
|
console.log("");
|
|
187
281
|
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Notification hook — OS-native notification when Claude wants attention.
|
|
3
|
+
# macOS osascript / Linux notify-send / Windows skip.
|
|
4
|
+
# Never blocks. Always exits 0. Opt-out: AHK_DISABLE_NOTIFY=1.
|
|
5
|
+
set -eo pipefail
|
|
6
|
+
|
|
7
|
+
INPUT=$(cat)
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
9
|
+
have_jq() {
|
|
10
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
11
|
+
command -v jq >/dev/null 2>&1
|
|
12
|
+
}
|
|
13
|
+
have_jp() {
|
|
14
|
+
have_jq && return 0
|
|
15
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
16
|
+
return 1
|
|
17
|
+
}
|
|
18
|
+
jp() {
|
|
19
|
+
if have_jq; then
|
|
20
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
21
|
+
else
|
|
22
|
+
if [ -n "$2" ]; then
|
|
23
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
24
|
+
else
|
|
25
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
26
|
+
fi
|
|
27
|
+
fi
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if [ "${AHK_DISABLE_NOTIFY:-}" = "1" ]; then
|
|
31
|
+
exit 0
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
TYPE=""
|
|
35
|
+
TITLE=""
|
|
36
|
+
BODY=""
|
|
37
|
+
if have_jp; then
|
|
38
|
+
TYPE=$(echo "$INPUT" | jp '.notification.type // empty')
|
|
39
|
+
TITLE=$(echo "$INPUT" | jp '.notification.title // empty')
|
|
40
|
+
BODY=$(echo "$INPUT" | jp '.notification.body // empty')
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
[ -z "$TITLE" ] && TITLE="Claude Code"
|
|
44
|
+
if [ -n "$TYPE" ]; then
|
|
45
|
+
BODY="[$TYPE] ${BODY}"
|
|
46
|
+
fi
|
|
47
|
+
[ -z "$BODY" ] && BODY="Claude Code wants your attention."
|
|
48
|
+
|
|
49
|
+
mkdir -p .harness
|
|
50
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
51
|
+
ESCAPED_TITLE=${TITLE//\"/\\\"}
|
|
52
|
+
ESCAPED_BODY=${BODY//\"/\\\"}
|
|
53
|
+
printf '{"ts":"%s","hook":"Notification","type":"%s","title":"%s","body":"%s"}\n' \
|
|
54
|
+
"$TS" "$TYPE" "$ESCAPED_TITLE" "$ESCAPED_BODY" >> .harness/telemetry.jsonl
|
|
55
|
+
|
|
56
|
+
OS_KIND=$(uname -s 2>/dev/null || echo "Unknown")
|
|
57
|
+
case "$OS_KIND" in
|
|
58
|
+
Darwin)
|
|
59
|
+
OSA_TITLE=${TITLE//\"/\\\"}
|
|
60
|
+
OSA_BODY=${BODY//\"/\\\"}
|
|
61
|
+
osascript -e "display notification \"$OSA_BODY\" with title \"$OSA_TITLE\"" >/dev/null 2>&1 || true
|
|
62
|
+
;;
|
|
63
|
+
Linux)
|
|
64
|
+
if command -v notify-send >/dev/null 2>&1; then
|
|
65
|
+
notify-send -a "Claude Code" "$TITLE" "$BODY" >/dev/null 2>&1 || true
|
|
66
|
+
fi
|
|
67
|
+
;;
|
|
68
|
+
*)
|
|
69
|
+
:
|
|
70
|
+
;;
|
|
71
|
+
esac
|
|
72
|
+
|
|
73
|
+
exit 0
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# PreCompact hook — write a small snapshot of state to
|
|
3
|
+
# .harness/compaction-snapshot.json BEFORE the context compactor runs.
|
|
4
|
+
# The companion SessionStart hook (matcher: compact) reads this snapshot
|
|
5
|
+
# back and re-injects the salient fields so the post-compaction model
|
|
6
|
+
# knows which feature it was working on, which branch, and how dirty
|
|
7
|
+
# the tree was.
|
|
8
|
+
#
|
|
9
|
+
# This is the kit's answer to the "I lost everything after compaction"
|
|
10
|
+
# failure mode that recurs in long sessions. Pair with:
|
|
11
|
+
# - SessionStart matcher compact → re-inject
|
|
12
|
+
# - PostCompact (not implemented; SessionStart does the work)
|
|
13
|
+
#
|
|
14
|
+
# Snapshot contents:
|
|
15
|
+
# {
|
|
16
|
+
# "compacted_at": "2026-05-16T19:00:00Z",
|
|
17
|
+
# "branch": "main",
|
|
18
|
+
# "sha": "abc1234",
|
|
19
|
+
# "uncommitted": 7,
|
|
20
|
+
# "feature": "auth-endpoint — POST /auth/login",
|
|
21
|
+
# "trigger": "manual|auto",
|
|
22
|
+
# "estimated_tokens_removed": 5000
|
|
23
|
+
# }
|
|
24
|
+
#
|
|
25
|
+
# The hook NEVER blocks (exit 0 always). PreCompact can technically block
|
|
26
|
+
# compaction but doing so defeats the entire point.
|
|
27
|
+
set -eo pipefail
|
|
28
|
+
|
|
29
|
+
INPUT=$(cat)
|
|
30
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
31
|
+
have_jq() {
|
|
32
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
33
|
+
command -v jq >/dev/null 2>&1
|
|
34
|
+
}
|
|
35
|
+
have_jp() {
|
|
36
|
+
have_jq && return 0
|
|
37
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
38
|
+
return 1
|
|
39
|
+
}
|
|
40
|
+
jp() {
|
|
41
|
+
if have_jq; then
|
|
42
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
43
|
+
else
|
|
44
|
+
if [ -n "$2" ]; then
|
|
45
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
46
|
+
else
|
|
47
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
48
|
+
fi
|
|
49
|
+
fi
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
TRIGGER=""
|
|
53
|
+
TOKENS=""
|
|
54
|
+
if have_jp; then
|
|
55
|
+
TRIGGER=$(echo "$INPUT" | jp '.trigger // "auto"' 2>/dev/null || true)
|
|
56
|
+
TOKENS=$(echo "$INPUT" | jp '.estimated_tokens_removed // 0' 2>/dev/null || true)
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
mkdir -p .harness
|
|
60
|
+
|
|
61
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
62
|
+
BR="(no-git)"
|
|
63
|
+
SHA="(no-git)"
|
|
64
|
+
COUNT=0
|
|
65
|
+
if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
|
|
66
|
+
BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
|
|
67
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "(none)")
|
|
68
|
+
COUNT=$(git status --short 2>/dev/null | wc -l | tr -d ' ')
|
|
69
|
+
fi
|
|
70
|
+
|
|
71
|
+
FEAT=""
|
|
72
|
+
if [ -f feature_list.json ]; then
|
|
73
|
+
if have_jq; then
|
|
74
|
+
FEAT=$(jq -r 'first(.features[] | select(.passes == false)) | "\(.id) — \(.title)"' \
|
|
75
|
+
feature_list.json 2>/dev/null || true)
|
|
76
|
+
elif command -v node >/dev/null 2>&1; then
|
|
77
|
+
FEAT=$(node -e "
|
|
78
|
+
const f = JSON.parse(require('fs').readFileSync('feature_list.json','utf8'));
|
|
79
|
+
const o = (f.features || []).find(x => x.passes === false);
|
|
80
|
+
if (o) process.stdout.write(o.id + ' — ' + o.title);
|
|
81
|
+
" 2>/dev/null || true)
|
|
82
|
+
fi
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
# Compose JSON via Node when available — handles escaping right.
|
|
86
|
+
if command -v node >/dev/null 2>&1; then
|
|
87
|
+
node -e "
|
|
88
|
+
const fs = require('fs');
|
|
89
|
+
const snap = {
|
|
90
|
+
compacted_at: '$TS',
|
|
91
|
+
branch: '$BR',
|
|
92
|
+
sha: '$SHA',
|
|
93
|
+
uncommitted: parseInt('$COUNT', 10) || 0,
|
|
94
|
+
feature: process.argv[1] || '',
|
|
95
|
+
trigger: '$TRIGGER' || 'auto',
|
|
96
|
+
estimated_tokens_removed: parseInt('$TOKENS', 10) || 0
|
|
97
|
+
};
|
|
98
|
+
fs.writeFileSync('.harness/compaction-snapshot.json', JSON.stringify(snap, null, 2) + '\n');
|
|
99
|
+
" "$FEAT"
|
|
100
|
+
elif have_jq; then
|
|
101
|
+
jq -n --arg ts "$TS" --arg br "$BR" --arg sha "$SHA" \
|
|
102
|
+
--argjson cnt "$COUNT" --arg feat "$FEAT" \
|
|
103
|
+
--arg trig "${TRIGGER:-auto}" --argjson tok "${TOKENS:-0}" \
|
|
104
|
+
'{compacted_at: $ts, branch: $br, sha: $sha, uncommitted: $cnt,
|
|
105
|
+
feature: $feat, trigger: $trig, estimated_tokens_removed: $tok}' \
|
|
106
|
+
> .harness/compaction-snapshot.json
|
|
107
|
+
else
|
|
108
|
+
# No JSON tool available — write a minimal record. SessionStart compact
|
|
109
|
+
# branch reads fields individually so partial records still work.
|
|
110
|
+
cat > .harness/compaction-snapshot.json <<EOF
|
|
111
|
+
{
|
|
112
|
+
"compacted_at": "$TS",
|
|
113
|
+
"branch": "$BR",
|
|
114
|
+
"sha": "$SHA",
|
|
115
|
+
"uncommitted": $COUNT,
|
|
116
|
+
"feature": "$FEAT",
|
|
117
|
+
"trigger": "${TRIGGER:-auto}"
|
|
118
|
+
}
|
|
119
|
+
EOF
|
|
120
|
+
fi
|
|
121
|
+
exit 0
|
|
@@ -4,17 +4,42 @@
|
|
|
4
4
|
# Lives in scripts/ so it ships with the repo; install via install-git-hooks.sh.
|
|
5
5
|
set -eo pipefail
|
|
6
6
|
|
|
7
|
+
# Resolve script dir so we can find _lib/json-pick.mjs (Node fallback for jq).
|
|
8
|
+
# Without this fallback, `jq` missing on a fresh CI image silently disabled
|
|
9
|
+
# the baseline-monotonic guard — a known audit hole.
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
11
|
+
have_jq() {
|
|
12
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
13
|
+
command -v jq >/dev/null 2>&1
|
|
14
|
+
}
|
|
15
|
+
have_jp() {
|
|
16
|
+
have_jq && return 0
|
|
17
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
18
|
+
return 1
|
|
19
|
+
}
|
|
20
|
+
jp() {
|
|
21
|
+
if have_jq; then
|
|
22
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
23
|
+
else
|
|
24
|
+
if [ -n "$2" ]; then
|
|
25
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
26
|
+
else
|
|
27
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
28
|
+
fi
|
|
29
|
+
fi
|
|
30
|
+
}
|
|
31
|
+
|
|
7
32
|
# Baseline monotonic guard. .harness/structural-baseline.json is decreasing-
|
|
8
33
|
# only — fixes REMOVE entries; no path should ADD them. Catches the "mask
|
|
9
34
|
# violations by baselining them" anti-pattern before code leaves the machine.
|
|
10
35
|
# Runs first because a grown baseline silently masks structural-test failures.
|
|
11
36
|
BASELINE_FILE=".harness/structural-baseline.json"
|
|
12
37
|
if [ -f "$BASELINE_FILE" ] \
|
|
13
|
-
&&
|
|
38
|
+
&& have_jp \
|
|
14
39
|
&& git rev-parse --verify HEAD >/dev/null 2>&1 \
|
|
15
40
|
&& git cat-file -e "HEAD:$BASELINE_FILE" 2>/dev/null; then
|
|
16
|
-
CURRENT_COUNT=$(
|
|
17
|
-
HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null |
|
|
41
|
+
CURRENT_COUNT=$(jp 'length' "$BASELINE_FILE" 2>/dev/null || echo 0)
|
|
42
|
+
HEAD_COUNT=$(git show "HEAD:$BASELINE_FILE" 2>/dev/null | jp 'length' 2>/dev/null || echo 0)
|
|
18
43
|
if [ "$CURRENT_COUNT" -gt "$HEAD_COUNT" ]; then
|
|
19
44
|
{
|
|
20
45
|
echo
|
|
@@ -12,9 +12,43 @@ set -e
|
|
|
12
12
|
|
|
13
13
|
INPUT=$(cat)
|
|
14
14
|
|
|
15
|
+
# Resolve the directory this hook lives in (used to find _lib/json-pick.mjs).
|
|
16
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
17
|
+
|
|
18
|
+
# have_jq — env-overridable probe. AHK_DISABLE_JQ=1 forces the Node fallback,
|
|
19
|
+
# used by tests to exercise the jq-less code path on machines that have jq
|
|
20
|
+
# installed locally.
|
|
21
|
+
have_jq() {
|
|
22
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
23
|
+
command -v jq >/dev/null 2>&1
|
|
24
|
+
}
|
|
25
|
+
# jp — JSON picker. Uses `jq` when available, else falls back to a bundled
|
|
26
|
+
# Node script with a jq-subset implementation. Keeps hooks portable on
|
|
27
|
+
# minimal CI / Windows where jq is not installed by default. Without this
|
|
28
|
+
# fallback, the entire pre-completion check used to be a silent no-op.
|
|
29
|
+
jp() {
|
|
30
|
+
if have_jq; then
|
|
31
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
32
|
+
else
|
|
33
|
+
if [ -n "$2" ]; then
|
|
34
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
35
|
+
else
|
|
36
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
37
|
+
fi
|
|
38
|
+
fi
|
|
39
|
+
}
|
|
40
|
+
# Probe: do we have either jq or the Node fallback? Node is always
|
|
41
|
+
# present (kit's `engines` field requires >=20), so this is just an explicit
|
|
42
|
+
# probe and a fail-loud branch if even node is missing.
|
|
43
|
+
have_jp() {
|
|
44
|
+
have_jq && return 0
|
|
45
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
46
|
+
return 1
|
|
47
|
+
}
|
|
48
|
+
|
|
15
49
|
# CRITICAL: avoid infinite loops. If the hook already ran, do not block again.
|
|
16
|
-
if
|
|
17
|
-
if [ "$(echo "$INPUT" |
|
|
50
|
+
if have_jp; then
|
|
51
|
+
if [ "$(echo "$INPUT" | jp '.stop_hook_active // false')" = "true" ]; then
|
|
18
52
|
exit 0
|
|
19
53
|
fi
|
|
20
54
|
fi
|
|
@@ -59,12 +93,17 @@ elif [ -f pyproject.toml ] && command -v ruff >/dev/null 2>&1; then
|
|
|
59
93
|
run_check ruff ruff check . || true
|
|
60
94
|
fi
|
|
61
95
|
|
|
62
|
-
# CLAUDE.md
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
96
|
+
# CLAUDE.md size caps. Two complementary signals:
|
|
97
|
+
# - maxInstructions (default 200): bullet/numbered-item count. Suits
|
|
98
|
+
# ASCII-heavy English where a bullet ≈ a fixed token weight.
|
|
99
|
+
# - maxTokens (default 0 = off): approximate token cap. Catches drift
|
|
100
|
+
# in non-ASCII content (Vietnamese, CJK, etc.) where 200 bullets
|
|
101
|
+
# may carry 2–3× more tokens than the HumanLayer baseline measured.
|
|
102
|
+
# Both checks fire independently — exceed either → block.
|
|
103
|
+
if [ -f harness.config.json ] && have_jp; then
|
|
104
|
+
CMD_PATH=$(jp '.claudeMd.path // "CLAUDE.md"' harness.config.json)
|
|
105
|
+
CMD_CAP=$(jp '.claudeMd.maxInstructions // 200' harness.config.json)
|
|
106
|
+
CMD_TOK_CAP=$(jp '.claudeMd.maxTokens // 0' harness.config.json)
|
|
68
107
|
if [ -f "$CMD_PATH" ] && [ "$CMD_CAP" -gt 0 ] 2>/dev/null; then
|
|
69
108
|
CMD_COUNT=$(grep -cE '^[[:space:]]*([-*]|[0-9]+\.)[[:space:]]' "$CMD_PATH" 2>/dev/null || echo 0)
|
|
70
109
|
if [ "$CMD_COUNT" -gt "$CMD_CAP" ]; then
|
|
@@ -85,6 +124,24 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
|
|
|
85
124
|
echo "claude-md-cap" >> "$TMPDIR_HOOK/failed.list"
|
|
86
125
|
fi
|
|
87
126
|
fi
|
|
127
|
+
if [ -f "$CMD_PATH" ] && [ "$CMD_TOK_CAP" -gt 0 ] 2>/dev/null \
|
|
128
|
+
&& command -v node >/dev/null 2>&1 \
|
|
129
|
+
&& [ -f "$SCRIPT_DIR/_lib/approx-tokens.mjs" ]; then
|
|
130
|
+
CMD_TOK=$(node "$SCRIPT_DIR/_lib/approx-tokens.mjs" "$CMD_PATH" 2>/dev/null || echo 0)
|
|
131
|
+
if [ "$CMD_TOK" -gt "$CMD_TOK_CAP" ]; then
|
|
132
|
+
{
|
|
133
|
+
echo "$CMD_PATH approximate token count: $CMD_TOK (cap: $CMD_TOK_CAP)"
|
|
134
|
+
echo
|
|
135
|
+
echo "Heuristic token cap — set because instruction count alone misses"
|
|
136
|
+
echo "drift in non-ASCII content (Vietnamese, CJK) where a bullet can"
|
|
137
|
+
echo "carry 2-3x more tokens than the HumanLayer baseline measured."
|
|
138
|
+
echo
|
|
139
|
+
echo "Adjust the cap (with justification) in harness.config.json:"
|
|
140
|
+
echo " .claudeMd.maxTokens"
|
|
141
|
+
} > "$TMPDIR_HOOK/claude-md-tokens.out"
|
|
142
|
+
echo "claude-md-tokens" >> "$TMPDIR_HOOK/failed.list"
|
|
143
|
+
fi
|
|
144
|
+
fi
|
|
88
145
|
fi
|
|
89
146
|
|
|
90
147
|
# Multi-layer review trigger. When uncommitted/staged/untracked changes touch
|
|
@@ -94,7 +151,7 @@ fi
|
|
|
94
151
|
# with a mechanical count off `harness.config.json` `domains[].layers` /
|
|
95
152
|
# `.root`. Fires once per stop; the loop guard (`stop_hook_active`) lets the
|
|
96
153
|
# next stop succeed after the agent has read the recommendation.
|
|
97
|
-
if [ -f harness.config.json ] &&
|
|
154
|
+
if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
|
|
98
155
|
CHANGED=$(
|
|
99
156
|
{
|
|
100
157
|
git diff --name-only 2>/dev/null || true
|
|
@@ -103,19 +160,19 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
|
|
|
103
160
|
} | sort -u
|
|
104
161
|
)
|
|
105
162
|
if [ -n "$CHANGED" ]; then
|
|
106
|
-
NUM_DOMAINS=$(
|
|
163
|
+
NUM_DOMAINS=$(jp '.domains | length' harness.config.json 2>/dev/null || echo 0)
|
|
107
164
|
MULTI_OUT="$TMPDIR_HOOK/multi-layer-review.out"
|
|
108
165
|
: > "$MULTI_OUT"
|
|
109
166
|
MULTI_HIT=0
|
|
110
167
|
i=0
|
|
111
168
|
while [ "$i" -lt "$NUM_DOMAINS" ]; do
|
|
112
|
-
ROOT=$(
|
|
113
|
-
DOMAIN=$(
|
|
169
|
+
ROOT=$(jp ".domains[$i].root" harness.config.json)
|
|
170
|
+
DOMAIN=$(jp ".domains[$i].name" harness.config.json)
|
|
114
171
|
# Optional layerDirPattern — supports conventions where the layer
|
|
115
172
|
# directory is not literally `{layer}`. Example: a Rust workspace
|
|
116
173
|
# with crates named `unibot-types`, `unibot-crypto`, ... uses
|
|
117
174
|
# `"layerDirPattern": "unibot-{layer}"`. Defaults to `{layer}`.
|
|
118
|
-
LAYER_PATTERN=$(
|
|
175
|
+
LAYER_PATTERN=$(jp ".domains[$i].layerDirPattern // \"{layer}\"" harness.config.json)
|
|
119
176
|
TOUCHED_COUNT=0
|
|
120
177
|
TOUCHED_NAMES=""
|
|
121
178
|
while IFS= read -r layer; do
|
|
@@ -125,7 +182,7 @@ if [ -f harness.config.json ] && command -v jq >/dev/null 2>&1 && command -v git
|
|
|
125
182
|
TOUCHED_COUNT=$((TOUCHED_COUNT + 1))
|
|
126
183
|
TOUCHED_NAMES="$TOUCHED_NAMES $layer"
|
|
127
184
|
fi
|
|
128
|
-
done < <(
|
|
185
|
+
done < <(jp ".domains[$i].layers[]" harness.config.json)
|
|
129
186
|
if [ "$TOUCHED_COUNT" -ge 2 ]; then
|
|
130
187
|
echo "Domain '$DOMAIN' has changes spanning $TOUCHED_COUNT layers:$TOUCHED_NAMES" >> "$MULTI_OUT"
|
|
131
188
|
MULTI_HIT=1
|
|
@@ -187,8 +244,8 @@ HEADLESS_SOURCE=""
|
|
|
187
244
|
if [ "${AHK_HEADLESS_RECOVER:-}" = "1" ]; then
|
|
188
245
|
HEADLESS_RECOVER=1
|
|
189
246
|
HEADLESS_SOURCE="AHK_HEADLESS_RECOVER"
|
|
190
|
-
elif [ -f harness.config.json ] &&
|
|
191
|
-
CFG_VAL=$(
|
|
247
|
+
elif [ -f harness.config.json ] && have_jp; then
|
|
248
|
+
CFG_VAL=$(jp '.recovery.headless // false' harness.config.json 2>/dev/null)
|
|
192
249
|
if [ "$CFG_VAL" = "true" ]; then
|
|
193
250
|
HEADLESS_RECOVER=1
|
|
194
251
|
HEADLESS_SOURCE="harness.config.json:.recovery.headless"
|
|
@@ -196,12 +253,64 @@ elif [ -f harness.config.json ] && command -v jq >/dev/null 2>&1; then
|
|
|
196
253
|
fi
|
|
197
254
|
if [ "$HEADLESS_RECOVER" = "1" ] && command -v claude >/dev/null 2>&1; then
|
|
198
255
|
FAILED_LIST=$(tr '\n' ' ' < "$TMPDIR_HOOK/failed.list")
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
#
|
|
256
|
+
|
|
257
|
+
# Concurrency guard. Two Stop events in different sessions (e.g. user
|
|
258
|
+
# working in two terminals, or an unattended CI rerun firing while a
|
|
259
|
+
# previous recovery is still active) used to race and edit the same
|
|
260
|
+
# files. The lock is a directory created atomically with `mkdir`; the
|
|
261
|
+
# PID file inside lets us detect stale locks left by a crashed parent.
|
|
262
|
+
mkdir -p .harness
|
|
263
|
+
LOCK_DIR=".harness/recovery.lock"
|
|
264
|
+
LOCK_STALE_MAX_SECS=${AHK_RECOVERY_LOCK_STALE_SECS:-1800}
|
|
265
|
+
|
|
266
|
+
if mkdir "$LOCK_DIR" 2>/dev/null; then
|
|
267
|
+
# We won the race — spawn the recovery turn. Snapshot the failure
|
|
268
|
+
# context into the lock dir BEFORE the parent's EXIT trap deletes
|
|
269
|
+
# TMPDIR_HOOK; otherwise the subshell's redirect to recover.out
|
|
270
|
+
# races the parent's cleanup and the subshell dies before claude
|
|
271
|
+
# can run. Everything the recovery needs (failed.list, per-check
|
|
272
|
+
# output, recover.out) now lives inside LOCK_DIR — self-contained.
|
|
273
|
+
cp -r "$TMPDIR_HOOK/." "$LOCK_DIR/snapshot/" 2>/dev/null || true
|
|
274
|
+
(
|
|
275
|
+
# Trap removes the lock on subshell EXIT (success, failure, or signal).
|
|
276
|
+
trap 'rm -rf "$LOCK_DIR"' EXIT
|
|
277
|
+
claude -p \
|
|
278
|
+
"The pre-completion checklist failed: $FAILED_LIST. Read the failure output in $LOCK_DIR/snapshot and apply the smallest fix. Do not disable any check." \
|
|
279
|
+
--max-turns 5 \
|
|
280
|
+
>"$LOCK_DIR/recover.out" 2>&1
|
|
281
|
+
) &
|
|
282
|
+
SUB_PID=$!
|
|
283
|
+
# Parent writes metadata SYNCHRONOUSLY before printing the "spawned"
|
|
284
|
+
# message so a second Stop firing immediately after never sees an
|
|
285
|
+
# empty pid file. Subsecond races between mkdir and these writes are
|
|
286
|
+
# closed by the bounded read-loop in the lock-held branch below.
|
|
287
|
+
echo "$SUB_PID" > "$LOCK_DIR/pid"
|
|
288
|
+
date +%s > "$LOCK_DIR/started_at"
|
|
289
|
+
echo "$HEADLESS_SOURCE" > "$LOCK_DIR/source"
|
|
290
|
+
echo "[ahk] headless recovery spawned (source=$HEADLESS_SOURCE, wrapper-pid=$SUB_PID, lock=$LOCK_DIR)" >&2
|
|
291
|
+
else
|
|
292
|
+
# Lock already held. Read who holds it and decide: live → skip,
|
|
293
|
+
# stale → reclaim. We never block the user's Stop on the lock —
|
|
294
|
+
# worst case we skip a recovery turn that the next Stop can retry.
|
|
295
|
+
# Bounded wait for the pid file to materialize — closes the race
|
|
296
|
+
# window between the parent's `mkdir` and its `echo $SUB_PID > pid`.
|
|
297
|
+
for _ in 1 2 3 4 5 6 7 8 9 10; do
|
|
298
|
+
[ -s "$LOCK_DIR/pid" ] && break
|
|
299
|
+
sleep 0.05
|
|
300
|
+
done
|
|
301
|
+
EXISTING_PID=$(cat "$LOCK_DIR/pid" 2>/dev/null || true)
|
|
302
|
+
STARTED_AT=$(cat "$LOCK_DIR/started_at" 2>/dev/null || echo 0)
|
|
303
|
+
NOW=$(date +%s)
|
|
304
|
+
AGE=$((NOW - STARTED_AT))
|
|
305
|
+
if [ -n "$EXISTING_PID" ] && kill -0 "$EXISTING_PID" 2>/dev/null; then
|
|
306
|
+
echo "[ahk] headless recovery skipped — another session already running (pid=$EXISTING_PID, age=${AGE}s, lock=$LOCK_DIR)" >&2
|
|
307
|
+
elif [ "$AGE" -gt "$LOCK_STALE_MAX_SECS" ]; then
|
|
308
|
+
echo "[ahk] headless recovery: removing stale lock (pid=$EXISTING_PID, age=${AGE}s > ${LOCK_STALE_MAX_SECS}s); next stop will retry. lock=$LOCK_DIR" >&2
|
|
309
|
+
rm -rf "$LOCK_DIR"
|
|
310
|
+
else
|
|
311
|
+
echo "[ahk] headless recovery skipped — lock present with dead pid=$EXISTING_PID (age=${AGE}s, will reclaim after ${LOCK_STALE_MAX_SECS}s). lock=$LOCK_DIR" >&2
|
|
312
|
+
fi
|
|
313
|
+
fi
|
|
205
314
|
fi
|
|
206
315
|
|
|
207
316
|
exit 2
|