agent-harness-kit 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +29 -0
- package/bin/cli.mjs +10 -1
- package/package.json +1 -1
- package/src/core/detect-stack.mjs +16 -0
- package/src/core/render-templates.mjs +111 -4
- package/src/templates/.claude/hooks/hooks.json +87 -0
- package/src/templates/CLAUDE.md.hbs +1 -1
- package/src/templates/CLAUDE.md.vi.hbs +70 -0
- package/src/templates/_adapter-kotlin/harness/structural-check.mjs.hbs +286 -0
- package/src/templates/_adapter-rust/harness/structural-check.mjs.hbs +292 -100
- package/src/templates/_adapter-swift/harness/structural-check.mjs.hbs +285 -0
- package/src/templates/harness.config.json.hbs +5 -3
- package/src/templates/scripts/_lib/approx-tokens.mjs +48 -0
- package/src/templates/scripts/_lib/json-pick.mjs +278 -0
- package/src/templates/scripts/harness-report.mjs +95 -1
- package/src/templates/scripts/pre-compact.sh.hbs +121 -0
- package/src/templates/scripts/pre-push.sh +28 -3
- package/src/templates/scripts/precompletion-checklist.sh.hbs +131 -22
- package/src/templates/scripts/pretooluse-bash-guard.sh.hbs +146 -0
- package/src/templates/scripts/session-end.sh.hbs +48 -0
- package/src/templates/scripts/session-start.sh.hbs +139 -0
- package/src/templates/scripts/structural-test-on-edit.sh.hbs +31 -8
- package/src/templates/scripts/telemetry-on-skill.sh +32 -10
- package/src/templates/.claude/hooks/hooks.json.hbs +0 -39
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# SessionEnd hook — append a single observability line to PROGRESS.md when
|
|
3
|
+
# the session terminates. Never blocks (SessionEnd is cleanup-only per
|
|
4
|
+
# Claude Code docs).
|
|
5
|
+
#
|
|
6
|
+
# Output line shape:
|
|
7
|
+
# YYYY-MM-DD HH:MM | session_end | <reason> | <branch> | <sha>
|
|
8
|
+
#
|
|
9
|
+
# Example:
|
|
10
|
+
# 2026-05-16 19:00 | session_end | clear | main | abc1234
|
|
11
|
+
#
|
|
12
|
+
# Reasons (per Claude Code docs): clear, resume, logout, prompt_input_exit,
|
|
13
|
+
# bypass_permissions_disabled, other.
|
|
14
|
+
set -eo pipefail
|
|
15
|
+
|
|
16
|
+
INPUT=$(cat)
|
|
17
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
have_jq() {
|
|
19
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
20
|
+
command -v jq >/dev/null 2>&1
|
|
21
|
+
}
|
|
22
|
+
have_jp() {
|
|
23
|
+
have_jq && return 0
|
|
24
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
25
|
+
return 1
|
|
26
|
+
}
|
|
27
|
+
jp() {
|
|
28
|
+
if have_jq; then jq -r "$1"
|
|
29
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
30
|
+
fi
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
REASON="unknown"
|
|
34
|
+
if have_jp; then
|
|
35
|
+
REASON=$(echo "$INPUT" | jp '.end_reason // "unknown"' 2>/dev/null || echo "unknown")
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
BR="(no-git)"
|
|
39
|
+
SHA="(no-git)"
|
|
40
|
+
if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
|
|
41
|
+
BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
|
|
42
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "(none)")
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
mkdir -p .harness
|
|
46
|
+
TS=$(date +"%Y-%m-%d %H:%M")
|
|
47
|
+
echo "$TS | session_end | $REASON | $BR | $SHA" >> .harness/PROGRESS.md
|
|
48
|
+
exit 0
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# SessionStart hook — inject a compact, deterministic context block when
|
|
3
|
+
# a session begins, resumes, or comes back from compaction. Output goes
|
|
4
|
+
# via JSON stdout `hookSpecificOutput.additionalContext`, which Claude
|
|
5
|
+
# Code feeds into the conversation context before the first turn.
|
|
6
|
+
#
|
|
7
|
+
# Three matchers fire this hook:
|
|
8
|
+
# startup → fresh session. Inject branch + uncommitted summary +
|
|
9
|
+
# current feature (from feature_list.json) + golden-principles
|
|
10
|
+
# cap reminder. ~10-20 lines of structured state.
|
|
11
|
+
# resume → user ran --resume / --continue. Same payload as startup,
|
|
12
|
+
# plus tail of PROGRESS.md so the model picks up where the
|
|
13
|
+
# last session stopped.
|
|
14
|
+
# compact → context was just compacted (mid-session). Pull the snapshot
|
|
15
|
+
# written by the PreCompact hook (.harness/compaction-snapshot.json)
|
|
16
|
+
# and re-inject it. Without this, the model loses everything
|
|
17
|
+
# that mattered about the current feature mid-compaction.
|
|
18
|
+
#
|
|
19
|
+
# The hook never blocks. Exit 0 + JSON to stdout is the *only* control
|
|
20
|
+
# path that Claude reads.
|
|
21
|
+
set -eo pipefail
|
|
22
|
+
|
|
23
|
+
INPUT=$(cat)
|
|
24
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
25
|
+
have_jq() {
|
|
26
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
27
|
+
command -v jq >/dev/null 2>&1
|
|
28
|
+
}
|
|
29
|
+
have_jp() {
|
|
30
|
+
have_jq && return 0
|
|
31
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
32
|
+
return 1
|
|
33
|
+
}
|
|
34
|
+
jp() {
|
|
35
|
+
if have_jq; then
|
|
36
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
37
|
+
else
|
|
38
|
+
if [ -n "$2" ]; then
|
|
39
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
40
|
+
else
|
|
41
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
42
|
+
fi
|
|
43
|
+
fi
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
SOURCE=""
|
|
47
|
+
if have_jp; then
|
|
48
|
+
SOURCE=$(echo "$INPUT" | jp '.source // "startup"')
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
# Build the additionalContext payload as plain text first, then JSON-escape
|
|
52
|
+
# the whole thing at the end. Plain text is easier to read while iterating
|
|
53
|
+
# on the hook, and Claude renders it as-is in the conversation view.
|
|
54
|
+
CTX=""
|
|
55
|
+
|
|
56
|
+
# 1. Branch + uncommitted count (always)
|
|
57
|
+
if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; then
|
|
58
|
+
BR=$(git branch --show-current 2>/dev/null || echo "(detached)")
|
|
59
|
+
COUNT=$(git status --short 2>/dev/null | wc -l | tr -d ' ')
|
|
60
|
+
CTX+="[harness] git: branch=$BR, uncommitted=$COUNT file(s)"$'\n'
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
# 2. Current feature (from feature_list.json) — picks the first entry with
|
|
64
|
+
# passes=false so the model resumes the in-flight work, not a finished
|
|
65
|
+
# one. Skipped if file missing or jp unavailable.
|
|
66
|
+
if [ -f feature_list.json ] && have_jp; then
|
|
67
|
+
FIRST_OPEN=$(echo '{}' | jp '.placeholder // empty' 2>/dev/null || true) # warm jp
|
|
68
|
+
# Use a transient script — we want { id, title } of first passes:false entry.
|
|
69
|
+
if have_jq; then
|
|
70
|
+
FEAT=$(jq -r 'first(.features[] | select(.passes == false)) | "[harness] feature: \(.id) — \(.title)"' \
|
|
71
|
+
feature_list.json 2>/dev/null || true)
|
|
72
|
+
else
|
|
73
|
+
# Node fallback path: emit (id, title) via a one-liner.
|
|
74
|
+
FEAT=$(node -e "
|
|
75
|
+
const f = JSON.parse(require('fs').readFileSync('feature_list.json', 'utf8'));
|
|
76
|
+
const open = (f.features || []).find(x => x.passes === false);
|
|
77
|
+
if (open) process.stdout.write('[harness] feature: ' + open.id + ' — ' + open.title);
|
|
78
|
+
" 2>/dev/null || true)
|
|
79
|
+
fi
|
|
80
|
+
if [ -n "$FEAT" ]; then
|
|
81
|
+
CTX+="$FEAT"$'\n'
|
|
82
|
+
fi
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
# 3. PROGRESS.md tail (resume only — fresh sessions don't need it).
|
|
86
|
+
if [ "$SOURCE" = "resume" ] && [ -f .harness/PROGRESS.md ]; then
|
|
87
|
+
TAIL=$(tail -3 .harness/PROGRESS.md 2>/dev/null | sed 's/^/ /')
|
|
88
|
+
if [ -n "$TAIL" ]; then
|
|
89
|
+
CTX+="[harness] PROGRESS.md tail:"$'\n'"$TAIL"$'\n'
|
|
90
|
+
fi
|
|
91
|
+
fi
|
|
92
|
+
|
|
93
|
+
# 4. Re-injection from compaction snapshot. The PreCompact hook writes
|
|
94
|
+
# .harness/compaction-snapshot.json before the model loses context.
|
|
95
|
+
# On `source: compact` we read it back and inline the most useful
|
|
96
|
+
# fields so the post-compaction model knows where it was.
|
|
97
|
+
if [ "$SOURCE" = "compact" ] && [ -f .harness/compaction-snapshot.json ] && have_jp; then
|
|
98
|
+
SNAP_BRANCH=$(jp '.branch // empty' .harness/compaction-snapshot.json 2>/dev/null || true)
|
|
99
|
+
SNAP_SHA=$(jp '.sha // empty' .harness/compaction-snapshot.json 2>/dev/null || true)
|
|
100
|
+
SNAP_FEAT=$(jp '.feature // empty' .harness/compaction-snapshot.json 2>/dev/null || true)
|
|
101
|
+
SNAP_TS=$(jp '.compacted_at // empty' .harness/compaction-snapshot.json 2>/dev/null || true)
|
|
102
|
+
CTX+="[harness] post-compaction snapshot (taken $SNAP_TS):"$'\n'
|
|
103
|
+
[ -n "$SNAP_BRANCH" ] && CTX+=" branch=$SNAP_BRANCH"$'\n'
|
|
104
|
+
[ -n "$SNAP_SHA" ] && CTX+=" sha=$SNAP_SHA"$'\n'
|
|
105
|
+
[ -n "$SNAP_FEAT" ] && CTX+=" current-feature=$SNAP_FEAT"$'\n'
|
|
106
|
+
fi
|
|
107
|
+
|
|
108
|
+
# 5. Layer rule reminder (always — short, deterministic). Lets the model
|
|
109
|
+
# re-establish the forward-only rule without reading CLAUDE.md again.
|
|
110
|
+
if [ -f harness.config.json ] && have_jp; then
|
|
111
|
+
LAYERS=$(jp '.domains[0].layers[]' harness.config.json 2>/dev/null | tr '\n' ' ' | sed 's/ $//' | tr ' ' '>')
|
|
112
|
+
LAYERS=${LAYERS//>/ → }
|
|
113
|
+
if [ -n "$LAYERS" ]; then
|
|
114
|
+
CTX+="[harness] layer rule (forward-only): $LAYERS"$'\n'
|
|
115
|
+
fi
|
|
116
|
+
fi
|
|
117
|
+
|
|
118
|
+
if [ -z "$CTX" ]; then
|
|
119
|
+
# Nothing meaningful to inject. Exit clean with no output — Claude
|
|
120
|
+
# treats this as "hook ran but had nothing to say".
|
|
121
|
+
exit 0
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
# Emit the JSON envelope. Use Node's JSON.stringify for the escape so we
|
|
125
|
+
# don't have to hand-roll \n / \" handling.
|
|
126
|
+
if command -v node >/dev/null 2>&1; then
|
|
127
|
+
node -e "
|
|
128
|
+
const ctx = process.argv[1];
|
|
129
|
+
const out = { hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext: ctx } };
|
|
130
|
+
process.stdout.write(JSON.stringify(out));
|
|
131
|
+
" "$CTX"
|
|
132
|
+
elif have_jq; then
|
|
133
|
+
jq -nc --arg ctx "$CTX" '{hookSpecificOutput: {hookEventName: "SessionStart", additionalContext: $ctx}}'
|
|
134
|
+
else
|
|
135
|
+
# Last-resort: emit as plain stdout. Claude Code accepts plain text from
|
|
136
|
+
# SessionStart hooks (it's treated as additionalContext too).
|
|
137
|
+
printf '%s' "$CTX"
|
|
138
|
+
fi
|
|
139
|
+
exit 0
|
|
@@ -7,18 +7,41 @@
|
|
|
7
7
|
set -eo pipefail
|
|
8
8
|
|
|
9
9
|
INPUT=$(cat)
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
# Resolve where this hook lives so we can find _lib/json-pick.mjs (Node-based
|
|
12
|
+
# jq fallback). Pure-Node fallback removes the previous fail-open behaviour
|
|
13
|
+
# when jq is missing — silently skipping the structural check on jq-less
|
|
14
|
+
# environments (minimal CI, Windows without WSL+brew) was a known audit hole.
|
|
15
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
16
|
+
have_jq() {
|
|
17
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
18
|
+
command -v jq >/dev/null 2>&1
|
|
19
|
+
}
|
|
20
|
+
have_jp() {
|
|
21
|
+
have_jq && return 0
|
|
22
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
23
|
+
return 1
|
|
24
|
+
}
|
|
25
|
+
jp() {
|
|
26
|
+
if have_jq; then jq -r "$1"
|
|
27
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
28
|
+
fi
|
|
29
|
+
}
|
|
30
|
+
if ! have_jp; then
|
|
31
|
+
echo "[ahk] structural-test-on-edit: no JSON parser available (need jq OR node + scripts/_lib/json-pick.mjs)." >&2
|
|
32
|
+
exit 0
|
|
12
33
|
fi
|
|
13
34
|
|
|
14
|
-
FILE=$(echo "$INPUT" |
|
|
35
|
+
FILE=$(echo "$INPUT" | jp '.tool_input.file_path // empty')
|
|
15
36
|
[ -z "$FILE" ] && exit 0
|
|
16
37
|
|
|
17
38
|
# Only run on source files, and only inside the configured roots.
|
|
18
39
|
case "$FILE" in
|
|
19
40
|
*.ts|*.tsx|*.js|*.jsx|*.mjs|*.cjs) ENGINE=ts ;;
|
|
20
41
|
*.py) ENGINE=py ;;
|
|
21
|
-
*.rs) ENGINE=
|
|
42
|
+
*.rs) ENGINE=node ;;
|
|
43
|
+
*.swift) ENGINE=node ;;
|
|
44
|
+
*.kt|*.kts) ENGINE=node ;;
|
|
22
45
|
*) exit 0 ;;
|
|
23
46
|
esac
|
|
24
47
|
|
|
@@ -61,10 +84,10 @@ Fix the violation before continuing — do NOT disable the test.
|
|
|
61
84
|
EOF
|
|
62
85
|
exit 2
|
|
63
86
|
fi
|
|
64
|
-
elif [ "$ENGINE" = "
|
|
65
|
-
#
|
|
66
|
-
#
|
|
67
|
-
# is cheap.
|
|
87
|
+
elif [ "$ENGINE" = "node" ]; then
|
|
88
|
+
# Node-based adapters (Rust / Swift / Kotlin). All ship the same
|
|
89
|
+
# harness/structural-check.mjs entry point. Workspace-wide scan because
|
|
90
|
+
# the regex is cheap. Missing script → graceful degrade.
|
|
68
91
|
if [ ! -f harness/structural-check.mjs ]; then
|
|
69
92
|
exit 0
|
|
70
93
|
fi
|
|
@@ -4,23 +4,45 @@
|
|
|
4
4
|
#
|
|
5
5
|
# Used by harness:report to compute per-skill success rate, average duration,
|
|
6
6
|
# and to surface drift over time.
|
|
7
|
+
#
|
|
8
|
+
# v0.7: migrated from `command -v jq` fail-open gate to the kit's jp() helper
|
|
9
|
+
# so the telemetry record still gets written on jq-less CI / Windows. Without
|
|
10
|
+
# the migration, telemetry quietly went dark anywhere jq wasn't installed.
|
|
7
11
|
set -e
|
|
8
12
|
|
|
9
13
|
INPUT=$(cat)
|
|
10
|
-
if ! command -v jq >/dev/null 2>&1; then
|
|
11
|
-
exit 0 # jq missing — skip silently rather than spuriously blocking
|
|
12
|
-
fi
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
16
|
+
have_jq() {
|
|
17
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
18
|
+
command -v jq >/dev/null 2>&1
|
|
19
|
+
}
|
|
20
|
+
have_jp() {
|
|
21
|
+
have_jq && return 0
|
|
22
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
23
|
+
return 1
|
|
24
|
+
}
|
|
25
|
+
jp() {
|
|
26
|
+
if have_jq; then jq -r "$1"
|
|
27
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
28
|
+
fi
|
|
29
|
+
}
|
|
30
|
+
if ! have_jp; then exit 0; fi
|
|
31
|
+
|
|
32
|
+
TOOL=$(echo "$INPUT" | jp '.tool_name // empty')
|
|
15
33
|
[ "$TOOL" = "Skill" ] || exit 0
|
|
16
34
|
|
|
17
|
-
SKILL=$(echo "$INPUT" |
|
|
35
|
+
SKILL=$(echo "$INPUT" | jp '.tool_input.skill // empty')
|
|
18
36
|
[ -z "$SKILL" ] && exit 0
|
|
19
37
|
|
|
20
38
|
mkdir -p .harness
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
39
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
40
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
|
|
41
|
+
|
|
42
|
+
# Compose JSONL line by hand — same shape as the previous jq-built record.
|
|
43
|
+
# Quoting via printf '%s' so embedded spaces in skill names don't break the
|
|
44
|
+
# line. Skill names are constrained to `[a-z0-9-]+` upstream so we don't
|
|
45
|
+
# need full JSON escaping here.
|
|
46
|
+
printf '{"ts":"%s","event":"skill_invoked","skill":"%s","sha":"%s"}\n' \
|
|
47
|
+
"$TS" "$SKILL" "$SHA" >> .harness/telemetry.jsonl
|
|
26
48
|
exit 0
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "https://json.schemastore.org/claude-code-hooks.json",
|
|
3
|
-
"hooks": {
|
|
4
|
-
"PostToolUse": [
|
|
5
|
-
{
|
|
6
|
-
"matcher": "Write|Edit|MultiEdit",
|
|
7
|
-
"hooks": [
|
|
8
|
-
{
|
|
9
|
-
"type": "command",
|
|
10
|
-
"command": "bash scripts/structural-test-on-edit.sh",
|
|
11
|
-
"timeout": 30
|
|
12
|
-
}
|
|
13
|
-
]
|
|
14
|
-
},
|
|
15
|
-
{
|
|
16
|
-
"matcher": "Skill",
|
|
17
|
-
"hooks": [
|
|
18
|
-
{
|
|
19
|
-
"type": "command",
|
|
20
|
-
"command": "bash scripts/telemetry-on-skill.sh",
|
|
21
|
-
"timeout": 5
|
|
22
|
-
}
|
|
23
|
-
]
|
|
24
|
-
}
|
|
25
|
-
],
|
|
26
|
-
"Stop": [
|
|
27
|
-
{
|
|
28
|
-
"matcher": "",
|
|
29
|
-
"hooks": [
|
|
30
|
-
{
|
|
31
|
-
"type": "command",
|
|
32
|
-
"command": "bash scripts/precompletion-checklist.sh",
|
|
33
|
-
"timeout": 20
|
|
34
|
-
}
|
|
35
|
-
]
|
|
36
|
-
}
|
|
37
|
-
]
|
|
38
|
-
}
|
|
39
|
-
}
|