agent-harness-kit 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +11 -1
- package/bin/cli.mjs +21 -0
- package/package.json +1 -1
- package/src/core/doctor.mjs +24 -0
- package/src/core/render-templates.mjs +29 -0
- package/src/core/upgrade.mjs +81 -60
- package/src/templates/.claude/agents/api-consistency-reviewer.md.vi +37 -0
- package/src/templates/.claude/agents/architecture-reviewer.md.vi.hbs +45 -0
- package/src/templates/.claude/agents/performance-reviewer.md.vi +39 -0
- package/src/templates/.claude/agents/reliability-reviewer.md.vi +42 -0
- package/src/templates/.claude/agents/security-reviewer.md.vi +43 -0
- package/src/templates/.claude/hooks/hooks.json +22 -0
- package/src/templates/.claude/output-styles/harness-terse.md +42 -0
- package/src/templates/.claude/settings.json.hbs +1 -0
- package/src/templates/.claude/skills/add-adr/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/add-feature/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/debug-flow/SKILL.md.vi.hbs +42 -0
- package/src/templates/.claude/skills/deliver-html/SKILL.md.hbs +96 -0
- package/src/templates/.claude/skills/deliver-html/SKILL.md.vi.hbs +89 -0
- package/src/templates/.claude/skills/deliver-html/assets/report.css +233 -0
- package/src/templates/.claude/skills/deliver-html/scripts/wrap-html.mjs +0 -0
- package/src/templates/.claude/skills/deliver-html/templates/audit-report.html.tmpl +29 -0
- package/src/templates/.claude/skills/deliver-html/templates/decision-doc.html.tmpl +29 -0
- package/src/templates/.claude/skills/deliver-html/templates/status-report.html.tmpl +29 -0
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md.vi +52 -0
- package/src/templates/.claude/skills/eval-runner/SKILL.md.vi +59 -0
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.vi.hbs +58 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md +52 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md.vi +56 -0
- package/src/templates/.claude/skills/i18n-add-locale/scripts/locale-scaffold.mjs +120 -0
- package/src/templates/.claude/skills/inspect-app/SKILL.md.vi +61 -0
- package/src/templates/.claude/skills/inspect-module/SKILL.md.vi.hbs +57 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md +42 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md.vi +42 -0
- package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs +145 -0
- package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi +49 -0
- package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs +172 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md +60 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs +146 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md +59 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md.vi +63 -0
- package/src/templates/.claude/skills/review-this-pr/scripts/pr-review-driver.mjs +152 -0
- package/src/templates/.claude/skills/structural-test-author/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/write-skill/SKILL.md.vi +43 -0
- package/src/templates/.harness/eval/rubrics/feature-step-done.mjs +148 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.answer.md +53 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.json +10 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.prompt.md +43 -0
- package/src/templates/.mcp.json.example +35 -0
- package/src/templates/CLAUDE.md.hbs +1 -0
- package/src/templates/CLAUDE.md.vi.hbs +1 -0
- package/src/templates/docs/adr/0002-html-first-for-humans.md.hbs +116 -0
- package/src/templates/docs/golden-principles.md.hbs +32 -0
- package/src/templates/scripts/precompletion-checklist.sh.hbs +43 -0
- package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +115 -0
- package/src/templates/scripts/session-end.sh.hbs +6 -0
- package/src/templates/scripts/session-rollup.mjs +96 -0
- package/src/templates/scripts/session-start.sh.hbs +25 -0
- package/src/templates/scripts/subagent-stop.sh.hbs +76 -0
|
@@ -47,6 +47,7 @@ luôn gọn.
|
|
|
47
47
|
- `/structural-test-author <layer>` khi thêm rule kiến trúc mới.
|
|
48
48
|
- `/garbage-collection` mỗi thứ Sáu hoặc trước khi tag release.
|
|
49
49
|
- `/eval-runner` trước khi merge bất kỳ thay đổi nào ở skill / agent file.
|
|
50
|
+
- `/deliver-html` khi user cần analysis / audit / plan / decision doc / next-actions — HTML cho human, MD giữ cho agent file (principle #11).
|
|
50
51
|
|
|
51
52
|
## Subagents nên ủy thác (KHÔNG inline review)
|
|
52
53
|
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# ADR 0002 — HTML for human deliverables, Markdown for agent files
|
|
2
|
+
|
|
3
|
+
- **Status:** accepted
|
|
4
|
+
- **Date:** {{now "yyyy-MM-dd"}}
|
|
5
|
+
- **Deciders:** project owner
|
|
6
|
+
|
|
7
|
+
## Context
|
|
8
|
+
|
|
9
|
+
The kit produces two distinct kinds of long-form output:
|
|
10
|
+
|
|
11
|
+
1. **Files an agent reads-and-edits.** `CLAUDE.md`, `SKILL.md`,
|
|
12
|
+
`.claude/agents/*.md`, `docs/architecture.md`, ADR notes, structural
|
|
13
|
+
reports written to stdout. These are line-oriented, diffable, and
|
|
14
|
+
typically loaded into the LLM context window.
|
|
15
|
+
2. **Documents a HUMAN reads-and-decides.** Audit reports, analyses, plans,
|
|
16
|
+
"next actions" reviews, status snapshots, decision docs. These are
|
|
17
|
+
self-contained artefacts that travel via email / Slack / PR attachments
|
|
18
|
+
and exist to surface a recommendation the human signs off on.
|
|
19
|
+
|
|
20
|
+
Anthropic's long-running-agent guide and the `agent-harness-kit` golden
|
|
21
|
+
principles both confirm Markdown is the right format for category 1: the
|
|
22
|
+
LLM tokenizes it cheaply, structural editing tools (`Edit`, `Write`) treat
|
|
23
|
+
it as native, and grep / sed / awk handle it without ceremony. Category 1
|
|
24
|
+
should remain Markdown.
|
|
25
|
+
|
|
26
|
+
Category 2 is where pain accumulates. A 500–800-line Markdown audit forces
|
|
27
|
+
the reader to:
|
|
28
|
+
|
|
29
|
+
- Scroll past sections that lack visual contrast.
|
|
30
|
+
- Render the file (terminal pager, GitHub preview, VS Code preview) before
|
|
31
|
+
it is readable at all.
|
|
32
|
+
- Skim and miss conclusions because every heading and bullet looks alike —
|
|
33
|
+
no severity badges, no border-left callouts, no grid layout.
|
|
34
|
+
|
|
35
|
+
The observed failure mode in this kit's own past sessions: the human reads
|
|
36
|
+
the Markdown report, asks the agent a follow-up that was answered in line
|
|
37
|
+
347, and burns another turn. That clarification turn costs more in tokens
|
|
38
|
+
(input replay + new output) than the +30-50% markup overhead of HTML.
|
|
39
|
+
|
|
40
|
+
## Decision
|
|
41
|
+
|
|
42
|
+
Adopt the rule documented as `docs/golden-principles.md` principle #11:
|
|
43
|
+
|
|
44
|
+
- **Human-facing deliverables ship as a single self-contained HTML file**
|
|
45
|
+
at repo root, produced by the `/deliver-html` skill against the shared
|
|
46
|
+
CSS at `.claude/skills/deliver-html/assets/report.css`.
|
|
47
|
+
- **Agent-facing files stay Markdown.** No exception.
|
|
48
|
+
|
|
49
|
+
Implementation details:
|
|
50
|
+
|
|
51
|
+
1. `/deliver-html` triggers on user intent: "analyze", "audit", "review",
|
|
52
|
+
"phân tích", "báo cáo", "plan", "proposal", "decision doc",
|
|
53
|
+
"next actions", and any similar prompt that calls for a long-form
|
|
54
|
+
deliverable.
|
|
55
|
+
2. The agent writes the body in Markdown (cheap tokens, easy reasoning).
|
|
56
|
+
The side-car `scripts/wrap-html.mjs` converts MD → HTML with three
|
|
57
|
+
templates (`decision-doc` | `audit-report` | `status-report`) and
|
|
58
|
+
inlines the shared CSS. No npm dependency: the converter is a
|
|
59
|
+
self-rolled subset (headings, paragraphs, lists, fenced code, tables,
|
|
60
|
+
blockquotes, inline formatting, links).
|
|
61
|
+
3. The Stop hook (`scripts/precompletion-checklist.sh`) emits a
|
|
62
|
+
non-blocking nudge when the user prompt matched a deliverable keyword
|
|
63
|
+
but the session produced only `.md` files at repo root.
|
|
64
|
+
4. Locale: the `<html lang="…">` attribute is read from
|
|
65
|
+
`harness.config.json` `.claudeMd.humanLanguage`. CSS is locale-agnostic.
|
|
66
|
+
|
|
67
|
+
## Consequences
|
|
68
|
+
|
|
69
|
+
Positive
|
|
70
|
+
|
|
71
|
+
- One canonical look for every audit, plan, and decision doc. Less drift
|
|
72
|
+
across reports.
|
|
73
|
+
- Human reads once, decides once. Measured benefit: each saved
|
|
74
|
+
clarification turn ≈ 2-5k output tokens + cached input replay; offsets
|
|
75
|
+
HTML markup overhead easily.
|
|
76
|
+
- Self-contained HTML — emailable, Slack-attachable, PR-comment-attachable
|
|
77
|
+
without a build step.
|
|
78
|
+
- Existing 5 HTML reports at repo root (`NEXT_ACTIONS.html`,
|
|
79
|
+
`PHAN_TICH.html`, `E2E_REPORT.html`, `E2E_CI_REPORT.html`,
|
|
80
|
+
`HOOK_AUDIT.html`) validate the pattern in practice — `/deliver-html`
|
|
81
|
+
formalises it.
|
|
82
|
+
|
|
83
|
+
Negative
|
|
84
|
+
|
|
85
|
+
- HTML output is ~30-50% larger in token count than the equivalent MD body.
|
|
86
|
+
Mitigation: the LLM writes MD; only the deterministic side-car emits
|
|
87
|
+
HTML, so the LLM token budget is not affected.
|
|
88
|
+
- HTML diffs are noisy in GitHub. Mitigation: deliverables are artefacts,
|
|
89
|
+
not source. Source-of-truth lives in the conversation / commit message;
|
|
90
|
+
the HTML file is a build output. CI can ignore `*.html` at repo root.
|
|
91
|
+
- Two formats to teach. Mitigation: the rule is "agent reads → MD,
|
|
92
|
+
human reads → HTML"; reviewers learn it on first encounter.
|
|
93
|
+
|
|
94
|
+
## Alternatives considered
|
|
95
|
+
|
|
96
|
+
- **Always Markdown.** Rejected: the failure mode this ADR closes is
|
|
97
|
+
exactly the "scrolling, miss-the-conclusion" loop that Markdown
|
|
98
|
+
invites for long deliverables. README / CHANGELOG remain MD because
|
|
99
|
+
npm/GitHub renders them and the install snippet must be copy-paste-able.
|
|
100
|
+
- **Generate PDF instead.** Rejected: solo-dev kit, no print pipeline,
|
|
101
|
+
PDFs are write-only on common review tools. HTML is editable in 90
|
|
102
|
+
seconds when a reviewer wants to amend.
|
|
103
|
+
- **Render Markdown server-side (Docusaurus / mdBook / GitHub Pages).**
|
|
104
|
+
Rejected: requires CI + deploy step for every report. HTML at repo root
|
|
105
|
+
opens with one click — zero friction.
|
|
106
|
+
- **Inline a renderer in the IDE.** Rejected: not portable when sending the
|
|
107
|
+
artefact to someone who is not running the kit.
|
|
108
|
+
|
|
109
|
+
## Out of scope
|
|
110
|
+
|
|
111
|
+
- Existing HTML reports at repo root keep their inline CSS for now.
|
|
112
|
+
Self-contained shipping artefacts trump DRY at solo scale. A future
|
|
113
|
+
cleanup may reference the shared CSS file by relative path — tracked in
|
|
114
|
+
`docs/tech-debt-tracker.md` if/when it becomes load-bearing.
|
|
115
|
+
- Localizing the CSS itself. Style is locale-agnostic by design; only the
|
|
116
|
+
`lang` attribute and body copy differ between locales.
|
|
@@ -119,6 +119,38 @@ domain. The agent reads the recommendation, invokes
|
|
|
119
119
|
`architecture-reviewer` (or documents why review is unnecessary), and the
|
|
120
120
|
loop guard (`stop_hook_active`) lets the next stop succeed.
|
|
121
121
|
|
|
122
|
+
## 11. HTML for human deliverables, Markdown for agent files
|
|
123
|
+
|
|
124
|
+
Files an agent reads-and-edits (`CLAUDE.md`, `.claude/skills/*/SKILL.md`,
|
|
125
|
+
`.claude/agents/*.md`, `docs/architecture.md`, `docs/adr/*.md`, ADR notes,
|
|
126
|
+
inline review output) stay as Markdown. Files a HUMAN reads-and-decides
|
|
127
|
+
(audit reports, analyses, plans, decision docs, next-actions reviews,
|
|
128
|
+
status snapshots) ship as self-contained HTML, written by the
|
|
129
|
+
`/deliver-html` skill against the shared dark-theme CSS.
|
|
130
|
+
|
|
131
|
+
Why: a 700-line Markdown deliverable forces the human to scroll, miss the
|
|
132
|
+
conclusion, and ask the agent to clarify — a wasted turn that costs more
|
|
133
|
+
tokens than the HTML overhead it was meant to avoid. HTML deliverables are
|
|
134
|
+
"read once, decide once." Markdown has no visual hierarchy strong enough to
|
|
135
|
+
support decision-grade reading at length.
|
|
136
|
+
Enforced by:
|
|
137
|
+
|
|
138
|
+
- `/deliver-html` skill triggers on user intent ("analyze", "audit",
|
|
139
|
+
"review", "phân tích", "báo cáo", "plan", "proposal", "decision doc",
|
|
140
|
+
"next actions") and writes `<slug>.html` at repo root.
|
|
141
|
+
- Stop hook nudge: when the prompt matches those keywords and the session
|
|
142
|
+
produced only `.md` files at repo root, the agent is reminded to invoke
|
|
143
|
+
`/deliver-html`. Non-blocking.
|
|
144
|
+
- ADR-0002 documents the trade-off (token cost +30-50% on the rendered
|
|
145
|
+
output, paid back by saving ≥1 clarification turn).
|
|
146
|
+
|
|
147
|
+
Counter-rules — when Markdown is still correct:
|
|
148
|
+
|
|
149
|
+
- `README.md`, `CHANGELOG.md` — npm/GitHub renders them; human installs/diffs.
|
|
150
|
+
- Stdout from `/review-this-pr`, `/garbage-collection`, structural reports —
|
|
151
|
+
agent consumes the output.
|
|
152
|
+
- Short summaries (< 30 lines) — answer inline, no file.
|
|
153
|
+
|
|
122
154
|
---
|
|
123
155
|
|
|
124
156
|
_Add new principles via `/structural-test-author`, which forces you to
|
|
@@ -202,6 +202,49 @@ if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
|
|
|
202
202
|
fi
|
|
203
203
|
fi
|
|
204
204
|
|
|
205
|
+
# Non-blocking nudge: HTML-for-humans (golden principle #11 / ADR-0002).
|
|
206
|
+
# When the session produced one or more deliverable-shaped .md files at repo
|
|
207
|
+
# root (i.e. not CLAUDE.md / AGENTS.md / README.md / CHANGELOG.md), suggest
|
|
208
|
+
# `/deliver-html`. Pure heuristic — never blocks the stop. Skip with
|
|
209
|
+
# `AHK_DISABLE_HTML_NUDGE=1`.
|
|
210
|
+
if [ "${AHK_DISABLE_HTML_NUDGE:-0}" != "1" ] && command -v git >/dev/null 2>&1; then
|
|
211
|
+
KIT_MDS="CLAUDE.md|AGENTS.md|README.md|CHANGELOG.md|LICENSE.md|CONTRIBUTING.md|CODE_OF_CONDUCT.md|SECURITY.md"
|
|
212
|
+
NEW_MD=$(
|
|
213
|
+
{
|
|
214
|
+
git ls-files --others --exclude-standard 2>/dev/null
|
|
215
|
+
git diff --name-only 2>/dev/null
|
|
216
|
+
git diff --name-only --cached 2>/dev/null
|
|
217
|
+
} \
|
|
218
|
+
| sort -u \
|
|
219
|
+
| grep -E '^[^/]+\.md$' \
|
|
220
|
+
| grep -Ev "^(${KIT_MDS})$" \
|
|
221
|
+
|| true
|
|
222
|
+
)
|
|
223
|
+
if [ -n "$NEW_MD" ]; then
|
|
224
|
+
NEW_HTML=$(
|
|
225
|
+
{
|
|
226
|
+
git ls-files --others --exclude-standard 2>/dev/null
|
|
227
|
+
git diff --name-only 2>/dev/null
|
|
228
|
+
git diff --name-only --cached 2>/dev/null
|
|
229
|
+
} \
|
|
230
|
+
| sort -u \
|
|
231
|
+
| grep -E '^[^/]+\.html$' \
|
|
232
|
+
|| true
|
|
233
|
+
)
|
|
234
|
+
if [ -z "$NEW_HTML" ]; then
|
|
235
|
+
{
|
|
236
|
+
echo
|
|
237
|
+
echo "[nudge] Repo root has new .md file(s) that look like human deliverables:"
|
|
238
|
+
echo "$NEW_MD" | sed 's/^/ - /'
|
|
239
|
+
echo
|
|
240
|
+
echo "Golden principle #11: HTML for human deliverables, MD for agent files."
|
|
241
|
+
echo "If these are reports/audits/plans/decision-docs, ship them via /deliver-html"
|
|
242
|
+
echo "instead. Non-blocking — suppress with AHK_DISABLE_HTML_NUDGE=1."
|
|
243
|
+
} >&2
|
|
244
|
+
fi
|
|
245
|
+
fi
|
|
246
|
+
fi
|
|
247
|
+
|
|
205
248
|
if [ ! -s "$TMPDIR_HOOK/failed.list" ]; then
|
|
206
249
|
exit 0
|
|
207
250
|
fi
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# PreToolUse hook (matcher: Edit|Write|MultiEdit) — denies direct edits to
|
|
3
|
+
# protected paths. Catches the failure mode where the agent decides to
|
|
4
|
+
# "just fix" a baseline file or .claude/ template instead of going through
|
|
5
|
+
# the proper /garbage-collection or scaffold-refresh paths.
|
|
6
|
+
#
|
|
7
|
+
# Protected paths (and why):
|
|
8
|
+
# 1. .claude/ — skills, agents, hooks, settings.
|
|
9
|
+
# Use /upgrade flow or edit the source
|
|
10
|
+
# template in src/templates/.
|
|
11
|
+
# 2. node_modules/ — package state, regenerated by install.
|
|
12
|
+
# 3. .git/ — repo internals, never hand-edited.
|
|
13
|
+
# 4. .harness/structural-baseline.json — bypasses monotonic guard. Use the
|
|
14
|
+
# /garbage-collection skill.
|
|
15
|
+
# 5. .harness/installed.json — kit lockfile, derived from render.
|
|
16
|
+
# Hand edits cause spurious "drift"
|
|
17
|
+
# warnings on next upgrade.
|
|
18
|
+
#
|
|
19
|
+
# Escape hatches:
|
|
20
|
+
# - AHK_ALLOW_BYPASS=1 → log + allow (audit trail in .harness/bypass.log).
|
|
21
|
+
# - AHK_HOOK_MODE=warn → log only, never deny.
|
|
22
|
+
set -eo pipefail
|
|
23
|
+
|
|
24
|
+
INPUT=$(cat)
|
|
25
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
26
|
+
have_jq() {
|
|
27
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
28
|
+
command -v jq >/dev/null 2>&1
|
|
29
|
+
}
|
|
30
|
+
have_jp() {
|
|
31
|
+
have_jq && return 0
|
|
32
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
33
|
+
return 1
|
|
34
|
+
}
|
|
35
|
+
jp() {
|
|
36
|
+
if have_jq; then jq -r "$1"
|
|
37
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
38
|
+
fi
|
|
39
|
+
}
|
|
40
|
+
if ! have_jp; then exit 0; fi
|
|
41
|
+
|
|
42
|
+
# Resolve target file. Write/Edit ship .tool_input.file_path; MultiEdit ships
|
|
43
|
+
# the same field at the top level. Both carry the absolute or repo-relative
|
|
44
|
+
# path. We normalise via Node to strip any leading ./ and use forward slashes.
|
|
45
|
+
FILE=$(echo "$INPUT" | jp '.tool_input.file_path // .tool_input.path // empty')
|
|
46
|
+
[ -z "$FILE" ] && exit 0
|
|
47
|
+
|
|
48
|
+
# Normalise to a path relative to CWD when possible; otherwise keep absolute.
|
|
49
|
+
REL_FILE="$FILE"
|
|
50
|
+
if [ -n "$PWD" ] && [[ "$FILE" == "$PWD"/* ]]; then
|
|
51
|
+
REL_FILE="${FILE#"$PWD"/}"
|
|
52
|
+
fi
|
|
53
|
+
REL_FILE="${REL_FILE#./}"
|
|
54
|
+
|
|
55
|
+
REASON=""
|
|
56
|
+
case "$REL_FILE" in
|
|
57
|
+
.claude/*|*/.claude/*)
|
|
58
|
+
REASON=".claude/ is owned by the kit's scaffold. To change a skill/agent/hook, edit src/templates/.claude/ in the kit source and re-run 'agent-harness-kit upgrade', or override at the user level (~/.claude/)."
|
|
59
|
+
;;
|
|
60
|
+
node_modules/*|*/node_modules/*)
|
|
61
|
+
REASON="node_modules/ is regenerated by the package manager. Edit package.json or the upstream package; never hand-edit installed files."
|
|
62
|
+
;;
|
|
63
|
+
.git/*|*/.git/*)
|
|
64
|
+
REASON=".git/ contains repo internals. Use git commands ('git config', 'git update-ref', etc.) — never hand-edit."
|
|
65
|
+
;;
|
|
66
|
+
.harness/structural-baseline.json)
|
|
67
|
+
REASON="Direct edits to .harness/structural-baseline.json bypass the baseline-monotonic guard. Use the /garbage-collection skill or fix the underlying violation."
|
|
68
|
+
;;
|
|
69
|
+
.harness/installed.json)
|
|
70
|
+
REASON=".harness/installed.json is the kit lockfile, regenerated by 'agent-harness-kit init/upgrade'. Hand edits cause spurious drift warnings."
|
|
71
|
+
;;
|
|
72
|
+
esac
|
|
73
|
+
|
|
74
|
+
if [ -z "$REASON" ]; then
|
|
75
|
+
exit 0
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
# Warn-only mode.
|
|
79
|
+
if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
|
|
80
|
+
echo "[ahk] pretooluse-edit-guard (warn): would deny edit to $REL_FILE — $REASON" >&2
|
|
81
|
+
exit 0
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
# Bypass with audit log.
|
|
85
|
+
if [ "${AHK_ALLOW_BYPASS:-}" = "1" ]; then
|
|
86
|
+
mkdir -p .harness
|
|
87
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
88
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
|
|
89
|
+
ESCAPED=${REL_FILE//\"/\\\"}
|
|
90
|
+
printf '{"ts":"%s","sha":"%s","bypass":"AHK_ALLOW_BYPASS","file":"%s","rule":"pretooluse-edit-guard"}\n' \
|
|
91
|
+
"$TS" "$SHA" "$ESCAPED" >> .harness/bypass.log
|
|
92
|
+
exit 0
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
# Deny via JSON.
|
|
96
|
+
if command -v node >/dev/null 2>&1; then
|
|
97
|
+
node -e "
|
|
98
|
+
const reason = process.argv[1];
|
|
99
|
+
const out = {
|
|
100
|
+
hookSpecificOutput: {
|
|
101
|
+
hookEventName: 'PreToolUse',
|
|
102
|
+
permissionDecision: 'deny',
|
|
103
|
+
permissionDecisionReason: reason
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
process.stdout.write(JSON.stringify(out));
|
|
107
|
+
" "$REASON"
|
|
108
|
+
elif have_jq; then
|
|
109
|
+
jq -nc --arg r "$REASON" \
|
|
110
|
+
'{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $r}}'
|
|
111
|
+
else
|
|
112
|
+
echo "$REASON" >&2
|
|
113
|
+
exit 2
|
|
114
|
+
fi
|
|
115
|
+
exit 0
|
|
@@ -45,4 +45,10 @@ fi
|
|
|
45
45
|
mkdir -p .harness
|
|
46
46
|
TS=$(date +"%Y-%m-%d %H:%M")
|
|
47
47
|
echo "$TS | session_end | $REASON | $BR | $SHA" >> .harness/PROGRESS.md
|
|
48
|
+
|
|
49
|
+
# Rollup side-car — writes a JSONL record to .harness/telemetry.jsonl.
|
|
50
|
+
# Best-effort: never blocks the cleanup-only SessionEnd contract.
|
|
51
|
+
if command -v node >/dev/null 2>&1 && [ -f scripts/session-rollup.mjs ]; then
|
|
52
|
+
printf '%s' "$INPUT" | node scripts/session-rollup.mjs 2>/dev/null || true
|
|
53
|
+
fi
|
|
48
54
|
exit 0
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// session-rollup.mjs — deterministic SessionEnd side-car. Writes a single
|
|
3
|
+
// JSONL record summarising the session into .harness/telemetry.jsonl. Pure
|
|
4
|
+
// Node (no jq dependency).
|
|
5
|
+
//
|
|
6
|
+
// Record shape:
|
|
7
|
+
// { ts, event: "session_rollup", reason, branch, sha, uncommitted,
|
|
8
|
+
// skills_invoked: [...], session_id }
|
|
9
|
+
//
|
|
10
|
+
// Called from session-end.sh after the human-readable PROGRESS.md line is
|
|
11
|
+
// written, so a single session contributes one PROGRESS.md line + one
|
|
12
|
+
// telemetry rollup record.
|
|
13
|
+
|
|
14
|
+
import { readFileSync, existsSync, mkdirSync, appendFileSync } from "node:fs";
|
|
15
|
+
import { resolve } from "node:path";
|
|
16
|
+
import { spawnSync } from "node:child_process";
|
|
17
|
+
|
|
18
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
19
|
+
|
|
20
|
+
function readStdinSync() {
|
|
21
|
+
// SessionEnd hooks pass JSON on stdin. fd 0 is the inherited stdin.
|
|
22
|
+
try {
|
|
23
|
+
return readFileSync(0, "utf8");
|
|
24
|
+
} catch {
|
|
25
|
+
return "";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function safeJSON(s) {
|
|
30
|
+
if (!s) return {};
|
|
31
|
+
try { return JSON.parse(s); } catch { return {}; }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function git(args, def = "") {
|
|
35
|
+
const r = spawnSync("git", args, { cwd: ROOT, encoding: "utf8" });
|
|
36
|
+
if (r.status !== 0) return def;
|
|
37
|
+
return (r.stdout || "").trim();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function recentSkillInvocations() {
|
|
41
|
+
// Tail of telemetry.jsonl: count skill_invoked records since the last
|
|
42
|
+
// session_rollup. If no prior rollup, count everything in the file (capped
|
|
43
|
+
// to 50 for sanity).
|
|
44
|
+
const path = resolve(ROOT, ".harness/telemetry.jsonl");
|
|
45
|
+
if (!existsSync(path)) return [];
|
|
46
|
+
const body = readFileSync(path, "utf8");
|
|
47
|
+
const lines = body.split("\n").filter(Boolean);
|
|
48
|
+
let startIdx = 0;
|
|
49
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
50
|
+
try {
|
|
51
|
+
const rec = JSON.parse(lines[i]);
|
|
52
|
+
if (rec.event === "session_rollup") {
|
|
53
|
+
startIdx = i + 1;
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
} catch { /* skip malformed */ }
|
|
57
|
+
}
|
|
58
|
+
const window = lines.slice(startIdx);
|
|
59
|
+
const skills = [];
|
|
60
|
+
for (const line of window) {
|
|
61
|
+
try {
|
|
62
|
+
const rec = JSON.parse(line);
|
|
63
|
+
if (rec.event === "skill_invoked" && rec.skill) skills.push(rec.skill);
|
|
64
|
+
} catch { /* skip */ }
|
|
65
|
+
}
|
|
66
|
+
return skills.slice(-50);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function main() {
|
|
70
|
+
const input = safeJSON(readStdinSync());
|
|
71
|
+
const reason = input.end_reason || "unknown";
|
|
72
|
+
const sessionId = input.session_id || "";
|
|
73
|
+
|
|
74
|
+
const branch = git(["branch", "--show-current"], "(detached)");
|
|
75
|
+
const sha = git(["rev-parse", "--short", "HEAD"], "(no-git)");
|
|
76
|
+
const uncommittedRaw = git(["status", "--short"], "");
|
|
77
|
+
const uncommitted = uncommittedRaw ? uncommittedRaw.split("\n").filter(Boolean).length : 0;
|
|
78
|
+
const skills = recentSkillInvocations();
|
|
79
|
+
|
|
80
|
+
const record = {
|
|
81
|
+
ts: new Date().toISOString(),
|
|
82
|
+
event: "session_rollup",
|
|
83
|
+
reason,
|
|
84
|
+
session_id: sessionId,
|
|
85
|
+
branch,
|
|
86
|
+
sha,
|
|
87
|
+
uncommitted,
|
|
88
|
+
skills_invoked: skills,
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const outPath = resolve(ROOT, ".harness/telemetry.jsonl");
|
|
92
|
+
mkdirSync(resolve(ROOT, ".harness"), { recursive: true });
|
|
93
|
+
appendFileSync(outPath, JSON.stringify(record) + "\n");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
main();
|
|
@@ -60,6 +60,31 @@ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; th
|
|
|
60
60
|
CTX+="[harness] git: branch=$BR, uncommitted=$COUNT file(s)"$'\n'
|
|
61
61
|
fi
|
|
62
62
|
|
|
63
|
+
# 1b. One-shot daily pill (harness version + open-feature reminder).
|
|
64
|
+
# `mkdir -p .harness/state` then check the stamp file. Today's pill fires
|
|
65
|
+
# once per UTC day per project; subsequent SessionStarts that day stay
|
|
66
|
+
# silent on this line so the model doesn't see the same banner thirty
|
|
67
|
+
# times per coding day.
|
|
68
|
+
mkdir -p .harness/state 2>/dev/null || true
|
|
69
|
+
STAMP_FILE=".harness/state/session-pill.stamp"
|
|
70
|
+
TODAY=$(date -u +%Y-%m-%d)
|
|
71
|
+
LAST=""
|
|
72
|
+
[ -f "$STAMP_FILE" ] && LAST=$(cat "$STAMP_FILE" 2>/dev/null || echo "")
|
|
73
|
+
if [ "$LAST" != "$TODAY" ]; then
|
|
74
|
+
HARNESS_VER=""
|
|
75
|
+
if [ -f harness.config.json ] && have_jp; then
|
|
76
|
+
HARNESS_VER=$(jp '.version // empty' harness.config.json 2>/dev/null || echo "")
|
|
77
|
+
fi
|
|
78
|
+
if [ -z "$HARNESS_VER" ] && [ -f .harness/installed.json ] && have_jp; then
|
|
79
|
+
HARNESS_VER=$(jp '.version // empty' .harness/installed.json 2>/dev/null || echo "")
|
|
80
|
+
fi
|
|
81
|
+
if [ -z "$HARNESS_VER" ]; then
|
|
82
|
+
HARNESS_VER="unknown"
|
|
83
|
+
fi
|
|
84
|
+
CTX+="[harness] pill (one/day): kit=$HARNESS_VER · date=$TODAY"$'\n'
|
|
85
|
+
printf '%s' "$TODAY" > "$STAMP_FILE" 2>/dev/null || true
|
|
86
|
+
fi
|
|
87
|
+
|
|
63
88
|
# 2. Current feature (from feature_list.json) — picks the first entry with
|
|
64
89
|
# passes=false so the model resumes the in-flight work, not a finished
|
|
65
90
|
# one. Skipped if file missing or jp unavailable.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# SubagentStop hook — fires when a subagent finishes its turn (Task tool).
|
|
3
|
+
# Triggers the same structural-test that PostToolUse(Edit) runs, because a
|
|
4
|
+
# subagent can edit files in batches that individually pass but jointly drift
|
|
5
|
+
# off-layer. Running the check at subagent boundary catches that drift early.
|
|
6
|
+
#
|
|
7
|
+
# Contract:
|
|
8
|
+
# - Never blocks (exit 0 even on failure — the parent Stop hook handles the
|
|
9
|
+
# final gate). We only emit a stderr summary that Claude reads.
|
|
10
|
+
# - Telemetry append to .harness/telemetry.jsonl as {event:"subagent_stop"}.
|
|
11
|
+
# - Skipped when harness.config.json#structuralTest.engine === "none" (the
|
|
12
|
+
# "structural test not yet wired" escape hatch used by polyglot scaffolds).
|
|
13
|
+
set -eo pipefail
|
|
14
|
+
|
|
15
|
+
INPUT=$(cat)
|
|
16
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
17
|
+
have_jq() {
|
|
18
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
19
|
+
command -v jq >/dev/null 2>&1
|
|
20
|
+
}
|
|
21
|
+
have_jp() {
|
|
22
|
+
have_jq && return 0
|
|
23
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
24
|
+
return 1
|
|
25
|
+
}
|
|
26
|
+
jp() {
|
|
27
|
+
if have_jq; then jq -r "$1"
|
|
28
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
29
|
+
fi
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
SUBAGENT="(unknown)"
|
|
33
|
+
if have_jp; then
|
|
34
|
+
SUBAGENT=$(echo "$INPUT" | jp '.subagent // .session_id // "unknown"' 2>/dev/null || echo "unknown")
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
# Telemetry first so we record every subagent boundary, even if the
|
|
38
|
+
# structural-test bails below.
|
|
39
|
+
mkdir -p .harness
|
|
40
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
41
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
|
|
42
|
+
printf '{"ts":"%s","event":"subagent_stop","subagent":"%s","sha":"%s"}\n' \
|
|
43
|
+
"$TS" "$SUBAGENT" "$SHA" >> .harness/telemetry.jsonl
|
|
44
|
+
|
|
45
|
+
# Skip if structural test disabled.
|
|
46
|
+
if [ -f harness.config.json ] \
|
|
47
|
+
&& grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
|
|
48
|
+
exit 0
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
# AHK_HOOK_MODE=warn → log only, don't run.
|
|
52
|
+
if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
|
|
53
|
+
exit 0
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
# Run structural test workspace-wide. Subagents typically touch multiple
|
|
57
|
+
# files; per-file scoping would miss the cross-file drift case. Cap output
|
|
58
|
+
# to 30 lines on stderr so the parent agent sees the summary without flood.
|
|
59
|
+
RAN=0
|
|
60
|
+
if [ -f harness/structural-check.mjs ] && command -v node >/dev/null 2>&1; then
|
|
61
|
+
RAN=1
|
|
62
|
+
if ! node harness/structural-check.mjs 2>&1 | tail -30 >&2; then
|
|
63
|
+
echo "[ahk] subagent_stop: structural-test reported violations (see above). Continuing — parent Stop hook will gate." >&2
|
|
64
|
+
fi
|
|
65
|
+
elif command -v npm >/dev/null 2>&1 && [ -f package.json ] \
|
|
66
|
+
&& grep -q '"harness:check"' package.json 2>/dev/null; then
|
|
67
|
+
RAN=1
|
|
68
|
+
if ! npm run --silent harness:check 2>&1 | tail -30 >&2; then
|
|
69
|
+
echo "[ahk] subagent_stop: structural-test reported violations (see above). Continuing — parent Stop hook will gate." >&2
|
|
70
|
+
fi
|
|
71
|
+
fi
|
|
72
|
+
if [ "$RAN" = "0" ]; then
|
|
73
|
+
# No structural-test entry point. Skip silently — already logged in telemetry.
|
|
74
|
+
exit 0
|
|
75
|
+
fi
|
|
76
|
+
exit 0
|