agent-harness-kit 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +11 -1
  4. package/bin/cli.mjs +21 -0
  5. package/package.json +1 -1
  6. package/src/core/doctor.mjs +24 -0
  7. package/src/core/render-templates.mjs +29 -0
  8. package/src/core/upgrade.mjs +81 -60
  9. package/src/templates/.claude/agents/api-consistency-reviewer.md.vi +37 -0
  10. package/src/templates/.claude/agents/architecture-reviewer.md.vi.hbs +45 -0
  11. package/src/templates/.claude/agents/performance-reviewer.md.vi +39 -0
  12. package/src/templates/.claude/agents/reliability-reviewer.md.vi +42 -0
  13. package/src/templates/.claude/agents/security-reviewer.md.vi +43 -0
  14. package/src/templates/.claude/hooks/hooks.json +22 -0
  15. package/src/templates/.claude/output-styles/harness-terse.md +42 -0
  16. package/src/templates/.claude/settings.json.hbs +1 -0
  17. package/src/templates/.claude/skills/add-adr/SKILL.md.vi +64 -0
  18. package/src/templates/.claude/skills/add-feature/SKILL.md.vi.hbs +50 -0
  19. package/src/templates/.claude/skills/debug-flow/SKILL.md.vi.hbs +42 -0
  20. package/src/templates/.claude/skills/deliver-html/SKILL.md.hbs +96 -0
  21. package/src/templates/.claude/skills/deliver-html/SKILL.md.vi.hbs +89 -0
  22. package/src/templates/.claude/skills/deliver-html/assets/report.css +233 -0
  23. package/src/templates/.claude/skills/deliver-html/scripts/wrap-html.mjs +0 -0
  24. package/src/templates/.claude/skills/deliver-html/templates/audit-report.html.tmpl +29 -0
  25. package/src/templates/.claude/skills/deliver-html/templates/decision-doc.html.tmpl +29 -0
  26. package/src/templates/.claude/skills/deliver-html/templates/status-report.html.tmpl +29 -0
  27. package/src/templates/.claude/skills/doc-drift-scan/SKILL.md.vi +52 -0
  28. package/src/templates/.claude/skills/eval-runner/SKILL.md.vi +59 -0
  29. package/src/templates/.claude/skills/garbage-collection/SKILL.md.vi.hbs +58 -0
  30. package/src/templates/.claude/skills/i18n-add-locale/SKILL.md +52 -0
  31. package/src/templates/.claude/skills/i18n-add-locale/SKILL.md.vi +56 -0
  32. package/src/templates/.claude/skills/i18n-add-locale/scripts/locale-scaffold.mjs +120 -0
  33. package/src/templates/.claude/skills/inspect-app/SKILL.md.vi +61 -0
  34. package/src/templates/.claude/skills/inspect-module/SKILL.md.vi.hbs +57 -0
  35. package/src/templates/.claude/skills/map-domain/SKILL.md +42 -0
  36. package/src/templates/.claude/skills/map-domain/SKILL.md.vi +42 -0
  37. package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs +145 -0
  38. package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi +49 -0
  39. package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs +172 -0
  40. package/src/templates/.claude/skills/refactor-feature/SKILL.md +60 -0
  41. package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi +64 -0
  42. package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs +146 -0
  43. package/src/templates/.claude/skills/review-this-pr/SKILL.md +59 -0
  44. package/src/templates/.claude/skills/review-this-pr/SKILL.md.vi +63 -0
  45. package/src/templates/.claude/skills/review-this-pr/scripts/pr-review-driver.mjs +152 -0
  46. package/src/templates/.claude/skills/structural-test-author/SKILL.md.vi.hbs +50 -0
  47. package/src/templates/.claude/skills/write-skill/SKILL.md.vi +43 -0
  48. package/src/templates/.harness/eval/rubrics/feature-step-done.mjs +148 -0
  49. package/src/templates/.harness/eval/tasks/feature-step-done.answer.md +53 -0
  50. package/src/templates/.harness/eval/tasks/feature-step-done.json +10 -0
  51. package/src/templates/.harness/eval/tasks/feature-step-done.prompt.md +43 -0
  52. package/src/templates/.mcp.json.example +35 -0
  53. package/src/templates/CLAUDE.md.hbs +1 -0
  54. package/src/templates/CLAUDE.md.vi.hbs +1 -0
  55. package/src/templates/docs/adr/0002-html-first-for-humans.md.hbs +116 -0
  56. package/src/templates/docs/golden-principles.md.hbs +32 -0
  57. package/src/templates/scripts/precompletion-checklist.sh.hbs +43 -0
  58. package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +115 -0
  59. package/src/templates/scripts/session-end.sh.hbs +6 -0
  60. package/src/templates/scripts/session-rollup.mjs +96 -0
  61. package/src/templates/scripts/session-start.sh.hbs +25 -0
  62. package/src/templates/scripts/subagent-stop.sh.hbs +76 -0
@@ -47,6 +47,7 @@ luôn gọn.
47
47
  - `/structural-test-author <layer>` khi thêm rule kiến trúc mới.
48
48
  - `/garbage-collection` mỗi thứ Sáu hoặc trước khi tag release.
49
49
  - `/eval-runner` trước khi merge bất kỳ thay đổi nào ở skill / agent file.
50
+ - `/deliver-html` khi user cần analysis / audit / plan / decision doc / next-actions — HTML cho human, MD giữ cho agent file (principle #11).
50
51
 
51
52
  ## Subagents nên ủy thác (KHÔNG inline review)
52
53
 
@@ -0,0 +1,116 @@
1
+ # ADR 0002 — HTML for human deliverables, Markdown for agent files
2
+
3
+ - **Status:** accepted
4
+ - **Date:** {{now "yyyy-MM-dd"}}
5
+ - **Deciders:** project owner
6
+
7
+ ## Context
8
+
9
+ The kit produces two distinct kinds of long-form output:
10
+
11
+ 1. **Files an agent reads-and-edits.** `CLAUDE.md`, `SKILL.md`,
12
+ `.claude/agents/*.md`, `docs/architecture.md`, ADR notes, structural
13
+ reports written to stdout. These are line-oriented, diffable, and
14
+ typically loaded into the LLM context window.
15
+ 2. **Documents a HUMAN reads-and-decides.** Audit reports, analyses, plans,
16
+ "next actions" reviews, status snapshots, decision docs. These are
17
+ self-contained artefacts that travel via email / Slack / PR attachments
18
+ and exist to surface a recommendation the human signs off on.
19
+
20
+ Anthropic's long-running-agent guide and the `agent-harness-kit` golden
21
+ principles both confirm Markdown is the right format for category 1: the
22
+ LLM tokenizes it cheaply, structural editing tools (`Edit`, `Write`) treat
23
+ it as native, and grep / sed / awk handle it without ceremony. Category 1
24
+ should remain Markdown.
25
+
26
+ Category 2 is where pain accumulates. A 500–800-line Markdown audit forces
27
+ the reader to:
28
+
29
+ - Scroll past sections that lack visual contrast.
30
+ - Render the file (terminal pager, GitHub preview, VS Code preview) before
31
+ it is readable at all.
32
+ - Skim and miss conclusions because every heading and bullet looks alike —
33
+ no severity badges, no border-left callouts, no grid layout.
34
+
35
+ The observed failure mode in this kit's own past sessions: the human reads
36
+ the Markdown report, asks the agent a follow-up that was answered in line
37
+ 347, and burns another turn. That clarification turn costs more in tokens
38
+ (input replay + new output) than the +30-50% markup overhead of HTML.
39
+
40
+ ## Decision
41
+
42
+ Adopt the rule documented as `docs/golden-principles.md` principle #11:
43
+
44
+ - **Human-facing deliverables ship as a single self-contained HTML file**
45
+ at repo root, produced by the `/deliver-html` skill against the shared
46
+ CSS at `.claude/skills/deliver-html/assets/report.css`.
47
+ - **Agent-facing files stay Markdown.** No exception.
48
+
49
+ Implementation details:
50
+
51
+ 1. `/deliver-html` triggers on user intent: "analyze", "audit", "review",
52
+ "phân tích", "báo cáo", "plan", "proposal", "decision doc",
53
+ "next actions", and any similar prompt that calls for a long-form
54
+ deliverable.
55
+ 2. The agent writes the body in Markdown (cheap tokens, easy reasoning).
56
+ The side-car `scripts/wrap-html.mjs` converts MD → HTML with three
57
+ templates (`decision-doc` | `audit-report` | `status-report`) and
58
+ inlines the shared CSS. No npm dependency: the converter is a
59
+ self-rolled subset (headings, paragraphs, lists, fenced code, tables,
60
+ blockquotes, inline formatting, links).
61
+ 3. The Stop hook (`scripts/precompletion-checklist.sh`) emits a
62
+ non-blocking nudge when the user prompt matched a deliverable keyword
63
+ but the session produced only `.md` files at repo root.
64
+ 4. Locale: the `<html lang="…">` attribute is read from
65
+ `harness.config.json` `.claudeMd.humanLanguage`. CSS is locale-agnostic.
66
+
67
+ ## Consequences
68
+
69
+ Positive
70
+
71
+ - One canonical look for every audit, plan, and decision doc. Less drift
72
+ across reports.
73
+ - Human reads once, decides once. Measured benefit: each saved
74
+ clarification turn ≈ 2-5k output tokens + cached input replay; offsets
75
+ HTML markup overhead easily.
76
+ - Self-contained HTML — emailable, Slack-attachable, PR-comment-attachable
77
+ without a build step.
78
+ - Existing 5 HTML reports at repo root (`NEXT_ACTIONS.html`,
79
+ `PHAN_TICH.html`, `E2E_REPORT.html`, `E2E_CI_REPORT.html`,
80
+ `HOOK_AUDIT.html`) validate the pattern in practice — `/deliver-html`
81
+ formalises it.
82
+
83
+ Negative
84
+
85
+ - HTML output is ~30-50% larger in token count than the equivalent MD body.
86
+ Mitigation: the LLM writes MD; only the deterministic side-car emits
87
+ HTML, so the LLM token budget is not affected.
88
+ - HTML diffs are noisy in GitHub. Mitigation: deliverables are artefacts,
89
+ not source. Source-of-truth lives in the conversation / commit message;
90
+ the HTML file is a build output. CI can ignore `*.html` at repo root.
91
+ - Two formats to teach. Mitigation: the rule is "agent reads → MD,
92
+ human reads → HTML"; reviewers learn it on first encounter.
93
+
94
+ ## Alternatives considered
95
+
96
+ - **Always Markdown.** Rejected: the failure mode this ADR closes is
97
+ exactly the "scrolling, miss-the-conclusion" loop that Markdown
98
+ invites for long deliverables. README / CHANGELOG remain MD because
99
+ npm/GitHub renders them and the install snippet must be copy-paste-able.
100
+ - **Generate PDF instead.** Rejected: solo-dev kit, no print pipeline,
101
+ PDFs are write-only on common review tools. HTML is editable in 90
102
+ seconds when a reviewer wants to amend.
103
+ - **Render Markdown server-side (Docusaurus / mdBook / GitHub Pages).**
104
+ Rejected: requires CI + deploy step for every report. HTML at repo root
105
+ opens with one click — zero friction.
106
+ - **Inline a renderer in the IDE.** Rejected: not portable when sending the
107
+ artefact to someone who is not running the kit.
108
+
109
+ ## Out of scope
110
+
111
+ - Existing HTML reports at repo root keep their inline CSS for now.
112
+ Self-contained shipping artefacts trump DRY at solo scale. A future
113
+ cleanup may reference the shared CSS file by relative path — tracked in
114
+ `docs/tech-debt-tracker.md` if/when it becomes load-bearing.
115
+ - Localizing the CSS itself. Style is locale-agnostic by design; only the
116
+ `lang` attribute and body copy differ between locales.
@@ -119,6 +119,38 @@ domain. The agent reads the recommendation, invokes
119
119
  `architecture-reviewer` (or documents why review is unnecessary), and the
120
120
  loop guard (`stop_hook_active`) lets the next stop succeed.
121
121
 
122
+ ## 11. HTML for human deliverables, Markdown for agent files
123
+
124
+ Files an agent reads-and-edits (`CLAUDE.md`, `.claude/skills/*/SKILL.md`,
125
+ `.claude/agents/*.md`, `docs/architecture.md`, `docs/adr/*.md`, ADR notes,
126
+ inline review output) stay as Markdown. Files a HUMAN reads-and-decides
127
+ (audit reports, analyses, plans, decision docs, next-actions reviews,
128
+ status snapshots) ship as self-contained HTML, written by the
129
+ `/deliver-html` skill against the shared dark-theme CSS.
130
+
131
+ Why: a 700-line Markdown deliverable forces the human to scroll, miss the
132
+ conclusion, and ask the agent to clarify — a wasted turn that costs more
133
+ tokens than the HTML overhead it was meant to avoid. HTML deliverables are
134
+ "read once, decide once." Markdown has no visual hierarchy strong enough to
135
+ support decision-grade reading at length.
136
+ Enforced by:
137
+
138
+ - `/deliver-html` skill triggers on user intent ("analyze", "audit",
139
+ "review", "phân tích", "báo cáo", "plan", "proposal", "decision doc",
140
+ "next actions") and writes `<slug>.html` at repo root.
141
+ - Stop hook nudge: when the prompt matches those keywords and the session
142
+ produced only `.md` files at repo root, the agent is reminded to invoke
143
+ `/deliver-html`. Non-blocking.
144
+ - ADR-0002 documents the trade-off (token cost +30-50% on the rendered
145
+ output, paid back by saving ≥1 clarification turn).
146
+
147
+ Counter-rules — when Markdown is still correct:
148
+
149
+ - `README.md`, `CHANGELOG.md` — npm/GitHub renders them; human installs/diffs.
150
+ - Stdout from `/review-this-pr`, `/garbage-collection`, structural reports —
151
+ agent consumes the output.
152
+ - Short summaries (< 30 lines) — answer inline, no file.
153
+
122
154
  ---
123
155
 
124
156
  _Add new principles via `/structural-test-author`, which forces you to
@@ -202,6 +202,49 @@ if [ -f harness.config.json ] && have_jp && command -v git >/dev/null 2>&1; then
202
202
  fi
203
203
  fi
204
204
 
205
+ # Non-blocking nudge: HTML-for-humans (golden principle #11 / ADR-0002).
206
+ # When the session produced one or more deliverable-shaped .md files at repo
207
+ # root (i.e. not CLAUDE.md / AGENTS.md / README.md / CHANGELOG.md), suggest
208
+ # `/deliver-html`. Pure heuristic — never blocks the stop. Skip with
209
+ # `AHK_DISABLE_HTML_NUDGE=1`.
210
+ if [ "${AHK_DISABLE_HTML_NUDGE:-0}" != "1" ] && command -v git >/dev/null 2>&1; then
211
+ KIT_MDS="CLAUDE.md|AGENTS.md|README.md|CHANGELOG.md|LICENSE.md|CONTRIBUTING.md|CODE_OF_CONDUCT.md|SECURITY.md"
212
+ NEW_MD=$(
213
+ {
214
+ git ls-files --others --exclude-standard 2>/dev/null
215
+ git diff --name-only 2>/dev/null
216
+ git diff --name-only --cached 2>/dev/null
217
+ } \
218
+ | sort -u \
219
+ | grep -E '^[^/]+\.md$' \
220
+ | grep -Ev "^(${KIT_MDS})$" \
221
+ || true
222
+ )
223
+ if [ -n "$NEW_MD" ]; then
224
+ NEW_HTML=$(
225
+ {
226
+ git ls-files --others --exclude-standard 2>/dev/null
227
+ git diff --name-only 2>/dev/null
228
+ git diff --name-only --cached 2>/dev/null
229
+ } \
230
+ | sort -u \
231
+ | grep -E '^[^/]+\.html$' \
232
+ || true
233
+ )
234
+ if [ -z "$NEW_HTML" ]; then
235
+ {
236
+ echo
237
+ echo "[nudge] Repo root has new .md file(s) that look like human deliverables:"
238
+ echo "$NEW_MD" | sed 's/^/ - /'
239
+ echo
240
+ echo "Golden principle #11: HTML for human deliverables, MD for agent files."
241
+ echo "If these are reports/audits/plans/decision-docs, ship them via /deliver-html"
242
+ echo "instead. Non-blocking — suppress with AHK_DISABLE_HTML_NUDGE=1."
243
+ } >&2
244
+ fi
245
+ fi
246
+ fi
247
+
205
248
  if [ ! -s "$TMPDIR_HOOK/failed.list" ]; then
206
249
  exit 0
207
250
  fi
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env bash
2
+ # PreToolUse hook (matcher: Edit|Write|MultiEdit) — denies direct edits to
3
+ # protected paths. Catches the failure mode where the agent decides to
4
+ # "just fix" a baseline file or .claude/ template instead of going through
5
+ # the proper /garbage-collection or scaffold-refresh paths.
6
+ #
7
+ # Protected paths (and why):
8
+ # 1. .claude/ — skills, agents, hooks, settings.
9
+ # Use /upgrade flow or edit the source
10
+ # template in src/templates/.
11
+ # 2. node_modules/ — package state, regenerated by install.
12
+ # 3. .git/ — repo internals, never hand-edited.
13
+ # 4. .harness/structural-baseline.json — bypasses monotonic guard. Use the
14
+ # /garbage-collection skill.
15
+ # 5. .harness/installed.json — kit lockfile, derived from render.
16
+ # Hand edits cause spurious "drift"
17
+ # warnings on next upgrade.
18
+ #
19
+ # Escape hatches:
20
+ # - AHK_ALLOW_BYPASS=1 → log + allow (audit trail in .harness/bypass.log).
21
+ # - AHK_HOOK_MODE=warn → log only, never deny.
22
+ set -eo pipefail
23
+
24
+ INPUT=$(cat)
25
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
26
+ have_jq() {
27
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
28
+ command -v jq >/dev/null 2>&1
29
+ }
30
+ have_jp() {
31
+ have_jq && return 0
32
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
33
+ return 1
34
+ }
35
+ jp() {
36
+ if have_jq; then jq -r "$1"
37
+ else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
38
+ fi
39
+ }
40
+ if ! have_jp; then exit 0; fi
41
+
42
+ # Resolve target file. Write/Edit ship .tool_input.file_path; MultiEdit ships
43
+ # the same field at the top level. Both carry the absolute or repo-relative
44
+ # path. We normalise via Node to strip any leading ./ and use forward slashes.
45
+ FILE=$(echo "$INPUT" | jp '.tool_input.file_path // .tool_input.path // empty')
46
+ [ -z "$FILE" ] && exit 0
47
+
48
+ # Normalise to a path relative to CWD when possible; otherwise keep absolute.
49
+ REL_FILE="$FILE"
50
+ if [ -n "$PWD" ] && [[ "$FILE" == "$PWD"/* ]]; then
51
+ REL_FILE="${FILE#"$PWD"/}"
52
+ fi
53
+ REL_FILE="${REL_FILE#./}"
54
+
55
+ REASON=""
56
+ case "$REL_FILE" in
57
+ .claude/*|*/.claude/*)
58
+ REASON=".claude/ is owned by the kit's scaffold. To change a skill/agent/hook, edit src/templates/.claude/ in the kit source and re-run 'agent-harness-kit upgrade', or override at the user level (~/.claude/)."
59
+ ;;
60
+ node_modules/*|*/node_modules/*)
61
+ REASON="node_modules/ is regenerated by the package manager. Edit package.json or the upstream package; never hand-edit installed files."
62
+ ;;
63
+ .git/*|*/.git/*)
64
+ REASON=".git/ contains repo internals. Use git commands ('git config', 'git update-ref', etc.) — never hand-edit."
65
+ ;;
66
+ .harness/structural-baseline.json)
67
+ REASON="Direct edits to .harness/structural-baseline.json bypass the baseline-monotonic guard. Use the /garbage-collection skill or fix the underlying violation."
68
+ ;;
69
+ .harness/installed.json)
70
+ REASON=".harness/installed.json is the kit lockfile, regenerated by 'agent-harness-kit init/upgrade'. Hand edits cause spurious drift warnings."
71
+ ;;
72
+ esac
73
+
74
+ if [ -z "$REASON" ]; then
75
+ exit 0
76
+ fi
77
+
78
+ # Warn-only mode.
79
+ if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
80
+ echo "[ahk] pretooluse-edit-guard (warn): would deny edit to $REL_FILE — $REASON" >&2
81
+ exit 0
82
+ fi
83
+
84
+ # Bypass with audit log.
85
+ if [ "${AHK_ALLOW_BYPASS:-}" = "1" ]; then
86
+ mkdir -p .harness
87
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
88
+ SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
89
+ ESCAPED=${REL_FILE//\"/\\\"}
90
+ printf '{"ts":"%s","sha":"%s","bypass":"AHK_ALLOW_BYPASS","file":"%s","rule":"pretooluse-edit-guard"}\n' \
91
+ "$TS" "$SHA" "$ESCAPED" >> .harness/bypass.log
92
+ exit 0
93
+ fi
94
+
95
+ # Deny via JSON.
96
+ if command -v node >/dev/null 2>&1; then
97
+ node -e "
98
+ const reason = process.argv[1];
99
+ const out = {
100
+ hookSpecificOutput: {
101
+ hookEventName: 'PreToolUse',
102
+ permissionDecision: 'deny',
103
+ permissionDecisionReason: reason
104
+ }
105
+ };
106
+ process.stdout.write(JSON.stringify(out));
107
+ " "$REASON"
108
+ elif have_jq; then
109
+ jq -nc --arg r "$REASON" \
110
+ '{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $r}}'
111
+ else
112
+ echo "$REASON" >&2
113
+ exit 2
114
+ fi
115
+ exit 0
@@ -45,4 +45,10 @@ fi
45
45
  mkdir -p .harness
46
46
  TS=$(date +"%Y-%m-%d %H:%M")
47
47
  echo "$TS | session_end | $REASON | $BR | $SHA" >> .harness/PROGRESS.md
48
+
49
+ # Rollup side-car — writes a JSONL record to .harness/telemetry.jsonl.
50
+ # Best-effort: never blocks the cleanup-only SessionEnd contract.
51
+ if command -v node >/dev/null 2>&1 && [ -f scripts/session-rollup.mjs ]; then
52
+ printf '%s' "$INPUT" | node scripts/session-rollup.mjs 2>/dev/null || true
53
+ fi
48
54
  exit 0
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env node
2
+ // session-rollup.mjs — deterministic SessionEnd side-car. Writes a single
3
+ // JSONL record summarising the session into .harness/telemetry.jsonl. Pure
4
+ // Node (no jq dependency).
5
+ //
6
+ // Record shape:
7
+ // { ts, event: "session_rollup", reason, branch, sha, uncommitted,
8
+ // skills_invoked: [...], session_id }
9
+ //
10
+ // Called from session-end.sh after the human-readable PROGRESS.md line is
11
+ // written, so a single session contributes one PROGRESS.md line + one
12
+ // telemetry rollup record.
13
+
14
+ import { readFileSync, existsSync, mkdirSync, appendFileSync } from "node:fs";
15
+ import { resolve } from "node:path";
16
+ import { spawnSync } from "node:child_process";
17
+
18
+ const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
19
+
20
+ function readStdinSync() {
21
+ // SessionEnd hooks pass JSON on stdin. fd 0 is the inherited stdin.
22
+ try {
23
+ return readFileSync(0, "utf8");
24
+ } catch {
25
+ return "";
26
+ }
27
+ }
28
+
29
+ function safeJSON(s) {
30
+ if (!s) return {};
31
+ try { return JSON.parse(s); } catch { return {}; }
32
+ }
33
+
34
+ function git(args, def = "") {
35
+ const r = spawnSync("git", args, { cwd: ROOT, encoding: "utf8" });
36
+ if (r.status !== 0) return def;
37
+ return (r.stdout || "").trim();
38
+ }
39
+
40
+ function recentSkillInvocations() {
41
+ // Tail of telemetry.jsonl: count skill_invoked records since the last
42
+ // session_rollup. If no prior rollup, count everything in the file (capped
43
+ // to 50 for sanity).
44
+ const path = resolve(ROOT, ".harness/telemetry.jsonl");
45
+ if (!existsSync(path)) return [];
46
+ const body = readFileSync(path, "utf8");
47
+ const lines = body.split("\n").filter(Boolean);
48
+ let startIdx = 0;
49
+ for (let i = lines.length - 1; i >= 0; i--) {
50
+ try {
51
+ const rec = JSON.parse(lines[i]);
52
+ if (rec.event === "session_rollup") {
53
+ startIdx = i + 1;
54
+ break;
55
+ }
56
+ } catch { /* skip malformed */ }
57
+ }
58
+ const window = lines.slice(startIdx);
59
+ const skills = [];
60
+ for (const line of window) {
61
+ try {
62
+ const rec = JSON.parse(line);
63
+ if (rec.event === "skill_invoked" && rec.skill) skills.push(rec.skill);
64
+ } catch { /* skip */ }
65
+ }
66
+ return skills.slice(-50);
67
+ }
68
+
69
+ function main() {
70
+ const input = safeJSON(readStdinSync());
71
+ const reason = input.end_reason || "unknown";
72
+ const sessionId = input.session_id || "";
73
+
74
+ const branch = git(["branch", "--show-current"], "(detached)");
75
+ const sha = git(["rev-parse", "--short", "HEAD"], "(no-git)");
76
+ const uncommittedRaw = git(["status", "--short"], "");
77
+ const uncommitted = uncommittedRaw ? uncommittedRaw.split("\n").filter(Boolean).length : 0;
78
+ const skills = recentSkillInvocations();
79
+
80
+ const record = {
81
+ ts: new Date().toISOString(),
82
+ event: "session_rollup",
83
+ reason,
84
+ session_id: sessionId,
85
+ branch,
86
+ sha,
87
+ uncommitted,
88
+ skills_invoked: skills,
89
+ };
90
+
91
+ const outPath = resolve(ROOT, ".harness/telemetry.jsonl");
92
+ mkdirSync(resolve(ROOT, ".harness"), { recursive: true });
93
+ appendFileSync(outPath, JSON.stringify(record) + "\n");
94
+ }
95
+
96
+ main();
@@ -60,6 +60,31 @@ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; th
60
60
  CTX+="[harness] git: branch=$BR, uncommitted=$COUNT file(s)"$'\n'
61
61
  fi
62
62
 
63
+ # 1b. One-shot daily pill (harness version + open-feature reminder).
64
+ # `mkdir -p .harness/state` then check the stamp file. Today's pill fires
65
+ # once per UTC day per project; subsequent SessionStarts that day stay
66
+ # silent on this line so the model doesn't see the same banner thirty
67
+ # times per coding day.
68
+ mkdir -p .harness/state 2>/dev/null || true
69
+ STAMP_FILE=".harness/state/session-pill.stamp"
70
+ TODAY=$(date -u +%Y-%m-%d)
71
+ LAST=""
72
+ [ -f "$STAMP_FILE" ] && LAST=$(cat "$STAMP_FILE" 2>/dev/null || echo "")
73
+ if [ "$LAST" != "$TODAY" ]; then
74
+ HARNESS_VER=""
75
+ if [ -f harness.config.json ] && have_jp; then
76
+ HARNESS_VER=$(jp '.version // empty' harness.config.json 2>/dev/null || echo "")
77
+ fi
78
+ if [ -z "$HARNESS_VER" ] && [ -f .harness/installed.json ] && have_jp; then
79
+ HARNESS_VER=$(jp '.version // empty' .harness/installed.json 2>/dev/null || echo "")
80
+ fi
81
+ if [ -z "$HARNESS_VER" ]; then
82
+ HARNESS_VER="unknown"
83
+ fi
84
+ CTX+="[harness] pill (one/day): kit=$HARNESS_VER · date=$TODAY"$'\n'
85
+ printf '%s' "$TODAY" > "$STAMP_FILE" 2>/dev/null || true
86
+ fi
87
+
63
88
  # 2. Current feature (from feature_list.json) — picks the first entry with
64
89
  # passes=false so the model resumes the in-flight work, not a finished
65
90
  # one. Skipped if file missing or jp unavailable.
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env bash
2
+ # SubagentStop hook — fires when a subagent finishes its turn (Task tool).
3
+ # Triggers the same structural-test that PostToolUse(Edit) runs, because a
4
+ # subagent can edit files in batches that individually pass but jointly drift
5
+ # off-layer. Running the check at subagent boundary catches that drift early.
6
+ #
7
+ # Contract:
8
+ # - Never blocks (exit 0 even on failure — the parent Stop hook handles the
9
+ # final gate). We only emit a stderr summary that Claude reads.
10
+ # - Telemetry append to .harness/telemetry.jsonl as {event:"subagent_stop"}.
11
+ # - Skipped when harness.config.json#structuralTest.engine === "none" (the
12
+ # "structural test not yet wired" escape hatch used by polyglot scaffolds).
13
+ set -eo pipefail
14
+
15
+ INPUT=$(cat)
16
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
17
+ have_jq() {
18
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
19
+ command -v jq >/dev/null 2>&1
20
+ }
21
+ have_jp() {
22
+ have_jq && return 0
23
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
24
+ return 1
25
+ }
26
+ jp() {
27
+ if have_jq; then jq -r "$1"
28
+ else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
29
+ fi
30
+ }
31
+
32
+ SUBAGENT="(unknown)"
33
+ if have_jp; then
34
+ SUBAGENT=$(echo "$INPUT" | jp '.subagent // .session_id // "unknown"' 2>/dev/null || echo "unknown")
35
+ fi
36
+
37
+ # Telemetry first so we record every subagent boundary, even if the
38
+ # structural-test bails below.
39
+ mkdir -p .harness
40
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
41
+ SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
42
+ printf '{"ts":"%s","event":"subagent_stop","subagent":"%s","sha":"%s"}\n' \
43
+ "$TS" "$SUBAGENT" "$SHA" >> .harness/telemetry.jsonl
44
+
45
+ # Skip if structural test disabled.
46
+ if [ -f harness.config.json ] \
47
+ && grep -qE '"engine"[[:space:]]*:[[:space:]]*"none"' harness.config.json; then
48
+ exit 0
49
+ fi
50
+
51
+ # AHK_HOOK_MODE=warn → log only, don't run.
52
+ if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
53
+ exit 0
54
+ fi
55
+
56
+ # Run structural test workspace-wide. Subagents typically touch multiple
57
+ # files; per-file scoping would miss the cross-file drift case. Cap output
58
+ # to 30 lines on stderr so the parent agent sees the summary without flood.
59
+ RAN=0
60
+ if [ -f harness/structural-check.mjs ] && command -v node >/dev/null 2>&1; then
61
+ RAN=1
62
+ if ! node harness/structural-check.mjs 2>&1 | tail -30 >&2; then
63
+ echo "[ahk] subagent_stop: structural-test reported violations (see above). Continuing — parent Stop hook will gate." >&2
64
+ fi
65
+ elif command -v npm >/dev/null 2>&1 && [ -f package.json ] \
66
+ && grep -q '"harness:check"' package.json 2>/dev/null; then
67
+ RAN=1
68
+ if ! npm run --silent harness:check 2>&1 | tail -30 >&2; then
69
+ echo "[ahk] subagent_stop: structural-test reported violations (see above). Continuing — parent Stop hook will gate." >&2
70
+ fi
71
+ fi
72
+ if [ "$RAN" = "0" ]; then
73
+ # No structural-test entry point. Skip silently — already logged in telemetry.
74
+ exit 0
75
+ fi
76
+ exit 0