agent-harness-kit 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/bin/cli.mjs +26 -0
  4. package/package.json +1 -1
  5. package/src/core/doctor.mjs +47 -0
  6. package/src/core/render-templates.mjs +119 -5
  7. package/src/core/upgrade.mjs +81 -60
  8. package/src/templates/.claude/agents/api-consistency-reviewer.md.vi +37 -0
  9. package/src/templates/.claude/agents/architecture-reviewer.md.vi.hbs +45 -0
  10. package/src/templates/.claude/agents/performance-reviewer.md.vi +39 -0
  11. package/src/templates/.claude/agents/reliability-reviewer.md.vi +42 -0
  12. package/src/templates/.claude/agents/security-reviewer.md.vi +43 -0
  13. package/src/templates/.claude/hooks/hooks.json +46 -0
  14. package/src/templates/.claude/output-styles/harness-terse.md +42 -0
  15. package/src/templates/.claude/settings.json.hbs +2 -1
  16. package/src/templates/.claude/skills/add-adr/SKILL.md.vi +64 -0
  17. package/src/templates/.claude/skills/add-feature/SKILL.md.vi.hbs +50 -0
  18. package/src/templates/.claude/skills/debug-flow/SKILL.md.vi.hbs +42 -0
  19. package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +15 -10
  20. package/src/templates/.claude/skills/doc-drift-scan/SKILL.md.vi +52 -0
  21. package/src/templates/.claude/skills/doc-drift-scan/scripts/scan-paths.mjs +64 -0
  22. package/src/templates/.claude/skills/eval-runner/SKILL.md.vi +59 -0
  23. package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +14 -5
  24. package/src/templates/.claude/skills/garbage-collection/SKILL.md.vi.hbs +58 -0
  25. package/src/templates/.claude/skills/garbage-collection/scripts/gc-classify.mjs +77 -0
  26. package/src/templates/.claude/skills/i18n-add-locale/SKILL.md +52 -0
  27. package/src/templates/.claude/skills/i18n-add-locale/SKILL.md.vi +56 -0
  28. package/src/templates/.claude/skills/i18n-add-locale/scripts/locale-scaffold.mjs +120 -0
  29. package/src/templates/.claude/skills/inspect-app/SKILL.md.vi +61 -0
  30. package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +17 -14
  31. package/src/templates/.claude/skills/inspect-module/SKILL.md.vi.hbs +57 -0
  32. package/src/templates/.claude/skills/inspect-module/scripts/module-summary.mjs +144 -0
  33. package/src/templates/.claude/skills/map-domain/SKILL.md +42 -0
  34. package/src/templates/.claude/skills/map-domain/SKILL.md.vi +42 -0
  35. package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs +145 -0
  36. package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi +49 -0
  37. package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs +172 -0
  38. package/src/templates/.claude/skills/refactor-feature/SKILL.md +60 -0
  39. package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi +64 -0
  40. package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs +146 -0
  41. package/src/templates/.claude/skills/review-this-pr/SKILL.md +59 -0
  42. package/src/templates/.claude/skills/review-this-pr/SKILL.md.vi +63 -0
  43. package/src/templates/.claude/skills/review-this-pr/scripts/pr-review-driver.mjs +152 -0
  44. package/src/templates/.claude/skills/structural-test-author/SKILL.md.vi.hbs +50 -0
  45. package/src/templates/.claude/skills/write-skill/SKILL.md.vi +43 -0
  46. package/src/templates/.harness/eval/rubrics/feature-step-done.mjs +148 -0
  47. package/src/templates/.harness/eval/tasks/feature-step-done.answer.md +53 -0
  48. package/src/templates/.harness/eval/tasks/feature-step-done.json +10 -0
  49. package/src/templates/.harness/eval/tasks/feature-step-done.prompt.md +43 -0
  50. package/src/templates/.mcp.json.example +35 -0
  51. package/src/templates/CLAUDE.md.hbs +9 -5
  52. package/src/templates/CLAUDE.md.vi.hbs +9 -5
  53. package/src/templates/scripts/notify-on-block.sh.hbs +73 -0
  54. package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +115 -0
  55. package/src/templates/scripts/session-end.sh.hbs +6 -0
  56. package/src/templates/scripts/session-rollup.mjs +96 -0
  57. package/src/templates/scripts/session-start.sh.hbs +25 -0
  58. package/src/templates/scripts/statusline.mjs +63 -0
  59. package/src/templates/scripts/subagent-stop.sh.hbs +76 -0
  60. package/src/templates/scripts/userprompt-guard.sh.hbs +100 -0
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env node
2
+ // feature-step-done.mjs — eval rubric for the "feature step done" task.
3
+ // Reads the agent's transcript + the final feature_list.json + the diff;
4
+ // returns a JSON verdict on the outcome / process / style / efficiency
5
+ // dimensions.
6
+ //
7
+ // Invocation (from eval-runner.mjs):
8
+ // node .harness/eval/rubrics/feature-step-done.mjs --transcript <path> --task <task.json>
9
+ //
10
+ // Exit 0 = rubric ran. The JSON tail communicates pass/fail.
11
+
12
+ import { readFileSync, existsSync } from "node:fs";
13
+ import { resolve } from "node:path";
14
+ import { spawnSync } from "node:child_process";
15
+
16
+ const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
17
+
18
+ function parseArgs(argv) {
19
+ const out = { transcript: null, task: null };
20
+ for (let i = 0; i < argv.length; i++) {
21
+ if (argv[i] === "--transcript") out.transcript = argv[++i];
22
+ else if (argv[i] === "--task") out.task = argv[++i];
23
+ }
24
+ return out;
25
+ }
26
+
27
+ function safeJSON(s, def = null) {
28
+ try { return JSON.parse(s); } catch { return def; }
29
+ }
30
+
31
+ function loadFile(path, fallback = null) {
32
+ try { return readFileSync(path, "utf8"); } catch { return fallback; }
33
+ }
34
+
35
+ function loadFeatureList() {
36
+ const path = resolve(ROOT, "feature_list.json");
37
+ const raw = loadFile(path);
38
+ return raw ? safeJSON(raw) : null;
39
+ }
40
+
41
+ function gitDiffFiles() {
42
+ // Files changed in the agent's run, relative to HEAD~1 (one commit before
43
+ // the eval started). Eval-runner pins HEAD with a tag before each task.
44
+ const r = spawnSync("git", ["diff", "--name-only", "HEAD~1...HEAD"], {
45
+ cwd: ROOT, encoding: "utf8",
46
+ });
47
+ if (r.status !== 0) return [];
48
+ return (r.stdout || "").split("\n").filter(Boolean);
49
+ }
50
+
51
+ function transcriptToolCalls(transcriptPath) {
52
+ // Stream-json transcripts from claude-cli are JSONL with one record per
53
+ // tool invocation / message. We collect the tool names + a small sample
54
+ // of inputs so the rubric can spot /add-feature etc.
55
+ const body = loadFile(transcriptPath, "");
56
+ const calls = [];
57
+ for (const line of body.split("\n")) {
58
+ if (!line.trim()) continue;
59
+ const rec = safeJSON(line);
60
+ if (!rec) continue;
61
+ if (rec.type === "tool_use" || rec.tool || rec.skill) {
62
+ calls.push({
63
+ tool: rec.tool || rec.skill || rec.type,
64
+ input: rec.input || rec.tool_input || rec.arguments || null,
65
+ });
66
+ }
67
+ }
68
+ return calls;
69
+ }
70
+
71
+ function grade({ task, fl, diffFiles, toolCalls }) {
72
+ const dims = { outcome: "fail", process: "fail", style: "warn", efficiency: "warn" };
73
+ const reasons = [];
74
+
75
+ // --- outcome ---
76
+ // features[0].steps[0].passes === true AND tests[] is non-empty AND
77
+ // at least one tests[] entry appears in diffFiles.
78
+ const step = fl?.features?.[0]?.steps?.[0];
79
+ if (!step) {
80
+ reasons.push("outcome: no features[0].steps[0] found in feature_list.json after run");
81
+ } else if (step.passes !== true) {
82
+ reasons.push(`outcome: features[0].steps[0].passes is ${JSON.stringify(step.passes)}, want true`);
83
+ } else if (!Array.isArray(step.tests) || step.tests.length === 0) {
84
+ reasons.push("outcome: features[0].steps[0].tests is empty — done flipped without test reference");
85
+ } else {
86
+ const testInDiff = step.tests.some((t) => diffFiles.includes(t));
87
+ if (!testInDiff) {
88
+ reasons.push(`outcome: feature_list.json#tests references [${step.tests.join(", ")}] but none appear in the diff`);
89
+ } else {
90
+ dims.outcome = "pass";
91
+ }
92
+ }
93
+
94
+ // --- process ---
95
+ // The agent should invoke /add-feature (or /refactor-feature) AND make
96
+ // a write to the handler + test file in the same run.
97
+ const ranSkill = toolCalls.some(
98
+ (c) => /(add-feature|refactor-feature)/i.test(c.tool || "") ||
99
+ /(add-feature|refactor-feature)/i.test(c.input?.skill || ""),
100
+ );
101
+ const handlerWrites = diffFiles.filter((f) => /\.(ts|tsx|js|mjs|py|rs|go)$/.test(f) && !/test/i.test(f));
102
+ const testWrites = diffFiles.filter((f) => /test/i.test(f) || /\.spec\./.test(f));
103
+ if (!ranSkill) {
104
+ reasons.push("process: agent did not invoke /add-feature or /refactor-feature");
105
+ } else if (handlerWrites.length === 0) {
106
+ reasons.push("process: no handler file appeared in diff");
107
+ } else if (testWrites.length === 0) {
108
+ reasons.push("process: no test file appeared in diff");
109
+ } else {
110
+ dims.process = "pass";
111
+ }
112
+
113
+ // --- style ---
114
+ // PROGRESS.md should be appended (kit convention). Soft check.
115
+ const touchedProgress = diffFiles.includes(".harness/PROGRESS.md");
116
+ if (touchedProgress) {
117
+ dims.style = "pass";
118
+ } else {
119
+ reasons.push("style: .harness/PROGRESS.md not appended (soft fail)");
120
+ }
121
+
122
+ // --- efficiency ---
123
+ // expected.tokensMax — actual token count comes from transcript meta.
124
+ // Without that we can't grade hard; warn-pass if filesChanged within
125
+ // task.expected.filesChanged bounds.
126
+ const max = task?.expected?.filesChanged?.max ?? 99;
127
+ const min = task?.expected?.filesChanged?.min ?? 1;
128
+ if (diffFiles.length >= min && diffFiles.length <= max) {
129
+ dims.efficiency = "pass";
130
+ } else {
131
+ reasons.push(`efficiency: ${diffFiles.length} files changed, want ${min}-${max}`);
132
+ }
133
+
134
+ const overall = (dims.outcome === "pass" && dims.process === "pass") ? "PASS" : "FAIL";
135
+ return { overall, dimensions: dims, reasons, diff_files: diffFiles };
136
+ }
137
+
138
+ function main() {
139
+ const { transcript, task: taskPath } = parseArgs(process.argv.slice(2));
140
+ const task = taskPath ? safeJSON(loadFile(resolve(ROOT, taskPath)) ?? "", null) : null;
141
+ const fl = loadFeatureList();
142
+ const diffFiles = gitDiffFiles();
143
+ const toolCalls = transcript ? transcriptToolCalls(resolve(ROOT, transcript)) : [];
144
+ const verdict = grade({ task, fl, diffFiles, toolCalls });
145
+ process.stdout.write(JSON.stringify(verdict, null, 2) + "\n");
146
+ }
147
+
148
+ main();
@@ -0,0 +1,53 @@
1
+ # Golden answer: feature-step-done
2
+
3
+ This file is read by `feature-step-done.mjs` rubric as a reference for
4
+ what an acceptable agent run looks like. The rubric does not require
5
+ byte-exact match — it checks structural properties (file count, JSON
6
+ shape) rather than identical content.
7
+
8
+ ## Files expected in the agent's diff (representative)
9
+
10
+ - `src/runtime/health.ts` (or equivalent path for the project's stack)
11
+ - `tests/health.test.ts` (or equivalent test path)
12
+ - `feature_list.json` (modified in place)
13
+ - `.harness/PROGRESS.md` (appended)
14
+
15
+ ## feature_list.json shape after the agent's edit
16
+
17
+ ```json
18
+ {
19
+ "features": [
20
+ {
21
+ "id": "health-endpoint",
22
+ "title": "GET /health returns 200",
23
+ "passes": true,
24
+ "steps": [
25
+ {
26
+ "id": "s1",
27
+ "passes": true,
28
+ "tests": ["tests/health.test.ts"]
29
+ }
30
+ ]
31
+ }
32
+ ]
33
+ }
34
+ ```
35
+
36
+ Key invariants the rubric checks:
37
+
38
+ 1. `features[0].steps[0].passes === true`
39
+ 2. `features[0].steps[0].tests` is a non-empty array
40
+ 3. At least one path in `tests` exists in the agent's file diff
41
+ 4. `features.length` is unchanged from setup (no new features mid-session)
42
+
43
+ ## Transcript shape expected
44
+
45
+ The transcript should include:
46
+
47
+ - A call to `/add-feature` (or equivalent) early in the run.
48
+ - At least one Write/Edit on the handler file.
49
+ - At least one Write/Edit on a test file matching the `tests[]` array.
50
+ - An Edit on `feature_list.json` flipping `passes: true`.
51
+
52
+ The rubric does not require exact tool-call order — only that all four
53
+ events appear in the transcript.
@@ -0,0 +1,10 @@
1
+ {
2
+ "id": "feature-step-done",
3
+ "description": "Verifies that when an agent implements a feature step, it flips passes:false→true in feature_list.json AND adds a tests[] reference (or testCommit). Catches the 'mark done without tests' anti-pattern that the kit's golden principles forbid. Graded by .harness/eval/rubrics/feature-step-done.mjs.",
4
+ "input": "feature_list.json has one feature `health-endpoint` with step `s1: GET /health returns 200`, passes:false. Implement the endpoint, write a smoke test that hits it, then update feature_list.json#features[0].steps[0] with passes:true AND tests:[<test_file_path>]. Do not delete or reorder other entries.",
5
+ "expected": {
6
+ "filesChanged": { "min": 2, "max": 5 },
7
+ "tokensMax": 25000,
8
+ "rubric": ".harness/eval/rubrics/feature-step-done.mjs"
9
+ }
10
+ }
@@ -0,0 +1,43 @@
1
+ # Eval task: feature-step-done
2
+
3
+ ## What the harness is testing
4
+
5
+ The kit's "no done without proof" rule: an agent that flips a feature
6
+ step from `passes: false` to `passes: true` MUST also commit a test
7
+ covering the new behavior. This eval gives the agent a one-step feature,
8
+ asks it to implement, and grades whether the test landed alongside the
9
+ flip.
10
+
11
+ ## Prompt given to the agent
12
+
13
+ ```
14
+ feature_list.json has one feature `health-endpoint` with step
15
+ `s1: GET /health returns 200`, passes:false. Implement the endpoint,
16
+ write a smoke test that hits it, then update feature_list.json#features[0].steps[0]
17
+ with passes:true AND tests:[<test_file_path>]. Do not delete or
18
+ reorder other entries.
19
+ ```
20
+
21
+ ## What "good" looks like
22
+
23
+ 1. The agent invokes `/add-feature` (or `/refactor-feature` for a re-shape).
24
+ 2. A handler file appears (e.g. `src/runtime/health.ts`).
25
+ 3. A test file appears (e.g. `tests/health.test.ts`).
26
+ 4. `feature_list.json` is edited in-place:
27
+ - `features[0].steps[0].passes` is now `true`.
28
+ - `features[0].steps[0].tests` includes the new test path.
29
+ 5. PROGRESS.md gets a one-line append (kit convention).
30
+
31
+ ## What "bad" looks like
32
+
33
+ - Passes flipped to true with no test file in the diff. (Hard fail.)
34
+ - New feature added to feature_list.json mid-session. (Hard fail.)
35
+ - Step entry deleted or reordered. (Hard fail.)
36
+ - Refactor of unrelated code in the same commit. (Soft fail.)
37
+
38
+ ## Why this matters
39
+
40
+ Without enforcement, the most common agent failure is "looks done"
41
+ (passes:true) without test coverage. The kit's `refactor-feature`
42
+ side-car gates this at edit time; the eval rubric confirms the gate
43
+ holds against an end-to-end run.
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/claude-code-mcp.json",
3
+ "_comment": "Rename to .mcp.json or run `agent-harness-kit init --with-mcp` to enable. Each server below is OFF until uncommented + credentialed.",
4
+ "mcpServers": {
5
+ "playwright": {
6
+ "_comment": "Headless browser for /review-this-pr UI smoke checks. Requires `npx playwright install` first.",
7
+ "command": "npx",
8
+ "args": ["-y", "@playwright/mcp@latest"],
9
+ "env": {
10
+ "PLAYWRIGHT_BROWSERS_PATH": "0"
11
+ }
12
+ },
13
+ "github": {
14
+ "_comment": "Read/write GitHub issues + PRs from inside Claude Code. Needs GITHUB_PERSONAL_ACCESS_TOKEN with `repo` + `read:org` scopes.",
15
+ "command": "npx",
16
+ "args": ["-y", "@modelcontextprotocol/server-github"],
17
+ "env": {
18
+ "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_PERSONAL_ACCESS_TOKEN}"
19
+ }
20
+ },
21
+ "filesystem-readonly": {
22
+ "_comment": "Read-only access to a sibling repo (docs / reference code). Adjust ALLOWED_PATHS for your layout.",
23
+ "command": "npx",
24
+ "args": ["-y", "@modelcontextprotocol/server-filesystem"],
25
+ "env": {
26
+ "ALLOWED_PATHS": "${HOME}/Dev/reference-repo"
27
+ }
28
+ }
29
+ },
30
+ "_recommended_skills": {
31
+ "playwright": "Useful for /review-this-pr when UI files changed — runs smoke against a dev server.",
32
+ "github": "Useful for /garbage-collection when proposing PRs and for /review-this-pr to read base branch.",
33
+ "filesystem-readonly": "Useful when /inspect-module needs to peek at a sibling repo without copying code in."
34
+ }
35
+ }
@@ -31,11 +31,15 @@ Full list: `docs/golden-principles.md`.
31
31
 
32
32
  ## Where to look (read on demand)
33
33
 
34
- - `docs/architecture.md` — read when adding a new module or moving code.
35
- - `docs/adr/` — read when changing public APIs.
36
- - `docs/golden-principles.md` — read before any refactor.
37
- - `feature_list.json` — read before claiming a feature is done.
38
- - `.harness/PROGRESS.md` read at session start; write at session end.
34
+ The lines below use Claude Code 2.1+ `@`-imports Claude loads the file
35
+ into context only when this section is referenced, keeping the working
36
+ CLAUDE.md tiny.
37
+
38
+ - @docs/architecture.md when adding a new module or moving code.
39
+ - @docs/adr/ — when changing public APIs.
40
+ - @docs/golden-principles.md — before any refactor.
41
+ - @feature_list.json — before claiming a feature is done.
42
+ - `.harness/PROGRESS.md` — read at session start; append at session end (kit-managed, not @-imported).
39
43
 
40
44
  ## Skills you should use
41
45
 
@@ -30,11 +30,15 @@ Danh sách đầy đủ: `docs/golden-principles.md`.
30
30
 
31
31
  ## Đọc khi cần (read on demand)
32
32
 
33
- - `docs/architecture.md` — đọc khi thêm module hoặc dời code.
34
- - `docs/adr/` — đọc khi đổi public API.
35
- - `docs/golden-principles.md` — đọc trước mọi refactor.
36
- - `feature_list.json` — đọc trước khi tuyên bố một feature đã xong.
37
- - `.harness/PROGRESS.md` đọc đầu session; ghi cuối session.
33
+ Các dòng dưới dùng pháp `@`-import của Claude Code 2.1+ — Claude chỉ
34
+ nạp file vào context khi section này được tham chiếu, giữ CLAUDE.md
35
+ luôn gọn.
36
+
37
+ - @docs/architecture.md khi thêm module hoặc dời code.
38
+ - @docs/adr/ — khi đổi public API.
39
+ - @docs/golden-principles.md — trước mọi refactor.
40
+ - @feature_list.json — trước khi tuyên bố một feature đã xong.
41
+ - `.harness/PROGRESS.md` — đọc đầu session; append cuối session (kit quản lý, không @-import).
38
42
 
39
43
  ## Skills nên dùng
40
44
 
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env bash
2
+ # Notification hook — OS-native notification when Claude wants attention.
3
+ # macOS osascript / Linux notify-send / Windows skip.
4
+ # Never blocks. Always exits 0. Opt-out: AHK_DISABLE_NOTIFY=1.
5
+ set -eo pipefail
6
+
7
+ INPUT=$(cat)
8
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
9
+ have_jq() {
10
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
11
+ command -v jq >/dev/null 2>&1
12
+ }
13
+ have_jp() {
14
+ have_jq && return 0
15
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
16
+ return 1
17
+ }
18
+ jp() {
19
+ if have_jq; then
20
+ if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
21
+ else
22
+ if [ -n "$2" ]; then
23
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
24
+ else
25
+ node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
26
+ fi
27
+ fi
28
+ }
29
+
30
+ if [ "${AHK_DISABLE_NOTIFY:-}" = "1" ]; then
31
+ exit 0
32
+ fi
33
+
34
+ TYPE=""
35
+ TITLE=""
36
+ BODY=""
37
+ if have_jp; then
38
+ TYPE=$(echo "$INPUT" | jp '.notification.type // empty')
39
+ TITLE=$(echo "$INPUT" | jp '.notification.title // empty')
40
+ BODY=$(echo "$INPUT" | jp '.notification.body // empty')
41
+ fi
42
+
43
+ [ -z "$TITLE" ] && TITLE="Claude Code"
44
+ if [ -n "$TYPE" ]; then
45
+ BODY="[$TYPE] ${BODY}"
46
+ fi
47
+ [ -z "$BODY" ] && BODY="Claude Code wants your attention."
48
+
49
+ mkdir -p .harness
50
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
51
+ ESCAPED_TITLE=${TITLE//\"/\\\"}
52
+ ESCAPED_BODY=${BODY//\"/\\\"}
53
+ printf '{"ts":"%s","hook":"Notification","type":"%s","title":"%s","body":"%s"}\n' \
54
+ "$TS" "$TYPE" "$ESCAPED_TITLE" "$ESCAPED_BODY" >> .harness/telemetry.jsonl
55
+
56
+ OS_KIND=$(uname -s 2>/dev/null || echo "Unknown")
57
+ case "$OS_KIND" in
58
+ Darwin)
59
+ OSA_TITLE=${TITLE//\"/\\\"}
60
+ OSA_BODY=${BODY//\"/\\\"}
61
+ osascript -e "display notification \"$OSA_BODY\" with title \"$OSA_TITLE\"" >/dev/null 2>&1 || true
62
+ ;;
63
+ Linux)
64
+ if command -v notify-send >/dev/null 2>&1; then
65
+ notify-send -a "Claude Code" "$TITLE" "$BODY" >/dev/null 2>&1 || true
66
+ fi
67
+ ;;
68
+ *)
69
+ :
70
+ ;;
71
+ esac
72
+
73
+ exit 0
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env bash
2
+ # PreToolUse hook (matcher: Edit|Write|MultiEdit) — denies direct edits to
3
+ # protected paths. Catches the failure mode where the agent decides to
4
+ # "just fix" a baseline file or .claude/ template instead of going through
5
+ # the proper /garbage-collection or scaffold-refresh paths.
6
+ #
7
+ # Protected paths (and why):
8
+ # 1. .claude/ — skills, agents, hooks, settings.
9
+ # Use /upgrade flow or edit the source
10
+ # template in src/templates/.
11
+ # 2. node_modules/ — package state, regenerated by install.
12
+ # 3. .git/ — repo internals, never hand-edited.
13
+ # 4. .harness/structural-baseline.json — bypasses monotonic guard. Use the
14
+ # /garbage-collection skill.
15
+ # 5. .harness/installed.json — kit lockfile, derived from render.
16
+ # Hand edits cause spurious "drift"
17
+ # warnings on next upgrade.
18
+ #
19
+ # Escape hatches:
20
+ # - AHK_ALLOW_BYPASS=1 → log + allow (audit trail in .harness/bypass.log).
21
+ # - AHK_HOOK_MODE=warn → log only, never deny.
22
+ set -eo pipefail
23
+
24
+ INPUT=$(cat)
25
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
26
+ have_jq() {
27
+ [ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
28
+ command -v jq >/dev/null 2>&1
29
+ }
30
+ have_jp() {
31
+ have_jq && return 0
32
+ command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
33
+ return 1
34
+ }
35
+ jp() {
36
+ if have_jq; then jq -r "$1"
37
+ else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
38
+ fi
39
+ }
40
+ if ! have_jp; then exit 0; fi
41
+
42
+ # Resolve target file. Write/Edit ship .tool_input.file_path; MultiEdit ships
43
+ # the same field at the top level. Both carry the absolute or repo-relative
44
+ # path. We normalise via Node to strip any leading ./ and use forward slashes.
45
+ FILE=$(echo "$INPUT" | jp '.tool_input.file_path // .tool_input.path // empty')
46
+ [ -z "$FILE" ] && exit 0
47
+
48
+ # Normalise to a path relative to CWD when possible; otherwise keep absolute.
49
+ REL_FILE="$FILE"
50
+ if [ -n "$PWD" ] && [[ "$FILE" == "$PWD"/* ]]; then
51
+ REL_FILE="${FILE#"$PWD"/}"
52
+ fi
53
+ REL_FILE="${REL_FILE#./}"
54
+
55
+ REASON=""
56
+ case "$REL_FILE" in
57
+ .claude/*|*/.claude/*)
58
+ REASON=".claude/ is owned by the kit's scaffold. To change a skill/agent/hook, edit src/templates/.claude/ in the kit source and re-run 'agent-harness-kit upgrade', or override at the user level (~/.claude/)."
59
+ ;;
60
+ node_modules/*|*/node_modules/*)
61
+ REASON="node_modules/ is regenerated by the package manager. Edit package.json or the upstream package; never hand-edit installed files."
62
+ ;;
63
+ .git/*|*/.git/*)
64
+ REASON=".git/ contains repo internals. Use git commands ('git config', 'git update-ref', etc.) — never hand-edit."
65
+ ;;
66
+ .harness/structural-baseline.json)
67
+ REASON="Direct edits to .harness/structural-baseline.json bypass the baseline-monotonic guard. Use the /garbage-collection skill or fix the underlying violation."
68
+ ;;
69
+ .harness/installed.json)
70
+ REASON=".harness/installed.json is the kit lockfile, regenerated by 'agent-harness-kit init/upgrade'. Hand edits cause spurious drift warnings."
71
+ ;;
72
+ esac
73
+
74
+ if [ -z "$REASON" ]; then
75
+ exit 0
76
+ fi
77
+
78
+ # Warn-only mode.
79
+ if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
80
+ echo "[ahk] pretooluse-edit-guard (warn): would deny edit to $REL_FILE — $REASON" >&2
81
+ exit 0
82
+ fi
83
+
84
+ # Bypass with audit log.
85
+ if [ "${AHK_ALLOW_BYPASS:-}" = "1" ]; then
86
+ mkdir -p .harness
87
+ TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
88
+ SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
89
+ ESCAPED=${REL_FILE//\"/\\\"}
90
+ printf '{"ts":"%s","sha":"%s","bypass":"AHK_ALLOW_BYPASS","file":"%s","rule":"pretooluse-edit-guard"}\n' \
91
+ "$TS" "$SHA" "$ESCAPED" >> .harness/bypass.log
92
+ exit 0
93
+ fi
94
+
95
+ # Deny via JSON.
96
+ if command -v node >/dev/null 2>&1; then
97
+ node -e "
98
+ const reason = process.argv[1];
99
+ const out = {
100
+ hookSpecificOutput: {
101
+ hookEventName: 'PreToolUse',
102
+ permissionDecision: 'deny',
103
+ permissionDecisionReason: reason
104
+ }
105
+ };
106
+ process.stdout.write(JSON.stringify(out));
107
+ " "$REASON"
108
+ elif have_jq; then
109
+ jq -nc --arg r "$REASON" \
110
+ '{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $r}}'
111
+ else
112
+ echo "$REASON" >&2
113
+ exit 2
114
+ fi
115
+ exit 0
@@ -45,4 +45,10 @@ fi
45
45
  mkdir -p .harness
46
46
  TS=$(date +"%Y-%m-%d %H:%M")
47
47
  echo "$TS | session_end | $REASON | $BR | $SHA" >> .harness/PROGRESS.md
48
+
49
+ # Rollup side-car — writes a JSONL record to .harness/telemetry.jsonl.
50
+ # Best-effort: never blocks the cleanup-only SessionEnd contract.
51
+ if command -v node >/dev/null 2>&1 && [ -f scripts/session-rollup.mjs ]; then
52
+ printf '%s' "$INPUT" | node scripts/session-rollup.mjs 2>/dev/null || true
53
+ fi
48
54
  exit 0
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env node
2
+ // session-rollup.mjs — deterministic SessionEnd side-car. Writes a single
3
+ // JSONL record summarising the session into .harness/telemetry.jsonl. Pure
4
+ // Node (no jq dependency).
5
+ //
6
+ // Record shape:
7
+ // { ts, event: "session_rollup", reason, branch, sha, uncommitted,
8
+ // skills_invoked: [...], session_id }
9
+ //
10
+ // Called from session-end.sh after the human-readable PROGRESS.md line is
11
+ // written, so a single session contributes one PROGRESS.md line + one
12
+ // telemetry rollup record.
13
+
14
+ import { readFileSync, existsSync, mkdirSync, appendFileSync } from "node:fs";
15
+ import { resolve } from "node:path";
16
+ import { spawnSync } from "node:child_process";
17
+
18
+ const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
19
+
20
+ function readStdinSync() {
21
+ // SessionEnd hooks pass JSON on stdin. fd 0 is the inherited stdin.
22
+ try {
23
+ return readFileSync(0, "utf8");
24
+ } catch {
25
+ return "";
26
+ }
27
+ }
28
+
29
+ function safeJSON(s) {
30
+ if (!s) return {};
31
+ try { return JSON.parse(s); } catch { return {}; }
32
+ }
33
+
34
+ function git(args, def = "") {
35
+ const r = spawnSync("git", args, { cwd: ROOT, encoding: "utf8" });
36
+ if (r.status !== 0) return def;
37
+ return (r.stdout || "").trim();
38
+ }
39
+
40
+ function recentSkillInvocations() {
41
+ // Tail of telemetry.jsonl: count skill_invoked records since the last
42
+ // session_rollup. If no prior rollup, count everything in the file (capped
43
+ // to 50 for sanity).
44
+ const path = resolve(ROOT, ".harness/telemetry.jsonl");
45
+ if (!existsSync(path)) return [];
46
+ const body = readFileSync(path, "utf8");
47
+ const lines = body.split("\n").filter(Boolean);
48
+ let startIdx = 0;
49
+ for (let i = lines.length - 1; i >= 0; i--) {
50
+ try {
51
+ const rec = JSON.parse(lines[i]);
52
+ if (rec.event === "session_rollup") {
53
+ startIdx = i + 1;
54
+ break;
55
+ }
56
+ } catch { /* skip malformed */ }
57
+ }
58
+ const window = lines.slice(startIdx);
59
+ const skills = [];
60
+ for (const line of window) {
61
+ try {
62
+ const rec = JSON.parse(line);
63
+ if (rec.event === "skill_invoked" && rec.skill) skills.push(rec.skill);
64
+ } catch { /* skip */ }
65
+ }
66
+ return skills.slice(-50);
67
+ }
68
+
69
+ function main() {
70
+ const input = safeJSON(readStdinSync());
71
+ const reason = input.end_reason || "unknown";
72
+ const sessionId = input.session_id || "";
73
+
74
+ const branch = git(["branch", "--show-current"], "(detached)");
75
+ const sha = git(["rev-parse", "--short", "HEAD"], "(no-git)");
76
+ const uncommittedRaw = git(["status", "--short"], "");
77
+ const uncommitted = uncommittedRaw ? uncommittedRaw.split("\n").filter(Boolean).length : 0;
78
+ const skills = recentSkillInvocations();
79
+
80
+ const record = {
81
+ ts: new Date().toISOString(),
82
+ event: "session_rollup",
83
+ reason,
84
+ session_id: sessionId,
85
+ branch,
86
+ sha,
87
+ uncommitted,
88
+ skills_invoked: skills,
89
+ };
90
+
91
+ const outPath = resolve(ROOT, ".harness/telemetry.jsonl");
92
+ mkdirSync(resolve(ROOT, ".harness"), { recursive: true });
93
+ appendFileSync(outPath, JSON.stringify(record) + "\n");
94
+ }
95
+
96
+ main();
@@ -60,6 +60,31 @@ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; th
60
60
  CTX+="[harness] git: branch=$BR, uncommitted=$COUNT file(s)"$'\n'
61
61
  fi
62
62
 
63
+ # 1b. One-shot daily pill (harness version + open-feature reminder).
64
+ # `mkdir -p .harness/state` then check the stamp file. Today's pill fires
65
+ # once per UTC day per project; subsequent SessionStarts that day stay
66
+ # silent on this line so the model doesn't see the same banner thirty
67
+ # times per coding day.
68
+ mkdir -p .harness/state 2>/dev/null || true
69
+ STAMP_FILE=".harness/state/session-pill.stamp"
70
+ TODAY=$(date -u +%Y-%m-%d)
71
+ LAST=""
72
+ [ -f "$STAMP_FILE" ] && LAST=$(cat "$STAMP_FILE" 2>/dev/null || echo "")
73
+ if [ "$LAST" != "$TODAY" ]; then
74
+ HARNESS_VER=""
75
+ if [ -f harness.config.json ] && have_jp; then
76
+ HARNESS_VER=$(jp '.version // empty' harness.config.json 2>/dev/null || echo "")
77
+ fi
78
+ if [ -z "$HARNESS_VER" ] && [ -f .harness/installed.json ] && have_jp; then
79
+ HARNESS_VER=$(jp '.version // empty' .harness/installed.json 2>/dev/null || echo "")
80
+ fi
81
+ if [ -z "$HARNESS_VER" ]; then
82
+ HARNESS_VER="unknown"
83
+ fi
84
+ CTX+="[harness] pill (one/day): kit=$HARNESS_VER · date=$TODAY"$'\n'
85
+ printf '%s' "$TODAY" > "$STAMP_FILE" 2>/dev/null || true
86
+ fi
87
+
63
88
  # 2. Current feature (from feature_list.json) — picks the first entry with
64
89
  # passes=false so the model resumes the in-flight work, not a finished
65
90
  # one. Skipped if file missing or jp unavailable.