agent-harness-kit 0.10.2 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +13 -5
- package/package.json +3 -2
- package/src/core/render-templates.mjs +31 -3
- package/src/templates/.claude/keybindings.json.example +20 -0
- package/src/templates/.claude/skills/deliver-html/SKILL.md.hbs +5 -1
- package/src/templates/.claude/skills/deliver-html/SKILL.md.vi.hbs +5 -1
- package/src/templates/.claude/skills/deliver-html/scripts/wrap-html.mjs +0 -0
- package/src/templates/.claude/skills/setup-nightly-eval/SKILL.md +118 -0
- package/src/templates/docs/env-vars.md +54 -0
- package/src/templates/docs/memory-cheatsheet.md +82 -0
- package/src/templates/scripts/_lib/jp.sh +53 -0
- package/src/templates/scripts/_lib/statusline-cache.mjs +57 -0
- package/src/templates/scripts/_lib/telemetry.sh +45 -0
- package/src/templates/scripts/notify-on-block.sh.hbs +6 -23
- package/src/templates/scripts/pre-compact.sh.hbs +2 -20
- package/src/templates/scripts/pre-push.sh +2 -20
- package/src/templates/scripts/precompletion-checklist.sh.hbs +5 -31
- package/src/templates/scripts/pretooluse-bash-guard.sh.hbs +2 -20
- package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +2 -14
- package/src/templates/scripts/session-end.sh.hbs +2 -14
- package/src/templates/scripts/session-start.sh.hbs +2 -20
- package/src/templates/scripts/statusline.mjs +327 -36
- package/src/templates/scripts/structural-test-on-edit.sh.hbs +2 -14
- package/src/templates/scripts/subagent-stop.sh.hbs +7 -18
- package/src/templates/scripts/telemetry-on-skill.sh +14 -20
- package/src/templates/scripts/userprompt-guard.sh.hbs +2 -20
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
"source": {
|
|
12
12
|
"source": "github",
|
|
13
13
|
"repo": "tuanle96/agent-harness-kit",
|
|
14
|
-
"ref": "v0.
|
|
14
|
+
"ref": "v0.11.1"
|
|
15
15
|
},
|
|
16
|
-
"version": "0.
|
|
16
|
+
"version": "0.11.1",
|
|
17
17
|
"description": "Solo-dev harness engineering kit — layered architecture, GC ritual, structural tests, review subagents.",
|
|
18
18
|
"category": "development",
|
|
19
19
|
"keywords": [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-harness-kit",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.1",
|
|
4
4
|
"description": "Solo-dev harness engineering kit — layered architecture, garbage-collection ritual, structural tests, review subagents. Optimized for Claude Code 2.1+.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Tuan Le"
|
package/README.md
CHANGED
|
@@ -33,9 +33,11 @@ Option B: install as a Claude Code plugin
|
|
|
33
33
|
|
|
34
34
|
## What ships
|
|
35
35
|
|
|
36
|
-
-
|
|
37
|
-
`
|
|
38
|
-
`
|
|
36
|
+
- 17 skills (`add-adr`, `add-feature`, `debug-flow`, `deliver-html`,
|
|
37
|
+
`doc-drift-scan`, `eval-runner`, `garbage-collection`, `i18n-add-locale`,
|
|
38
|
+
`inspect-app`, `inspect-module`, `map-domain`,
|
|
39
|
+
`propose-harness-improvement`, `refactor-feature`, `review-this-pr`,
|
|
40
|
+
`setup-nightly-eval`, `structural-test-author`, `write-skill`)
|
|
39
41
|
- 5 read-only review subagents (`architecture-reviewer`, `security-reviewer`,
|
|
40
42
|
`reliability-reviewer`, `performance-reviewer`, `api-consistency-reviewer`)
|
|
41
43
|
- 1 PostToolUse hook (structural-test on every edit) + 1 Stop hook
|
|
@@ -60,6 +62,12 @@ Option B: install as a Claude Code plugin
|
|
|
60
62
|
| `/doc-drift-scan` | Find stale path/command references in `docs/` |
|
|
61
63
|
| `/debug-flow` | Run the failing flow before fixing it |
|
|
62
64
|
| `/deliver-html` | Ship an analysis/audit/plan as a self-contained HTML |
|
|
65
|
+
| `/i18n-add-locale <code>` | Scaffold a new translation locale for skills + CLAUDE.md |
|
|
66
|
+
| `/inspect-app` | Boot dev server + drive the failing flow before edits |
|
|
67
|
+
| `/map-domain` | Render layer config + flag config-vs-filesystem drift |
|
|
68
|
+
| `/refactor-feature` | Restructure steps in `feature_list.json` with proof gate |
|
|
69
|
+
| `/review-this-pr` | Deterministic diff review against the current base |
|
|
70
|
+
| `/setup-nightly-eval` | Enable the nightly eval GitHub Actions workflow |
|
|
63
71
|
|
|
64
72
|
## Philosophy (5 axioms)
|
|
65
73
|
|
|
@@ -104,7 +112,7 @@ your-repo/
|
|
|
104
112
|
├── harness.config.json
|
|
105
113
|
├── .claude/
|
|
106
114
|
│ ├── settings.json
|
|
107
|
-
│ ├── skills/ #
|
|
115
|
+
│ ├── skills/ # 17 skills as SKILL.md files
|
|
108
116
|
│ ├── agents/ # 5 reviewer personas
|
|
109
117
|
│ └── hooks/hooks.json
|
|
110
118
|
├── .harness/
|
|
@@ -159,7 +167,7 @@ agent-harness-kit --version
|
|
|
159
167
|
What this kit **does** differentiate from bare claude-cli (anecdotal + design-level):
|
|
160
168
|
|
|
161
169
|
- Opinionated CLAUDE.md template (50–80 lines) so context isn't blown on style
|
|
162
|
-
-
|
|
170
|
+
- 17 skills (`/add-feature`, `/garbage-collection`, `/propose-harness-improvement`, …) that codify Hashimoto/OpenAI rituals
|
|
163
171
|
- 5 read-only review subagents for cheap second-opinion passes
|
|
164
172
|
- `feature_list.json` + ADR template + GC ritual for solo-scale planning hygiene
|
|
165
173
|
- Solo-dev cost defaults (~$2/day) and per-run budget enforcement
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-harness-kit",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.1",
|
|
4
4
|
"description": "Solo-dev harness engineering kit for Claude Code. Layered architecture, structural tests, garbage-collection ritual, review subagents — without the enterprise overhead.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -42,7 +42,8 @@
|
|
|
42
42
|
"lint": "echo 'no-op (kit is plain ESM JS)'",
|
|
43
43
|
"selftest": "node bin/cli.mjs --version",
|
|
44
44
|
"harness:eval": "node src/templates/_adapter-typescript/harness/eval-runner.mjs",
|
|
45
|
-
"harness:check": "node scripts/kit-structural-check.mjs"
|
|
45
|
+
"harness:check": "node scripts/kit-structural-check.mjs",
|
|
46
|
+
"check:skill-count": "node scripts/check-skill-count.mjs"
|
|
46
47
|
},
|
|
47
48
|
"dependencies": {
|
|
48
49
|
"@inquirer/prompts": "^7.0.0",
|
|
@@ -288,10 +288,14 @@ function sha256(buf) {
|
|
|
288
288
|
}
|
|
289
289
|
|
|
290
290
|
// Inject a statusLine block into .claude/settings.json. Idempotent: if the
|
|
291
|
-
// existing statusLine already references the kit's script
|
|
292
|
-
//
|
|
291
|
+
// existing statusLine already references the kit's script with the desired
|
|
292
|
+
// padding + refreshInterval, leave it; otherwise update. Doesn't clobber a
|
|
293
293
|
// user-customised type:"command" entry that points at a different command.
|
|
294
294
|
//
|
|
295
|
+
// padding/refreshInterval are sourced from harness.config.json#statusline
|
|
296
|
+
// (with defaults) so a user can tune through one config file and the merge
|
|
297
|
+
// keeps settings.json in sync.
|
|
298
|
+
//
|
|
295
299
|
// Returns {changed, rawContent} for the lockfile bookkeeping (mirrors the
|
|
296
300
|
// mergeHooksIntoSettings contract).
|
|
297
301
|
export async function mergeStatusLineIntoSettings(cwd) {
|
|
@@ -310,9 +314,31 @@ export async function mergeStatusLineIntoSettings(cwd) {
|
|
|
310
314
|
);
|
|
311
315
|
}
|
|
312
316
|
}
|
|
317
|
+
|
|
318
|
+
// Read padding + refreshInterval from harness.config.json#statusline if
|
|
319
|
+
// present; otherwise V4 defaults (padding 1, refresh 2s for live updates
|
|
320
|
+
// during long-running turns).
|
|
321
|
+
let padding = 1;
|
|
322
|
+
let refreshInterval = 2;
|
|
323
|
+
const cfgPath = resolve(cwd, "harness.config.json");
|
|
324
|
+
if (existsSync(cfgPath)) {
|
|
325
|
+
try {
|
|
326
|
+
const cfg = JSON.parse(await readFile(cfgPath, "utf8"));
|
|
327
|
+
const sl = cfg?.statusline;
|
|
328
|
+
if (sl && typeof sl === "object") {
|
|
329
|
+
if (typeof sl.padding === "number" && sl.padding >= 0) padding = sl.padding;
|
|
330
|
+
if (typeof sl.refreshInterval === "number" && sl.refreshInterval >= 1) {
|
|
331
|
+
refreshInterval = sl.refreshInterval;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
} catch { /* malformed config → use defaults */ }
|
|
335
|
+
}
|
|
336
|
+
|
|
313
337
|
const desired = {
|
|
314
338
|
type: "command",
|
|
315
339
|
command: "node scripts/statusline.mjs",
|
|
340
|
+
padding,
|
|
341
|
+
refreshInterval,
|
|
316
342
|
};
|
|
317
343
|
// Preserve a user-customised entry if it already points elsewhere. We only
|
|
318
344
|
// inject when statusLine is absent OR explicitly references our script.
|
|
@@ -330,7 +356,9 @@ export async function mergeStatusLineIntoSettings(cwd) {
|
|
|
330
356
|
cur &&
|
|
331
357
|
typeof cur === "object" &&
|
|
332
358
|
cur.type === desired.type &&
|
|
333
|
-
cur.command === desired.command
|
|
359
|
+
cur.command === desired.command &&
|
|
360
|
+
cur.padding === desired.padding &&
|
|
361
|
+
cur.refreshInterval === desired.refreshInterval
|
|
334
362
|
) {
|
|
335
363
|
return { changed: false, rawContent: Buffer.from(raw) };
|
|
336
364
|
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://www.schemastore.org/claude-code-keybindings.json",
|
|
3
|
+
"$docs": "https://code.claude.com/docs/en/keybindings",
|
|
4
|
+
"$comment": "agent-harness-kit sample. RENAME to ~/.claude/keybindings.json after editing. Claude Code's keybinding action list is FIXED — there is no action that runs a slash command (no `chat:runCommand` or similar), so we cannot bind keys to /gc, /add-feature, etc. The bindings below tune the chat workflow only. To run a slash command, type `/` and use autocomplete — that is the supported UX.",
|
|
5
|
+
"bindings": [
|
|
6
|
+
{
|
|
7
|
+
"context": "Chat",
|
|
8
|
+
"bindings": {
|
|
9
|
+
"ctrl+e": "chat:externalEditor"
|
|
10
|
+
}
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"context": "Global",
|
|
14
|
+
"bindings": {
|
|
15
|
+
"ctrl+shift+t": "app:toggleTodos",
|
|
16
|
+
"ctrl+shift+r": "app:toggleTranscript"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
@@ -60,6 +60,10 @@ Do **NOT** use for:
|
|
|
60
60
|
- Converts MD → HTML (self-rolled subset: headings, lists, code blocks,
|
|
61
61
|
tables, blockquotes, links, inline formatting — no npm dependency).
|
|
62
62
|
- Writes `<slug>.html` at the path you pass.
|
|
63
|
+
- **Auto-opens** the file in the default browser (`open`/`xdg-open`/`start`).
|
|
64
|
+
Suppress with `--no-open`, or by setting `AHK_DISABLE_HTML_OPEN=1` /
|
|
65
|
+
`CI=true` in the environment. Open failures (missing binary, headless
|
|
66
|
+
box) never fail the deliverable.
|
|
63
67
|
|
|
64
68
|
5. **Print the deliverable contract** (the script already does this — copy it
|
|
65
69
|
into your response):
|
|
@@ -69,7 +73,7 @@ Do **NOT** use for:
|
|
|
69
73
|
**File:** <path> (<size>)
|
|
70
74
|
**Template:** decision-doc | audit-report | status-report
|
|
71
75
|
**Lang:** vi | en
|
|
72
|
-
**Open:**
|
|
76
|
+
**Open:** auto-opened (or fallback hint if --no-open / CI=true)
|
|
73
77
|
```
|
|
74
78
|
|
|
75
79
|
## Output contract
|
|
@@ -59,6 +59,10 @@ Trigger keyword từ user (tiếng Việt / English):
|
|
|
59
59
|
- Convert MD → HTML (self-rolled subset: heading, list, code block, table,
|
|
60
60
|
blockquote, link, inline format — không cần npm dependency).
|
|
61
61
|
- Ghi `<slug>.html` tại path bạn truyền.
|
|
62
|
+
- **Tự mở** file trong browser mặc định (`open`/`xdg-open`/`start`).
|
|
63
|
+
Tắt bằng `--no-open`, hoặc set `AHK_DISABLE_HTML_OPEN=1` /
|
|
64
|
+
`CI=true` trong env. Lỗi mở (thiếu binary, headless) không làm
|
|
65
|
+
fail deliverable.
|
|
62
66
|
|
|
63
67
|
5. **In deliverable contract** (script tự in — bạn copy vào response):
|
|
64
68
|
|
|
@@ -67,7 +71,7 @@ Trigger keyword từ user (tiếng Việt / English):
|
|
|
67
71
|
**File:** <path> (<size>)
|
|
68
72
|
**Template:** decision-doc | audit-report | status-report
|
|
69
73
|
**Lang:** vi | en
|
|
70
|
-
**Open:**
|
|
74
|
+
**Open:** auto-opened (hoặc fallback hint khi --no-open / CI=true)
|
|
71
75
|
```
|
|
72
76
|
|
|
73
77
|
## Output contract
|
|
Binary file
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: setup-nightly-eval
|
|
3
|
+
description: Use this skill when the user wants to schedule the harness eval to run every night, asks "how do I set up nightly evals", "schedule the eval", "run evals on a cron", or "nightly regression for the harness". The kit already ships a GitHub Actions workflow at .github/workflows/eval-nightly.yml — this skill walks the user through enabling it (secret setup, smoke run via workflow_dispatch, verifying the first scheduled run). Do NOT use this skill to "remind me to run eval every night in this Claude session" — that is the /loop skill or CronCreate (which is session-only), a different concern.
|
|
4
|
+
allowed-tools: Read, Bash(gh:*), Bash(ls:*), Bash(cat:*), Bash(test:*)
|
|
5
|
+
suggested-turns: 4
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Background — why GitHub Actions, not CronCreate
|
|
9
|
+
|
|
10
|
+
A common request is "use CronCreate to run the eval every night". That
|
|
11
|
+
does not do what the user wants:
|
|
12
|
+
|
|
13
|
+
- `CronCreate` jobs live only in the **current Claude Code session**.
|
|
14
|
+
Closing the REPL deletes them. Auto-expire after 7 days regardless.
|
|
15
|
+
- Jobs only fire while the REPL is **idle**, not when the laptop is
|
|
16
|
+
asleep or off.
|
|
17
|
+
- They run *Claude turns*, which spend tokens for every fire.
|
|
18
|
+
|
|
19
|
+
For a real nightly cadence ("runs at 6am whether I'm at the keyboard or
|
|
20
|
+
not"), the right substrate is OS-level cron / launchd / GitHub Actions.
|
|
21
|
+
This kit ships a GitHub Actions workflow as the default because:
|
|
22
|
+
|
|
23
|
+
1. No local daemon to babysit.
|
|
24
|
+
2. Free for public repos and within the free tier for most private ones.
|
|
25
|
+
3. Results land in workflow artifacts — visible from anywhere.
|
|
26
|
+
|
|
27
|
+
## When to use
|
|
28
|
+
|
|
29
|
+
Trigger phrases (English / Vietnamese):
|
|
30
|
+
|
|
31
|
+
- "set up nightly eval" / "lập lịch eval mỗi đêm"
|
|
32
|
+
- "schedule the harness eval"
|
|
33
|
+
- "make the eval run on a cron"
|
|
34
|
+
- "nightly regression for the harness"
|
|
35
|
+
|
|
36
|
+
Do **NOT** invoke for:
|
|
37
|
+
|
|
38
|
+
- One-off ad-hoc eval runs — use `/eval-runner` directly.
|
|
39
|
+
- In-session polling ("re-run every 10 min until I say stop") — that's
|
|
40
|
+
the `/loop` skill.
|
|
41
|
+
- Local-machine cron setup (launchd / crontab) — that path is on the
|
|
42
|
+
user's machine and a skill cannot install OS daemons. Print the
|
|
43
|
+
recipe and let them paste it.
|
|
44
|
+
|
|
45
|
+
## Steps
|
|
46
|
+
|
|
47
|
+
1. **Verify the workflow file exists.** It ships via `installCi: true`.
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
test -f .github/workflows/eval-nightly.yml && echo OK || echo MISSING
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
If MISSING: the user opted out of CI files at scaffold time. Tell
|
|
54
|
+
them to re-run `agent-harness-kit upgrade --ci` (or to manually copy
|
|
55
|
+
from `node_modules/agent-harness-kit/src/templates/_ci/`).
|
|
56
|
+
|
|
57
|
+
2. **Check the eval transport.** The workflow defaults to `mock`
|
|
58
|
+
transport unless `ANTHROPIC_API_KEY` is set in repo secrets. Decide
|
|
59
|
+
with the user:
|
|
60
|
+
|
|
61
|
+
- **Mock (free):** smoke-tests the eval runner shape — catches a
|
|
62
|
+
broken JSONL writer, but does not exercise the model. Good
|
|
63
|
+
default for forks / OSS.
|
|
64
|
+
- **claude-cli (real, costs tokens):** runs the actual model on
|
|
65
|
+
each task. Catches regressions caused by prompt/skill changes.
|
|
66
|
+
Costs ~$0.05–0.50/night depending on task set size.
|
|
67
|
+
|
|
68
|
+
3. **(If real transport) ensure the secret is set:**
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
gh secret list | grep ANTHROPIC_API_KEY
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
If absent, ask the user to set it via:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
gh secret set ANTHROPIC_API_KEY
|
|
78
|
+
# paste the key when prompted (it never appears in shell history)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
4. **Trigger a first manual run** via `workflow_dispatch` so the user
|
|
82
|
+
does not wait 24h to confirm the wiring:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
gh workflow run eval-nightly.yml --field set=quick --field transport=mock
|
|
86
|
+
# then watch:
|
|
87
|
+
gh run watch
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
5. **Print the contract.** What the user just enabled:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
### Nightly eval enabled
|
|
94
|
+
**Workflow:** .github/workflows/eval-nightly.yml
|
|
95
|
+
**Cron:** 0 6 * * * UTC (offset: see `gh run list` for actual fire times)
|
|
96
|
+
**Transport:** mock | claude-cli
|
|
97
|
+
**Set:** quick (3 tasks) | full (all tasks)
|
|
98
|
+
**Results:** uploaded as `eval-results` artifact on each run
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Output contract
|
|
102
|
+
|
|
103
|
+
The skill prints a single block matching the shape above. Do not edit
|
|
104
|
+
the workflow file from here — if the user wants to change the cron, the
|
|
105
|
+
task set, or the transport default, they edit
|
|
106
|
+
`.github/workflows/eval-nightly.yml` directly (it is a normal yml file,
|
|
107
|
+
not a templated artifact, after install).
|
|
108
|
+
|
|
109
|
+
## When the workflow file is owned by the kit
|
|
110
|
+
|
|
111
|
+
Re-running `agent-harness-kit upgrade` will refresh
|
|
112
|
+
`.github/workflows/eval-nightly.yml`. If the user has hand-tuned cron
|
|
113
|
+
or transport defaults, mention this — they should either:
|
|
114
|
+
|
|
115
|
+
- Move their customisation into `harness.config.json#evals` (kit reads
|
|
116
|
+
it on next render) and let the workflow stay vanilla, or
|
|
117
|
+
- Document the customisation in a comment so the next upgrade does not
|
|
118
|
+
silently overwrite it.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Environment variables
|
|
2
|
+
|
|
3
|
+
Every kit hook and side-car honors one or more `AHK_*` env vars for opt-out,
|
|
4
|
+
debugging, or non-default behavior. Defaults are tuned for "just works" —
|
|
5
|
+
override only when you have a specific reason.
|
|
6
|
+
|
|
7
|
+
## Opt-out
|
|
8
|
+
|
|
9
|
+
| Var | Default | Effect |
|
|
10
|
+
| --- | --- | --- |
|
|
11
|
+
| `AHK_DISABLE_TELEMETRY` | unset | When `1`, the `telemetry-on-skill` and `subagent-stop` hooks exit before reading stdin — no `.harness/telemetry.jsonl` is created. Use when you do not want per-skill activity recorded. |
|
|
12
|
+
| `AHK_DISABLE_NOTIFY` | unset | When `1`, the `notify-on-block` hook skips the OS-native notification (osascript / notify-send). The telemetry row still logs the notification event. |
|
|
13
|
+
| `AHK_DISABLE_HTML_OPEN` | unset | When `1`, `/deliver-html` writes the HTML file but does not auto-open it in the browser. Also implied when `CI=true`. |
|
|
14
|
+
| `AHK_DISABLE_HTML_NUDGE` | unset | When `1`, suppresses the inline reminder that `/deliver-html` is available for analysis-style tasks. |
|
|
15
|
+
| `AHK_DISABLE_JQ` | unset | When `1`, hooks pretend `jq` is not on `$PATH` and use the Node fallback (`scripts/_lib/json-pick.mjs`). Used by tests to exercise the fallback path. |
|
|
16
|
+
|
|
17
|
+
## Bypass (audited)
|
|
18
|
+
|
|
19
|
+
| Var | Default | Effect |
|
|
20
|
+
| --- | --- | --- |
|
|
21
|
+
| `AHK_ALLOW_BYPASS` | unset | When `1`, `userprompt-guard`, `pretooluse-bash-guard`, and `pretooluse-edit-guard` allow the action through but append a record to `.harness/bypass.log` (timestamp + sha + reason + payload). The bypass leaves a paper trail so it cannot be silent. Use only with explicit intent — e.g. a mass-rename refactor that legitimately touches `.claude/`. |
|
|
22
|
+
| `AHK_HOOK_MODE` | unset | When `warn`, every gate hook (structural-test-on-edit, pretooluse-edit-guard, subagent-stop) logs the would-be violation to stderr but does not deny. Useful for one-off debugging; do not leave set in normal use. |
|
|
23
|
+
|
|
24
|
+
## Tuning
|
|
25
|
+
|
|
26
|
+
| Var | Default | Effect |
|
|
27
|
+
| --- | --- | --- |
|
|
28
|
+
| `AHK_TELEMETRY_MAX_LINES` | `5000` | Soft cap on `.harness/telemetry.jsonl` size. The `telemetry_append` helper rotates via `tail -n <N>` once the file grows past this number of lines. Set `0` to disable rotation entirely. Numeric only — non-numeric values fall back to the default rather than failing the hook. |
|
|
29
|
+
| `AHK_HEADLESS_RECOVER` | `0` | When `1`, the Stop hook spawns `claude -p` for one turn of recovery on failure. Costs tokens; off by default. Persistent equivalent: `harness.config.json#recovery.headless`. |
|
|
30
|
+
| `AHK_RECOVERY_LOCK_STALE_SECS` | `300` | How long the Stop-hook recovery lock is considered stale before a new recovery attempt can take it. Prevents stuck locks after a killed session. |
|
|
31
|
+
| `AHK_STATUSLINE_NO_COLOR` | unset | When `1`, the statusline emits plain text — no ANSI color escapes. Useful on terminals that do not render colors well, or when piping the output. |
|
|
32
|
+
|
|
33
|
+
## Where each variable lives
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
AHK_DISABLE_TELEMETRY → scripts/telemetry-on-skill.sh, scripts/subagent-stop.sh
|
|
37
|
+
AHK_DISABLE_NOTIFY → scripts/notify-on-block.sh
|
|
38
|
+
AHK_DISABLE_HTML_OPEN → .claude/skills/deliver-html/scripts/wrap-html.mjs
|
|
39
|
+
AHK_DISABLE_HTML_NUDGE → .claude/skills/deliver-html/SKILL.md
|
|
40
|
+
AHK_DISABLE_JQ → scripts/_lib/jp.sh (probed by every hook that parses JSON)
|
|
41
|
+
AHK_ALLOW_BYPASS → scripts/userprompt-guard.sh, scripts/pretooluse-*.sh
|
|
42
|
+
AHK_HOOK_MODE → scripts/structural-test-on-edit.sh, scripts/pretooluse-edit-guard.sh, scripts/subagent-stop.sh
|
|
43
|
+
AHK_TELEMETRY_MAX_LINES→ scripts/_lib/telemetry.sh (used by telemetry-on-skill, subagent-stop, notify-on-block)
|
|
44
|
+
AHK_HEADLESS_RECOVER → scripts/precompletion-checklist.sh
|
|
45
|
+
AHK_STATUSLINE_NO_COLOR→ scripts/statusline.mjs
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Disabling vs. removing
|
|
49
|
+
|
|
50
|
+
Prefer env-var opt-out over removing a hook from `.claude/settings.json` —
|
|
51
|
+
the kit's structural-test and version-sync checks expect every hook listed in
|
|
52
|
+
`hooks.json` to be present. Removing a hook leaves the index claiming a
|
|
53
|
+
contract the file system no longer fulfills, and `agent-harness-kit upgrade`
|
|
54
|
+
will keep re-installing it.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Auto-memory cheat sheet
|
|
2
|
+
|
|
3
|
+
Claude Code maintains a per-project memory directory at
|
|
4
|
+
`~/.claude/projects/<repo-slug>/memory/`. It persists across sessions and is
|
|
5
|
+
automatically loaded into the next conversation. The kit does **not**
|
|
6
|
+
manage this — Claude Code does. This file is a cheat sheet on when to push
|
|
7
|
+
something into it and when to leave it out.
|
|
8
|
+
|
|
9
|
+
## The four types
|
|
10
|
+
|
|
11
|
+
| Type | When to write | Half-life | Example |
|
|
12
|
+
| --- | --- | --- | --- |
|
|
13
|
+
| **user** | You learn something durable about the person you're working with — role, expertise, team, preferences | months to years | "data scientist, 10 years Go, new to React" |
|
|
14
|
+
| **feedback** | The user corrects an approach ("don't X") *or* validates an unusual choice ("yes the bundled PR was right") | months | "integration tests must hit a real DB, not mocks — burned by mock/prod divergence in Q3" |
|
|
15
|
+
| **project** | A fact about the work that the codebase itself does not show — deadlines, stakeholder decisions, the *why* behind a refactor | weeks to months | "auth rewrite is driven by legal compliance, not tech debt — scope toward compliance" |
|
|
16
|
+
| **reference** | A pointer to an external system — Linear project, Grafana dashboard, on-call runbook | until that system moves | "pipeline bugs in Linear project INGEST" |
|
|
17
|
+
|
|
18
|
+
## What's actually worth saving
|
|
19
|
+
|
|
20
|
+
Save when **all three** apply:
|
|
21
|
+
|
|
22
|
+
1. **Non-obvious from the code.** If `git log` or reading the file shows
|
|
23
|
+
it, the memory is dead weight.
|
|
24
|
+
2. **Survives across sessions.** Today's in-progress task is not memory —
|
|
25
|
+
it's a task list.
|
|
26
|
+
3. **Decision-shaping.** A future-you (or future-Claude) would behave
|
|
27
|
+
differently knowing it.
|
|
28
|
+
|
|
29
|
+
Trigger words from the user that mean "save this":
|
|
30
|
+
|
|
31
|
+
- "remember that …"
|
|
32
|
+
- "next time, do X / don't do X"
|
|
33
|
+
- "this is how we always do it"
|
|
34
|
+
- "the reason is …"
|
|
35
|
+
|
|
36
|
+
## What's NOT worth saving
|
|
37
|
+
|
|
38
|
+
- Code patterns, file paths, architecture diagrams — re-read the code.
|
|
39
|
+
- Git history, "who changed X last week" — `git log` is authoritative.
|
|
40
|
+
- Today's debug recipe — the fix lives in the commit; the commit message
|
|
41
|
+
has the context.
|
|
42
|
+
- A list of files you just edited — the diff has it.
|
|
43
|
+
- Anything documented in `CLAUDE.md` — it's already loaded.
|
|
44
|
+
|
|
45
|
+
Reject the request even if the user explicitly asks. Bigger memory is
|
|
46
|
+
*not* better memory — every dead entry is noise the next session will
|
|
47
|
+
have to scan past.
|
|
48
|
+
|
|
49
|
+
## Working with what's there
|
|
50
|
+
|
|
51
|
+
Claude Code loads `MEMORY.md` (the index) into every conversation. So:
|
|
52
|
+
|
|
53
|
+
- If you want a memory honored, make sure its index line is concise and
|
|
54
|
+
specific. Bad: `notes.md — stuff`. Good: `feedback_tests.md — never
|
|
55
|
+
mock the database in integration tests`.
|
|
56
|
+
- If a memory turns out wrong or stale, ask Claude to remove or update it
|
|
57
|
+
— don't let it accumulate.
|
|
58
|
+
- Memory is **not** automatically synced across machines. If your
|
|
59
|
+
workflow spans laptops, treat it as scratch, not source-of-truth.
|
|
60
|
+
|
|
61
|
+
## Privacy and scope
|
|
62
|
+
|
|
63
|
+
- Memory lives in `~/.claude/` — local-only. Nothing is uploaded.
|
|
64
|
+
- A project-scoped memory is keyed by repo slug. Cloning the repo on a
|
|
65
|
+
new machine does **not** carry memory over.
|
|
66
|
+
- If a memory contains something sensitive (credentials, customer names),
|
|
67
|
+
delete it. The kit's `userprompt-guard` hook does not see memory; the
|
|
68
|
+
burden of redaction is on you.
|
|
69
|
+
|
|
70
|
+
## Related kit features
|
|
71
|
+
|
|
72
|
+
- The **Stop hook** writes a JSONL row to `.harness/telemetry.jsonl` on
|
|
73
|
+
every Skill invocation — that is observability for the kit, not memory.
|
|
74
|
+
- The **SessionStart hook** inject branch + uncommitted diff + current
|
|
75
|
+
feature as `additionalContext`. That is per-session context, not
|
|
76
|
+
memory.
|
|
77
|
+
- The **PROGRESS.md** file at `.harness/PROGRESS.md` is the human-readable
|
|
78
|
+
session log appended by `SessionEnd`. Useful next-day rehydration; not
|
|
79
|
+
memory.
|
|
80
|
+
|
|
81
|
+
Memory ≠ context. Memory persists. Context is rebuilt every session from
|
|
82
|
+
files in the repo.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# _lib/jp.sh — source-only library. DO NOT execute directly.
|
|
3
|
+
#
|
|
4
|
+
# Provides three shared helpers used by every hook script that parses Claude
|
|
5
|
+
# Code's JSON stdin:
|
|
6
|
+
#
|
|
7
|
+
# have_jq — true iff jq is on PATH AND not disabled via env
|
|
8
|
+
# have_jp — true iff EITHER jq OR (node + _lib/json-pick.mjs) is usable
|
|
9
|
+
# jp <expr> [f] — run a jq-subset expression, preferring jq when present,
|
|
10
|
+
# else the Node fallback. Accepts optional file arg (some
|
|
11
|
+
# callers pass it; most read from stdin).
|
|
12
|
+
#
|
|
13
|
+
# Why this exists: the same ~14 lines were duplicated across 12 hook scripts.
|
|
14
|
+
# Single source of truth so fixing one bug (e.g. the "json-pick.mjs only
|
|
15
|
+
# supports one `// default` per expression" footgun documented in
|
|
16
|
+
# session-end.sh.hbs) only needs one edit, not twelve.
|
|
17
|
+
#
|
|
18
|
+
# Sourcing convention — the calling script MUST set _LIB_DIR before . :
|
|
19
|
+
#
|
|
20
|
+
# SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
21
|
+
# _LIB_DIR="$SCRIPT_DIR/_lib"
|
|
22
|
+
# . "$_LIB_DIR/jp.sh"
|
|
23
|
+
#
|
|
24
|
+
# Env vars:
|
|
25
|
+
# AHK_DISABLE_JQ=1 → pretend jq is missing; forces the Node fallback path.
|
|
26
|
+
# Lets us test the fallback on machines that have jq.
|
|
27
|
+
|
|
28
|
+
have_jq() {
|
|
29
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
30
|
+
command -v jq >/dev/null 2>&1
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
have_jp() {
|
|
34
|
+
have_jq && return 0
|
|
35
|
+
command -v node >/dev/null 2>&1 \
|
|
36
|
+
&& [ -f "$_LIB_DIR/json-pick.mjs" ] \
|
|
37
|
+
&& return 0
|
|
38
|
+
return 1
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
jp() {
|
|
42
|
+
if have_jq; then
|
|
43
|
+
if [ -n "${2:-}" ]; then jq -r "$1" "$2"
|
|
44
|
+
else jq -r "$1"
|
|
45
|
+
fi
|
|
46
|
+
else
|
|
47
|
+
if [ -n "${2:-}" ]; then
|
|
48
|
+
node "$_LIB_DIR/json-pick.mjs" "$1" "$2"
|
|
49
|
+
else
|
|
50
|
+
node "$_LIB_DIR/json-pick.mjs" "$1"
|
|
51
|
+
fi
|
|
52
|
+
fi
|
|
53
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// statusline-cache.mjs — tiny file-based memo for statusLine segments.
|
|
2
|
+
//
|
|
3
|
+
// Why this exists: Claude Code re-spawns the statusLine command on every
|
|
4
|
+
// refresh, so in-process memoization is useless — each invocation is a
|
|
5
|
+
// fresh node process. File-based cache keyed on `session_id` (stable per
|
|
6
|
+
// Claude Code session) is the documented pattern.
|
|
7
|
+
//
|
|
8
|
+
// The cache lives under $TMPDIR. Each key gets a separate file with mtime
|
|
9
|
+
// as the freshness signal. Reads bypass the file when stale; writes are
|
|
10
|
+
// best-effort (failure to write = next call recomputes, no error surfaced).
|
|
11
|
+
//
|
|
12
|
+
// Usage:
|
|
13
|
+
// import { cached } from "./statusline-cache.mjs";
|
|
14
|
+
// const branch = cached(
|
|
15
|
+
// { sessionId, key: "git-branch", ttlMs: 5000 },
|
|
16
|
+
// () => spawnSync("git", ["branch", "--show-current"], ...).stdout.trim(),
|
|
17
|
+
// );
|
|
18
|
+
|
|
19
|
+
import { readFileSync, writeFileSync, statSync, mkdirSync } from "node:fs";
|
|
20
|
+
import { tmpdir } from "node:os";
|
|
21
|
+
import { join } from "node:path";
|
|
22
|
+
|
|
23
|
+
const CACHE_DIR = join(tmpdir(), "ahk-statusline");
|
|
24
|
+
|
|
25
|
+
function ensureDir() {
|
|
26
|
+
try { mkdirSync(CACHE_DIR, { recursive: true }); } catch { /* exists */ }
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function cachePath(sessionId, key) {
|
|
30
|
+
// session_id can contain anything → sanitize. No path separator survives.
|
|
31
|
+
const safeSession = String(sessionId || "no-session").replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 64);
|
|
32
|
+
const safeKey = String(key).replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 32);
|
|
33
|
+
return join(CACHE_DIR, `${safeSession}-${safeKey}.cache`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Synchronous because statusline.mjs runs as a one-shot command and the
|
|
37
|
+
// upstream caller blocks on its output anyway. async would add no value.
|
|
38
|
+
export function cached({ sessionId, key, ttlMs }, fetchFn) {
|
|
39
|
+
ensureDir();
|
|
40
|
+
const file = cachePath(sessionId, key);
|
|
41
|
+
try {
|
|
42
|
+
const st = statSync(file);
|
|
43
|
+
if (Date.now() - st.mtimeMs < ttlMs) {
|
|
44
|
+
return readFileSync(file, "utf8");
|
|
45
|
+
}
|
|
46
|
+
} catch { /* miss */ }
|
|
47
|
+
let value;
|
|
48
|
+
try {
|
|
49
|
+
value = fetchFn();
|
|
50
|
+
} catch {
|
|
51
|
+
value = "";
|
|
52
|
+
}
|
|
53
|
+
if (value == null) value = "";
|
|
54
|
+
const s = String(value);
|
|
55
|
+
try { writeFileSync(file, s); } catch { /* best-effort */ }
|
|
56
|
+
return s;
|
|
57
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# _lib/telemetry.sh — source-only library. DO NOT execute directly.
|
|
3
|
+
#
|
|
4
|
+
# Provides telemetry_append <jsonl-line> — write one line to
|
|
5
|
+
# .harness/telemetry.jsonl and rotate when the file grows past
|
|
6
|
+
# AHK_TELEMETRY_MAX_LINES (default 5000).
|
|
7
|
+
#
|
|
8
|
+
# Why centralised:
|
|
9
|
+
# - Two hooks append (telemetry-on-skill, notify-on-block). Rotation logic
|
|
10
|
+
# written once = one place to fix when the file format evolves.
|
|
11
|
+
# - harness-report.mjs only ever inspects the last 14 days; older lines are
|
|
12
|
+
# pure I/O cost. Bounding lines bounds report time at O(1).
|
|
13
|
+
#
|
|
14
|
+
# Env vars:
|
|
15
|
+
# AHK_DISABLE_TELEMETRY=1 → caller is expected to early-exit; this
|
|
16
|
+
# helper does NOT re-check (avoids double
|
|
17
|
+
# work) — gate in the caller.
|
|
18
|
+
# AHK_TELEMETRY_MAX_LINES=N → cap (default 5000). Set 0 to disable
|
|
19
|
+
# rotation entirely (keep unbounded).
|
|
20
|
+
|
|
21
|
+
telemetry_append() {
|
|
22
|
+
local line="$1"
|
|
23
|
+
[ -z "$line" ] && return 0
|
|
24
|
+
mkdir -p .harness
|
|
25
|
+
printf '%s\n' "$line" >> .harness/telemetry.jsonl
|
|
26
|
+
|
|
27
|
+
local limit="${AHK_TELEMETRY_MAX_LINES:-5000}"
|
|
28
|
+
# 0 = caller opted out of rotation explicitly.
|
|
29
|
+
[ "$limit" = "0" ] && return 0
|
|
30
|
+
# Non-numeric → fall back to default rather than failing the hook.
|
|
31
|
+
case "$limit" in
|
|
32
|
+
''|*[!0-9]*) limit=5000 ;;
|
|
33
|
+
esac
|
|
34
|
+
|
|
35
|
+
# wc -l is sub-millisecond on files we care about (< 1MB at the default
|
|
36
|
+
# cap). Cheap enough to run every append; avoids needing a daemon.
|
|
37
|
+
local lines
|
|
38
|
+
lines=$(wc -l < .harness/telemetry.jsonl 2>/dev/null || echo 0)
|
|
39
|
+
if [ "$lines" -gt "$limit" ]; then
|
|
40
|
+
# tail to tmp + mv = atomic on POSIX. Reader can't catch a half-written
|
|
41
|
+
# file mid-rotate.
|
|
42
|
+
tail -n "$limit" .harness/telemetry.jsonl > .harness/telemetry.jsonl.tmp \
|
|
43
|
+
&& mv .harness/telemetry.jsonl.tmp .harness/telemetry.jsonl
|
|
44
|
+
fi
|
|
45
|
+
}
|