agent-harness-kit 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/bin/cli.mjs +26 -0
- package/package.json +1 -1
- package/src/core/doctor.mjs +47 -0
- package/src/core/render-templates.mjs +119 -5
- package/src/core/upgrade.mjs +81 -60
- package/src/templates/.claude/agents/api-consistency-reviewer.md.vi +37 -0
- package/src/templates/.claude/agents/architecture-reviewer.md.vi.hbs +45 -0
- package/src/templates/.claude/agents/performance-reviewer.md.vi +39 -0
- package/src/templates/.claude/agents/reliability-reviewer.md.vi +42 -0
- package/src/templates/.claude/agents/security-reviewer.md.vi +43 -0
- package/src/templates/.claude/hooks/hooks.json +46 -0
- package/src/templates/.claude/output-styles/harness-terse.md +42 -0
- package/src/templates/.claude/settings.json.hbs +2 -1
- package/src/templates/.claude/skills/add-adr/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/add-feature/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/debug-flow/SKILL.md.vi.hbs +42 -0
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +15 -10
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md.vi +52 -0
- package/src/templates/.claude/skills/doc-drift-scan/scripts/scan-paths.mjs +64 -0
- package/src/templates/.claude/skills/eval-runner/SKILL.md.vi +59 -0
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +14 -5
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.vi.hbs +58 -0
- package/src/templates/.claude/skills/garbage-collection/scripts/gc-classify.mjs +77 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md +52 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md.vi +56 -0
- package/src/templates/.claude/skills/i18n-add-locale/scripts/locale-scaffold.mjs +120 -0
- package/src/templates/.claude/skills/inspect-app/SKILL.md.vi +61 -0
- package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +17 -14
- package/src/templates/.claude/skills/inspect-module/SKILL.md.vi.hbs +57 -0
- package/src/templates/.claude/skills/inspect-module/scripts/module-summary.mjs +144 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md +42 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md.vi +42 -0
- package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs +145 -0
- package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi +49 -0
- package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs +172 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md +60 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs +146 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md +59 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md.vi +63 -0
- package/src/templates/.claude/skills/review-this-pr/scripts/pr-review-driver.mjs +152 -0
- package/src/templates/.claude/skills/structural-test-author/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/write-skill/SKILL.md.vi +43 -0
- package/src/templates/.harness/eval/rubrics/feature-step-done.mjs +148 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.answer.md +53 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.json +10 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.prompt.md +43 -0
- package/src/templates/.mcp.json.example +35 -0
- package/src/templates/CLAUDE.md.hbs +9 -5
- package/src/templates/CLAUDE.md.vi.hbs +9 -5
- package/src/templates/scripts/notify-on-block.sh.hbs +73 -0
- package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +115 -0
- package/src/templates/scripts/session-end.sh.hbs +6 -0
- package/src/templates/scripts/session-rollup.mjs +96 -0
- package/src/templates/scripts/session-start.sh.hbs +25 -0
- package/src/templates/scripts/statusline.mjs +63 -0
- package/src/templates/scripts/subagent-stop.sh.hbs +76 -0
- package/src/templates/scripts/userprompt-guard.sh.hbs +100 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// feature-step-done.mjs — eval rubric for the "feature step done" task.
|
|
3
|
+
// Reads the agent's transcript + the final feature_list.json + the diff;
|
|
4
|
+
// returns a JSON verdict on the outcome / process / style / efficiency
|
|
5
|
+
// dimensions.
|
|
6
|
+
//
|
|
7
|
+
// Invocation (from eval-runner.mjs):
|
|
8
|
+
// node .harness/eval/rubrics/feature-step-done.mjs --transcript <path> --task <task.json>
|
|
9
|
+
//
|
|
10
|
+
// Exit 0 = rubric ran. The JSON tail communicates pass/fail.
|
|
11
|
+
|
|
12
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
13
|
+
import { resolve } from "node:path";
|
|
14
|
+
import { spawnSync } from "node:child_process";
|
|
15
|
+
|
|
16
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
17
|
+
|
|
18
|
+
function parseArgs(argv) {
|
|
19
|
+
const out = { transcript: null, task: null };
|
|
20
|
+
for (let i = 0; i < argv.length; i++) {
|
|
21
|
+
if (argv[i] === "--transcript") out.transcript = argv[++i];
|
|
22
|
+
else if (argv[i] === "--task") out.task = argv[++i];
|
|
23
|
+
}
|
|
24
|
+
return out;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function safeJSON(s, def = null) {
|
|
28
|
+
try { return JSON.parse(s); } catch { return def; }
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function loadFile(path, fallback = null) {
|
|
32
|
+
try { return readFileSync(path, "utf8"); } catch { return fallback; }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function loadFeatureList() {
|
|
36
|
+
const path = resolve(ROOT, "feature_list.json");
|
|
37
|
+
const raw = loadFile(path);
|
|
38
|
+
return raw ? safeJSON(raw) : null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function gitDiffFiles() {
|
|
42
|
+
// Files changed in the agent's run, relative to HEAD~1 (one commit before
|
|
43
|
+
// the eval started). Eval-runner pins HEAD with a tag before each task.
|
|
44
|
+
const r = spawnSync("git", ["diff", "--name-only", "HEAD~1...HEAD"], {
|
|
45
|
+
cwd: ROOT, encoding: "utf8",
|
|
46
|
+
});
|
|
47
|
+
if (r.status !== 0) return [];
|
|
48
|
+
return (r.stdout || "").split("\n").filter(Boolean);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function transcriptToolCalls(transcriptPath) {
|
|
52
|
+
// Stream-json transcripts from claude-cli are JSONL with one record per
|
|
53
|
+
// tool invocation / message. We collect the tool names + a small sample
|
|
54
|
+
// of inputs so the rubric can spot /add-feature etc.
|
|
55
|
+
const body = loadFile(transcriptPath, "");
|
|
56
|
+
const calls = [];
|
|
57
|
+
for (const line of body.split("\n")) {
|
|
58
|
+
if (!line.trim()) continue;
|
|
59
|
+
const rec = safeJSON(line);
|
|
60
|
+
if (!rec) continue;
|
|
61
|
+
if (rec.type === "tool_use" || rec.tool || rec.skill) {
|
|
62
|
+
calls.push({
|
|
63
|
+
tool: rec.tool || rec.skill || rec.type,
|
|
64
|
+
input: rec.input || rec.tool_input || rec.arguments || null,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return calls;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function grade({ task, fl, diffFiles, toolCalls }) {
|
|
72
|
+
const dims = { outcome: "fail", process: "fail", style: "warn", efficiency: "warn" };
|
|
73
|
+
const reasons = [];
|
|
74
|
+
|
|
75
|
+
// --- outcome ---
|
|
76
|
+
// features[0].steps[0].passes === true AND tests[] is non-empty AND
|
|
77
|
+
// at least one tests[] entry appears in diffFiles.
|
|
78
|
+
const step = fl?.features?.[0]?.steps?.[0];
|
|
79
|
+
if (!step) {
|
|
80
|
+
reasons.push("outcome: no features[0].steps[0] found in feature_list.json after run");
|
|
81
|
+
} else if (step.passes !== true) {
|
|
82
|
+
reasons.push(`outcome: features[0].steps[0].passes is ${JSON.stringify(step.passes)}, want true`);
|
|
83
|
+
} else if (!Array.isArray(step.tests) || step.tests.length === 0) {
|
|
84
|
+
reasons.push("outcome: features[0].steps[0].tests is empty — done flipped without test reference");
|
|
85
|
+
} else {
|
|
86
|
+
const testInDiff = step.tests.some((t) => diffFiles.includes(t));
|
|
87
|
+
if (!testInDiff) {
|
|
88
|
+
reasons.push(`outcome: feature_list.json#tests references [${step.tests.join(", ")}] but none appear in the diff`);
|
|
89
|
+
} else {
|
|
90
|
+
dims.outcome = "pass";
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// --- process ---
|
|
95
|
+
// The agent should invoke /add-feature (or /refactor-feature) AND make
|
|
96
|
+
// a write to the handler + test file in the same run.
|
|
97
|
+
const ranSkill = toolCalls.some(
|
|
98
|
+
(c) => /(add-feature|refactor-feature)/i.test(c.tool || "") ||
|
|
99
|
+
/(add-feature|refactor-feature)/i.test(c.input?.skill || ""),
|
|
100
|
+
);
|
|
101
|
+
const handlerWrites = diffFiles.filter((f) => /\.(ts|tsx|js|mjs|py|rs|go)$/.test(f) && !/test/i.test(f));
|
|
102
|
+
const testWrites = diffFiles.filter((f) => /test/i.test(f) || /\.spec\./.test(f));
|
|
103
|
+
if (!ranSkill) {
|
|
104
|
+
reasons.push("process: agent did not invoke /add-feature or /refactor-feature");
|
|
105
|
+
} else if (handlerWrites.length === 0) {
|
|
106
|
+
reasons.push("process: no handler file appeared in diff");
|
|
107
|
+
} else if (testWrites.length === 0) {
|
|
108
|
+
reasons.push("process: no test file appeared in diff");
|
|
109
|
+
} else {
|
|
110
|
+
dims.process = "pass";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// --- style ---
|
|
114
|
+
// PROGRESS.md should be appended (kit convention). Soft check.
|
|
115
|
+
const touchedProgress = diffFiles.includes(".harness/PROGRESS.md");
|
|
116
|
+
if (touchedProgress) {
|
|
117
|
+
dims.style = "pass";
|
|
118
|
+
} else {
|
|
119
|
+
reasons.push("style: .harness/PROGRESS.md not appended (soft fail)");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// --- efficiency ---
|
|
123
|
+
// expected.tokensMax — actual token count comes from transcript meta.
|
|
124
|
+
// Without that we can't grade hard; warn-pass if filesChanged within
|
|
125
|
+
// task.expected.filesChanged bounds.
|
|
126
|
+
const max = task?.expected?.filesChanged?.max ?? 99;
|
|
127
|
+
const min = task?.expected?.filesChanged?.min ?? 1;
|
|
128
|
+
if (diffFiles.length >= min && diffFiles.length <= max) {
|
|
129
|
+
dims.efficiency = "pass";
|
|
130
|
+
} else {
|
|
131
|
+
reasons.push(`efficiency: ${diffFiles.length} files changed, want ${min}-${max}`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const overall = (dims.outcome === "pass" && dims.process === "pass") ? "PASS" : "FAIL";
|
|
135
|
+
return { overall, dimensions: dims, reasons, diff_files: diffFiles };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function main() {
|
|
139
|
+
const { transcript, task: taskPath } = parseArgs(process.argv.slice(2));
|
|
140
|
+
const task = taskPath ? safeJSON(loadFile(resolve(ROOT, taskPath)) ?? "", null) : null;
|
|
141
|
+
const fl = loadFeatureList();
|
|
142
|
+
const diffFiles = gitDiffFiles();
|
|
143
|
+
const toolCalls = transcript ? transcriptToolCalls(resolve(ROOT, transcript)) : [];
|
|
144
|
+
const verdict = grade({ task, fl, diffFiles, toolCalls });
|
|
145
|
+
process.stdout.write(JSON.stringify(verdict, null, 2) + "\n");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
main();
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Golden answer: feature-step-done
|
|
2
|
+
|
|
3
|
+
This file is read by `feature-step-done.mjs` rubric as a reference for
|
|
4
|
+
what an acceptable agent run looks like. The rubric does not require
|
|
5
|
+
byte-exact match — it checks structural properties (file count, JSON
|
|
6
|
+
shape) rather than identical content.
|
|
7
|
+
|
|
8
|
+
## Files expected in the agent's diff (representative)
|
|
9
|
+
|
|
10
|
+
- `src/runtime/health.ts` (or equivalent path for the project's stack)
|
|
11
|
+
- `tests/health.test.ts` (or equivalent test path)
|
|
12
|
+
- `feature_list.json` (modified in place)
|
|
13
|
+
- `.harness/PROGRESS.md` (appended)
|
|
14
|
+
|
|
15
|
+
## feature_list.json shape after the agent's edit
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"features": [
|
|
20
|
+
{
|
|
21
|
+
"id": "health-endpoint",
|
|
22
|
+
"title": "GET /health returns 200",
|
|
23
|
+
"passes": true,
|
|
24
|
+
"steps": [
|
|
25
|
+
{
|
|
26
|
+
"id": "s1",
|
|
27
|
+
"passes": true,
|
|
28
|
+
"tests": ["tests/health.test.ts"]
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Key invariants the rubric checks:
|
|
37
|
+
|
|
38
|
+
1. `features[0].steps[0].passes === true`
|
|
39
|
+
2. `features[0].steps[0].tests` is a non-empty array
|
|
40
|
+
3. At least one path in `tests` exists in the agent's file diff
|
|
41
|
+
4. `features.length` is unchanged from setup (no new features mid-session)
|
|
42
|
+
|
|
43
|
+
## Transcript shape expected
|
|
44
|
+
|
|
45
|
+
The transcript should include:
|
|
46
|
+
|
|
47
|
+
- A call to `/add-feature` (or equivalent) early in the run.
|
|
48
|
+
- At least one Write/Edit on the handler file.
|
|
49
|
+
- At least one Write/Edit on a test file matching the `tests[]` array.
|
|
50
|
+
- An Edit on `feature_list.json` flipping `passes: true`.
|
|
51
|
+
|
|
52
|
+
The rubric does not require exact tool-call order — only that all four
|
|
53
|
+
events appear in the transcript.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "feature-step-done",
|
|
3
|
+
"description": "Verifies that when an agent implements a feature step, it flips passes:false→true in feature_list.json AND adds a tests[] reference (or testCommit). Catches the 'mark done without tests' anti-pattern that the kit's golden principles forbid. Graded by .harness/eval/rubrics/feature-step-done.mjs.",
|
|
4
|
+
"input": "feature_list.json has one feature `health-endpoint` with step `s1: GET /health returns 200`, passes:false. Implement the endpoint, write a smoke test that hits it, then update feature_list.json#features[0].steps[0] with passes:true AND tests:[<test_file_path>]. Do not delete or reorder other entries.",
|
|
5
|
+
"expected": {
|
|
6
|
+
"filesChanged": { "min": 2, "max": 5 },
|
|
7
|
+
"tokensMax": 25000,
|
|
8
|
+
"rubric": ".harness/eval/rubrics/feature-step-done.mjs"
|
|
9
|
+
}
|
|
10
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Eval task: feature-step-done
|
|
2
|
+
|
|
3
|
+
## What the harness is testing
|
|
4
|
+
|
|
5
|
+
The kit's "no done without proof" rule: an agent that flips a feature
|
|
6
|
+
step from `passes: false` to `passes: true` MUST also commit a test
|
|
7
|
+
covering the new behavior. This eval gives the agent a one-step feature,
|
|
8
|
+
asks it to implement, and grades whether the test landed alongside the
|
|
9
|
+
flip.
|
|
10
|
+
|
|
11
|
+
## Prompt given to the agent
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
feature_list.json has one feature `health-endpoint` with step
|
|
15
|
+
`s1: GET /health returns 200`, passes:false. Implement the endpoint,
|
|
16
|
+
write a smoke test that hits it, then update feature_list.json#features[0].steps[0]
|
|
17
|
+
with passes:true AND tests:[<test_file_path>]. Do not delete or
|
|
18
|
+
reorder other entries.
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## What "good" looks like
|
|
22
|
+
|
|
23
|
+
1. The agent invokes `/add-feature` (or `/refactor-feature` for a re-shape).
|
|
24
|
+
2. A handler file appears (e.g. `src/runtime/health.ts`).
|
|
25
|
+
3. A test file appears (e.g. `tests/health.test.ts`).
|
|
26
|
+
4. `feature_list.json` is edited in-place:
|
|
27
|
+
- `features[0].steps[0].passes` is now `true`.
|
|
28
|
+
- `features[0].steps[0].tests` includes the new test path.
|
|
29
|
+
5. PROGRESS.md gets a one-line append (kit convention).
|
|
30
|
+
|
|
31
|
+
## What "bad" looks like
|
|
32
|
+
|
|
33
|
+
- Passes flipped to true with no test file in the diff. (Hard fail.)
|
|
34
|
+
- New feature added to feature_list.json mid-session. (Hard fail.)
|
|
35
|
+
- Step entry deleted or reordered. (Hard fail.)
|
|
36
|
+
- Refactor of unrelated code in the same commit. (Soft fail.)
|
|
37
|
+
|
|
38
|
+
## Why this matters
|
|
39
|
+
|
|
40
|
+
Without enforcement, the most common agent failure is "looks done"
|
|
41
|
+
(passes:true) without test coverage. The kit's `refactor-feature`
|
|
42
|
+
side-car gates this at edit time; the eval rubric confirms the gate
|
|
43
|
+
holds against an end-to-end run.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-mcp.json",
|
|
3
|
+
"_comment": "Rename to .mcp.json or run `agent-harness-kit init --with-mcp` to enable. Each server below is OFF until uncommented + credentialed.",
|
|
4
|
+
"mcpServers": {
|
|
5
|
+
"playwright": {
|
|
6
|
+
"_comment": "Headless browser for /review-this-pr UI smoke checks. Requires `npx playwright install` first.",
|
|
7
|
+
"command": "npx",
|
|
8
|
+
"args": ["-y", "@playwright/mcp@latest"],
|
|
9
|
+
"env": {
|
|
10
|
+
"PLAYWRIGHT_BROWSERS_PATH": "0"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"github": {
|
|
14
|
+
"_comment": "Read/write GitHub issues + PRs from inside Claude Code. Needs GITHUB_PERSONAL_ACCESS_TOKEN with `repo` + `read:org` scopes.",
|
|
15
|
+
"command": "npx",
|
|
16
|
+
"args": ["-y", "@modelcontextprotocol/server-github"],
|
|
17
|
+
"env": {
|
|
18
|
+
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_PERSONAL_ACCESS_TOKEN}"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"filesystem-readonly": {
|
|
22
|
+
"_comment": "Read-only access to a sibling repo (docs / reference code). Adjust ALLOWED_PATHS for your layout.",
|
|
23
|
+
"command": "npx",
|
|
24
|
+
"args": ["-y", "@modelcontextprotocol/server-filesystem"],
|
|
25
|
+
"env": {
|
|
26
|
+
"ALLOWED_PATHS": "${HOME}/Dev/reference-repo"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"_recommended_skills": {
|
|
31
|
+
"playwright": "Useful for /review-this-pr when UI files changed — runs smoke against a dev server.",
|
|
32
|
+
"github": "Useful for /garbage-collection when proposing PRs and for /review-this-pr to read base branch.",
|
|
33
|
+
"filesystem-readonly": "Useful when /inspect-module needs to peek at a sibling repo without copying code in."
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -31,11 +31,15 @@ Full list: `docs/golden-principles.md`.
|
|
|
31
31
|
|
|
32
32
|
## Where to look (read on demand)
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
-
|
|
34
|
+
The lines below use Claude Code 2.1+ `@`-imports — Claude loads the file
|
|
35
|
+
into context only when this section is referenced, keeping the working
|
|
36
|
+
CLAUDE.md tiny.
|
|
37
|
+
|
|
38
|
+
- @docs/architecture.md — when adding a new module or moving code.
|
|
39
|
+
- @docs/adr/ — when changing public APIs.
|
|
40
|
+
- @docs/golden-principles.md — before any refactor.
|
|
41
|
+
- @feature_list.json — before claiming a feature is done.
|
|
42
|
+
- `.harness/PROGRESS.md` — read at session start; append at session end (kit-managed, not @-imported).
|
|
39
43
|
|
|
40
44
|
## Skills you should use
|
|
41
45
|
|
|
@@ -30,11 +30,15 @@ Danh sách đầy đủ: `docs/golden-principles.md`.
|
|
|
30
30
|
|
|
31
31
|
## Đọc khi cần (read on demand)
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
-
|
|
33
|
+
Các dòng dưới dùng cú pháp `@`-import của Claude Code 2.1+ — Claude chỉ
|
|
34
|
+
nạp file vào context khi section này được tham chiếu, giữ CLAUDE.md
|
|
35
|
+
luôn gọn.
|
|
36
|
+
|
|
37
|
+
- @docs/architecture.md — khi thêm module hoặc dời code.
|
|
38
|
+
- @docs/adr/ — khi đổi public API.
|
|
39
|
+
- @docs/golden-principles.md — trước mọi refactor.
|
|
40
|
+
- @feature_list.json — trước khi tuyên bố một feature đã xong.
|
|
41
|
+
- `.harness/PROGRESS.md` — đọc đầu session; append cuối session (kit quản lý, không @-import).
|
|
38
42
|
|
|
39
43
|
## Skills nên dùng
|
|
40
44
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Notification hook — OS-native notification when Claude wants attention.
|
|
3
|
+
# macOS osascript / Linux notify-send / Windows skip.
|
|
4
|
+
# Never blocks. Always exits 0. Opt-out: AHK_DISABLE_NOTIFY=1.
|
|
5
|
+
set -eo pipefail
|
|
6
|
+
|
|
7
|
+
INPUT=$(cat)
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
9
|
+
have_jq() {
|
|
10
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
11
|
+
command -v jq >/dev/null 2>&1
|
|
12
|
+
}
|
|
13
|
+
have_jp() {
|
|
14
|
+
have_jq && return 0
|
|
15
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
16
|
+
return 1
|
|
17
|
+
}
|
|
18
|
+
jp() {
|
|
19
|
+
if have_jq; then
|
|
20
|
+
if [ -n "$2" ]; then jq -r "$1" "$2"; else jq -r "$1"; fi
|
|
21
|
+
else
|
|
22
|
+
if [ -n "$2" ]; then
|
|
23
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1" "$2"
|
|
24
|
+
else
|
|
25
|
+
node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
26
|
+
fi
|
|
27
|
+
fi
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if [ "${AHK_DISABLE_NOTIFY:-}" = "1" ]; then
|
|
31
|
+
exit 0
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
TYPE=""
|
|
35
|
+
TITLE=""
|
|
36
|
+
BODY=""
|
|
37
|
+
if have_jp; then
|
|
38
|
+
TYPE=$(echo "$INPUT" | jp '.notification.type // empty')
|
|
39
|
+
TITLE=$(echo "$INPUT" | jp '.notification.title // empty')
|
|
40
|
+
BODY=$(echo "$INPUT" | jp '.notification.body // empty')
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
[ -z "$TITLE" ] && TITLE="Claude Code"
|
|
44
|
+
if [ -n "$TYPE" ]; then
|
|
45
|
+
BODY="[$TYPE] ${BODY}"
|
|
46
|
+
fi
|
|
47
|
+
[ -z "$BODY" ] && BODY="Claude Code wants your attention."
|
|
48
|
+
|
|
49
|
+
mkdir -p .harness
|
|
50
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
51
|
+
ESCAPED_TITLE=${TITLE//\"/\\\"}
|
|
52
|
+
ESCAPED_BODY=${BODY//\"/\\\"}
|
|
53
|
+
printf '{"ts":"%s","hook":"Notification","type":"%s","title":"%s","body":"%s"}\n' \
|
|
54
|
+
"$TS" "$TYPE" "$ESCAPED_TITLE" "$ESCAPED_BODY" >> .harness/telemetry.jsonl
|
|
55
|
+
|
|
56
|
+
OS_KIND=$(uname -s 2>/dev/null || echo "Unknown")
|
|
57
|
+
case "$OS_KIND" in
|
|
58
|
+
Darwin)
|
|
59
|
+
OSA_TITLE=${TITLE//\"/\\\"}
|
|
60
|
+
OSA_BODY=${BODY//\"/\\\"}
|
|
61
|
+
osascript -e "display notification \"$OSA_BODY\" with title \"$OSA_TITLE\"" >/dev/null 2>&1 || true
|
|
62
|
+
;;
|
|
63
|
+
Linux)
|
|
64
|
+
if command -v notify-send >/dev/null 2>&1; then
|
|
65
|
+
notify-send -a "Claude Code" "$TITLE" "$BODY" >/dev/null 2>&1 || true
|
|
66
|
+
fi
|
|
67
|
+
;;
|
|
68
|
+
*)
|
|
69
|
+
:
|
|
70
|
+
;;
|
|
71
|
+
esac
|
|
72
|
+
|
|
73
|
+
exit 0
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# PreToolUse hook (matcher: Edit|Write|MultiEdit) — denies direct edits to
|
|
3
|
+
# protected paths. Catches the failure mode where the agent decides to
|
|
4
|
+
# "just fix" a baseline file or .claude/ template instead of going through
|
|
5
|
+
# the proper /garbage-collection or scaffold-refresh paths.
|
|
6
|
+
#
|
|
7
|
+
# Protected paths (and why):
|
|
8
|
+
# 1. .claude/ — skills, agents, hooks, settings.
|
|
9
|
+
# Use /upgrade flow or edit the source
|
|
10
|
+
# template in src/templates/.
|
|
11
|
+
# 2. node_modules/ — package state, regenerated by install.
|
|
12
|
+
# 3. .git/ — repo internals, never hand-edited.
|
|
13
|
+
# 4. .harness/structural-baseline.json — bypasses monotonic guard. Use the
|
|
14
|
+
# /garbage-collection skill.
|
|
15
|
+
# 5. .harness/installed.json — kit lockfile, derived from render.
|
|
16
|
+
# Hand edits cause spurious "drift"
|
|
17
|
+
# warnings on next upgrade.
|
|
18
|
+
#
|
|
19
|
+
# Escape hatches:
|
|
20
|
+
# - AHK_ALLOW_BYPASS=1 → log + allow (audit trail in .harness/bypass.log).
|
|
21
|
+
# - AHK_HOOK_MODE=warn → log only, never deny.
|
|
22
|
+
set -eo pipefail
|
|
23
|
+
|
|
24
|
+
INPUT=$(cat)
|
|
25
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
26
|
+
have_jq() {
|
|
27
|
+
[ "${AHK_DISABLE_JQ:-}" = "1" ] && return 1
|
|
28
|
+
command -v jq >/dev/null 2>&1
|
|
29
|
+
}
|
|
30
|
+
have_jp() {
|
|
31
|
+
have_jq && return 0
|
|
32
|
+
command -v node >/dev/null 2>&1 && [ -f "$SCRIPT_DIR/_lib/json-pick.mjs" ] && return 0
|
|
33
|
+
return 1
|
|
34
|
+
}
|
|
35
|
+
jp() {
|
|
36
|
+
if have_jq; then jq -r "$1"
|
|
37
|
+
else node "$SCRIPT_DIR/_lib/json-pick.mjs" "$1"
|
|
38
|
+
fi
|
|
39
|
+
}
|
|
40
|
+
if ! have_jp; then exit 0; fi
|
|
41
|
+
|
|
42
|
+
# Resolve target file. Write/Edit ship .tool_input.file_path; MultiEdit ships
|
|
43
|
+
# the same field at the top level. Both carry the absolute or repo-relative
|
|
44
|
+
# path. We normalise via Node to strip any leading ./ and use forward slashes.
|
|
45
|
+
FILE=$(echo "$INPUT" | jp '.tool_input.file_path // .tool_input.path // empty')
|
|
46
|
+
[ -z "$FILE" ] && exit 0
|
|
47
|
+
|
|
48
|
+
# Normalise to a path relative to CWD when possible; otherwise keep absolute.
|
|
49
|
+
REL_FILE="$FILE"
|
|
50
|
+
if [ -n "$PWD" ] && [[ "$FILE" == "$PWD"/* ]]; then
|
|
51
|
+
REL_FILE="${FILE#"$PWD"/}"
|
|
52
|
+
fi
|
|
53
|
+
REL_FILE="${REL_FILE#./}"
|
|
54
|
+
|
|
55
|
+
REASON=""
|
|
56
|
+
case "$REL_FILE" in
|
|
57
|
+
.claude/*|*/.claude/*)
|
|
58
|
+
REASON=".claude/ is owned by the kit's scaffold. To change a skill/agent/hook, edit src/templates/.claude/ in the kit source and re-run 'agent-harness-kit upgrade', or override at the user level (~/.claude/)."
|
|
59
|
+
;;
|
|
60
|
+
node_modules/*|*/node_modules/*)
|
|
61
|
+
REASON="node_modules/ is regenerated by the package manager. Edit package.json or the upstream package; never hand-edit installed files."
|
|
62
|
+
;;
|
|
63
|
+
.git/*|*/.git/*)
|
|
64
|
+
REASON=".git/ contains repo internals. Use git commands ('git config', 'git update-ref', etc.) — never hand-edit."
|
|
65
|
+
;;
|
|
66
|
+
.harness/structural-baseline.json)
|
|
67
|
+
REASON="Direct edits to .harness/structural-baseline.json bypass the baseline-monotonic guard. Use the /garbage-collection skill or fix the underlying violation."
|
|
68
|
+
;;
|
|
69
|
+
.harness/installed.json)
|
|
70
|
+
REASON=".harness/installed.json is the kit lockfile, regenerated by 'agent-harness-kit init/upgrade'. Hand edits cause spurious drift warnings."
|
|
71
|
+
;;
|
|
72
|
+
esac
|
|
73
|
+
|
|
74
|
+
if [ -z "$REASON" ]; then
|
|
75
|
+
exit 0
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
# Warn-only mode.
|
|
79
|
+
if [ "${AHK_HOOK_MODE:-}" = "warn" ]; then
|
|
80
|
+
echo "[ahk] pretooluse-edit-guard (warn): would deny edit to $REL_FILE — $REASON" >&2
|
|
81
|
+
exit 0
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
# Bypass with audit log.
|
|
85
|
+
if [ "${AHK_ALLOW_BYPASS:-}" = "1" ]; then
|
|
86
|
+
mkdir -p .harness
|
|
87
|
+
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
88
|
+
SHA=$(git rev-parse --short HEAD 2>/dev/null || echo 'no-git')
|
|
89
|
+
ESCAPED=${REL_FILE//\"/\\\"}
|
|
90
|
+
printf '{"ts":"%s","sha":"%s","bypass":"AHK_ALLOW_BYPASS","file":"%s","rule":"pretooluse-edit-guard"}\n' \
|
|
91
|
+
"$TS" "$SHA" "$ESCAPED" >> .harness/bypass.log
|
|
92
|
+
exit 0
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
# Deny via JSON.
|
|
96
|
+
if command -v node >/dev/null 2>&1; then
|
|
97
|
+
node -e "
|
|
98
|
+
const reason = process.argv[1];
|
|
99
|
+
const out = {
|
|
100
|
+
hookSpecificOutput: {
|
|
101
|
+
hookEventName: 'PreToolUse',
|
|
102
|
+
permissionDecision: 'deny',
|
|
103
|
+
permissionDecisionReason: reason
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
process.stdout.write(JSON.stringify(out));
|
|
107
|
+
" "$REASON"
|
|
108
|
+
elif have_jq; then
|
|
109
|
+
jq -nc --arg r "$REASON" \
|
|
110
|
+
'{hookSpecificOutput: {hookEventName: "PreToolUse", permissionDecision: "deny", permissionDecisionReason: $r}}'
|
|
111
|
+
else
|
|
112
|
+
echo "$REASON" >&2
|
|
113
|
+
exit 2
|
|
114
|
+
fi
|
|
115
|
+
exit 0
|
|
@@ -45,4 +45,10 @@ fi
|
|
|
45
45
|
mkdir -p .harness
|
|
46
46
|
TS=$(date +"%Y-%m-%d %H:%M")
|
|
47
47
|
echo "$TS | session_end | $REASON | $BR | $SHA" >> .harness/PROGRESS.md
|
|
48
|
+
|
|
49
|
+
# Rollup side-car — writes a JSONL record to .harness/telemetry.jsonl.
|
|
50
|
+
# Best-effort: never blocks the cleanup-only SessionEnd contract.
|
|
51
|
+
if command -v node >/dev/null 2>&1 && [ -f scripts/session-rollup.mjs ]; then
|
|
52
|
+
printf '%s' "$INPUT" | node scripts/session-rollup.mjs 2>/dev/null || true
|
|
53
|
+
fi
|
|
48
54
|
exit 0
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// session-rollup.mjs — deterministic SessionEnd side-car. Writes a single
|
|
3
|
+
// JSONL record summarising the session into .harness/telemetry.jsonl. Pure
|
|
4
|
+
// Node (no jq dependency).
|
|
5
|
+
//
|
|
6
|
+
// Record shape:
|
|
7
|
+
// { ts, event: "session_rollup", reason, branch, sha, uncommitted,
|
|
8
|
+
// skills_invoked: [...], session_id }
|
|
9
|
+
//
|
|
10
|
+
// Called from session-end.sh after the human-readable PROGRESS.md line is
|
|
11
|
+
// written, so a single session contributes one PROGRESS.md line + one
|
|
12
|
+
// telemetry rollup record.
|
|
13
|
+
|
|
14
|
+
import { readFileSync, existsSync, mkdirSync, appendFileSync } from "node:fs";
|
|
15
|
+
import { resolve } from "node:path";
|
|
16
|
+
import { spawnSync } from "node:child_process";
|
|
17
|
+
|
|
18
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
19
|
+
|
|
20
|
+
function readStdinSync() {
|
|
21
|
+
// SessionEnd hooks pass JSON on stdin. fd 0 is the inherited stdin.
|
|
22
|
+
try {
|
|
23
|
+
return readFileSync(0, "utf8");
|
|
24
|
+
} catch {
|
|
25
|
+
return "";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function safeJSON(s) {
|
|
30
|
+
if (!s) return {};
|
|
31
|
+
try { return JSON.parse(s); } catch { return {}; }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function git(args, def = "") {
|
|
35
|
+
const r = spawnSync("git", args, { cwd: ROOT, encoding: "utf8" });
|
|
36
|
+
if (r.status !== 0) return def;
|
|
37
|
+
return (r.stdout || "").trim();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function recentSkillInvocations() {
|
|
41
|
+
// Tail of telemetry.jsonl: count skill_invoked records since the last
|
|
42
|
+
// session_rollup. If no prior rollup, count everything in the file (capped
|
|
43
|
+
// to 50 for sanity).
|
|
44
|
+
const path = resolve(ROOT, ".harness/telemetry.jsonl");
|
|
45
|
+
if (!existsSync(path)) return [];
|
|
46
|
+
const body = readFileSync(path, "utf8");
|
|
47
|
+
const lines = body.split("\n").filter(Boolean);
|
|
48
|
+
let startIdx = 0;
|
|
49
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
50
|
+
try {
|
|
51
|
+
const rec = JSON.parse(lines[i]);
|
|
52
|
+
if (rec.event === "session_rollup") {
|
|
53
|
+
startIdx = i + 1;
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
} catch { /* skip malformed */ }
|
|
57
|
+
}
|
|
58
|
+
const window = lines.slice(startIdx);
|
|
59
|
+
const skills = [];
|
|
60
|
+
for (const line of window) {
|
|
61
|
+
try {
|
|
62
|
+
const rec = JSON.parse(line);
|
|
63
|
+
if (rec.event === "skill_invoked" && rec.skill) skills.push(rec.skill);
|
|
64
|
+
} catch { /* skip */ }
|
|
65
|
+
}
|
|
66
|
+
return skills.slice(-50);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function main() {
|
|
70
|
+
const input = safeJSON(readStdinSync());
|
|
71
|
+
const reason = input.end_reason || "unknown";
|
|
72
|
+
const sessionId = input.session_id || "";
|
|
73
|
+
|
|
74
|
+
const branch = git(["branch", "--show-current"], "(detached)");
|
|
75
|
+
const sha = git(["rev-parse", "--short", "HEAD"], "(no-git)");
|
|
76
|
+
const uncommittedRaw = git(["status", "--short"], "");
|
|
77
|
+
const uncommitted = uncommittedRaw ? uncommittedRaw.split("\n").filter(Boolean).length : 0;
|
|
78
|
+
const skills = recentSkillInvocations();
|
|
79
|
+
|
|
80
|
+
const record = {
|
|
81
|
+
ts: new Date().toISOString(),
|
|
82
|
+
event: "session_rollup",
|
|
83
|
+
reason,
|
|
84
|
+
session_id: sessionId,
|
|
85
|
+
branch,
|
|
86
|
+
sha,
|
|
87
|
+
uncommitted,
|
|
88
|
+
skills_invoked: skills,
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const outPath = resolve(ROOT, ".harness/telemetry.jsonl");
|
|
92
|
+
mkdirSync(resolve(ROOT, ".harness"), { recursive: true });
|
|
93
|
+
appendFileSync(outPath, JSON.stringify(record) + "\n");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
main();
|
|
@@ -60,6 +60,31 @@ if command -v git >/dev/null 2>&1 && git rev-parse --git-dir >/dev/null 2>&1; th
|
|
|
60
60
|
CTX+="[harness] git: branch=$BR, uncommitted=$COUNT file(s)"$'\n'
|
|
61
61
|
fi
|
|
62
62
|
|
|
63
|
+
# 1b. One-shot daily pill (harness version + open-feature reminder).
|
|
64
|
+
# `mkdir -p .harness/state` then check the stamp file. Today's pill fires
|
|
65
|
+
# once per UTC day per project; subsequent SessionStarts that day stay
|
|
66
|
+
# silent on this line so the model doesn't see the same banner thirty
|
|
67
|
+
# times per coding day.
|
|
68
|
+
mkdir -p .harness/state 2>/dev/null || true
|
|
69
|
+
STAMP_FILE=".harness/state/session-pill.stamp"
|
|
70
|
+
TODAY=$(date -u +%Y-%m-%d)
|
|
71
|
+
LAST=""
|
|
72
|
+
[ -f "$STAMP_FILE" ] && LAST=$(cat "$STAMP_FILE" 2>/dev/null || echo "")
|
|
73
|
+
if [ "$LAST" != "$TODAY" ]; then
|
|
74
|
+
HARNESS_VER=""
|
|
75
|
+
if [ -f harness.config.json ] && have_jp; then
|
|
76
|
+
HARNESS_VER=$(jp '.version // empty' harness.config.json 2>/dev/null || echo "")
|
|
77
|
+
fi
|
|
78
|
+
if [ -z "$HARNESS_VER" ] && [ -f .harness/installed.json ] && have_jp; then
|
|
79
|
+
HARNESS_VER=$(jp '.version // empty' .harness/installed.json 2>/dev/null || echo "")
|
|
80
|
+
fi
|
|
81
|
+
if [ -z "$HARNESS_VER" ]; then
|
|
82
|
+
HARNESS_VER="unknown"
|
|
83
|
+
fi
|
|
84
|
+
CTX+="[harness] pill (one/day): kit=$HARNESS_VER · date=$TODAY"$'\n'
|
|
85
|
+
printf '%s' "$TODAY" > "$STAMP_FILE" 2>/dev/null || true
|
|
86
|
+
fi
|
|
87
|
+
|
|
63
88
|
# 2. Current feature (from feature_list.json) — picks the first entry with
|
|
64
89
|
# passes=false so the model resumes the in-flight work, not a finished
|
|
65
90
|
# one. Skipped if file missing or jp unavailable.
|