agent-harness-kit 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/bin/cli.mjs +26 -0
- package/package.json +1 -1
- package/src/core/doctor.mjs +47 -0
- package/src/core/render-templates.mjs +119 -5
- package/src/core/upgrade.mjs +81 -60
- package/src/templates/.claude/agents/api-consistency-reviewer.md.vi +37 -0
- package/src/templates/.claude/agents/architecture-reviewer.md.vi.hbs +45 -0
- package/src/templates/.claude/agents/performance-reviewer.md.vi +39 -0
- package/src/templates/.claude/agents/reliability-reviewer.md.vi +42 -0
- package/src/templates/.claude/agents/security-reviewer.md.vi +43 -0
- package/src/templates/.claude/hooks/hooks.json +46 -0
- package/src/templates/.claude/output-styles/harness-terse.md +42 -0
- package/src/templates/.claude/settings.json.hbs +2 -1
- package/src/templates/.claude/skills/add-adr/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/add-feature/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/debug-flow/SKILL.md.vi.hbs +42 -0
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +15 -10
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md.vi +52 -0
- package/src/templates/.claude/skills/doc-drift-scan/scripts/scan-paths.mjs +64 -0
- package/src/templates/.claude/skills/eval-runner/SKILL.md.vi +59 -0
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +14 -5
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.vi.hbs +58 -0
- package/src/templates/.claude/skills/garbage-collection/scripts/gc-classify.mjs +77 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md +52 -0
- package/src/templates/.claude/skills/i18n-add-locale/SKILL.md.vi +56 -0
- package/src/templates/.claude/skills/i18n-add-locale/scripts/locale-scaffold.mjs +120 -0
- package/src/templates/.claude/skills/inspect-app/SKILL.md.vi +61 -0
- package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +17 -14
- package/src/templates/.claude/skills/inspect-module/SKILL.md.vi.hbs +57 -0
- package/src/templates/.claude/skills/inspect-module/scripts/module-summary.mjs +144 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md +42 -0
- package/src/templates/.claude/skills/map-domain/SKILL.md.vi +42 -0
- package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs +145 -0
- package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi +49 -0
- package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs +172 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md +60 -0
- package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi +64 -0
- package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs +146 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md +59 -0
- package/src/templates/.claude/skills/review-this-pr/SKILL.md.vi +63 -0
- package/src/templates/.claude/skills/review-this-pr/scripts/pr-review-driver.mjs +152 -0
- package/src/templates/.claude/skills/structural-test-author/SKILL.md.vi.hbs +50 -0
- package/src/templates/.claude/skills/write-skill/SKILL.md.vi +43 -0
- package/src/templates/.harness/eval/rubrics/feature-step-done.mjs +148 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.answer.md +53 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.json +10 -0
- package/src/templates/.harness/eval/tasks/feature-step-done.prompt.md +43 -0
- package/src/templates/.mcp.json.example +35 -0
- package/src/templates/CLAUDE.md.hbs +9 -5
- package/src/templates/CLAUDE.md.vi.hbs +9 -5
- package/src/templates/scripts/notify-on-block.sh.hbs +73 -0
- package/src/templates/scripts/pretooluse-edit-guard.sh.hbs +115 -0
- package/src/templates/scripts/session-end.sh.hbs +6 -0
- package/src/templates/scripts/session-rollup.mjs +96 -0
- package/src/templates/scripts/session-start.sh.hbs +25 -0
- package/src/templates/scripts/statusline.mjs +63 -0
- package/src/templates/scripts/subagent-stop.sh.hbs +76 -0
- package/src/templates/scripts/userprompt-guard.sh.hbs +100 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// module-summary.mjs — deterministic step for /inspect-module.
|
|
3
|
+
// Bundles exports + outbound + inbound + layer + recent commits in JSON.
|
|
4
|
+
// Prefer ripgrep, fallback grep -rE.
|
|
5
|
+
|
|
6
|
+
import { readFileSync, existsSync, readdirSync, statSync } from "node:fs";
|
|
7
|
+
import { resolve, relative, join } from "node:path";
|
|
8
|
+
import { spawnSync } from "node:child_process";
|
|
9
|
+
|
|
10
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
11
|
+
|
|
12
|
+
function bail(msg) {
|
|
13
|
+
console.error("module-summary: " + msg);
|
|
14
|
+
process.exit(2);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Walk a path (file or directory) and yield matching source files. Skip
|
|
18
|
+
// node_modules, .git, dist, build — folders that contain mountains of
|
|
19
|
+
// irrelevant exports and blow up the result set.
|
|
20
|
+
const SOURCE_EXTS = /\.(ts|tsx|js|jsx|mjs|cjs|py|rs|go|swift|kt|kts)$/i;
|
|
21
|
+
const SKIP_DIRS = new Set(["node_modules", ".git", ".harness", "dist", "build", "target", ".next"]);
|
|
22
|
+
|
|
23
|
+
function* walkSources(absPath) {
|
|
24
|
+
let st;
|
|
25
|
+
try { st = statSync(absPath); } catch { return; }
|
|
26
|
+
if (st.isFile()) {
|
|
27
|
+
if (SOURCE_EXTS.test(absPath)) yield absPath;
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
if (!st.isDirectory()) return;
|
|
31
|
+
for (const entry of readdirSync(absPath, { withFileTypes: true })) {
|
|
32
|
+
if (entry.name.startsWith(".") && entry.name !== "." && entry.name !== "..") {
|
|
33
|
+
if (SKIP_DIRS.has(entry.name)) continue;
|
|
34
|
+
}
|
|
35
|
+
if (SKIP_DIRS.has(entry.name)) continue;
|
|
36
|
+
yield* walkSources(join(absPath, entry.name));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// scan: read each file line-by-line, run the regex, collect matches with
|
|
41
|
+
// per-line annotation `path:line: content`. Pure Node — no external grep
|
|
42
|
+
// dependency, so the script works the same on macOS local, Linux CI,
|
|
43
|
+
// minimal Alpine, etc. (Previous shell-out to grep failed on CI with an
|
|
44
|
+
// empty result set; root cause: spawn-time differences between BSD and
|
|
45
|
+
// GNU grep when the target argument is a single file. Node fs is the
|
|
46
|
+
// portable answer.)
|
|
47
|
+
function scan(target, regex) {
|
|
48
|
+
const lines = [];
|
|
49
|
+
const absTarget = resolve(ROOT, target);
|
|
50
|
+
for (const file of walkSources(absTarget)) {
|
|
51
|
+
let body;
|
|
52
|
+
try { body = readFileSync(file, "utf8"); } catch { continue; }
|
|
53
|
+
const rel = relative(ROOT, file);
|
|
54
|
+
const fileLines = body.split("\n");
|
|
55
|
+
for (let i = 0; i < fileLines.length; i++) {
|
|
56
|
+
if (regex.test(fileLines[i])) {
|
|
57
|
+
lines.push(`${rel}:${i + 1}: ${fileLines[i]}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return lines;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function listExports(target) {
|
|
65
|
+
const out = new Set();
|
|
66
|
+
for (const line of scan(target, /^export /)) {
|
|
67
|
+
const m = line.match(/^([^:]+):(\d+):\s*export\s+(.*)$/);
|
|
68
|
+
if (m) out.add(`${m[3].slice(0, 80)} (${m[1]}:${m[2]})`);
|
|
69
|
+
}
|
|
70
|
+
for (const line of scan(target, /^(def |class )/)) {
|
|
71
|
+
const m = line.match(/^([^:]+):(\d+):\s*(def|class)\s+(\w+)/);
|
|
72
|
+
if (m) out.add(`${m[3]} ${m[4]} (${m[1]}:${m[2]})`);
|
|
73
|
+
}
|
|
74
|
+
return [...out].slice(0, 50);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function outboundDeps(target) {
|
|
78
|
+
const out = new Set();
|
|
79
|
+
for (const line of scan(target, /^(import |from |use crate)/)) {
|
|
80
|
+
const m = line.match(/^[^:]+:\d+:\s*(.+)$/);
|
|
81
|
+
if (m) out.add(m[1].trim().slice(0, 100));
|
|
82
|
+
}
|
|
83
|
+
return [...out].slice(0, 50);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function inboundDeps(target) {
|
|
87
|
+
const relTarget = relative(ROOT, resolve(ROOT, target));
|
|
88
|
+
const name = relTarget.split("/").pop().replace(/\.[a-z]+$/i, "");
|
|
89
|
+
if (!name) return [];
|
|
90
|
+
const seen = new Set();
|
|
91
|
+
// Search the whole project root for references back to the target
|
|
92
|
+
// module. Filter out self-references.
|
|
93
|
+
const re = new RegExp(`(import|from|require\\().*['"][^'"]*${name.replace(/[.*+?^${}()|[\\\]\\\\]/g, "\\\\$&")}`);
|
|
94
|
+
for (const line of scan(".", re)) {
|
|
95
|
+
const m = line.match(/^([^:]+):\d+:/);
|
|
96
|
+
if (m && m[1] !== relTarget && !m[1].endsWith(`/${relTarget}`)) seen.add(m[1]);
|
|
97
|
+
}
|
|
98
|
+
return [...seen].slice(0, 30);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function readLayers() {
|
|
102
|
+
try { return JSON.parse(readFileSync(resolve(ROOT, "harness.config.json"), "utf8")); }
|
|
103
|
+
catch { return null; }
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function whichLayer(target, cfg) {
|
|
107
|
+
if (!cfg?.domains) return null;
|
|
108
|
+
const rel = relative(ROOT, resolve(ROOT, target));
|
|
109
|
+
for (const d of cfg.domains) {
|
|
110
|
+
if (!d?.layers || !d.root) continue;
|
|
111
|
+
for (const layer of d.layers) {
|
|
112
|
+
const prefix = `${d.root}/${layer}/`;
|
|
113
|
+
if (rel.startsWith(prefix) || rel === `${d.root}/${layer}`) {
|
|
114
|
+
return { domain: d.name || "default", layer };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function recentCommits(target) {
|
|
122
|
+
const r = spawnSync("git", ["log", "--oneline", "-5", "--", target], { cwd: ROOT, encoding: "utf8" });
|
|
123
|
+
if (r.status !== 0) return [];
|
|
124
|
+
return (r.stdout || "").split("\n").filter(Boolean);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function main() {
|
|
128
|
+
const target = process.argv[2];
|
|
129
|
+
if (!target) bail("missing target path argument");
|
|
130
|
+
const abs = resolve(ROOT, target);
|
|
131
|
+
if (!existsSync(abs)) bail(`target not found: ${target}`);
|
|
132
|
+
const cfg = readLayers();
|
|
133
|
+
const out = {
|
|
134
|
+
module: relative(ROOT, abs),
|
|
135
|
+
layer: whichLayer(target, cfg),
|
|
136
|
+
exports: listExports(target),
|
|
137
|
+
outbound: outboundDeps(target),
|
|
138
|
+
inbound: inboundDeps(target),
|
|
139
|
+
recent: recentCommits(target),
|
|
140
|
+
};
|
|
141
|
+
process.stdout.write(JSON.stringify(out, null, 2) + "\n");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
main();
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: map-domain
|
|
3
|
+
description: Use this skill to render the harness's domain/layer config as a mermaid diagram + check for drift between harness.config.json#domains and the actual filesystem. Surfaces "the config says layers A→B→C but the repo has folders A, B, X" — drift that silently invalidates the structural-test contract.
|
|
4
|
+
allowed-tools: Read, Bash(node .claude/skills/map-domain/scripts/domain-map.mjs:*)
|
|
5
|
+
suggested-turns: 3
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## When to invoke
|
|
9
|
+
|
|
10
|
+
- After editing `harness.config.json#domains`.
|
|
11
|
+
- After moving files between layer directories.
|
|
12
|
+
- During onboarding — gives a one-glance view of the kit's layer rule.
|
|
13
|
+
|
|
14
|
+
## Steps
|
|
15
|
+
|
|
16
|
+
1. **Run the side-car.**
|
|
17
|
+
```
|
|
18
|
+
node .claude/skills/map-domain/scripts/domain-map.mjs --out docs/architecture/domain-map.md
|
|
19
|
+
```
|
|
20
|
+
2. **Inspect drift.** The mermaid diagram embeds a "drift" badge per layer:
|
|
21
|
+
- `✓` — config layer name has a matching `<root>/<layer>/` directory.
|
|
22
|
+
- `✗` — directory missing.
|
|
23
|
+
- `?` — directory exists but contains only sub-layers (likely OK; review).
|
|
24
|
+
3. **Update the README** (optional). The generated markdown is safe to
|
|
25
|
+
commit; re-running the side-car is idempotent.
|
|
26
|
+
|
|
27
|
+
## Output contract
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
domains: <N>
|
|
31
|
+
layers: <M>
|
|
32
|
+
drift_count: <K>
|
|
33
|
+
report: docs/architecture/domain-map.md
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Anti-patterns
|
|
37
|
+
|
|
38
|
+
- Don't rename a layer in the config without moving the directory at the
|
|
39
|
+
same time — the structural-test will start scanning a path that no
|
|
40
|
+
longer exists.
|
|
41
|
+
- Don't add a layer to the config without seeding it with at least a
|
|
42
|
+
`README.md` so the drift check passes.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: map-domain
|
|
3
|
+
description: Use this skill to render the harness's domain/layer config as a mermaid diagram + check for drift between harness.config.json#domains and the actual filesystem. Surfaces "the config says layers A→B→C but the repo has folders A, B, X" — drift that silently invalidates the structural-test contract.
|
|
4
|
+
allowed-tools: Read, Bash(node .claude/skills/map-domain/scripts/domain-map.mjs:*)
|
|
5
|
+
suggested-turns: 3
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Khi nào dùng
|
|
9
|
+
|
|
10
|
+
- Sau khi sửa `harness.config.json#domains`.
|
|
11
|
+
- Sau khi di chuyển files giữa các layer directory.
|
|
12
|
+
- Trong onboarding — cho cái nhìn tổng quan một-cái-nhìn về layer rule
|
|
13
|
+
của kit.
|
|
14
|
+
|
|
15
|
+
## Các bước
|
|
16
|
+
|
|
17
|
+
1. **Chạy side-car.**
|
|
18
|
+
```
|
|
19
|
+
node .claude/skills/map-domain/scripts/domain-map.mjs --out docs/architecture/domain-map.md
|
|
20
|
+
```
|
|
21
|
+
2. **Soi drift.** Mermaid diagram nhúng "drift" badge per layer:
|
|
22
|
+
- `✓` — layer name trong config có thư mục `<root>/<layer>/` tương ứng.
|
|
23
|
+
- `✗` — thư mục thiếu.
|
|
24
|
+
- `?` — thư mục tồn tại nhưng chỉ chứa sub-layers (có khả năng OK; review).
|
|
25
|
+
3. **Update README** (không bắt buộc). Markdown sinh ra có thể safely
|
|
26
|
+
commit; chạy lại side-car là idempotent.
|
|
27
|
+
|
|
28
|
+
## Output contract
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
domains: <N>
|
|
32
|
+
layers: <M>
|
|
33
|
+
drift_count: <K>
|
|
34
|
+
report: docs/architecture/domain-map.md
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Anti-patterns
|
|
38
|
+
|
|
39
|
+
- Không rename một layer trong config mà không cùng lúc di chuyển
|
|
40
|
+
directory — structural-test sẽ bắt đầu scan một path không còn tồn tại.
|
|
41
|
+
- Không thêm layer vào config mà không seed nó với ít nhất một
|
|
42
|
+
`README.md` để drift check pass.
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// domain-map.mjs — deterministic step for /map-domain.
|
|
3
|
+
// Renders harness.config.json#domains as a markdown doc with embedded
|
|
4
|
+
// mermaid graph + drift check against the filesystem.
|
|
5
|
+
//
|
|
6
|
+
// Usage:
|
|
7
|
+
// domain-map.mjs [--out docs/architecture/domain-map.md]
|
|
8
|
+
// domain-map.mjs --stdout
|
|
9
|
+
|
|
10
|
+
import { readFileSync, existsSync, writeFileSync, mkdirSync, statSync, readdirSync } from "node:fs";
|
|
11
|
+
import { resolve, dirname } from "node:path";
|
|
12
|
+
|
|
13
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
14
|
+
|
|
15
|
+
function parseArgs(argv) {
|
|
16
|
+
const out = { outPath: null, stdout: false };
|
|
17
|
+
for (let i = 0; i < argv.length; i++) {
|
|
18
|
+
if (argv[i] === "--out") out.outPath = argv[++i];
|
|
19
|
+
else if (argv[i] === "--stdout") out.stdout = true;
|
|
20
|
+
}
|
|
21
|
+
if (!out.outPath && !out.stdout) out.stdout = true;
|
|
22
|
+
return out;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function loadConfig() {
|
|
26
|
+
const p = resolve(ROOT, "harness.config.json");
|
|
27
|
+
if (!existsSync(p)) {
|
|
28
|
+
console.error("domain-map: harness.config.json not found at repo root");
|
|
29
|
+
process.exit(2);
|
|
30
|
+
}
|
|
31
|
+
try { return JSON.parse(readFileSync(p, "utf8")); }
|
|
32
|
+
catch (e) {
|
|
33
|
+
console.error(`domain-map: harness.config.json is not valid JSON: ${e.message}`);
|
|
34
|
+
process.exit(2);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function dirState(abs) {
|
|
39
|
+
try {
|
|
40
|
+
const st = statSync(abs);
|
|
41
|
+
if (!st.isDirectory()) return "missing";
|
|
42
|
+
const entries = readdirSync(abs);
|
|
43
|
+
if (entries.length === 0) return "empty";
|
|
44
|
+
return "present";
|
|
45
|
+
} catch {
|
|
46
|
+
return "missing";
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function driftBadge(state) {
|
|
51
|
+
if (state === "present") return "✓";
|
|
52
|
+
if (state === "empty") return "?";
|
|
53
|
+
return "✗";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function safeId(s) {
|
|
57
|
+
return String(s).replace(/[^a-zA-Z0-9_]/g, "_");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function buildMermaid(domains) {
|
|
61
|
+
const lines = [];
|
|
62
|
+
lines.push("```mermaid");
|
|
63
|
+
lines.push("flowchart LR");
|
|
64
|
+
let domainIdx = 0;
|
|
65
|
+
const driftRows = [];
|
|
66
|
+
for (const d of domains) {
|
|
67
|
+
domainIdx++;
|
|
68
|
+
const dName = d.name || `domain${domainIdx}`;
|
|
69
|
+
const root = d.root || "";
|
|
70
|
+
const layers = Array.isArray(d.layers) ? d.layers : [];
|
|
71
|
+
const dId = safeId(dName);
|
|
72
|
+
lines.push(` subgraph ${dId} ["${dName} (${root}/)"]`);
|
|
73
|
+
let prev = null;
|
|
74
|
+
for (const layer of layers) {
|
|
75
|
+
const abs = resolve(ROOT, root, layer);
|
|
76
|
+
const state = dirState(abs);
|
|
77
|
+
const badge = driftBadge(state);
|
|
78
|
+
const node = `${dId}__${safeId(layer)}`;
|
|
79
|
+
lines.push(` ${node}["${layer} ${badge}"]`);
|
|
80
|
+
if (prev) lines.push(` ${prev} --> ${node}`);
|
|
81
|
+
prev = node;
|
|
82
|
+
driftRows.push({ domain: dName, layer, root, state, badge });
|
|
83
|
+
}
|
|
84
|
+
lines.push(" end");
|
|
85
|
+
}
|
|
86
|
+
lines.push("```");
|
|
87
|
+
return { mermaid: lines.join("\n"), driftRows };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildMarkdown(cfg) {
|
|
91
|
+
const domains = Array.isArray(cfg.domains) ? cfg.domains : [];
|
|
92
|
+
const { mermaid, driftRows } = buildMermaid(domains);
|
|
93
|
+
const drift = driftRows.filter((r) => r.state !== "present");
|
|
94
|
+
const md = [];
|
|
95
|
+
md.push(`# Domain map`);
|
|
96
|
+
md.push("");
|
|
97
|
+
md.push(`Generated by \`/map-domain\` from \`harness.config.json\`. Re-run after editing domain/layer config.`);
|
|
98
|
+
md.push("");
|
|
99
|
+
md.push(`- domains: ${domains.length}`);
|
|
100
|
+
md.push(`- layers (total): ${driftRows.length}`);
|
|
101
|
+
md.push(`- drift entries: ${drift.length}`);
|
|
102
|
+
md.push("");
|
|
103
|
+
md.push(`## Diagram`);
|
|
104
|
+
md.push("");
|
|
105
|
+
md.push(mermaid);
|
|
106
|
+
md.push("");
|
|
107
|
+
md.push(`## Drift table`);
|
|
108
|
+
md.push("");
|
|
109
|
+
md.push(`| domain | layer | root | state | badge |`);
|
|
110
|
+
md.push(`|---|---|---|---|---|`);
|
|
111
|
+
for (const r of driftRows) {
|
|
112
|
+
md.push(`| ${r.domain} | ${r.layer} | \`${r.root}/${r.layer}/\` | ${r.state} | ${r.badge} |`);
|
|
113
|
+
}
|
|
114
|
+
md.push("");
|
|
115
|
+
if (drift.length > 0) {
|
|
116
|
+
md.push(`## Resolutions`);
|
|
117
|
+
md.push("");
|
|
118
|
+
for (const d of drift) {
|
|
119
|
+
if (d.state === "missing") {
|
|
120
|
+
md.push(`- \`${d.root}/${d.layer}/\` is **missing**. Either create the directory (with a README.md) or remove "${d.layer}" from harness.config.json#domains[${d.domain}].layers.`);
|
|
121
|
+
} else if (d.state === "empty") {
|
|
122
|
+
md.push(`- \`${d.root}/${d.layer}/\` exists but is empty. Add at least a README.md or seed file, or drop the layer from config.`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
md.push("");
|
|
126
|
+
}
|
|
127
|
+
const tail = { domains: domains.length, layers: driftRows.length, drift_count: drift.length };
|
|
128
|
+
md.push(`<!-- machine-tail: ${JSON.stringify(tail)} -->`);
|
|
129
|
+
return { md: md.join("\n") + "\n", drift_count: drift.length };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function main() {
|
|
133
|
+
const { outPath, stdout } = parseArgs(process.argv.slice(2));
|
|
134
|
+
const cfg = loadConfig();
|
|
135
|
+
const { md, drift_count } = buildMarkdown(cfg);
|
|
136
|
+
if (outPath) {
|
|
137
|
+
const abs = resolve(ROOT, outPath);
|
|
138
|
+
mkdirSync(dirname(abs), { recursive: true });
|
|
139
|
+
writeFileSync(abs, md);
|
|
140
|
+
process.stdout.write(JSON.stringify({ written: outPath, drift_count }) + "\n");
|
|
141
|
+
}
|
|
142
|
+
if (stdout) process.stdout.write(md);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
main();
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: propose-harness-improvement
|
|
3
|
+
description: Use this skill whenever the agent makes a mistake, the user observes an avoidable failure, a pattern recurs, or someone says "the agent keeps doing X". Files an "Engineer the Harness" entry — Mitchell Hashimoto's discipline: every failure becomes a permanent prevention mechanism. Always invoke this instead of just fixing the immediate symptom.
|
|
4
|
+
allowed-tools: Read, Edit, Write, Bash(git diff:*)
|
|
5
|
+
suggested-turns: 8
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Các bước
|
|
9
|
+
|
|
10
|
+
1. **Triage.** Hỏi: "Vừa rồi sai cái gì? Hành vi mong muốn của agent là
|
|
11
|
+
gì? Triệu chứng là gì?"
|
|
12
|
+
2. **Phân loại.** Một trong:
|
|
13
|
+
- **(a) Thiếu context** — agent không biết một điều gì đó. Fix: thêm
|
|
14
|
+
vào `docs/`.
|
|
15
|
+
- **(b) Thiếu rule** — agent làm một việc bị một unwritten rule cấm.
|
|
16
|
+
Fix: gọi `/structural-test-author`.
|
|
17
|
+
- **(c) Thiếu tool/skill** — agent với lấy sai tool. Fix: gọi
|
|
18
|
+
`/write-skill`.
|
|
19
|
+
- **(d) Wrong layer / architecture** — cấu trúc đã mời gọi sai lầm.
|
|
20
|
+
Fix: viết ADR qua `/add-adr`.
|
|
21
|
+
- **(e) Wrong instruction in prompt** — failure truy ngược về một
|
|
22
|
+
skill/agent prompt đã ambiguous, gây hiểu sai, hoặc under-constrained.
|
|
23
|
+
Agent đã làm theo prompt chính xác nhưng chính prompt đã dẫn sai.
|
|
24
|
+
Fix: edit file vi phạm tại `.claude/skills/<name>/SKILL.md` hoặc
|
|
25
|
+
`.claude/agents/<name>.md`. Sau đó chạy lại `/eval-runner` để xác
|
|
26
|
+
nhận regression đã đóng.
|
|
27
|
+
3. **Append entry** vào `docs/agent-failures.md` với: date, symptom, fix,
|
|
28
|
+
fix-type, file modified.
|
|
29
|
+
4. **Áp dụng fix tại nơi đúng.** KHÔNG BAO GIỜ dán đè bằng một câu "be
|
|
30
|
+
careful" vào CLAUDE.md trừ khi rule (a) áp dụng — và ngay cả khi đó,
|
|
31
|
+
chỉ làm pointer đến doc dài hơn.
|
|
32
|
+
5. **Update PROGRESS.** Append `harness-improvement: <slug>` vào
|
|
33
|
+
`.harness/PROGRESS.md`.
|
|
34
|
+
|
|
35
|
+
## Output contract
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
### Failure: <one-line summary>
|
|
39
|
+
### Classification: (a|b|c|d|e) <name>
|
|
40
|
+
### Fix applied at: <file:line>
|
|
41
|
+
### docs/agent-failures.md entry: §<n>
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Anti-patterns (block on these)
|
|
45
|
+
|
|
46
|
+
- Không thêm câu "be careful with X" mơ hồ vào CLAUDE.md.
|
|
47
|
+
- Không thêm rule mà enforcement của nó cũng dựa trên LLM.
|
|
48
|
+
- Không dùng skill này để log những cleanup ideas không liên quan —
|
|
49
|
+
chúng đi vào `docs/tech-debt-tracker.md`.
|
package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// improvement-bundle.mjs — deterministic step for /propose-harness-improvement.
|
|
3
|
+
// Replaces the "ask the agent to summarize recent failures" LLM turn with a
|
|
4
|
+
// mechanical sweep over telemetry + git history + bypass log.
|
|
5
|
+
//
|
|
6
|
+
// Output (JSON, stdout or --out):
|
|
7
|
+
// {
|
|
8
|
+
// window_days: <n>,
|
|
9
|
+
// recent_failures: [ {ts, event, source, detail} ],
|
|
10
|
+
// recurring_patterns: [ {pattern, count, sample_ts} ],
|
|
11
|
+
// classification: { context, rule, tool_skill, architecture, prompt },
|
|
12
|
+
// fix_targets: [ {file, why} ]
|
|
13
|
+
// }
|
|
14
|
+
//
|
|
15
|
+
// Classification rubric mirrors the (a)-(e) buckets in the SKILL.md:
|
|
16
|
+
// (a) context — pretooluse denials referencing rules in docs/
|
|
17
|
+
// (b) rule — structural-test failures / baseline drift
|
|
18
|
+
// (c) tool/skill — bypass.log entries / missing-skill prompt-guard hits
|
|
19
|
+
// (d) architecture — layer-violation patterns appearing >=3 times
|
|
20
|
+
// (e) prompt — skill_invoked followed by failure within same session
|
|
21
|
+
//
|
|
22
|
+
// The buckets are heuristic; an LLM still makes the final call. The point is
|
|
23
|
+
// to hand it a dense, factual digest instead of forcing it to scan files
|
|
24
|
+
// blind.
|
|
25
|
+
|
|
26
|
+
import { readFileSync, existsSync, writeFileSync } from "node:fs";
|
|
27
|
+
import { resolve } from "node:path";
|
|
28
|
+
import { spawnSync } from "node:child_process";
|
|
29
|
+
|
|
30
|
+
const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
31
|
+
|
|
32
|
+
function parseArgs(argv) {
|
|
33
|
+
const out = { window: 14, out: null };
|
|
34
|
+
for (let i = 0; i < argv.length; i++) {
|
|
35
|
+
if (argv[i] === "--window") out.window = Number(argv[++i]) || 14;
|
|
36
|
+
else if (argv[i] === "--out") out.out = argv[++i];
|
|
37
|
+
}
|
|
38
|
+
return out;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function readJsonl(path) {
|
|
42
|
+
if (!existsSync(path)) return [];
|
|
43
|
+
const body = readFileSync(path, "utf8");
|
|
44
|
+
const out = [];
|
|
45
|
+
for (const line of body.split("\n")) {
|
|
46
|
+
if (!line.trim()) continue;
|
|
47
|
+
try { out.push(JSON.parse(line)); } catch { /* skip malformed */ }
|
|
48
|
+
}
|
|
49
|
+
return out;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function isWithin(ts, days) {
|
|
53
|
+
const t = Date.parse(ts);
|
|
54
|
+
if (!Number.isFinite(t)) return false;
|
|
55
|
+
return (Date.now() - t) <= days * 24 * 3600 * 1000;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function gitLogFixes(days) {
|
|
59
|
+
const since = `${days}.days`;
|
|
60
|
+
const r = spawnSync("git", ["log", `--since=${since}`, "--oneline", "--grep=fix\\|revert\\|hotfix"], {
|
|
61
|
+
cwd: ROOT, encoding: "utf8",
|
|
62
|
+
});
|
|
63
|
+
if (r.status !== 0) return [];
|
|
64
|
+
return (r.stdout || "").split("\n").filter(Boolean).slice(0, 50);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function summariseFailures(telemetry, bypass, windowDays) {
|
|
68
|
+
const failures = [];
|
|
69
|
+
for (const rec of telemetry) {
|
|
70
|
+
if (!rec.ts || !isWithin(rec.ts, windowDays)) continue;
|
|
71
|
+
if (rec.event === "structural_test_fail" || rec.event === "precompletion_block" ||
|
|
72
|
+
rec.event === "permission_denied" || rec.event === "userprompt_block") {
|
|
73
|
+
failures.push({
|
|
74
|
+
ts: rec.ts,
|
|
75
|
+
event: rec.event,
|
|
76
|
+
source: rec.source || rec.rule || "(unspecified)",
|
|
77
|
+
detail: (rec.reason || rec.detail || rec.skill || "").slice(0, 200),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
for (const rec of bypass) {
|
|
82
|
+
if (!rec.ts || !isWithin(rec.ts, windowDays)) continue;
|
|
83
|
+
failures.push({
|
|
84
|
+
ts: rec.ts,
|
|
85
|
+
event: "bypass",
|
|
86
|
+
source: rec.rule || rec.bypass || "(unspecified)",
|
|
87
|
+
detail: (rec.command || rec.file || "").slice(0, 200),
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
failures.sort((a, b) => a.ts.localeCompare(b.ts));
|
|
91
|
+
return failures.slice(-40);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function recurringPatterns(failures) {
|
|
95
|
+
const counts = new Map();
|
|
96
|
+
const samples = new Map();
|
|
97
|
+
for (const f of failures) {
|
|
98
|
+
const key = `${f.event}::${f.source}`;
|
|
99
|
+
counts.set(key, (counts.get(key) || 0) + 1);
|
|
100
|
+
if (!samples.has(key)) samples.set(key, f.ts);
|
|
101
|
+
}
|
|
102
|
+
const out = [];
|
|
103
|
+
for (const [key, count] of counts) {
|
|
104
|
+
if (count >= 2) out.push({ pattern: key, count, sample_ts: samples.get(key) });
|
|
105
|
+
}
|
|
106
|
+
out.sort((a, b) => b.count - a.count);
|
|
107
|
+
return out.slice(0, 20);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function classify(failures, recurring) {
|
|
111
|
+
const buckets = { context: 0, rule: 0, tool_skill: 0, architecture: 0, prompt: 0 };
|
|
112
|
+
for (const f of failures) {
|
|
113
|
+
if (f.event === "structural_test_fail") buckets.rule++;
|
|
114
|
+
else if (f.event === "precompletion_block") buckets.rule++;
|
|
115
|
+
else if (f.event === "permission_denied") buckets.context++;
|
|
116
|
+
else if (f.event === "userprompt_block") buckets.context++;
|
|
117
|
+
else if (f.event === "bypass") buckets.tool_skill++;
|
|
118
|
+
}
|
|
119
|
+
for (const r of recurring) {
|
|
120
|
+
if (r.count >= 3 && r.pattern.startsWith("structural_test_fail::")) {
|
|
121
|
+
buckets.architecture++;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return buckets;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function fixTargets(buckets) {
|
|
128
|
+
const out = [];
|
|
129
|
+
if (buckets.rule > 0) {
|
|
130
|
+
out.push({ file: "harness.config.json", why: "structural rule lives here; consider tightening" });
|
|
131
|
+
out.push({ file: ".harness/structural-baseline.json", why: "review whether baseline entries should drain" });
|
|
132
|
+
}
|
|
133
|
+
if (buckets.context > 0) {
|
|
134
|
+
out.push({ file: "docs/golden-principles.md", why: "context gap surfaced via permission denials" });
|
|
135
|
+
out.push({ file: "CLAUDE.md", why: "consider a pointer (not a paste) to relevant doc" });
|
|
136
|
+
}
|
|
137
|
+
if (buckets.tool_skill > 0) {
|
|
138
|
+
out.push({ file: ".claude/skills/", why: "missing skill or wrong skill chosen — write or edit one" });
|
|
139
|
+
}
|
|
140
|
+
if (buckets.architecture > 0) {
|
|
141
|
+
out.push({ file: "docs/adr/", why: "recurring violation suggests an ADR is needed" });
|
|
142
|
+
}
|
|
143
|
+
if (buckets.prompt > 0) {
|
|
144
|
+
out.push({ file: ".claude/skills/<name>/SKILL.md", why: "prompt ambiguity led the agent astray" });
|
|
145
|
+
}
|
|
146
|
+
return out;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function main() {
|
|
150
|
+
const { window: windowDays, out: outPath } = parseArgs(process.argv.slice(2));
|
|
151
|
+
const telemetry = readJsonl(resolve(ROOT, ".harness/telemetry.jsonl"));
|
|
152
|
+
const bypass = readJsonl(resolve(ROOT, ".harness/bypass.log"));
|
|
153
|
+
const recentFailures = summariseFailures(telemetry, bypass, windowDays);
|
|
154
|
+
const recurring = recurringPatterns(recentFailures);
|
|
155
|
+
const classification = classify(recentFailures, recurring);
|
|
156
|
+
const targets = fixTargets(classification);
|
|
157
|
+
const fixCommits = gitLogFixes(windowDays);
|
|
158
|
+
|
|
159
|
+
const payload = {
|
|
160
|
+
window_days: windowDays,
|
|
161
|
+
recent_failures: recentFailures,
|
|
162
|
+
recurring_patterns: recurring,
|
|
163
|
+
classification,
|
|
164
|
+
fix_targets: targets,
|
|
165
|
+
recent_fix_commits: fixCommits,
|
|
166
|
+
};
|
|
167
|
+
const text = JSON.stringify(payload, null, 2);
|
|
168
|
+
if (outPath) writeFileSync(resolve(ROOT, outPath), text + "\n");
|
|
169
|
+
else process.stdout.write(text + "\n");
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
main();
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: refactor-feature
|
|
3
|
+
description: Use this skill when restructuring a feature in feature_list.json — splitting steps, merging steps, renaming, or marking a previously-failing step done. The side-car diffs feature_list.json#steps before/after and rejects the edit when a step.done transition is not accompanied by a test reference. Forces "no done without proof".
|
|
4
|
+
allowed-tools: Read, Edit, Bash(git diff:*, node .claude/skills/refactor-feature/scripts/feature-diff.mjs:*)
|
|
5
|
+
suggested-turns: 6
|
|
6
|
+
isolation: worktree
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## When to invoke
|
|
10
|
+
|
|
11
|
+
- Re-decomposing a feature (one becomes many, or vice versa).
|
|
12
|
+
- Marking `passes: false → true` for a step that was previously WIP.
|
|
13
|
+
- Renaming feature ids (this is the dangerous case — the side-car catches
|
|
14
|
+
silent renames that orphan PROGRESS.md references).
|
|
15
|
+
|
|
16
|
+
## Pre-flight (side-car gate)
|
|
17
|
+
|
|
18
|
+
Run the diff side-car BEFORE any feature_list.json edit lands:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
node .claude/skills/refactor-feature/scripts/feature-diff.mjs \
|
|
22
|
+
--before-ref HEAD --after-file feature_list.json
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Side-car contract:
|
|
26
|
+
- Exits 0 + JSON when changes are coherent.
|
|
27
|
+
- Exits 2 + JSON with `violations: [...]` when:
|
|
28
|
+
- A step's `passes` flipped `false → true` without a test entry under
|
|
29
|
+
`step.tests` (or `step.testCommit`).
|
|
30
|
+
- A step's `id` changed without a `renamed_from` field (silent rename).
|
|
31
|
+
- A step disappeared without an entry in `step.replaced_by`.
|
|
32
|
+
|
|
33
|
+
## Steps
|
|
34
|
+
|
|
35
|
+
1. **Capture before-state.** `git show HEAD:feature_list.json > /tmp/before.json`
|
|
36
|
+
2. **Edit.** Make the refactor in your working copy.
|
|
37
|
+
3. **Run the gate.** Side-car compares HEAD vs working copy. Address any
|
|
38
|
+
violation before staging.
|
|
39
|
+
4. **Stage + test.** If `passes` flipped true, the test must exist and be
|
|
40
|
+
referenced in `step.tests`.
|
|
41
|
+
5. **Commit with a body explaining the refactor.** Use commit trailer
|
|
42
|
+
`Refactor-Feature: <feature_id>` so /review-this-pr can group changes.
|
|
43
|
+
|
|
44
|
+
## Output contract
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
feature_list refactor: <id>
|
|
48
|
+
steps_changed: <N>
|
|
49
|
+
done_transitions: <M> (each with a test reference)
|
|
50
|
+
renames: <list of id→id>
|
|
51
|
+
gate: passed
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Anti-patterns
|
|
55
|
+
|
|
56
|
+
- Don't mark `passes: true` first and "add tests later" — the side-car
|
|
57
|
+
blocks at the boundary on purpose. Flip the bit only AFTER the test
|
|
58
|
+
exists.
|
|
59
|
+
- Don't delete a step without `replaced_by` — orphaned PROGRESS.md
|
|
60
|
+
entries get out of sync with the live feature list.
|