agent-harness-kit 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +27 -0
- package/.claude-plugin/plugin.json +25 -0
- package/LICENSE +21 -0
- package/README.md +165 -0
- package/bin/cli.mjs +261 -0
- package/package.json +64 -0
- package/src/core/detect-stack.mjs +181 -0
- package/src/core/doctor.mjs +106 -0
- package/src/core/patch-package-json.mjs +53 -0
- package/src/core/render-templates.mjs +277 -0
- package/src/core/upgrade.mjs +274 -0
- package/src/templates/.claude/agents/api-consistency-reviewer.md +33 -0
- package/src/templates/.claude/agents/architecture-reviewer.md.hbs +41 -0
- package/src/templates/.claude/agents/performance-reviewer.md +35 -0
- package/src/templates/.claude/agents/reliability-reviewer.md +38 -0
- package/src/templates/.claude/agents/security-reviewer.md +39 -0
- package/src/templates/.claude/hooks/hooks.json.hbs +39 -0
- package/src/templates/.claude/settings.json.hbs +25 -0
- package/src/templates/.claude/skills/add-adr/SKILL.md +60 -0
- package/src/templates/.claude/skills/add-feature/SKILL.md.hbs +50 -0
- package/src/templates/.claude/skills/debug-flow/SKILL.md.hbs +38 -0
- package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +43 -0
- package/src/templates/.claude/skills/eval-runner/SKILL.md +55 -0
- package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +49 -0
- package/src/templates/.claude/skills/inspect-app/SKILL.md +57 -0
- package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +53 -0
- package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md +43 -0
- package/src/templates/.claude/skills/structural-test-author/SKILL.md.hbs +46 -0
- package/src/templates/.claude/skills/write-skill/SKILL.md +39 -0
- package/src/templates/CLAUDE.md.hbs +70 -0
- package/src/templates/_adapter-python/.importlinter +14 -0
- package/src/templates/_adapter-python/harness/__init__.py +0 -0
- package/src/templates/_adapter-python/harness/eval_runner.py +281 -0
- package/src/templates/_adapter-python/harness/structural_test.py +195 -0
- package/src/templates/_adapter-typescript/.dependency-cruiser.cjs +27 -0
- package/src/templates/_adapter-typescript/eslint.config.mjs +38 -0
- package/src/templates/_adapter-typescript/harness/eval-runner.mjs +322 -0
- package/src/templates/_adapter-typescript/harness/structural-test.mjs +125 -0
- package/src/templates/_ci/.github/workflows/eval-nightly.yml +59 -0
- package/src/templates/_ci/.github/workflows/harness.yml +55 -0
- package/src/templates/docs/adr/0001-use-agent-harness-kit.md.hbs +56 -0
- package/src/templates/docs/agent-failures.md +25 -0
- package/src/templates/docs/architecture.md.hbs +47 -0
- package/src/templates/docs/core-beliefs.md.hbs +41 -0
- package/src/templates/docs/golden-principles.md.hbs +80 -0
- package/src/templates/docs/tech-debt-tracker.md +30 -0
- package/src/templates/feature_list.json.hbs +29 -0
- package/src/templates/harness.config.json.hbs +40 -0
- package/src/templates/scripts/dev-up.sh.hbs +51 -0
- package/src/templates/scripts/harness-report.mjs +189 -0
- package/src/templates/scripts/install-git-hooks.sh +18 -0
- package/src/templates/scripts/pre-push.sh +21 -0
- package/src/templates/scripts/precompletion-checklist.sh.hbs +99 -0
- package/src/templates/scripts/structural-test-on-edit.sh.hbs +53 -0
- package/src/templates/scripts/telemetry-on-skill.sh +26 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
// upgrade.mjs — non-destructive version-aware upgrade.
|
|
2
|
+
//
|
|
3
|
+
// Strategy:
|
|
4
|
+
// 1. Read .harness/installed.json (lockfile of last-installed sha per file).
|
|
5
|
+
// 2. For each kit-managed file: if the user has not modified it (sha matches),
|
|
6
|
+
// overwrite. If modified, drop a sibling `.harness-new` so the user can diff.
|
|
7
|
+
// 3. Never touch USER_OWNED_FILES (CLAUDE.md, docs/architecture.md, etc.).
|
|
8
|
+
// 4. Print a concise summary and update the lockfile.
|
|
9
|
+
|
|
10
|
+
import { readFile, writeFile, mkdir, readdir, stat } from "node:fs/promises";
|
|
11
|
+
import { existsSync } from "node:fs";
|
|
12
|
+
import { resolve, join, relative, dirname } from "node:path";
|
|
13
|
+
import { fileURLToPath } from "node:url";
|
|
14
|
+
import { createHash } from "node:crypto";
|
|
15
|
+
import { confirm } from "@inquirer/prompts";
|
|
16
|
+
import pc from "picocolors";
|
|
17
|
+
import Handlebars from "handlebars";
|
|
18
|
+
import { registerHelpers } from "./render-templates.mjs";
|
|
19
|
+
import { detectStack } from "./detect-stack.mjs";
|
|
20
|
+
|
|
21
|
+
// Sync the two version-pinned fields in harness.config.json after a kit
|
|
22
|
+
// upgrade. Everything else in the config is user-owned and left untouched.
|
|
23
|
+
// Uses regex on the raw text instead of JSON round-tripping so user
|
|
24
|
+
// formatting (trailing zeros, key order, indentation, comments-as-strings)
|
|
25
|
+
// survives. Exported for unit tests; called from `upgrade()` below.
|
|
26
|
+
export async function syncHarnessConfigVersion(cwd, kitVersion) {
|
|
27
|
+
const cfgPath = resolve(cwd, "harness.config.json");
|
|
28
|
+
if (!existsSync(cfgPath)) return { changed: false, reason: "missing" };
|
|
29
|
+
const raw = await readFile(cfgPath, "utf8");
|
|
30
|
+
// Validate JSON first so we never write back a corrupted file.
|
|
31
|
+
let cfg;
|
|
32
|
+
try {
|
|
33
|
+
cfg = JSON.parse(raw);
|
|
34
|
+
} catch {
|
|
35
|
+
return { changed: false, reason: "invalid-json" };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let next = raw;
|
|
39
|
+
// Replace top-level "version": "<x.y.z>" — anchored at line start so we
|
|
40
|
+
// don't touch a "version" key nested inside another object.
|
|
41
|
+
if (typeof cfg.version === "string" && cfg.version !== kitVersion) {
|
|
42
|
+
next = next.replace(
|
|
43
|
+
/^(\s*"version"\s*:\s*")[^"]+(")/m,
|
|
44
|
+
`$1${kitVersion}$2`,
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
// Replace the kit's pinned $schema URL only — leaves user-forked URLs alone.
|
|
48
|
+
const schemaUrlRe =
|
|
49
|
+
/https:\/\/raw\.githubusercontent\.com\/tuanle96\/agent-harness-kit\/v[^/"]+\/schema\.json/;
|
|
50
|
+
if (typeof cfg.$schema === "string" && schemaUrlRe.test(cfg.$schema)) {
|
|
51
|
+
const newSchema = `https://raw.githubusercontent.com/tuanle96/agent-harness-kit/v${kitVersion}/schema.json`;
|
|
52
|
+
if (cfg.$schema !== newSchema) {
|
|
53
|
+
next = next.replace(schemaUrlRe, newSchema);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (next === raw) {
|
|
58
|
+
return { changed: false, reason: "already-current" };
|
|
59
|
+
}
|
|
60
|
+
// Sanity-check: the regex replace must still produce valid JSON.
|
|
61
|
+
try {
|
|
62
|
+
JSON.parse(next);
|
|
63
|
+
} catch {
|
|
64
|
+
return { changed: false, reason: "would-corrupt" };
|
|
65
|
+
}
|
|
66
|
+
await writeFile(cfgPath, next);
|
|
67
|
+
return { changed: true, reason: "synced" };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
71
|
+
const TEMPLATES_ROOT = resolve(__dirname, "..", "templates");
|
|
72
|
+
|
|
73
|
+
const USER_OWNED_FILES = new Set([
|
|
74
|
+
"CLAUDE.md",
|
|
75
|
+
"AGENTS.md",
|
|
76
|
+
"docs/architecture.md",
|
|
77
|
+
"docs/core-beliefs.md",
|
|
78
|
+
"docs/golden-principles.md",
|
|
79
|
+
"docs/tech-debt-tracker.md",
|
|
80
|
+
"feature_list.json",
|
|
81
|
+
"harness.config.json",
|
|
82
|
+
]);
|
|
83
|
+
|
|
84
|
+
function sha256(buf) {
|
|
85
|
+
return createHash("sha256").update(buf).digest("hex");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function* walk(dir) {
|
|
89
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
90
|
+
for (const e of entries) {
|
|
91
|
+
const full = join(dir, e.name);
|
|
92
|
+
if (e.isDirectory()) {
|
|
93
|
+
yield* walk(full);
|
|
94
|
+
} else {
|
|
95
|
+
yield full;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export async function upgrade({ cwd, kitVersion, yes }) {
|
|
101
|
+
const lockPath = resolve(cwd, ".harness/installed.json");
|
|
102
|
+
if (!existsSync(lockPath)) {
|
|
103
|
+
console.error(
|
|
104
|
+
pc.red(
|
|
105
|
+
`No .harness/installed.json found. Run \`agent-harness-kit init\` first.`,
|
|
106
|
+
),
|
|
107
|
+
);
|
|
108
|
+
process.exit(1);
|
|
109
|
+
}
|
|
110
|
+
const lockfile = JSON.parse(await readFile(lockPath, "utf8"));
|
|
111
|
+
const previousVersion = lockfile.version;
|
|
112
|
+
|
|
113
|
+
if (previousVersion === kitVersion) {
|
|
114
|
+
// Lockfile already current — but harness.config.json may still carry an
|
|
115
|
+
// older `version`/`$schema` (it's user-owned and skipped by the file walk).
|
|
116
|
+
// Sync those two fields so doctor stops flagging drift.
|
|
117
|
+
const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
|
|
118
|
+
if (cfgSync.changed) {
|
|
119
|
+
console.log(
|
|
120
|
+
pc.green(`harness.config.json version + $schema synced to v${kitVersion}.`),
|
|
121
|
+
);
|
|
122
|
+
} else {
|
|
123
|
+
console.log(pc.green(`Already on v${kitVersion}. Nothing to do.`));
|
|
124
|
+
}
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
console.log(
|
|
129
|
+
pc.bold(
|
|
130
|
+
`\nUpgrading agent-harness-kit: ${pc.dim(previousVersion)} → ${pc.green(kitVersion)}\n`,
|
|
131
|
+
),
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
const stack = await detectStack(cwd);
|
|
135
|
+
const ctx = {
|
|
136
|
+
projectName: "your-project",
|
|
137
|
+
layers: ["types", "config", "repo", "service", "runtime", "ui"],
|
|
138
|
+
layersJoined: "types → config → repo → service → runtime → ui",
|
|
139
|
+
language: stack.language,
|
|
140
|
+
framework: stack.framework,
|
|
141
|
+
packageManager: stack.packageManager,
|
|
142
|
+
isTypescript: stack.language === "typescript",
|
|
143
|
+
isPython: stack.language === "python",
|
|
144
|
+
isNextjs: stack.framework === "nextjs",
|
|
145
|
+
isFastapi: stack.framework === "fastapi",
|
|
146
|
+
kitVersion,
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const updates = []; // { rel, action: 'overwrite'|'sidecar'|'skip', reason }
|
|
150
|
+
|
|
151
|
+
for await (const abs of walk(TEMPLATES_ROOT)) {
|
|
152
|
+
const relFromTemplates = relative(TEMPLATES_ROOT, abs).split("\\").join("/");
|
|
153
|
+
if (relFromTemplates.startsWith("_adapter-typescript/") && stack.language !== "typescript")
|
|
154
|
+
continue;
|
|
155
|
+
if (relFromTemplates.startsWith("_adapter-python/") && stack.language !== "python")
|
|
156
|
+
continue;
|
|
157
|
+
if (relFromTemplates.startsWith("_preset-nextjs/") && stack.framework !== "nextjs")
|
|
158
|
+
continue;
|
|
159
|
+
if (relFromTemplates.startsWith("_preset-fastapi/") && stack.framework !== "fastapi")
|
|
160
|
+
continue;
|
|
161
|
+
const stackRel = relFromTemplates
|
|
162
|
+
.replace(/^_adapter-typescript\//, "")
|
|
163
|
+
.replace(/^_adapter-python\//, "")
|
|
164
|
+
.replace(/^_preset-nextjs\//, "")
|
|
165
|
+
.replace(/^_preset-fastapi\//, "")
|
|
166
|
+
.replace(/^_ci\//, "");
|
|
167
|
+
const targetRel = stackRel.endsWith(".hbs")
|
|
168
|
+
? stackRel.slice(0, -".hbs".length)
|
|
169
|
+
: stackRel;
|
|
170
|
+
|
|
171
|
+
if (USER_OWNED_FILES.has(targetRel)) {
|
|
172
|
+
updates.push({ rel: targetRel, action: "skip", reason: "user-owned" });
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
let newContent;
|
|
177
|
+
if (abs.endsWith(".hbs")) {
|
|
178
|
+
const raw = await readFile(abs, "utf8");
|
|
179
|
+
registerHelpers();
|
|
180
|
+
const tpl = Handlebars.compile(raw, { noEscape: true });
|
|
181
|
+
newContent = tpl(ctx);
|
|
182
|
+
} else {
|
|
183
|
+
newContent = await readFile(abs);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const newSha = sha256(
|
|
187
|
+
typeof newContent === "string" ? Buffer.from(newContent) : newContent,
|
|
188
|
+
);
|
|
189
|
+
const targetAbs = resolve(cwd, targetRel);
|
|
190
|
+
const previousSha = lockfile.files?.[targetRel];
|
|
191
|
+
const targetExists = existsSync(targetAbs);
|
|
192
|
+
|
|
193
|
+
if (!targetExists) {
|
|
194
|
+
updates.push({ rel: targetRel, action: "overwrite", reason: "new" });
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
const currentBuf = await readFile(targetAbs);
|
|
198
|
+
const currentSha = sha256(currentBuf);
|
|
199
|
+
if (currentSha === newSha) {
|
|
200
|
+
// Already on the new version — record the new sha but no action.
|
|
201
|
+
updates.push({ rel: targetRel, action: "skip", reason: "identical" });
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
if (currentSha === previousSha) {
|
|
205
|
+
updates.push({ rel: targetRel, action: "overwrite", reason: "user-untouched" });
|
|
206
|
+
} else {
|
|
207
|
+
updates.push({ rel: targetRel, action: "sidecar", reason: "user-modified" });
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Group + print summary.
|
|
212
|
+
const overwrites = updates.filter((u) => u.action === "overwrite");
|
|
213
|
+
const sidecars = updates.filter((u) => u.action === "sidecar");
|
|
214
|
+
for (const u of overwrites) console.log(` ${pc.green("~")} ${u.rel} ${pc.dim("(" + u.reason + ")")}`);
|
|
215
|
+
for (const u of sidecars)
|
|
216
|
+
console.log(
|
|
217
|
+
` ${pc.yellow("!")} ${u.rel} ${pc.dim("user-modified — writing " + u.rel + ".harness-new for you to diff")}`,
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
if (overwrites.length === 0 && sidecars.length === 0) {
|
|
221
|
+
console.log(pc.green(`No file changes needed. Bumping lockfile to v${kitVersion}.`));
|
|
222
|
+
} else if (!yes) {
|
|
223
|
+
const ok = await confirm({ message: "Apply the changes above?", default: true });
|
|
224
|
+
if (!ok) {
|
|
225
|
+
console.log(pc.yellow("upgrade aborted."));
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Apply.
|
|
231
|
+
for (const u of [...overwrites, ...sidecars]) {
|
|
232
|
+
const sourceTplRel = u.rel; // simplified: regenerate
|
|
233
|
+
let abs = resolve(TEMPLATES_ROOT, sourceTplRel + ".hbs");
|
|
234
|
+
if (!existsSync(abs)) abs = resolve(TEMPLATES_ROOT, sourceTplRel);
|
|
235
|
+
if (stack.language === "typescript" && !existsSync(abs))
|
|
236
|
+
abs = resolve(TEMPLATES_ROOT, "_adapter-typescript", sourceTplRel);
|
|
237
|
+
if (stack.language === "python" && !existsSync(abs))
|
|
238
|
+
abs = resolve(TEMPLATES_ROOT, "_adapter-python", sourceTplRel);
|
|
239
|
+
if (!existsSync(abs)) continue; // skip — the kit may have removed this file
|
|
240
|
+
let content;
|
|
241
|
+
if (abs.endsWith(".hbs")) {
|
|
242
|
+
const raw = await readFile(abs, "utf8");
|
|
243
|
+
const tpl = Handlebars.compile(raw, { noEscape: true });
|
|
244
|
+
content = tpl(ctx);
|
|
245
|
+
} else {
|
|
246
|
+
content = await readFile(abs);
|
|
247
|
+
}
|
|
248
|
+
const targetAbs = resolve(cwd, u.action === "sidecar" ? u.rel + ".harness-new" : u.rel);
|
|
249
|
+
await mkdir(dirname(targetAbs), { recursive: true });
|
|
250
|
+
await writeFile(targetAbs, content);
|
|
251
|
+
if (u.action === "overwrite") {
|
|
252
|
+
lockfile.files[u.rel] = sha256(typeof content === "string" ? Buffer.from(content) : content);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
lockfile.version = kitVersion;
|
|
257
|
+
await writeFile(lockPath, JSON.stringify(lockfile, null, 2) + "\n");
|
|
258
|
+
|
|
259
|
+
// Sync the two version-pinned fields in user-owned harness.config.json
|
|
260
|
+
// (version + $schema URL). Everything else is left untouched.
|
|
261
|
+
const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
|
|
262
|
+
if (cfgSync.changed) {
|
|
263
|
+
console.log(pc.dim(` ${pc.green("~")} harness.config.json (version + $schema synced)`));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
console.log(pc.bold(pc.green(`\n✓ upgrade complete (v${kitVersion}).`)));
|
|
267
|
+
if (sidecars.length > 0) {
|
|
268
|
+
console.log(
|
|
269
|
+
pc.dim(
|
|
270
|
+
`\nDiff each *.harness-new sidecar against the original to merge intentional changes, then delete the sidecar.`,
|
|
271
|
+
),
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: api-consistency-reviewer
|
|
3
|
+
description: Use this agent after adding or modifying any public API endpoint, exported function, CLI command, or RPC handler. Verifies naming, response shape, error format, and versioning conventions match `docs/api-conventions.md` (or the kit's defaults if that file doesn't exist). Read-only.
|
|
4
|
+
tools: Read, Grep, Glob, Bash(git diff:*)
|
|
5
|
+
model: haiku
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
Compare changed public surfaces against `docs/api-conventions.md` (if absent,
|
|
9
|
+
fall back to: response shape `{ data, error }`, camelCase keys for JS/TS,
|
|
10
|
+
snake_case for Python). Flag:
|
|
11
|
+
|
|
12
|
+
- response-shape drift (e.g. `{ success, data, error }` vs `{ ok, result }`)
|
|
13
|
+
- naming convention violations (camelCase vs snake_case mixing within one
|
|
14
|
+
payload)
|
|
15
|
+
- missing versioning on breaking changes (no `/v2/` prefix, no `deprecated`
|
|
16
|
+
flag)
|
|
17
|
+
- exported symbols without JSDoc / docstring on a NEW public function
|
|
18
|
+
- error response shape that doesn't match existing handlers
|
|
19
|
+
|
|
20
|
+
## Output format
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
PASS — public surfaces are consistent
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
or a numbered fix list:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
1. <path>:<line> — <convention violated> — <fix>
|
|
30
|
+
2. ...
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Do not modify files. Be terse.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: architecture-reviewer
|
|
3
|
+
description: Use this agent immediately after any change that touches multiple layers, adds a new domain, or modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
|
|
4
|
+
tools: Read, Grep, Glob, Bash({{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}), Bash(git diff:*)
|
|
5
|
+
model: sonnet
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a senior software architect reviewing a single PR's diff for
|
|
9
|
+
layered-architecture compliance. You are the **inferential sensor** that
|
|
10
|
+
complements the **computational sensor** (the structural test).
|
|
11
|
+
|
|
12
|
+
When invoked:
|
|
13
|
+
|
|
14
|
+
1. Run `git diff HEAD~1` (or against the PR base) to see exactly what changed.
|
|
15
|
+
2. Run {{#if isPython}}`python -m harness.structural_test`{{else}}`npm run harness:check`{{/if}} to see deterministic
|
|
16
|
+
violations first. If it fails, your job is to translate the failure into
|
|
17
|
+
a remediation plan, not duplicate it.
|
|
18
|
+
3. For each changed file: identify which layer it belongs to from
|
|
19
|
+
`harness.config.json`. Flag any cross-layer import that goes "backward"
|
|
20
|
+
or skips a layer.
|
|
21
|
+
4. Check that any new cross-cutting concern enters via the `providers/`
|
|
22
|
+
interface, not via direct import.
|
|
23
|
+
5. Check that any new public type is defined in the `types/` layer, not
|
|
24
|
+
inline in a service.
|
|
25
|
+
|
|
26
|
+
## Output format (always)
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
### Architecture review
|
|
30
|
+
**Verdict:** PASS | FAIL | NEEDS-DISCUSSION
|
|
31
|
+
**Layer-correct:** ✅ / ❌
|
|
32
|
+
**Provider-clean:** ✅ / ❌
|
|
33
|
+
**Findings:**
|
|
34
|
+
1. <path:line> — <description>
|
|
35
|
+
2. ...
|
|
36
|
+
**Remediation plan:**
|
|
37
|
+
- <specific edit, no rewrites>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Do not modify any files. Do not run tests beyond the structural test. If
|
|
41
|
+
unsure, return NEEDS-DISCUSSION with concrete questions.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: performance-reviewer
|
|
3
|
+
description: Use this agent after adding loops over large collections, database queries, render paths, or anything in a hot path. Catches N+1 queries, missing memoization, accidental quadratic loops, and unindexed sorts. Read-only. Runs on Haiku for speed.
|
|
4
|
+
tools: Read, Grep, Glob
|
|
5
|
+
model: haiku
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a performance reviewer. Be brief — this runs on Haiku for speed.
|
|
9
|
+
|
|
10
|
+
Check for, in order:
|
|
11
|
+
|
|
12
|
+
1. **N+1 queries.** Any `for x in xs: db.get(x.id)`-shaped pattern, or
|
|
13
|
+
`await Promise.all(xs.map(async x => db.findOne(...)))` against a database
|
|
14
|
+
with a way to batch.
|
|
15
|
+
2. **O(n²) loops.** Nested iteration over the same collection without an
|
|
16
|
+
early break or an index.
|
|
17
|
+
3. **Missing memoization** on a pure expensive function called in a render
|
|
18
|
+
hot path or per-request.
|
|
19
|
+
4. **Synchronous IO in an async/await context** (`fs.readFileSync`,
|
|
20
|
+
`db.queryBlocking`).
|
|
21
|
+
5. **Unbounded list growth.** `accumulator.push(...)` in a loop over an
|
|
22
|
+
external feed without a cap.
|
|
23
|
+
|
|
24
|
+
## Output format
|
|
25
|
+
|
|
26
|
+
For each finding, one line:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
<path>:<line> — <pattern> — <suggested fix in ≤ 1 line>
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
If clean: `PASS — no obvious hot spots`.
|
|
33
|
+
|
|
34
|
+
Be terse. Do not modify files. If a finding is speculative, mark it `(maybe)`
|
|
35
|
+
and explain in ≤ 5 words.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: reliability-reviewer
|
|
3
|
+
description: Use this agent immediately after adding any error handling, retry loop, async boundary, timeout, or external call (HTTP/DB/queue/file). Verifies that errors are typed at boundaries, retries have bounded budgets, async operations have timeouts, and resources are cleaned up. Read-only.
|
|
4
|
+
tools: Read, Grep, Glob, Bash(git diff:*)
|
|
5
|
+
model: sonnet
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a senior reliability engineer. Focus areas, in priority order:
|
|
9
|
+
|
|
10
|
+
1. **Boundary error handling.** Every external call (HTTP, DB, file, queue)
|
|
11
|
+
must have an explicit error path. No bare `except:` (Python) or empty
|
|
12
|
+
`catch` (TS). Errors should be typed (`Result<T,E>` or tagged union).
|
|
13
|
+
2. **Retry budgets.** Every retry loop must have BOTH a max-attempts AND a
|
|
14
|
+
deadline. Reject infinite `while True` / `while (true)` over external
|
|
15
|
+
calls. Reject exponential backoff without a cap.
|
|
16
|
+
3. **Timeouts.** Every `fetch` / `httpx` / `requests` / `axios` call needs an
|
|
17
|
+
explicit timeout. The default ones are hours-long — that's never what you
|
|
18
|
+
want.
|
|
19
|
+
4. **Idempotency.** Write operations should be idempotent or guarded with a
|
|
20
|
+
key. Flag `POST` / `INSERT` without a deduplication mechanism that runs
|
|
21
|
+
inside a retry loop.
|
|
22
|
+
5. **Resource cleanup.** Every `open()` in Python must use `with`. Every TS
|
|
23
|
+
file/socket/stream must have a `try/finally close` or `using` declaration
|
|
24
|
+
(TC39 explicit-resource-management).
|
|
25
|
+
6. **Cancellation.** Long-running async work without an `AbortSignal` /
|
|
26
|
+
`asyncio.CancelledError` handler is a leak waiting to happen.
|
|
27
|
+
|
|
28
|
+
## Output format
|
|
29
|
+
|
|
30
|
+
For each finding:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
[BLOCKING|WARN] <path>:<line> — <issue> — <fix in ≤ 1 line>
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
If clean: `PASS — reliability checks satisfied`.
|
|
37
|
+
|
|
38
|
+
Do not modify files.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: security-reviewer
|
|
3
|
+
description: Use this agent immediately after writing or modifying authentication, authorization, input handling, secret loading, network calls, or anything in `providers/auth` or runtime/api routes. Runs read-only OWASP-Top-10 + secrets scan. Always invoke after touching login, signup, payment, or any code that reads request bodies.
|
|
4
|
+
tools: Read, Grep, Glob, Bash(git diff:*)
|
|
5
|
+
model: sonnet
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a senior application security engineer. Your role is to **find
|
|
9
|
+
vulnerabilities, not write fixes**.
|
|
10
|
+
|
|
11
|
+
When invoked:
|
|
12
|
+
|
|
13
|
+
1. `git diff HEAD~1` to see only the changed code.
|
|
14
|
+
2. Identify the highest-risk areas in the diff: auth flows, input handling,
|
|
15
|
+
data exposure, file IO, child_process, eval, dynamic imports.
|
|
16
|
+
3. Check for, in order:
|
|
17
|
+
- SQL injection (string-interpolated SQL, even with ORMs)
|
|
18
|
+
- XSS (`dangerouslySetInnerHTML`, `innerHTML`, `v-html`, `{{...|safe}}`)
|
|
19
|
+
- IDOR / missing authorization checks on a resource fetch
|
|
20
|
+
- Secrets in code (regex `^(sk-|ghp_|AKIA|xox[abp]-|-----BEGIN)`)
|
|
21
|
+
- Unbounded user input (no max length, no schema validation)
|
|
22
|
+
- Missing rate limit on auth-adjacent endpoints
|
|
23
|
+
- Insecure deserialization (`pickle.loads`, `JSON.parse` with reviver)
|
|
24
|
+
4. Language-specific:
|
|
25
|
+
- **Python**: `pickle.loads`, `os.system`, `eval`, `subprocess(shell=True)`, `yaml.load` without `Loader=SafeLoader`
|
|
26
|
+
- **TypeScript**: `dangerouslySetInnerHTML`, `eval`, `new Function`, `child_process.exec` with interpolation, `fetch` to untrusted URL without TLS verification
|
|
27
|
+
|
|
28
|
+
## Output format
|
|
29
|
+
|
|
30
|
+
For each finding, one line:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
[CRITICAL|HIGH|MEDIUM|LOW] <path>:<line> — <brief description> — <minimal-fix suggestion ≤ 3 lines of code>
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
If clean: `PASS — no vulnerabilities found in diff`.
|
|
37
|
+
|
|
38
|
+
Do not modify files. Do not write tests. Do not propose architectural
|
|
39
|
+
rewrites — that's `architecture-reviewer`'s job.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-hooks.json",
|
|
3
|
+
"hooks": {
|
|
4
|
+
"PostToolUse": [
|
|
5
|
+
{
|
|
6
|
+
"matcher": "Write|Edit|MultiEdit",
|
|
7
|
+
"hooks": [
|
|
8
|
+
{
|
|
9
|
+
"type": "command",
|
|
10
|
+
"command": "bash scripts/structural-test-on-edit.sh",
|
|
11
|
+
"timeout": 30
|
|
12
|
+
}
|
|
13
|
+
]
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"matcher": "Skill",
|
|
17
|
+
"hooks": [
|
|
18
|
+
{
|
|
19
|
+
"type": "command",
|
|
20
|
+
"command": "bash scripts/telemetry-on-skill.sh",
|
|
21
|
+
"timeout": 5
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
|
+
],
|
|
26
|
+
"Stop": [
|
|
27
|
+
{
|
|
28
|
+
"matcher": "",
|
|
29
|
+
"hooks": [
|
|
30
|
+
{
|
|
31
|
+
"type": "command",
|
|
32
|
+
"command": "bash scripts/precompletion-checklist.sh",
|
|
33
|
+
"timeout": 20
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-settings.json",
|
|
3
|
+
"permissions": {
|
|
4
|
+
"allow": [
|
|
5
|
+
"Bash(npm run harness:*)",
|
|
6
|
+
"Bash(npm run lint:*)",
|
|
7
|
+
"Bash(npm test:*)",
|
|
8
|
+
"Bash(pytest:*)",
|
|
9
|
+
"Bash(ruff:*)",
|
|
10
|
+
"Bash(git status)",
|
|
11
|
+
"Bash(git diff:*)",
|
|
12
|
+
"Bash(git log:*)",
|
|
13
|
+
"Bash(git ls-tree:*)",
|
|
14
|
+
"Bash(git show:*)",
|
|
15
|
+
"Bash(tree:*)",
|
|
16
|
+
"Bash(ls:*)",
|
|
17
|
+
"Bash(test -e:*)",
|
|
18
|
+
"Bash(command -v:*)"
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
"model": "{{#if isPython}}claude-sonnet-4-6{{else}}claude-sonnet-4-6{{/if}}",
|
|
22
|
+
"env": {
|
|
23
|
+
"AGENT_HARNESS_KIT_VERSION": "{{kitVersion}}"
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: add-adr
|
|
3
|
+
description: Use this skill whenever a decision is made about architecture, dependencies, frameworks, naming conventions, or layer order. Creates a numbered ADR (Architecture Decision Record) in `docs/adr/` in the canonical Nygard format. Always invoke this before changing layer order, adding a layer, swapping a major dependency, or introducing a new external service.
|
|
4
|
+
allowed-tools: Read, Write, Glob
|
|
5
|
+
suggested-turns: 6
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Steps
|
|
9
|
+
|
|
10
|
+
1. **Find the next number.** List `docs/adr/` and pick the highest existing
|
|
11
|
+
number + 1 (zero-padded to 4 digits).
|
|
12
|
+
2. **Generate the file.** Write `docs/adr/{NNNN}-{kebab-title}.md` with the
|
|
13
|
+
sections below.
|
|
14
|
+
3. **Update affected configs.** If the ADR changes layer order or adds a
|
|
15
|
+
layer, update `harness.config.json` AND the structural-test config in the
|
|
16
|
+
same commit as the ADR.
|
|
17
|
+
4. **Append to the index.** Add a one-line entry under "Recent decisions" in
|
|
18
|
+
`docs/architecture.md`.
|
|
19
|
+
|
|
20
|
+
## ADR template (write exactly this shape)
|
|
21
|
+
|
|
22
|
+
```markdown
|
|
23
|
+
# ADR <NNNN> — <title>
|
|
24
|
+
|
|
25
|
+
- **Status:** proposed | accepted | superseded by <link>
|
|
26
|
+
- **Date:** YYYY-MM-DD
|
|
27
|
+
- **Deciders:** <names or "project owner">
|
|
28
|
+
|
|
29
|
+
## Context
|
|
30
|
+
|
|
31
|
+
<What forces are in play? What constraints? What did we learn that triggered this?>
|
|
32
|
+
|
|
33
|
+
## Decision
|
|
34
|
+
|
|
35
|
+
<What we decided. Single sentence then a list.>
|
|
36
|
+
|
|
37
|
+
## Consequences
|
|
38
|
+
|
|
39
|
+
Positive: ...
|
|
40
|
+
Negative: ...
|
|
41
|
+
|
|
42
|
+
## Alternatives considered
|
|
43
|
+
|
|
44
|
+
- <alternative>: <why rejected>
|
|
45
|
+
- <alternative>: <why rejected>
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Output contract
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
### ADR: <NNNN>-<slug>
|
|
52
|
+
### Status: <status>
|
|
53
|
+
### Configs updated: <list or "none">
|
|
54
|
+
### docs/architecture.md updated: yes/no
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Anti-patterns
|
|
58
|
+
|
|
59
|
+
- Don't write an ADR for a one-line refactor — those go in commit messages.
|
|
60
|
+
- Don't change the status of an existing ADR retroactively. Supersede it.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: add-feature
|
|
3
|
+
description: Use this skill whenever the user asks to add, implement, or build a new feature, capability, endpoint, page, command, or anything user-visible. Enforces the Anthropic two-fold harness pattern — read feature_list.json, pick exactly one feature, implement incrementally, run the structural test on every save, and never declare "done" without updating the JSON. Always invoke this skill instead of writing new feature code freehand.
|
|
4
|
+
allowed-tools: Read, Edit, Write, Bash(npm run:*), Bash(pytest:*), Bash(ruff:*), Bash(git:*), Glob, Grep
|
|
5
|
+
suggested-turns: 25
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Steps
|
|
9
|
+
|
|
10
|
+
1. **Read `feature_list.json`.** Confirm the feature exists and `passes:
|
|
11
|
+
false`. If the user described a feature not in the list, **stop**: ask
|
|
12
|
+
whether to add it via `/add-adr` first.
|
|
13
|
+
2. **Read `docs/architecture.md`** for the affected domain. Identify which
|
|
14
|
+
layers will change.
|
|
15
|
+
3. **Run `/inspect-module`** on each affected module. Do this even if you
|
|
16
|
+
think you know the area — verify, don't assume.
|
|
17
|
+
4. **Plan first.** Write a one-paragraph plan to `.harness/PLAN.md` *before
|
|
18
|
+
any code change*. (Anthropic Claude 4 prompt-guide pattern.)
|
|
19
|
+
5. **Implement smallest first.** Make the smallest change that turns one
|
|
20
|
+
`steps[]` item from failing → passing.
|
|
21
|
+
6. **Run the structural test.** {{#if isPython}}`python -m harness.structural_test`{{else}}`npm run harness:check`{{/if}}.
|
|
22
|
+
If it fails, fix the violation before continuing — never disable the test.
|
|
23
|
+
7. **Smoke test.** Run the relevant smoke test from `scripts/dev-up.sh`.
|
|
24
|
+
8. **Update `feature_list.json` ONLY** by changing the `passes` field of one
|
|
25
|
+
item. Never delete or rewrite items. (Anthropic JSON-over-Markdown rule:
|
|
26
|
+
"the model is less likely to inappropriately change or overwrite JSON
|
|
27
|
+
files compared to Markdown files.")
|
|
28
|
+
9. **Append to PROGRESS.** One line in `.harness/PROGRESS.md`:
|
|
29
|
+
`YYYY-MM-DD HH:MM | <feature_id> | done`.
|
|
30
|
+
10. **Commit.** Message: `feat(<domain>): <feature_id> - <short>`.
|
|
31
|
+
|
|
32
|
+
## Failure modes to avoid (each line below corresponds to a real observed failure)
|
|
33
|
+
|
|
34
|
+
- Don't claim a feature is done without running the smoke test.
|
|
35
|
+
- Don't mark `passes: true` if the structural test is failing.
|
|
36
|
+
- Don't add a new feature to `feature_list.json` mid-session — propose it
|
|
37
|
+
for the next session via ADR instead.
|
|
38
|
+
- Don't refactor unrelated code in the same commit.
|
|
39
|
+
|
|
40
|
+
## Output contract
|
|
41
|
+
|
|
42
|
+
After implementation, summarize:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
### Feature: <id>
|
|
46
|
+
### Files changed: <list>
|
|
47
|
+
### Structural test: PASS|FAIL
|
|
48
|
+
### Smoke test: PASS|FAIL
|
|
49
|
+
### Reviewer subagents to invoke: architecture-reviewer, security-reviewer (if auth/IO touched), reliability-reviewer (if retries/timeouts touched)
|
|
50
|
+
```
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: debug-flow
|
|
3
|
+
description: Use this skill whenever the user reports a bug, unexpected output, or "this doesn't work". Runs the dev server, drives the failing flow via Playwright MCP if installed (else captures stdout/stderr), and produces a minimal repro before any fix. Mirrors the OpenAI Chrome-DevTools-Protocol-into-runtime pattern at solo scale — verify the failure before you propose a fix.
|
|
4
|
+
allowed-tools: Read, Bash({{devCmd}}), Bash(curl:*), Bash(playwright:*), Bash(scripts/dev-up.sh)
|
|
5
|
+
suggested-turns: 20
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Steps
|
|
9
|
+
|
|
10
|
+
1. **Start the dev server** via `scripts/dev-up.sh`. Wait for the readiness
|
|
11
|
+
probe.
|
|
12
|
+
2. **Drive the failing flow.**
|
|
13
|
+
- If the bug is UI: use Playwright MCP (`mcp__playwright__*`) — the
|
|
14
|
+
Anthropic claude.ai-clone pattern.
|
|
15
|
+
- If MCP unavailable: fall back to `curl -i` + screenshot via
|
|
16
|
+
`scrot`/`screencapture`/`gnome-screenshot`.
|
|
17
|
+
3. **Capture context.** Request payload (if any), response status, stderr
|
|
18
|
+
tail (last 50 lines), last 3 git commits.
|
|
19
|
+
4. **Write a minimal repro** to `.harness/repros/<date>-<slug>.md` with:
|
|
20
|
+
environment, steps, expected, actual.
|
|
21
|
+
5. **Only then propose a fix.** Run the structural test and the relevant
|
|
22
|
+
smoke test after the fix. Re-run the repro to confirm.
|
|
23
|
+
|
|
24
|
+
## Output contract
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
### Repro saved: .harness/repros/<filename>
|
|
28
|
+
### Failure mode: <one-line summary>
|
|
29
|
+
### Smallest failing input: <code or curl command>
|
|
30
|
+
### Proposed fix location: <file:line>
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Anti-patterns
|
|
34
|
+
|
|
35
|
+
- Don't propose a fix before reproducing the bug locally.
|
|
36
|
+
- Don't fix more than the user reported in the same commit.
|
|
37
|
+
- Don't add a defensive try/except over the failing call without
|
|
38
|
+
understanding why it fails.
|