agent-harness-kit 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude-plugin/marketplace.json +27 -0
  2. package/.claude-plugin/plugin.json +25 -0
  3. package/LICENSE +21 -0
  4. package/README.md +165 -0
  5. package/bin/cli.mjs +261 -0
  6. package/package.json +64 -0
  7. package/src/core/detect-stack.mjs +181 -0
  8. package/src/core/doctor.mjs +106 -0
  9. package/src/core/patch-package-json.mjs +53 -0
  10. package/src/core/render-templates.mjs +277 -0
  11. package/src/core/upgrade.mjs +274 -0
  12. package/src/templates/.claude/agents/api-consistency-reviewer.md +33 -0
  13. package/src/templates/.claude/agents/architecture-reviewer.md.hbs +41 -0
  14. package/src/templates/.claude/agents/performance-reviewer.md +35 -0
  15. package/src/templates/.claude/agents/reliability-reviewer.md +38 -0
  16. package/src/templates/.claude/agents/security-reviewer.md +39 -0
  17. package/src/templates/.claude/hooks/hooks.json.hbs +39 -0
  18. package/src/templates/.claude/settings.json.hbs +25 -0
  19. package/src/templates/.claude/skills/add-adr/SKILL.md +60 -0
  20. package/src/templates/.claude/skills/add-feature/SKILL.md.hbs +50 -0
  21. package/src/templates/.claude/skills/debug-flow/SKILL.md.hbs +38 -0
  22. package/src/templates/.claude/skills/doc-drift-scan/SKILL.md +43 -0
  23. package/src/templates/.claude/skills/eval-runner/SKILL.md +55 -0
  24. package/src/templates/.claude/skills/garbage-collection/SKILL.md.hbs +49 -0
  25. package/src/templates/.claude/skills/inspect-app/SKILL.md +57 -0
  26. package/src/templates/.claude/skills/inspect-module/SKILL.md.hbs +53 -0
  27. package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md +43 -0
  28. package/src/templates/.claude/skills/structural-test-author/SKILL.md.hbs +46 -0
  29. package/src/templates/.claude/skills/write-skill/SKILL.md +39 -0
  30. package/src/templates/CLAUDE.md.hbs +70 -0
  31. package/src/templates/_adapter-python/.importlinter +14 -0
  32. package/src/templates/_adapter-python/harness/__init__.py +0 -0
  33. package/src/templates/_adapter-python/harness/eval_runner.py +281 -0
  34. package/src/templates/_adapter-python/harness/structural_test.py +195 -0
  35. package/src/templates/_adapter-typescript/.dependency-cruiser.cjs +27 -0
  36. package/src/templates/_adapter-typescript/eslint.config.mjs +38 -0
  37. package/src/templates/_adapter-typescript/harness/eval-runner.mjs +322 -0
  38. package/src/templates/_adapter-typescript/harness/structural-test.mjs +125 -0
  39. package/src/templates/_ci/.github/workflows/eval-nightly.yml +59 -0
  40. package/src/templates/_ci/.github/workflows/harness.yml +55 -0
  41. package/src/templates/docs/adr/0001-use-agent-harness-kit.md.hbs +56 -0
  42. package/src/templates/docs/agent-failures.md +25 -0
  43. package/src/templates/docs/architecture.md.hbs +47 -0
  44. package/src/templates/docs/core-beliefs.md.hbs +41 -0
  45. package/src/templates/docs/golden-principles.md.hbs +80 -0
  46. package/src/templates/docs/tech-debt-tracker.md +30 -0
  47. package/src/templates/feature_list.json.hbs +29 -0
  48. package/src/templates/harness.config.json.hbs +40 -0
  49. package/src/templates/scripts/dev-up.sh.hbs +51 -0
  50. package/src/templates/scripts/harness-report.mjs +189 -0
  51. package/src/templates/scripts/install-git-hooks.sh +18 -0
  52. package/src/templates/scripts/pre-push.sh +21 -0
  53. package/src/templates/scripts/precompletion-checklist.sh.hbs +99 -0
  54. package/src/templates/scripts/structural-test-on-edit.sh.hbs +53 -0
  55. package/src/templates/scripts/telemetry-on-skill.sh +26 -0
@@ -0,0 +1,274 @@
1
+ // upgrade.mjs — non-destructive version-aware upgrade.
2
+ //
3
+ // Strategy:
4
+ // 1. Read .harness/installed.json (lockfile of last-installed sha per file).
5
+ // 2. For each kit-managed file: if the user has not modified it (sha matches),
6
+ // overwrite. If modified, drop a sibling `.harness-new` so the user can diff.
7
+ // 3. Never touch USER_OWNED_FILES (CLAUDE.md, docs/architecture.md, etc.).
8
+ // 4. Print a concise summary and update the lockfile.
9
+
10
+ import { readFile, writeFile, mkdir, readdir, stat } from "node:fs/promises";
11
+ import { existsSync } from "node:fs";
12
+ import { resolve, join, relative, dirname } from "node:path";
13
+ import { fileURLToPath } from "node:url";
14
+ import { createHash } from "node:crypto";
15
+ import { confirm } from "@inquirer/prompts";
16
+ import pc from "picocolors";
17
+ import Handlebars from "handlebars";
18
+ import { registerHelpers } from "./render-templates.mjs";
19
+ import { detectStack } from "./detect-stack.mjs";
20
+
21
+ // Sync the two version-pinned fields in harness.config.json after a kit
22
+ // upgrade. Everything else in the config is user-owned and left untouched.
23
+ // Uses regex on the raw text instead of JSON round-tripping so user
24
+ // formatting (trailing zeros, key order, indentation, comments-as-strings)
25
+ // survives. Exported for unit tests; called from `upgrade()` below.
26
+ export async function syncHarnessConfigVersion(cwd, kitVersion) {
27
+ const cfgPath = resolve(cwd, "harness.config.json");
28
+ if (!existsSync(cfgPath)) return { changed: false, reason: "missing" };
29
+ const raw = await readFile(cfgPath, "utf8");
30
+ // Validate JSON first so we never write back a corrupted file.
31
+ let cfg;
32
+ try {
33
+ cfg = JSON.parse(raw);
34
+ } catch {
35
+ return { changed: false, reason: "invalid-json" };
36
+ }
37
+
38
+ let next = raw;
39
+ // Replace top-level "version": "<x.y.z>" — anchored at line start so we
40
+ // don't touch a "version" key nested inside another object.
41
+ if (typeof cfg.version === "string" && cfg.version !== kitVersion) {
42
+ next = next.replace(
43
+ /^(\s*"version"\s*:\s*")[^"]+(")/m,
44
+ `$1${kitVersion}$2`,
45
+ );
46
+ }
47
+ // Replace the kit's pinned $schema URL only — leaves user-forked URLs alone.
48
+ const schemaUrlRe =
49
+ /https:\/\/raw\.githubusercontent\.com\/tuanle96\/agent-harness-kit\/v[^/"]+\/schema\.json/;
50
+ if (typeof cfg.$schema === "string" && schemaUrlRe.test(cfg.$schema)) {
51
+ const newSchema = `https://raw.githubusercontent.com/tuanle96/agent-harness-kit/v${kitVersion}/schema.json`;
52
+ if (cfg.$schema !== newSchema) {
53
+ next = next.replace(schemaUrlRe, newSchema);
54
+ }
55
+ }
56
+
57
+ if (next === raw) {
58
+ return { changed: false, reason: "already-current" };
59
+ }
60
+ // Sanity-check: the regex replace must still produce valid JSON.
61
+ try {
62
+ JSON.parse(next);
63
+ } catch {
64
+ return { changed: false, reason: "would-corrupt" };
65
+ }
66
+ await writeFile(cfgPath, next);
67
+ return { changed: true, reason: "synced" };
68
+ }
69
+
70
+ const __dirname = dirname(fileURLToPath(import.meta.url));
71
+ const TEMPLATES_ROOT = resolve(__dirname, "..", "templates");
72
+
73
+ const USER_OWNED_FILES = new Set([
74
+ "CLAUDE.md",
75
+ "AGENTS.md",
76
+ "docs/architecture.md",
77
+ "docs/core-beliefs.md",
78
+ "docs/golden-principles.md",
79
+ "docs/tech-debt-tracker.md",
80
+ "feature_list.json",
81
+ "harness.config.json",
82
+ ]);
83
+
84
+ function sha256(buf) {
85
+ return createHash("sha256").update(buf).digest("hex");
86
+ }
87
+
88
+ async function* walk(dir) {
89
+ const entries = await readdir(dir, { withFileTypes: true });
90
+ for (const e of entries) {
91
+ const full = join(dir, e.name);
92
+ if (e.isDirectory()) {
93
+ yield* walk(full);
94
+ } else {
95
+ yield full;
96
+ }
97
+ }
98
+ }
99
+
100
+ export async function upgrade({ cwd, kitVersion, yes }) {
101
+ const lockPath = resolve(cwd, ".harness/installed.json");
102
+ if (!existsSync(lockPath)) {
103
+ console.error(
104
+ pc.red(
105
+ `No .harness/installed.json found. Run \`agent-harness-kit init\` first.`,
106
+ ),
107
+ );
108
+ process.exit(1);
109
+ }
110
+ const lockfile = JSON.parse(await readFile(lockPath, "utf8"));
111
+ const previousVersion = lockfile.version;
112
+
113
+ if (previousVersion === kitVersion) {
114
+ // Lockfile already current — but harness.config.json may still carry an
115
+ // older `version`/`$schema` (it's user-owned and skipped by the file walk).
116
+ // Sync those two fields so doctor stops flagging drift.
117
+ const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
118
+ if (cfgSync.changed) {
119
+ console.log(
120
+ pc.green(`harness.config.json version + $schema synced to v${kitVersion}.`),
121
+ );
122
+ } else {
123
+ console.log(pc.green(`Already on v${kitVersion}. Nothing to do.`));
124
+ }
125
+ return;
126
+ }
127
+
128
+ console.log(
129
+ pc.bold(
130
+ `\nUpgrading agent-harness-kit: ${pc.dim(previousVersion)} → ${pc.green(kitVersion)}\n`,
131
+ ),
132
+ );
133
+
134
+ const stack = await detectStack(cwd);
135
+ const ctx = {
136
+ projectName: "your-project",
137
+ layers: ["types", "config", "repo", "service", "runtime", "ui"],
138
+ layersJoined: "types → config → repo → service → runtime → ui",
139
+ language: stack.language,
140
+ framework: stack.framework,
141
+ packageManager: stack.packageManager,
142
+ isTypescript: stack.language === "typescript",
143
+ isPython: stack.language === "python",
144
+ isNextjs: stack.framework === "nextjs",
145
+ isFastapi: stack.framework === "fastapi",
146
+ kitVersion,
147
+ };
148
+
149
+ const updates = []; // { rel, action: 'overwrite'|'sidecar'|'skip', reason }
150
+
151
+ for await (const abs of walk(TEMPLATES_ROOT)) {
152
+ const relFromTemplates = relative(TEMPLATES_ROOT, abs).split("\\").join("/");
153
+ if (relFromTemplates.startsWith("_adapter-typescript/") && stack.language !== "typescript")
154
+ continue;
155
+ if (relFromTemplates.startsWith("_adapter-python/") && stack.language !== "python")
156
+ continue;
157
+ if (relFromTemplates.startsWith("_preset-nextjs/") && stack.framework !== "nextjs")
158
+ continue;
159
+ if (relFromTemplates.startsWith("_preset-fastapi/") && stack.framework !== "fastapi")
160
+ continue;
161
+ const stackRel = relFromTemplates
162
+ .replace(/^_adapter-typescript\//, "")
163
+ .replace(/^_adapter-python\//, "")
164
+ .replace(/^_preset-nextjs\//, "")
165
+ .replace(/^_preset-fastapi\//, "")
166
+ .replace(/^_ci\//, "");
167
+ const targetRel = stackRel.endsWith(".hbs")
168
+ ? stackRel.slice(0, -".hbs".length)
169
+ : stackRel;
170
+
171
+ if (USER_OWNED_FILES.has(targetRel)) {
172
+ updates.push({ rel: targetRel, action: "skip", reason: "user-owned" });
173
+ continue;
174
+ }
175
+
176
+ let newContent;
177
+ if (abs.endsWith(".hbs")) {
178
+ const raw = await readFile(abs, "utf8");
179
+ registerHelpers();
180
+ const tpl = Handlebars.compile(raw, { noEscape: true });
181
+ newContent = tpl(ctx);
182
+ } else {
183
+ newContent = await readFile(abs);
184
+ }
185
+
186
+ const newSha = sha256(
187
+ typeof newContent === "string" ? Buffer.from(newContent) : newContent,
188
+ );
189
+ const targetAbs = resolve(cwd, targetRel);
190
+ const previousSha = lockfile.files?.[targetRel];
191
+ const targetExists = existsSync(targetAbs);
192
+
193
+ if (!targetExists) {
194
+ updates.push({ rel: targetRel, action: "overwrite", reason: "new" });
195
+ continue;
196
+ }
197
+ const currentBuf = await readFile(targetAbs);
198
+ const currentSha = sha256(currentBuf);
199
+ if (currentSha === newSha) {
200
+ // Already on the new version — record the new sha but no action.
201
+ updates.push({ rel: targetRel, action: "skip", reason: "identical" });
202
+ continue;
203
+ }
204
+ if (currentSha === previousSha) {
205
+ updates.push({ rel: targetRel, action: "overwrite", reason: "user-untouched" });
206
+ } else {
207
+ updates.push({ rel: targetRel, action: "sidecar", reason: "user-modified" });
208
+ }
209
+ }
210
+
211
+ // Group + print summary.
212
+ const overwrites = updates.filter((u) => u.action === "overwrite");
213
+ const sidecars = updates.filter((u) => u.action === "sidecar");
214
+ for (const u of overwrites) console.log(` ${pc.green("~")} ${u.rel} ${pc.dim("(" + u.reason + ")")}`);
215
+ for (const u of sidecars)
216
+ console.log(
217
+ ` ${pc.yellow("!")} ${u.rel} ${pc.dim("user-modified — writing " + u.rel + ".harness-new for you to diff")}`,
218
+ );
219
+
220
+ if (overwrites.length === 0 && sidecars.length === 0) {
221
+ console.log(pc.green(`No file changes needed. Bumping lockfile to v${kitVersion}.`));
222
+ } else if (!yes) {
223
+ const ok = await confirm({ message: "Apply the changes above?", default: true });
224
+ if (!ok) {
225
+ console.log(pc.yellow("upgrade aborted."));
226
+ return;
227
+ }
228
+ }
229
+
230
+ // Apply.
231
+ for (const u of [...overwrites, ...sidecars]) {
232
+ const sourceTplRel = u.rel; // simplified: regenerate
233
+ let abs = resolve(TEMPLATES_ROOT, sourceTplRel + ".hbs");
234
+ if (!existsSync(abs)) abs = resolve(TEMPLATES_ROOT, sourceTplRel);
235
+ if (stack.language === "typescript" && !existsSync(abs))
236
+ abs = resolve(TEMPLATES_ROOT, "_adapter-typescript", sourceTplRel);
237
+ if (stack.language === "python" && !existsSync(abs))
238
+ abs = resolve(TEMPLATES_ROOT, "_adapter-python", sourceTplRel);
239
+ if (!existsSync(abs)) continue; // skip — the kit may have removed this file
240
+ let content;
241
+ if (abs.endsWith(".hbs")) {
242
+ const raw = await readFile(abs, "utf8");
243
+ const tpl = Handlebars.compile(raw, { noEscape: true });
244
+ content = tpl(ctx);
245
+ } else {
246
+ content = await readFile(abs);
247
+ }
248
+ const targetAbs = resolve(cwd, u.action === "sidecar" ? u.rel + ".harness-new" : u.rel);
249
+ await mkdir(dirname(targetAbs), { recursive: true });
250
+ await writeFile(targetAbs, content);
251
+ if (u.action === "overwrite") {
252
+ lockfile.files[u.rel] = sha256(typeof content === "string" ? Buffer.from(content) : content);
253
+ }
254
+ }
255
+
256
+ lockfile.version = kitVersion;
257
+ await writeFile(lockPath, JSON.stringify(lockfile, null, 2) + "\n");
258
+
259
+ // Sync the two version-pinned fields in user-owned harness.config.json
260
+ // (version + $schema URL). Everything else is left untouched.
261
+ const cfgSync = await syncHarnessConfigVersion(cwd, kitVersion);
262
+ if (cfgSync.changed) {
263
+ console.log(pc.dim(` ${pc.green("~")} harness.config.json (version + $schema synced)`));
264
+ }
265
+
266
+ console.log(pc.bold(pc.green(`\n✓ upgrade complete (v${kitVersion}).`)));
267
+ if (sidecars.length > 0) {
268
+ console.log(
269
+ pc.dim(
270
+ `\nDiff each *.harness-new sidecar against the original to merge intentional changes, then delete the sidecar.`,
271
+ ),
272
+ );
273
+ }
274
+ }
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: api-consistency-reviewer
3
+ description: Use this agent after adding or modifying any public API endpoint, exported function, CLI command, or RPC handler. Verifies naming, response shape, error format, and versioning conventions match `docs/api-conventions.md` (or the kit's defaults if that file doesn't exist). Read-only.
4
+ tools: Read, Grep, Glob, Bash(git diff:*)
5
+ model: haiku
6
+ ---
7
+
8
+ Compare changed public surfaces against `docs/api-conventions.md` (if absent,
9
+ fall back to: response shape `{ data, error }`, camelCase keys for JS/TS,
10
+ snake_case for Python). Flag:
11
+
12
+ - response-shape drift (e.g. `{ success, data, error }` vs `{ ok, result }`)
13
+ - naming convention violations (camelCase vs snake_case mixing within one
14
+ payload)
15
+ - missing versioning on breaking changes (no `/v2/` prefix, no `deprecated`
16
+ flag)
17
+ - exported symbols without JSDoc / docstring on a NEW public function
18
+ - error response shape that doesn't match existing handlers
19
+
20
+ ## Output format
21
+
22
+ ```
23
+ PASS — public surfaces are consistent
24
+ ```
25
+
26
+ or a numbered fix list:
27
+
28
+ ```
29
+ 1. <path>:<line> — <convention violated> — <fix>
30
+ 2. ...
31
+ ```
32
+
33
+ Do not modify files. Be terse.
@@ -0,0 +1,41 @@
1
+ ---
2
+ name: architecture-reviewer
3
+ description: Use this agent immediately after any change that touches multiple layers, adds a new domain, or modifies imports across module boundaries. Verifies the {{layersJoined}} rule, provider boundaries, and golden-principles.md compliance. Read-only — never modifies files.
4
+ tools: Read, Grep, Glob, Bash({{#if isPython}}python -m harness.structural_test{{else}}npm run harness:check{{/if}}), Bash(git diff:*)
5
+ model: sonnet
6
+ ---
7
+
8
+ You are a senior software architect reviewing a single PR's diff for
9
+ layered-architecture compliance. You are the **inferential sensor** that
10
+ complements the **computational sensor** (the structural test).
11
+
12
+ When invoked:
13
+
14
+ 1. Run `git diff HEAD~1` (or against the PR base) to see exactly what changed.
15
+ 2. Run {{#if isPython}}`python -m harness.structural_test`{{else}}`npm run harness:check`{{/if}} to see deterministic
16
+ violations first. If it fails, your job is to translate the failure into
17
+ a remediation plan, not duplicate it.
18
+ 3. For each changed file: identify which layer it belongs to from
19
+ `harness.config.json`. Flag any cross-layer import that goes "backward"
20
+ or skips a layer.
21
+ 4. Check that any new cross-cutting concern enters via the `providers/`
22
+ interface, not via direct import.
23
+ 5. Check that any new public type is defined in the `types/` layer, not
24
+ inline in a service.
25
+
26
+ ## Output format (always)
27
+
28
+ ```
29
+ ### Architecture review
30
+ **Verdict:** PASS | FAIL | NEEDS-DISCUSSION
31
+ **Layer-correct:** ✅ / ❌
32
+ **Provider-clean:** ✅ / ❌
33
+ **Findings:**
34
+ 1. <path:line> — <description>
35
+ 2. ...
36
+ **Remediation plan:**
37
+ - <specific edit, no rewrites>
38
+ ```
39
+
40
+ Do not modify any files. Do not run tests beyond the structural test. If
41
+ unsure, return NEEDS-DISCUSSION with concrete questions.
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: performance-reviewer
3
+ description: Use this agent after adding loops over large collections, database queries, render paths, or anything in a hot path. Catches N+1 queries, missing memoization, accidental quadratic loops, and unindexed sorts. Read-only. Runs on Haiku for speed.
4
+ tools: Read, Grep, Glob
5
+ model: haiku
6
+ ---
7
+
8
+ You are a performance reviewer. Be brief — this runs on Haiku for speed.
9
+
10
+ Check for, in order:
11
+
12
+ 1. **N+1 queries.** Any `for x in xs: db.get(x.id)`-shaped pattern, or
13
+ `await Promise.all(xs.map(async x => db.findOne(...)))` against a database
14
+ with a way to batch.
15
+ 2. **O(n²) loops.** Nested iteration over the same collection without an
16
+ early break or an index.
17
+ 3. **Missing memoization** on a pure expensive function called in a render
18
+ hot path or per-request.
19
+ 4. **Synchronous IO in an async/await context** (`fs.readFileSync`,
20
+ `db.queryBlocking`).
21
+ 5. **Unbounded list growth.** `accumulator.push(...)` in a loop over an
22
+ external feed without a cap.
23
+
24
+ ## Output format
25
+
26
+ For each finding, one line:
27
+
28
+ ```
29
+ <path>:<line> — <pattern> — <suggested fix in ≤ 1 line>
30
+ ```
31
+
32
+ If clean: `PASS — no obvious hot spots`.
33
+
34
+ Be terse. Do not modify files. If a finding is speculative, mark it `(maybe)`
35
+ and explain in ≤ 5 words.
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: reliability-reviewer
3
+ description: Use this agent immediately after adding any error handling, retry loop, async boundary, timeout, or external call (HTTP/DB/queue/file). Verifies that errors are typed at boundaries, retries have bounded budgets, async operations have timeouts, and resources are cleaned up. Read-only.
4
+ tools: Read, Grep, Glob, Bash(git diff:*)
5
+ model: sonnet
6
+ ---
7
+
8
+ You are a senior reliability engineer. Focus areas, in priority order:
9
+
10
+ 1. **Boundary error handling.** Every external call (HTTP, DB, file, queue)
11
+ must have an explicit error path. No bare `except:` (Python) or empty
12
+ `catch` (TS). Errors should be typed (`Result<T,E>` or tagged union).
13
+ 2. **Retry budgets.** Every retry loop must have BOTH a max-attempts AND a
14
+ deadline. Reject infinite `while True` / `while (true)` over external
15
+ calls. Reject exponential backoff without a cap.
16
+ 3. **Timeouts.** Every `fetch` / `httpx` / `requests` / `axios` call needs an
17
+ explicit timeout. The default ones are hours-long — that's never what you
18
+ want.
19
+ 4. **Idempotency.** Write operations should be idempotent or guarded with a
20
+ key. Flag `POST` / `INSERT` without a deduplication mechanism that runs
21
+ inside a retry loop.
22
+ 5. **Resource cleanup.** Every `open()` in Python must use `with`. Every TS
23
+ file/socket/stream must have a `try/finally close` or `using` declaration
24
+ (TC39 explicit-resource-management).
25
+ 6. **Cancellation.** Long-running async work without an `AbortSignal` /
26
+ `asyncio.CancelledError` handler is a leak waiting to happen.
27
+
28
+ ## Output format
29
+
30
+ For each finding:
31
+
32
+ ```
33
+ [BLOCKING|WARN] <path>:<line> — <issue> — <fix in ≤ 1 line>
34
+ ```
35
+
36
+ If clean: `PASS — reliability checks satisfied`.
37
+
38
+ Do not modify files.
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: security-reviewer
3
+ description: Use this agent immediately after writing or modifying authentication, authorization, input handling, secret loading, network calls, or anything in `providers/auth` or runtime/api routes. Runs read-only OWASP-Top-10 + secrets scan. Always invoke after touching login, signup, payment, or any code that reads request bodies.
4
+ tools: Read, Grep, Glob, Bash(git diff:*)
5
+ model: sonnet
6
+ ---
7
+
8
+ You are a senior application security engineer. Your role is to **find
9
+ vulnerabilities, not write fixes**.
10
+
11
+ When invoked:
12
+
13
+ 1. `git diff HEAD~1` to see only the changed code.
14
+ 2. Identify the highest-risk areas in the diff: auth flows, input handling,
15
+ data exposure, file IO, child_process, eval, dynamic imports.
16
+ 3. Check for, in order:
17
+ - SQL injection (string-interpolated SQL, even with ORMs)
18
+ - XSS (`dangerouslySetInnerHTML`, `innerHTML`, `v-html`, `{{...|safe}}`)
19
+ - IDOR / missing authorization checks on a resource fetch
20
+ - Secrets in code (regex `^(sk-|ghp_|AKIA|xox[abp]-|-----BEGIN)`)
21
+ - Unbounded user input (no max length, no schema validation)
22
+ - Missing rate limit on auth-adjacent endpoints
23
+ - Insecure deserialization (`pickle.loads`, `JSON.parse` with reviver)
24
+ 4. Language-specific:
25
+ - **Python**: `pickle.loads`, `os.system`, `eval`, `subprocess(shell=True)`, `yaml.load` without `Loader=SafeLoader`
26
+ - **TypeScript**: `dangerouslySetInnerHTML`, `eval`, `new Function`, `child_process.exec` with interpolation, `fetch` to untrusted URL without TLS verification
27
+
28
+ ## Output format
29
+
30
+ For each finding, one line:
31
+
32
+ ```
33
+ [CRITICAL|HIGH|MEDIUM|LOW] <path>:<line> — <brief description> — <minimal-fix suggestion ≤ 3 lines of code>
34
+ ```
35
+
36
+ If clean: `PASS — no vulnerabilities found in diff`.
37
+
38
+ Do not modify files. Do not write tests. Do not propose architectural
39
+ rewrites — that's `architecture-reviewer`'s job.
@@ -0,0 +1,39 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/claude-code-hooks.json",
3
+ "hooks": {
4
+ "PostToolUse": [
5
+ {
6
+ "matcher": "Write|Edit|MultiEdit",
7
+ "hooks": [
8
+ {
9
+ "type": "command",
10
+ "command": "bash scripts/structural-test-on-edit.sh",
11
+ "timeout": 30
12
+ }
13
+ ]
14
+ },
15
+ {
16
+ "matcher": "Skill",
17
+ "hooks": [
18
+ {
19
+ "type": "command",
20
+ "command": "bash scripts/telemetry-on-skill.sh",
21
+ "timeout": 5
22
+ }
23
+ ]
24
+ }
25
+ ],
26
+ "Stop": [
27
+ {
28
+ "matcher": "",
29
+ "hooks": [
30
+ {
31
+ "type": "command",
32
+ "command": "bash scripts/precompletion-checklist.sh",
33
+ "timeout": 20
34
+ }
35
+ ]
36
+ }
37
+ ]
38
+ }
39
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/claude-code-settings.json",
3
+ "permissions": {
4
+ "allow": [
5
+ "Bash(npm run harness:*)",
6
+ "Bash(npm run lint:*)",
7
+ "Bash(npm test:*)",
8
+ "Bash(pytest:*)",
9
+ "Bash(ruff:*)",
10
+ "Bash(git status)",
11
+ "Bash(git diff:*)",
12
+ "Bash(git log:*)",
13
+ "Bash(git ls-tree:*)",
14
+ "Bash(git show:*)",
15
+ "Bash(tree:*)",
16
+ "Bash(ls:*)",
17
+ "Bash(test -e:*)",
18
+ "Bash(command -v:*)"
19
+ ]
20
+ },
21
+ "model": "{{#if isPython}}claude-sonnet-4-6{{else}}claude-sonnet-4-6{{/if}}",
22
+ "env": {
23
+ "AGENT_HARNESS_KIT_VERSION": "{{kitVersion}}"
24
+ }
25
+ }
@@ -0,0 +1,60 @@
1
+ ---
2
+ name: add-adr
3
+ description: Use this skill whenever a decision is made about architecture, dependencies, frameworks, naming conventions, or layer order. Creates a numbered ADR (Architecture Decision Record) in `docs/adr/` in the canonical Nygard format. Always invoke this before changing layer order, adding a layer, swapping a major dependency, or introducing a new external service.
4
+ allowed-tools: Read, Write, Glob
5
+ suggested-turns: 6
6
+ ---
7
+
8
+ ## Steps
9
+
10
+ 1. **Find the next number.** List `docs/adr/` and pick the highest existing
11
+ number + 1 (zero-padded to 4 digits).
12
+ 2. **Generate the file.** Write `docs/adr/{NNNN}-{kebab-title}.md` with the
13
+ sections below.
14
+ 3. **Update affected configs.** If the ADR changes layer order or adds a
15
+ layer, update `harness.config.json` AND the structural-test config in the
16
+ same commit as the ADR.
17
+ 4. **Append to the index.** Add a one-line entry under "Recent decisions" in
18
+ `docs/architecture.md`.
19
+
20
+ ## ADR template (write exactly this shape)
21
+
22
+ ```markdown
23
+ # ADR <NNNN> — <title>
24
+
25
+ - **Status:** proposed | accepted | superseded by <link>
26
+ - **Date:** YYYY-MM-DD
27
+ - **Deciders:** <names or "project owner">
28
+
29
+ ## Context
30
+
31
+ <What forces are in play? What constraints? What did we learn that triggered this?>
32
+
33
+ ## Decision
34
+
35
+ <What we decided. Single sentence then a list.>
36
+
37
+ ## Consequences
38
+
39
+ Positive: ...
40
+ Negative: ...
41
+
42
+ ## Alternatives considered
43
+
44
+ - <alternative>: <why rejected>
45
+ - <alternative>: <why rejected>
46
+ ```
47
+
48
+ ## Output contract
49
+
50
+ ```
51
+ ### ADR: <NNNN>-<slug>
52
+ ### Status: <status>
53
+ ### Configs updated: <list or "none">
54
+ ### docs/architecture.md updated: yes/no
55
+ ```
56
+
57
+ ## Anti-patterns
58
+
59
+ - Don't write an ADR for a one-line refactor — those go in commit messages.
60
+ - Don't change the status of an existing ADR retroactively. Supersede it.
@@ -0,0 +1,50 @@
1
+ ---
2
+ name: add-feature
3
+ description: Use this skill whenever the user asks to add, implement, or build a new feature, capability, endpoint, page, command, or anything user-visible. Enforces the Anthropic two-fold harness pattern — read feature_list.json, pick exactly one feature, implement incrementally, run the structural test on every save, and never declare "done" without updating the JSON. Always invoke this skill instead of writing new feature code freehand.
4
+ allowed-tools: Read, Edit, Write, Bash(npm run:*), Bash(pytest:*), Bash(ruff:*), Bash(git:*), Glob, Grep
5
+ suggested-turns: 25
6
+ ---
7
+
8
+ ## Steps
9
+
10
+ 1. **Read `feature_list.json`.** Confirm the feature exists and `passes:
11
+ false`. If the user described a feature not in the list, **stop**: ask
12
+ whether to add it via `/add-adr` first.
13
+ 2. **Read `docs/architecture.md`** for the affected domain. Identify which
14
+ layers will change.
15
+ 3. **Run `/inspect-module`** on each affected module. Do this even if you
16
+ think you know the area — verify, don't assume.
17
+ 4. **Plan first.** Write a one-paragraph plan to `.harness/PLAN.md` *before
18
+ any code change*. (Anthropic Claude 4 prompt-guide pattern.)
19
+ 5. **Implement smallest first.** Make the smallest change that turns one
20
+ `steps[]` item from failing → passing.
21
+ 6. **Run the structural test.** {{#if isPython}}`python -m harness.structural_test`{{else}}`npm run harness:check`{{/if}}.
22
+ If it fails, fix the violation before continuing — never disable the test.
23
+ 7. **Smoke test.** Run the relevant smoke test from `scripts/dev-up.sh`.
24
+ 8. **Update `feature_list.json` ONLY** by changing the `passes` field of one
25
+ item. Never delete or rewrite items. (Anthropic JSON-over-Markdown rule:
26
+ "the model is less likely to inappropriately change or overwrite JSON
27
+ files compared to Markdown files.")
28
+ 9. **Append to PROGRESS.** One line in `.harness/PROGRESS.md`:
29
+ `YYYY-MM-DD HH:MM | <feature_id> | done`.
30
+ 10. **Commit.** Message: `feat(<domain>): <feature_id> - <short>`.
31
+
32
+ ## Failure modes to avoid (each line below corresponds to a real observed failure)
33
+
34
+ - Don't claim a feature is done without running the smoke test.
35
+ - Don't mark `passes: true` if the structural test is failing.
36
+ - Don't add a new feature to `feature_list.json` mid-session — propose it
37
+ for the next session via ADR instead.
38
+ - Don't refactor unrelated code in the same commit.
39
+
40
+ ## Output contract
41
+
42
+ After implementation, summarize:
43
+
44
+ ```
45
+ ### Feature: <id>
46
+ ### Files changed: <list>
47
+ ### Structural test: PASS|FAIL
48
+ ### Smoke test: PASS|FAIL
49
+ ### Reviewer subagents to invoke: architecture-reviewer, security-reviewer (if auth/IO touched), reliability-reviewer (if retries/timeouts touched)
50
+ ```
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: debug-flow
3
+ description: Use this skill whenever the user reports a bug, unexpected output, or "this doesn't work". Runs the dev server, drives the failing flow via Playwright MCP if installed (else captures stdout/stderr), and produces a minimal repro before any fix. Mirrors the OpenAI Chrome-DevTools-Protocol-into-runtime pattern at solo scale — verify the failure before you propose a fix.
4
+ allowed-tools: Read, Bash({{devCmd}}), Bash(curl:*), Bash(playwright:*), Bash(scripts/dev-up.sh)
5
+ suggested-turns: 20
6
+ ---
7
+
8
+ ## Steps
9
+
10
+ 1. **Start the dev server** via `scripts/dev-up.sh`. Wait for the readiness
11
+ probe.
12
+ 2. **Drive the failing flow.**
13
+ - If the bug is UI: use Playwright MCP (`mcp__playwright__*`) — the
14
+ Anthropic claude.ai-clone pattern.
15
+ - If MCP unavailable: fall back to `curl -i` + screenshot via
16
+ `scrot`/`screencapture`/`gnome-screenshot`.
17
+ 3. **Capture context.** Request payload (if any), response status, stderr
18
+ tail (last 50 lines), last 3 git commits.
19
+ 4. **Write a minimal repro** to `.harness/repros/<date>-<slug>.md` with:
20
+ environment, steps, expected, actual.
21
+ 5. **Only then propose a fix.** Run the structural test and the relevant
22
+ smoke test after the fix. Re-run the repro to confirm.
23
+
24
+ ## Output contract
25
+
26
+ ```
27
+ ### Repro saved: .harness/repros/<filename>
28
+ ### Failure mode: <one-line summary>
29
+ ### Smallest failing input: <code or curl command>
30
+ ### Proposed fix location: <file:line>
31
+ ```
32
+
33
+ ## Anti-patterns
34
+
35
+ - Don't propose a fix before reproducing the bug locally.
36
+ - Don't fix more than the user reported in the same commit.
37
+ - Don't add a defensive try/except over the failing call without
38
+ understanding why it fails.