cc-dev-template 0.1.103 → 0.1.105
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scripts/statusline.js +61 -37
- package/src/skills/prevent-regression/SKILL.md +18 -0
- package/src/skills/prevent-regression/references/step-1-diagnose.md +33 -0
- package/src/skills/prevent-regression/references/step-2-triage.md +81 -0
- package/src/skills/prevent-regression/references/step-3-implement.md +133 -0
- package/src/skills/prevent-regression/references/step-4-reflect.md +32 -0
package/package.json
CHANGED
|
@@ -21,9 +21,10 @@ const { homedir } = require('os');
|
|
|
21
21
|
// Usage API cache
|
|
22
22
|
const USAGE_CACHE_PATH = join(homedir(), '.claude', '.usage-cache.json');
|
|
23
23
|
const USAGE_LOCK_PATH = join(homedir(), '.claude', '.usage-cache.lock');
|
|
24
|
-
const USAGE_CACHE_TTL =
|
|
25
|
-
const
|
|
26
|
-
const
|
|
24
|
+
const USAGE_CACHE_TTL = 30000; // 30s normal refresh interval
|
|
25
|
+
const USAGE_ERROR_TTL = 300000; // 5 min backoff on errors (rate limit, network, etc.)
|
|
26
|
+
const USAGE_LOCK_TTL = 15000; // 15s lock (curl timeout is 5s, 15s is generous)
|
|
27
|
+
const USAGE_HISTORY_MAX = 20; // ~10 min of readings at 30s intervals
|
|
27
28
|
|
|
28
29
|
// Background refresh mode: fetch usage data and write cache, then exit
|
|
29
30
|
if (process.argv.includes('--refresh')) {
|
|
@@ -307,13 +308,33 @@ function getOAuthToken() {
|
|
|
307
308
|
}
|
|
308
309
|
}
|
|
309
310
|
|
|
311
|
+
/**
|
|
312
|
+
* Write error backoff to cache — preserves existing data/history,
|
|
313
|
+
* but delays next refresh attempt by USAGE_ERROR_TTL.
|
|
314
|
+
*/
|
|
315
|
+
function writeErrorBackoff() {
|
|
316
|
+
const now = Date.now();
|
|
317
|
+
try {
|
|
318
|
+
const existing = JSON.parse(readFileSync(USAGE_CACHE_PATH, 'utf-8'));
|
|
319
|
+
existing.nextRefreshAfter = now + USAGE_ERROR_TTL;
|
|
320
|
+
writeFileSync(USAGE_CACHE_PATH, JSON.stringify(existing));
|
|
321
|
+
} catch {
|
|
322
|
+
writeFileSync(USAGE_CACHE_PATH, JSON.stringify({
|
|
323
|
+
timestamp: now,
|
|
324
|
+
nextRefreshAfter: now + USAGE_ERROR_TTL,
|
|
325
|
+
data: null,
|
|
326
|
+
history: [],
|
|
327
|
+
}));
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
310
331
|
/**
|
|
311
332
|
* Fetch usage data from API and write to cache (runs in background)
|
|
312
333
|
*/
|
|
313
334
|
function refreshUsageCache() {
|
|
314
335
|
try {
|
|
315
336
|
const token = getOAuthToken();
|
|
316
|
-
if (!token) return;
|
|
337
|
+
if (!token) { writeErrorBackoff(); return; }
|
|
317
338
|
|
|
318
339
|
const result = spawnSync('curl', [
|
|
319
340
|
'-s', '--max-time', '3',
|
|
@@ -323,59 +344,62 @@ function refreshUsageCache() {
|
|
|
323
344
|
'-H', 'Content-Type: application/json',
|
|
324
345
|
], { encoding: 'utf-8', timeout: 5000 });
|
|
325
346
|
|
|
326
|
-
if (result.status
|
|
327
|
-
const data = JSON.parse(result.stdout.trim());
|
|
328
|
-
if (data.five_hour && data.seven_day) {
|
|
329
|
-
// Load existing history and append new reading
|
|
330
|
-
let history = [];
|
|
331
|
-
try {
|
|
332
|
-
const existing = JSON.parse(readFileSync(USAGE_CACHE_PATH, 'utf-8'));
|
|
333
|
-
if (Array.isArray(existing.history)) history = existing.history;
|
|
334
|
-
} catch {}
|
|
335
|
-
|
|
336
|
-
const now = Date.now();
|
|
337
|
-
history.push({
|
|
338
|
-
t: now,
|
|
339
|
-
five_hour: data.five_hour.utilization,
|
|
340
|
-
seven_day: data.seven_day.utilization,
|
|
341
|
-
});
|
|
347
|
+
if (result.status !== 0 || !result.stdout) { writeErrorBackoff(); return; }
|
|
342
348
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
history = history.slice(-USAGE_HISTORY_MAX);
|
|
346
|
-
}
|
|
349
|
+
let responseData;
|
|
350
|
+
try { responseData = JSON.parse(result.stdout.trim()); } catch { writeErrorBackoff(); return; }
|
|
347
351
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
352
|
+
if (!responseData.five_hour || !responseData.seven_day) { writeErrorBackoff(); return; }
|
|
353
|
+
|
|
354
|
+
// --- Success path ---
|
|
355
|
+
let history = [];
|
|
356
|
+
try {
|
|
357
|
+
const existing = JSON.parse(readFileSync(USAGE_CACHE_PATH, 'utf-8'));
|
|
358
|
+
if (Array.isArray(existing.history)) history = existing.history;
|
|
359
|
+
} catch {}
|
|
360
|
+
|
|
361
|
+
const now = Date.now();
|
|
362
|
+
history.push({
|
|
363
|
+
t: now,
|
|
364
|
+
five_hour: responseData.five_hour.utilization,
|
|
365
|
+
seven_day: responseData.seven_day.utilization,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
if (history.length > USAGE_HISTORY_MAX) {
|
|
369
|
+
history = history.slice(-USAGE_HISTORY_MAX);
|
|
355
370
|
}
|
|
371
|
+
|
|
372
|
+
writeFileSync(USAGE_CACHE_PATH, JSON.stringify({
|
|
373
|
+
timestamp: now,
|
|
374
|
+
nextRefreshAfter: now + USAGE_CACHE_TTL,
|
|
375
|
+
data: responseData,
|
|
376
|
+
history,
|
|
377
|
+
}));
|
|
356
378
|
} catch {
|
|
357
|
-
|
|
379
|
+
writeErrorBackoff();
|
|
380
|
+
} finally {
|
|
381
|
+
try { unlinkSync(USAGE_LOCK_PATH); } catch {}
|
|
358
382
|
}
|
|
359
383
|
}
|
|
360
384
|
|
|
361
385
|
/**
|
|
362
|
-
* Read cached usage data, trigger background refresh if
|
|
386
|
+
* Read cached usage data, trigger background refresh if scheduled time has passed.
|
|
387
|
+
* All instances share the same nextRefreshAfter timestamp — only one refresh per cycle.
|
|
363
388
|
*/
|
|
364
389
|
function getUsageData() {
|
|
365
390
|
let cacheData = null;
|
|
366
391
|
let cacheHistory = null;
|
|
367
|
-
let
|
|
392
|
+
let shouldRefresh = true; // refresh if no cache exists
|
|
368
393
|
|
|
369
394
|
try {
|
|
370
395
|
const raw = readFileSync(USAGE_CACHE_PATH, 'utf-8');
|
|
371
396
|
const cache = JSON.parse(raw);
|
|
372
397
|
cacheData = cache.data;
|
|
373
398
|
cacheHistory = cache.history || null;
|
|
374
|
-
|
|
399
|
+
shouldRefresh = Date.now() > (cache.nextRefreshAfter || 0);
|
|
375
400
|
} catch {}
|
|
376
401
|
|
|
377
|
-
|
|
378
|
-
if (cacheAge > USAGE_CACHE_TTL) {
|
|
402
|
+
if (shouldRefresh) {
|
|
379
403
|
let lockHeld = false;
|
|
380
404
|
try {
|
|
381
405
|
const lockTime = parseInt(readFileSync(USAGE_LOCK_PATH, 'utf-8'), 10);
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prevent-regression
|
|
3
|
+
description: "Codifies an agent mistake into a permanent backpressure mechanism. Use when an agent did something wrong and you want to ensure it never happens again. User-invoked only."
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[description of what the agent did wrong]"
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Prevent Regression
|
|
9
|
+
|
|
10
|
+
The issue to prevent: **$ARGUMENTS**
|
|
11
|
+
|
|
12
|
+
## Core Principle: Generate Backpressure
|
|
13
|
+
|
|
14
|
+
Backpressure is a deterministic feedback loop that lets the agent self-correct without human intervention. The goal of this workflow is to convert a one-time mistake into a permanent mechanism that applies automatic corrective force whenever the agent drifts toward the same error.
|
|
15
|
+
|
|
16
|
+
The key insight: **you can accidentally steer a model, but you cannot accidentally steer a type checker.** The more deterministic the check, the stronger the backpressure. Prose instructions are the weakest form — the model can misread them, ignore them, or be steered away from them by conflicting context. A lint test or a hook that exits non-zero is impossible to ignore.
|
|
17
|
+
|
|
18
|
+
Read `references/step-1-diagnose.md`.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Step 1: Diagnose
|
|
2
|
+
|
|
3
|
+
Understand what went wrong and why the agent did it.
|
|
4
|
+
|
|
5
|
+
## Examine the Evidence
|
|
6
|
+
|
|
7
|
+
Read the relevant code or files involved. If a specific location was mentioned, go there. Search for the violation in the actual source.
|
|
8
|
+
|
|
9
|
+
## Find the Root Cause
|
|
10
|
+
|
|
11
|
+
Determine why the agent did this. Common causes:
|
|
12
|
+
|
|
13
|
+
| Cause | Signal |
|
|
14
|
+
|-------|--------|
|
|
15
|
+
| **Pattern-matched bad code** | Existing code elsewhere does the same wrong thing |
|
|
16
|
+
| **No guidance existed** | No CLAUDE.md or rule covered this case |
|
|
17
|
+
| **Reasonable but wrong** | Defensible on general principles — just not our convention |
|
|
18
|
+
| **Wrong hierarchy level** | Guidance existed but wasn't loaded for this file type |
|
|
19
|
+
| **Conflicting guidance** | Two instructions pointed in different directions |
|
|
20
|
+
| **Black-box assumption** | Agent assumed how an external system works without verifying |
|
|
21
|
+
|
|
22
|
+
If the agent pattern-matched on bad existing code, identify all instances of that bad code — they may need to be fixed alongside adding the prevention mechanism.
|
|
23
|
+
|
|
24
|
+
If the agent made a wrong assumption about an external system (SDK, API, CLI), note that — a learning test may be appropriate alongside the backpressure mechanism.
|
|
25
|
+
|
|
26
|
+
## Output
|
|
27
|
+
|
|
28
|
+
State:
|
|
29
|
+
1. The exact violation (one sentence)
|
|
30
|
+
2. The most likely root cause (one sentence)
|
|
31
|
+
3. Any existing bad code that should be cleaned up
|
|
32
|
+
|
|
33
|
+
**IMPORTANT: You are not done. Read `references/step-2-triage.md` now.**
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Step 2: Choose the Backpressure Mechanism
|
|
2
|
+
|
|
3
|
+
Evaluate each tier in order. Use the **highest tier that can catch this violation** — higher tiers are more deterministic and harder to ignore.
|
|
4
|
+
|
|
5
|
+
The backpressure hierarchy, from strongest to weakest:
|
|
6
|
+
|
|
7
|
+
## Tier 1: Automated Test (Strongest — Try First)
|
|
8
|
+
|
|
9
|
+
A test that runs in CI or via a `make`/`npm`/`cargo` command.
|
|
10
|
+
|
|
11
|
+
Viable when you can write a test that:
|
|
12
|
+
- Detects the violation deterministically (grep pattern, AST check, import analysis, etc.)
|
|
13
|
+
- Won't false-positive on legitimate code
|
|
14
|
+
- Can run without external dependencies (no network, no API keys)
|
|
15
|
+
|
|
16
|
+
Common patterns:
|
|
17
|
+
- **Lint/grep tests**: Walk source files and flag banned patterns (e.g., forbidden imports, wrong naming conventions, missing annotations)
|
|
18
|
+
- **Architecture tests**: Verify dependency boundaries, module structure, or export constraints
|
|
19
|
+
- **Contract tests**: Assert invariants about configuration files, schema shapes, or API surfaces
|
|
20
|
+
|
|
21
|
+
This is the strongest backpressure because it runs deterministically, produces clear error messages, and blocks merging. The agent gets immediate feedback with exact file and line numbers.
|
|
22
|
+
|
|
23
|
+
If viable → go to `references/step-3-implement.md` with **decision = Tier 1**.
|
|
24
|
+
|
|
25
|
+
## Tier 2: Claude Code Hook (Try Second)
|
|
26
|
+
|
|
27
|
+
A hook that fires when the agent writes or edits files, runs commands, or thinks it's done.
|
|
28
|
+
|
|
29
|
+
Viable when:
|
|
30
|
+
- The violation happens during code generation, not in saved code
|
|
31
|
+
- You can detect it from the tool input (file path, content, command)
|
|
32
|
+
- A test can't catch it because the wrong code looks syntactically fine
|
|
33
|
+
- You want to catch it *before* it's written, not after
|
|
34
|
+
|
|
35
|
+
Available hook events:
|
|
36
|
+
- **`PreToolUse` on `Edit|Write`** — inspect file path + new content before it's written; can block (exit 2) or advise (exit 0 + JSON context)
|
|
37
|
+
- **`PreToolUse` on `Bash`** — inspect the command about to run
|
|
38
|
+
- **`PostToolUse` on `Edit|Write`** — react after a file is written
|
|
39
|
+
- **`Stop` hook** — runs when the agent thinks it's done; deterministically run final checks and inject failures back into context
|
|
40
|
+
|
|
41
|
+
Blocking (exit 2): write reason to stderr — the agent sees it and must adjust.
|
|
42
|
+
Advising (exit 0 + JSON): output context that gets injected into the agent's window — softer but still automatic.
|
|
43
|
+
|
|
44
|
+
Stop hooks are particularly powerful: they let you run a full test suite or validation pass at the moment the agent believes it's finished, catching anything that slipped through.
|
|
45
|
+
|
|
46
|
+
If viable → go to `references/step-3-implement.md` with **decision = Tier 2**.
|
|
47
|
+
|
|
48
|
+
## Tier 3: Pre-commit Hook (Try Third)
|
|
49
|
+
|
|
50
|
+
A git pre-commit hook that blocks the commit if violations are found.
|
|
51
|
+
|
|
52
|
+
Viable when:
|
|
53
|
+
- The check should run against the full staged changeset, not individual edits
|
|
54
|
+
- You want a final gate before code lands in version control
|
|
55
|
+
- The check is fast enough to run on every commit (< 5 seconds)
|
|
56
|
+
|
|
57
|
+
Pre-commit hooks are strong backpressure because they're deterministic and the agent cannot bypass them (unless explicitly told to skip). They also protect against human mistakes, not just agent mistakes.
|
|
58
|
+
|
|
59
|
+
If viable → go to `references/step-3-implement.md` with **decision = Tier 3**.
|
|
60
|
+
|
|
61
|
+
## Tier 4: CLAUDE.md Rule (Weakest — Last Resort)
|
|
62
|
+
|
|
63
|
+
A prose instruction in a CLAUDE.md or rules file.
|
|
64
|
+
|
|
65
|
+
Use when Tiers 1–3 are not viable. This is the weakest backpressure because:
|
|
66
|
+
- The model can misread or overlook it
|
|
67
|
+
- Conflicting context can steer the model away from following it
|
|
68
|
+
- There is no deterministic enforcement — it relies entirely on the model's judgment
|
|
69
|
+
|
|
70
|
+
To maximize effectiveness of prose rules:
|
|
71
|
+
- Write imperatives, not suggestions ("Never import X from Y" not "You should avoid importing X from Y")
|
|
72
|
+
- Place the rule in the most specific file in the hierarchy (path-scoped rules files > directory CLAUDE.md > root CLAUDE.md)
|
|
73
|
+
- If possible, pair with a higher-tier mechanism — the prose explains *why*, the test/hook enforces *what*
|
|
74
|
+
|
|
75
|
+
If Tier 4 is the only option → go to `references/step-3-implement.md` with **decision = Tier 4**.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
**Tiers are not mutually exclusive.** The best backpressure is layered: a lint test (Tier 1) catches it in CI, a hook (Tier 2) catches it at write-time, and a CLAUDE.md rule (Tier 4) explains the *why* so the agent avoids the mistake in the first place. Deterministic enforcement + prose explanation is stronger than either alone.
|
|
80
|
+
|
|
81
|
+
**IMPORTANT: You are not done. Read `references/step-3-implement.md` now.**
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Step 3: Implement
|
|
2
|
+
|
|
3
|
+
Implement the backpressure mechanism for your chosen tier, then verify it works.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Tier 1: Add an Automated Test
|
|
8
|
+
|
|
9
|
+
Find the project's existing test infrastructure. Look for:
|
|
10
|
+
- An existing lint or architecture test file (search for patterns like `walkFiles`, `lint_test`, `architecture.test`, or grep-based test assertions)
|
|
11
|
+
- The project's test runner and how to invoke it
|
|
12
|
+
|
|
13
|
+
If there's an existing lint/architecture test file, add your check there following the established patterns. If there isn't one, create one in the appropriate test directory.
|
|
14
|
+
|
|
15
|
+
The test should:
|
|
16
|
+
1. Scan the relevant source files
|
|
17
|
+
2. Detect the violation pattern
|
|
18
|
+
3. Report the exact file, line number, and violating code
|
|
19
|
+
4. Include a clear error message explaining **why** this is wrong and **what to do instead**
|
|
20
|
+
|
|
21
|
+
The error message is critical — it's the "tokens" that give the agent (or human) feedback. A message like `"Found 3 violations"` is weak backpressure. A message like `"auth.go:42: Direct DB query in handler. Use the repository pattern instead — see docs/architecture.md"` is strong backpressure because it tells the agent exactly what's wrong and how to fix it.
|
|
22
|
+
|
|
23
|
+
After adding the test:
|
|
24
|
+
1. Run it — confirm it detects the violation (if the violating code still exists)
|
|
25
|
+
2. Fix the violating code
|
|
26
|
+
3. Run it again — confirm it passes cleanly
|
|
27
|
+
|
|
28
|
+
If you also want to add a CLAUDE.md annotation, mark it `[enforced by test]` so agents know there's a deterministic check behind the instruction.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Tier 2: Add a Claude Code Hook
|
|
33
|
+
|
|
34
|
+
Add the hook to the project's `.claude/settings.json` (project-level) or `~/.claude/settings.json` (user-level).
|
|
35
|
+
|
|
36
|
+
For simple checks, use an inline command:
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"matcher": "Edit|Write",
|
|
40
|
+
"hooks": [
|
|
41
|
+
{
|
|
42
|
+
"type": "command",
|
|
43
|
+
"command": "bash -c 'INPUT=$(cat); FILE=$(echo \"$INPUT\" | jq -r \".tool_input.file_path // empty\"); if [[ -z \"$FILE\" ]]; then exit 0; fi; # your check here; exit 0'"
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
For complex logic, write a script:
|
|
50
|
+
- Save to `.claude/scripts/your-check.sh` (project-level) or `~/.claude/scripts/your-check.sh` (user-level)
|
|
51
|
+
- Make it executable: `chmod +x`
|
|
52
|
+
- Reference it: `"command": "bash .claude/scripts/your-check.sh"`
|
|
53
|
+
|
|
54
|
+
Script template for a **blocking** check (exit 2):
|
|
55
|
+
```bash
|
|
56
|
+
#!/bin/bash
|
|
57
|
+
INPUT=$(cat)
|
|
58
|
+
FILE=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty')
|
|
59
|
+
CONTENT=$(echo "$INPUT" | jq -r '.tool_input.new_string // .tool_input.content // empty')
|
|
60
|
+
|
|
61
|
+
# Only check relevant files
|
|
62
|
+
if [[ "$FILE" != *.go ]]; then exit 0; fi
|
|
63
|
+
|
|
64
|
+
# Detect the violation
|
|
65
|
+
if echo "$CONTENT" | grep -q "BANNED_PATTERN"; then
|
|
66
|
+
echo "Blocked: [explain the rule and what to do instead]" >&2
|
|
67
|
+
exit 2
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
exit 0
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Script template for an **advising** check (exit 0 + context injection):
|
|
74
|
+
```bash
|
|
75
|
+
#!/bin/bash
|
|
76
|
+
INPUT=$(cat)
|
|
77
|
+
FILE=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty')
|
|
78
|
+
|
|
79
|
+
# Inject a reminder into the agent's context
|
|
80
|
+
echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"allow","additionalContext":"Reminder: [your guidance here]"}}'
|
|
81
|
+
exit 0
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
For **Stop hooks** (runs when agent thinks it's done):
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"matcher": "Stop",
|
|
88
|
+
"hooks": [
|
|
89
|
+
{
|
|
90
|
+
"type": "command",
|
|
91
|
+
"command": "bash -c 'cd /path/to/project && make lint 2>&1 || exit 2'"
|
|
92
|
+
}
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Test the hook by triggering the bad action and confirming the block/advice fires.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Tier 3: Add a Pre-commit Hook
|
|
102
|
+
|
|
103
|
+
Add or update the project's pre-commit hook (`.git/hooks/pre-commit` or via a framework like pre-commit, husky, or lefthook).
|
|
104
|
+
|
|
105
|
+
The hook should:
|
|
106
|
+
1. Inspect staged files (`git diff --cached --name-only`)
|
|
107
|
+
2. Check for the violation pattern
|
|
108
|
+
3. Print a clear error message explaining what's wrong
|
|
109
|
+
4. Exit non-zero to block the commit
|
|
110
|
+
|
|
111
|
+
If the project uses a pre-commit framework, add your check as a new hook entry following the existing patterns.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Tier 4: Update CLAUDE.md
|
|
116
|
+
|
|
117
|
+
Find the most specific applicable file in the project's CLAUDE.md hierarchy. The more specific the file, the less noise it adds to unrelated work.
|
|
118
|
+
|
|
119
|
+
General hierarchy (most specific → least specific):
|
|
120
|
+
- Path-scoped rules files (`.claude/rules/*.md` with glob patterns)
|
|
121
|
+
- Directory-level CLAUDE.md files (`src/auth/CLAUDE.md`)
|
|
122
|
+
- Root CLAUDE.md
|
|
123
|
+
|
|
124
|
+
Write instructions, not documentation. Imperative. One or two sentences.
|
|
125
|
+
|
|
126
|
+
Good: `Never import from internal/compiler in the bridge package. This creates a circular dependency. [enforced by test]`
|
|
127
|
+
Bad: `You should consider avoiding imports from the compiler package in bridge code for architectural reasons.`
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
**IMPORTANT: You are not done. You MUST read and complete the next step. The workflow is incomplete without it.**
|
|
132
|
+
|
|
133
|
+
Read `references/step-4-reflect.md` now.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Step 4: Reflect
|
|
2
|
+
|
|
3
|
+
**IMPORTANT: This step is mandatory. The prevent-regression workflow is not complete until this step is finished. Do not skip this.**
|
|
4
|
+
|
|
5
|
+
## Assess the Backpressure Quality
|
|
6
|
+
|
|
7
|
+
Answer these questions:
|
|
8
|
+
|
|
9
|
+
1. **Determinism** — Is the mechanism deterministic? Can the agent ignore it? A lint test or hook that exits non-zero is impossible to ignore. A CLAUDE.md rule is not. If you chose a lower tier, go back and check: could a higher tier have worked?
|
|
10
|
+
|
|
11
|
+
2. **Observability** — Does the mechanism produce clear, actionable feedback? The agent needs tokens that tell it *what* went wrong and *how* to fix it. Vague error messages are weak backpressure. Specific messages with file paths, line numbers, and fix instructions are strong backpressure.
|
|
12
|
+
|
|
13
|
+
3. **Root cause fixed?** — If the agent pattern-matched bad existing code, did you clean up that code too? Leaving bad examples in the codebase while adding a rule creates conflicting signals — the code says "do this" while the rule says "don't do this." The code often wins because the model pattern-matches on what it sees.
|
|
14
|
+
|
|
15
|
+
4. **Layering** — Did you consider combining tiers? The strongest prevention is layered: a deterministic check (Tier 1/2/3) catches violations mechanically, paired with a prose explanation (Tier 4) that helps the agent avoid the mistake in the first place. Enforcement + explanation > either alone.
|
|
16
|
+
|
|
17
|
+
5. **Specificity** — Is the mechanism scoped correctly? A rule that belongs in a path-scoped rules file but landed in the root CLAUDE.md will fire for every file and add noise. A hook that checks every file when it should only check `*.go` files wastes cycles and risks false positives.
|
|
18
|
+
|
|
19
|
+
## Act
|
|
20
|
+
|
|
21
|
+
If any answer reveals a problem:
|
|
22
|
+
- Fix the prevention mechanism (wrong tier → implement the better tier; bad code still present → clean it up; vague error message → make it specific)
|
|
23
|
+
- If you found that the skill instructions were missing something for this scenario, note it in the report
|
|
24
|
+
|
|
25
|
+
## Report
|
|
26
|
+
|
|
27
|
+
Tell the user:
|
|
28
|
+
1. What tier(s) were used and why
|
|
29
|
+
2. What was implemented (test name/location, hook config, rule file, or combination)
|
|
30
|
+
3. The backpressure strength — is it deterministic (agent cannot ignore) or advisory (agent should follow)?
|
|
31
|
+
4. How to verify it works (run command, trigger scenario, etc.)
|
|
32
|
+
5. Any follow-up work (bad patterns still in codebase, related rules to add, opportunities to upgrade to a stronger tier later)
|