devflow-kit 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/README.md +13 -6
- package/dist/cli.js +5 -1
- package/dist/commands/ambient.d.ts +18 -0
- package/dist/commands/ambient.js +136 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.js +97 -10
- package/dist/commands/memory.d.ts +22 -0
- package/dist/commands/memory.js +175 -0
- package/dist/commands/uninstall.js +72 -5
- package/dist/plugins.js +8 -1
- package/dist/utils/post-install.d.ts +12 -0
- package/dist/utils/post-install.js +82 -1
- package/dist/utils/safe-delete-install.d.ts +7 -0
- package/dist/utils/safe-delete-install.js +40 -5
- package/package.json +1 -1
- package/plugins/devflow-ambient/.claude-plugin/plugin.json +7 -0
- package/plugins/devflow-ambient/README.md +49 -0
- package/plugins/devflow-ambient/commands/ambient.md +110 -0
- package/plugins/devflow-ambient/skills/ambient-router/SKILL.md +89 -0
- package/plugins/devflow-ambient/skills/ambient-router/references/skill-catalog.md +64 -0
- package/plugins/devflow-audit-claude/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-code-review/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-core-skills/.claude-plugin/plugin.json +2 -1
- package/plugins/devflow-core-skills/skills/docs-framework/SKILL.md +10 -6
- package/plugins/devflow-core-skills/skills/test-driven-development/SKILL.md +139 -0
- package/plugins/devflow-core-skills/skills/test-driven-development/references/rationalization-prevention.md +111 -0
- package/plugins/devflow-debug/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-implement/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-resolve/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-self-review/.claude-plugin/plugin.json +1 -1
- package/plugins/devflow-specify/.claude-plugin/plugin.json +1 -1
- package/scripts/hooks/ambient-prompt.sh +48 -0
- package/scripts/hooks/background-memory-update.sh +49 -8
- package/scripts/hooks/ensure-memory-gitignore.sh +17 -0
- package/scripts/hooks/pre-compact-memory.sh +12 -6
- package/scripts/hooks/session-start-memory.sh +50 -8
- package/scripts/hooks/stop-update-memory.sh +10 -6
- package/shared/skills/ambient-router/SKILL.md +89 -0
- package/shared/skills/ambient-router/references/skill-catalog.md +64 -0
- package/shared/skills/docs-framework/SKILL.md +10 -6
- package/shared/skills/test-driven-development/SKILL.md +139 -0
- package/shared/skills/test-driven-development/references/rationalization-prevention.md +111 -0
- package/src/templates/managed-settings.json +14 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
|
|
3
3
|
# Working Memory: SessionStart Hook
|
|
4
|
-
# Reads .
|
|
4
|
+
# Reads .memory/WORKING-MEMORY.md and injects it as additionalContext for the new session.
|
|
5
5
|
# Also captures fresh git state so Claude knows what's changed since the memory was written.
|
|
6
6
|
# Adds staleness warning if memory is >1 hour old.
|
|
7
7
|
|
|
@@ -17,12 +17,7 @@ if [ -z "$CWD" ]; then
|
|
|
17
17
|
exit 0
|
|
18
18
|
fi
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
if [ ! -d "$CWD/.docs" ]; then
|
|
22
|
-
exit 0
|
|
23
|
-
fi
|
|
24
|
-
|
|
25
|
-
MEMORY_FILE="$CWD/.docs/WORKING-MEMORY.md"
|
|
20
|
+
MEMORY_FILE="$CWD/.memory/WORKING-MEMORY.md"
|
|
26
21
|
|
|
27
22
|
# No memory file = nothing to restore (fresh project or first session)
|
|
28
23
|
if [ ! -f "$MEMORY_FILE" ]; then
|
|
@@ -31,6 +26,13 @@ fi
|
|
|
31
26
|
|
|
32
27
|
MEMORY_CONTENT=$(cat "$MEMORY_FILE")
|
|
33
28
|
|
|
29
|
+
# Read accumulated patterns if they exist
|
|
30
|
+
PATTERNS_FILE="$CWD/.memory/PROJECT-PATTERNS.md"
|
|
31
|
+
PATTERNS_CONTENT=""
|
|
32
|
+
if [ -f "$PATTERNS_FILE" ]; then
|
|
33
|
+
PATTERNS_CONTENT=$(cat "$PATTERNS_FILE")
|
|
34
|
+
fi
|
|
35
|
+
|
|
34
36
|
# Compute staleness warning
|
|
35
37
|
if stat --version &>/dev/null 2>&1; then
|
|
36
38
|
FILE_MTIME=$(stat -c %Y "$MEMORY_FILE")
|
|
@@ -40,6 +42,30 @@ fi
|
|
|
40
42
|
NOW=$(date +%s)
|
|
41
43
|
AGE=$(( NOW - FILE_MTIME ))
|
|
42
44
|
|
|
45
|
+
# Check for pre-compact memory snapshot (compaction recovery)
|
|
46
|
+
BACKUP_FILE="$CWD/.memory/backup.json"
|
|
47
|
+
COMPACT_NOTE=""
|
|
48
|
+
if [ -f "$BACKUP_FILE" ]; then
|
|
49
|
+
BACKUP_MEMORY=$(jq -r '.memory_snapshot // ""' "$BACKUP_FILE" 2>/dev/null)
|
|
50
|
+
if [ -n "$BACKUP_MEMORY" ]; then
|
|
51
|
+
BACKUP_TS=$(jq -r '.timestamp // ""' "$BACKUP_FILE" 2>/dev/null)
|
|
52
|
+
BACKUP_EPOCH=0
|
|
53
|
+
if [ -n "$BACKUP_TS" ]; then
|
|
54
|
+
BACKUP_EPOCH=$(date -j -f "%Y-%m-%dT%H:%M:%SZ" "$BACKUP_TS" +%s 2>/dev/null \
|
|
55
|
+
|| date -d "$BACKUP_TS" +%s 2>/dev/null \
|
|
56
|
+
|| echo "0")
|
|
57
|
+
fi
|
|
58
|
+
if [ "$BACKUP_EPOCH" -gt "$FILE_MTIME" ]; then
|
|
59
|
+
COMPACT_NOTE="
|
|
60
|
+
--- PRE-COMPACT SNAPSHOT ($BACKUP_TS) ---
|
|
61
|
+
Context was compacted. This snapshot may contain decisions or progress not yet in working memory.
|
|
62
|
+
|
|
63
|
+
$BACKUP_MEMORY
|
|
64
|
+
"
|
|
65
|
+
fi
|
|
66
|
+
fi
|
|
67
|
+
fi
|
|
68
|
+
|
|
43
69
|
STALE_WARNING=""
|
|
44
70
|
if [ "$AGE" -gt 3600 ]; then
|
|
45
71
|
HOURS=$(( AGE / 3600 ))
|
|
@@ -62,7 +88,18 @@ fi
|
|
|
62
88
|
# Build context string
|
|
63
89
|
CONTEXT="${STALE_WARNING}--- WORKING MEMORY (from previous session) ---
|
|
64
90
|
|
|
65
|
-
${MEMORY_CONTENT}
|
|
91
|
+
${MEMORY_CONTENT}"
|
|
92
|
+
|
|
93
|
+
# Insert accumulated patterns between working memory and git state
|
|
94
|
+
if [ -n "$PATTERNS_CONTENT" ]; then
|
|
95
|
+
CONTEXT="${CONTEXT}
|
|
96
|
+
|
|
97
|
+
--- PROJECT PATTERNS (accumulated) ---
|
|
98
|
+
|
|
99
|
+
${PATTERNS_CONTENT}"
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
CONTEXT="${CONTEXT}
|
|
66
103
|
|
|
67
104
|
--- CURRENT GIT STATE ---
|
|
68
105
|
Branch: ${GIT_BRANCH}
|
|
@@ -75,6 +112,11 @@ Uncommitted changes:
|
|
|
75
112
|
${GIT_STATUS}"
|
|
76
113
|
fi
|
|
77
114
|
|
|
115
|
+
if [ -n "$COMPACT_NOTE" ]; then
|
|
116
|
+
CONTEXT="${CONTEXT}
|
|
117
|
+
${COMPACT_NOTE}"
|
|
118
|
+
fi
|
|
119
|
+
|
|
78
120
|
# Output as additionalContext JSON envelope (Claude sees it as system context, not user-visible)
|
|
79
121
|
jq -n --arg ctx "$CONTEXT" '{
|
|
80
122
|
"hookSpecificOutput": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
|
|
3
3
|
# Working Memory: Stop Hook
|
|
4
|
-
# Spawns a background process to update .
|
|
4
|
+
# Spawns a background process to update .memory/WORKING-MEMORY.md asynchronously.
|
|
5
5
|
# The session ends immediately — no visible edit in the TUI.
|
|
6
6
|
# On failure: does nothing (stale memory is better than fake data).
|
|
7
7
|
|
|
@@ -16,21 +16,25 @@ if ! command -v jq &>/dev/null; then exit 0; fi
|
|
|
16
16
|
|
|
17
17
|
INPUT=$(cat)
|
|
18
18
|
|
|
19
|
-
#
|
|
19
|
+
# Resolve project directory — bail if missing
|
|
20
20
|
CWD=$(echo "$INPUT" | jq -r '.cwd // ""' 2>/dev/null)
|
|
21
|
-
if [ -z "$CWD" ]
|
|
21
|
+
if [ -z "$CWD" ]; then
|
|
22
22
|
exit 0
|
|
23
23
|
fi
|
|
24
24
|
|
|
25
|
+
# Auto-create .memory/ and ensure .gitignore entries (idempotent after first run)
|
|
26
|
+
SCRIPT_DIR_EARLY="$(cd "$(dirname "$0")" && pwd)"
|
|
27
|
+
source "$SCRIPT_DIR_EARLY/ensure-memory-gitignore.sh" "$CWD" || exit 0
|
|
28
|
+
|
|
25
29
|
# Logging (shared log file with background updater; [stop-hook] prefix distinguishes)
|
|
26
|
-
MEMORY_FILE="$CWD/.
|
|
27
|
-
LOG_FILE="$CWD/.
|
|
30
|
+
MEMORY_FILE="$CWD/.memory/WORKING-MEMORY.md"
|
|
31
|
+
LOG_FILE="$CWD/.memory/.working-memory-update.log"
|
|
28
32
|
log() { echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] [stop-hook] $1" >> "$LOG_FILE"; }
|
|
29
33
|
|
|
30
34
|
# Throttle: skip if stop hook was triggered within the last 2 minutes
|
|
31
35
|
# Uses a marker file touched BEFORE spawning the updater — prevents race condition
|
|
32
36
|
# where multiple hooks see stale WORKING-MEMORY.md mtime and all bypass throttle.
|
|
33
|
-
TRIGGER_MARKER="$CWD/.
|
|
37
|
+
TRIGGER_MARKER="$CWD/.memory/.working-memory-last-trigger"
|
|
34
38
|
if [ -f "$TRIGGER_MARKER" ]; then
|
|
35
39
|
if stat --version &>/dev/null 2>&1; then
|
|
36
40
|
MARKER_MTIME=$(stat -c %Y "$TRIGGER_MARKER")
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ambient-router
|
|
3
|
+
description: >-
|
|
4
|
+
Classify user intent and response depth for ambient mode. Auto-loads relevant
|
|
5
|
+
skills without explicit command invocation. Used by /ambient command and
|
|
6
|
+
always-on UserPromptSubmit hook.
|
|
7
|
+
user-invocable: false
|
|
8
|
+
allowed-tools: Read, Grep, Glob
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Ambient Router
|
|
12
|
+
|
|
13
|
+
Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill injection for substantive work, workflow nudges for complex tasks.
|
|
14
|
+
|
|
15
|
+
## Iron Law
|
|
16
|
+
|
|
17
|
+
> **PROPORTIONAL RESPONSE**
|
|
18
|
+
>
|
|
19
|
+
> Match effort to intent. Never apply heavyweight processes to lightweight requests.
|
|
20
|
+
> A chat question gets zero overhead. A 3-file feature gets 2-3 skills. A system
|
|
21
|
+
> refactor gets a nudge toward `/implement`. Misclassification in either direction
|
|
22
|
+
> is a failure.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Step 1: Classify Intent
|
|
27
|
+
|
|
28
|
+
Determine what the user is trying to do from their prompt.
|
|
29
|
+
|
|
30
|
+
| Intent | Signal Words / Patterns | Examples |
|
|
31
|
+
|--------|------------------------|---------|
|
|
32
|
+
| **BUILD** | "add", "create", "implement", "build", "write", "make" | "add a login form", "create an API endpoint" |
|
|
33
|
+
| **DEBUG** | "fix", "bug", "broken", "failing", "error", "why does" | "fix the auth error", "why is this test failing" |
|
|
34
|
+
| **REVIEW** | "check", "look at", "review", "is this ok", "any issues" | "check this function", "any issues with this?" |
|
|
35
|
+
| **PLAN** | "how should", "design", "architecture", "approach", "strategy" | "how should I structure auth?", "what's the approach for caching?" |
|
|
36
|
+
| **EXPLORE** | "what is", "where is", "find", "show me", "explain", "how does" | "where is the config?", "explain this function" |
|
|
37
|
+
| **CHAT** | greetings, meta-questions, confirmations, short responses | "thanks", "yes", "what can you do?" |
|
|
38
|
+
|
|
39
|
+
**Ambiguous prompts:** Default to the lowest-overhead classification. "Update the README" → BUILD/STANDARD. Git operations like "commit this" → QUICK.
|
|
40
|
+
|
|
41
|
+
## Step 2: Classify Depth
|
|
42
|
+
|
|
43
|
+
Determine how much enforcement the prompt warrants.
|
|
44
|
+
|
|
45
|
+
| Depth | Criteria | Action |
|
|
46
|
+
|-------|----------|--------|
|
|
47
|
+
| **QUICK** | CHAT intent. EXPLORE with no analytical depth ("where is X?"). Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. | Respond normally. Zero overhead. Do not state classification. |
|
|
48
|
+
| **STANDARD** | BUILD/DEBUG/REVIEW/PLAN intent (any word count). EXPLORE with analytical depth ("analyze our X", "discuss how Y works"). | Read and apply 2-3 relevant skills from the selection matrix below. State classification briefly. |
|
|
49
|
+
| **ESCALATE** | Multi-file architectural change, system-wide scope, > 5 files. Detailed implementation plan (100+ words with plan structure). | Respond at best effort + recommend: "This looks like it would benefit from `/implement` for full lifecycle management." |
|
|
50
|
+
|
|
51
|
+
## Step 3: Select Skills (STANDARD depth only)
|
|
52
|
+
|
|
53
|
+
Based on classified intent, read the following skills to inform your response.
|
|
54
|
+
|
|
55
|
+
| Intent | Primary Skills | Secondary (if file type matches) |
|
|
56
|
+
|--------|---------------|----------------------------------|
|
|
57
|
+
| **BUILD** | test-driven-development, implementation-patterns | typescript (.ts), react (.tsx/.jsx), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) |
|
|
58
|
+
| **DEBUG** | test-patterns, core-patterns | git-safety (if git operations involved) |
|
|
59
|
+
| **REVIEW** | self-review, core-patterns | test-patterns |
|
|
60
|
+
| **PLAN** | implementation-patterns | core-patterns |
|
|
61
|
+
|
|
62
|
+
**Excluded from ambient** (review-command-only): review-methodology, complexity-patterns, consistency-patterns, database-patterns, dependencies-patterns, documentation-patterns, regression-patterns, architecture-patterns, accessibility.
|
|
63
|
+
|
|
64
|
+
See `references/skill-catalog.md` for the full skill-to-intent mapping with file pattern triggers.
|
|
65
|
+
|
|
66
|
+
## Step 4: Apply
|
|
67
|
+
|
|
68
|
+
- **QUICK:** Respond directly. No preamble, no classification statement.
|
|
69
|
+
- **STANDARD:** State classification briefly: `Ambient: BUILD/STANDARD. Loading: test-driven-development, implementation-patterns.` Then read the selected skills and apply their patterns to your response. For BUILD intent, enforce RED-GREEN-REFACTOR from test-driven-development.
|
|
70
|
+
- **ESCALATE:** Respond with your best effort, then append: `> This task spans multiple files/systems. Consider \`/implement\` for full lifecycle (exploration → planning → implementation → review).`
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Transparency Rules
|
|
75
|
+
|
|
76
|
+
1. **QUICK → silent.** No classification output.
|
|
77
|
+
2. **STANDARD → brief statement.** One line: intent, depth, skills loaded.
|
|
78
|
+
3. **ESCALATE → recommendation.** Best-effort response + workflow nudge.
|
|
79
|
+
4. **Never lie about classification.** If uncertain, say so.
|
|
80
|
+
5. **Never over-classify.** When in doubt, go one tier lower.
|
|
81
|
+
|
|
82
|
+
## Edge Cases
|
|
83
|
+
|
|
84
|
+
| Case | Handling |
|
|
85
|
+
|------|----------|
|
|
86
|
+
| Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (BUILD > DEBUG) |
|
|
87
|
+
| Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts |
|
|
88
|
+
| User explicitly requests no enforcement | Respect immediately — classify as QUICK |
|
|
89
|
+
| Prompt references specific DevFlow command | Skip ambient — the command has its own orchestration |
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Ambient Router — Skill Catalog
|
|
2
|
+
|
|
3
|
+
Full mapping of DevFlow skills to ambient intents and file-type triggers. The ambient-router SKILL.md references this for detailed selection logic.
|
|
4
|
+
|
|
5
|
+
## Skills Available for Ambient Loading
|
|
6
|
+
|
|
7
|
+
These skills may be loaded during STANDARD-depth ambient routing.
|
|
8
|
+
|
|
9
|
+
### BUILD Intent
|
|
10
|
+
|
|
11
|
+
| Skill | When to Load | File Patterns |
|
|
12
|
+
|-------|-------------|---------------|
|
|
13
|
+
| test-driven-development | Always for BUILD | `*.ts`, `*.tsx`, `*.js`, `*.jsx`, `*.py` |
|
|
14
|
+
| implementation-patterns | Always for BUILD | Any code file |
|
|
15
|
+
| typescript | TypeScript files in scope | `*.ts`, `*.tsx` |
|
|
16
|
+
| react | React components in scope | `*.tsx`, `*.jsx` |
|
|
17
|
+
| frontend-design | UI/styling work | `*.css`, `*.scss`, `*.tsx` with styling keywords |
|
|
18
|
+
| input-validation | Forms, APIs, user input | Files with form/input/validation keywords |
|
|
19
|
+
| security-patterns | Auth, crypto, secrets | Files with auth/token/crypto/password keywords |
|
|
20
|
+
|
|
21
|
+
### DEBUG Intent
|
|
22
|
+
|
|
23
|
+
| Skill | When to Load | File Patterns |
|
|
24
|
+
|-------|-------------|---------------|
|
|
25
|
+
| test-patterns | Always for DEBUG | Any test-related context |
|
|
26
|
+
| core-patterns | Always for DEBUG | Any code file |
|
|
27
|
+
| git-safety | Git operations involved | User mentions git, rebase, merge, etc. |
|
|
28
|
+
|
|
29
|
+
### REVIEW Intent
|
|
30
|
+
|
|
31
|
+
| Skill | When to Load | File Patterns |
|
|
32
|
+
|-------|-------------|---------------|
|
|
33
|
+
| self-review | Always for REVIEW | Any code file |
|
|
34
|
+
| core-patterns | Always for REVIEW | Any code file |
|
|
35
|
+
| test-patterns | Test files in scope | `*.test.*`, `*.spec.*` |
|
|
36
|
+
|
|
37
|
+
### PLAN Intent
|
|
38
|
+
|
|
39
|
+
| Skill | When to Load | File Patterns |
|
|
40
|
+
|-------|-------------|---------------|
|
|
41
|
+
| implementation-patterns | Always for PLAN | Any planning context |
|
|
42
|
+
| core-patterns | Architectural planning | System design discussions |
|
|
43
|
+
|
|
44
|
+
## Skills Excluded from Ambient
|
|
45
|
+
|
|
46
|
+
These skills are loaded only by explicit DevFlow commands (primarily `/code-review`):
|
|
47
|
+
|
|
48
|
+
- review-methodology — Full review process (6-step, 3-category classification)
|
|
49
|
+
- complexity-patterns — Cyclomatic complexity, deep nesting analysis
|
|
50
|
+
- consistency-patterns — Naming convention, pattern deviation detection
|
|
51
|
+
- database-patterns — Index analysis, query optimization, migration safety
|
|
52
|
+
- dependencies-patterns — CVE detection, license audit, outdated packages
|
|
53
|
+
- documentation-patterns — Doc drift, stale comments, missing API docs
|
|
54
|
+
- regression-patterns — Lost functionality, broken exports, behavioral changes
|
|
55
|
+
- architecture-patterns — SOLID analysis, coupling detection, layering issues
|
|
56
|
+
- accessibility — WCAG compliance, ARIA roles, keyboard navigation
|
|
57
|
+
- performance-patterns — N+1 queries, memory leaks, caching opportunities
|
|
58
|
+
|
|
59
|
+
## Selection Limits
|
|
60
|
+
|
|
61
|
+
- **Maximum 3 skills** per ambient response (primary + up to 2 secondary)
|
|
62
|
+
- **Primary skills** are always loaded for the classified intent
|
|
63
|
+
- **Secondary skills** are loaded only when file patterns match conversation context
|
|
64
|
+
- If more than 3 skills seem relevant, this is an ESCALATE signal
|
|
@@ -32,10 +32,14 @@ All generated documentation lives under `.docs/` in the project root:
|
|
|
32
32
|
│ ├── {timestamp}.md
|
|
33
33
|
│ ├── compact/{timestamp}.md
|
|
34
34
|
│ └── INDEX.md
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
└── swarm/ # Swarm operation state
|
|
36
|
+
├── state.json
|
|
37
|
+
└── plans/
|
|
38
|
+
|
|
39
|
+
.memory/
|
|
40
|
+
├── WORKING-MEMORY.md # Auto-maintained by Stop hook (overwritten)
|
|
41
|
+
├── PROJECT-PATTERNS.md # Accumulated patterns (merged across sessions)
|
|
42
|
+
└── backup.json # Pre-compact git state snapshot
|
|
39
43
|
```
|
|
40
44
|
|
|
41
45
|
---
|
|
@@ -92,7 +96,7 @@ source .devflow/scripts/docs-helpers.sh 2>/dev/null || {
|
|
|
92
96
|
| Agent | Output Location | Behavior |
|
|
93
97
|
|-------|-----------------|----------|
|
|
94
98
|
| Reviewer | `.docs/reviews/{branch-slug}/{type}-report.{timestamp}.md` | Creates new |
|
|
95
|
-
| Working Memory | `.
|
|
99
|
+
| Working Memory | `.memory/WORKING-MEMORY.md` | Overwrites (auto-maintained by Stop hook) |
|
|
96
100
|
|
|
97
101
|
### Agents That Don't Persist
|
|
98
102
|
|
|
@@ -120,7 +124,7 @@ When creating or modifying persisting agents:
|
|
|
120
124
|
|
|
121
125
|
This framework is used by:
|
|
122
126
|
- **Review agents**: Creates review reports
|
|
123
|
-
- **Working Memory hooks**: Auto-maintains `.
|
|
127
|
+
- **Working Memory hooks**: Auto-maintains `.memory/WORKING-MEMORY.md`
|
|
124
128
|
|
|
125
129
|
All persisting agents should load this skill to ensure consistent documentation.
|
|
126
130
|
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: test-driven-development
|
|
3
|
+
description: >-
|
|
4
|
+
Enforce RED-GREEN-REFACTOR cycle during implementation. Write failing tests before
|
|
5
|
+
production code. Distinct from test-patterns (which reviews test quality) — this
|
|
6
|
+
skill enforces the TDD workflow during code generation.
|
|
7
|
+
user-invocable: false
|
|
8
|
+
allowed-tools: Read, Grep, Glob
|
|
9
|
+
activation:
|
|
10
|
+
file-patterns:
|
|
11
|
+
- "**/*.ts"
|
|
12
|
+
- "**/*.tsx"
|
|
13
|
+
- "**/*.js"
|
|
14
|
+
- "**/*.jsx"
|
|
15
|
+
- "**/*.py"
|
|
16
|
+
exclude:
|
|
17
|
+
- "node_modules/**"
|
|
18
|
+
- "dist/**"
|
|
19
|
+
- "**/*.test.*"
|
|
20
|
+
- "**/*.spec.*"
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
# Test-Driven Development
|
|
24
|
+
|
|
25
|
+
Enforce the RED-GREEN-REFACTOR cycle for all implementation work. Tests define the design. Code satisfies the tests. Refactoring improves the design without changing behavior.
|
|
26
|
+
|
|
27
|
+
## Iron Law
|
|
28
|
+
|
|
29
|
+
> **TESTS FIRST, ALWAYS**
|
|
30
|
+
>
|
|
31
|
+
> Write the failing test before the production code. No exceptions. If you catch
|
|
32
|
+
> yourself writing production code without a failing test, stop immediately, delete
|
|
33
|
+
> the production code, write the test, watch it fail, then write the minimum code
|
|
34
|
+
> to make it pass. The test IS the specification.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## The Cycle
|
|
39
|
+
|
|
40
|
+
### Step 1: RED — Write a Failing Test
|
|
41
|
+
|
|
42
|
+
Write a test that describes the behavior you want. Run it. Watch it fail. The failure message IS your specification.
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Describe what the code SHOULD do, not how it does it.
|
|
46
|
+
One behavior per test. One assertion per test (ideally).
|
|
47
|
+
Name tests as sentences: "returns error when email is invalid"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Checkpoint:** The test MUST fail before proceeding. A test that passes immediately proves nothing.
|
|
51
|
+
|
|
52
|
+
### Step 2: GREEN — Write Minimum Code to Pass
|
|
53
|
+
|
|
54
|
+
Write the simplest production code that makes the failing test pass. No more, no less.
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
Hardcode first if that's simplest. Generalize when the next test forces it.
|
|
58
|
+
Don't write code "you'll need later." Write code the test demands NOW.
|
|
59
|
+
Don't optimize. Don't refactor. Don't clean up. Just pass the test.
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Checkpoint:** All tests pass. If any test fails, fix it before moving on.
|
|
63
|
+
|
|
64
|
+
### Step 3: REFACTOR — Improve Without Changing Behavior
|
|
65
|
+
|
|
66
|
+
Now clean up. Extract helpers, rename variables, simplify logic. Tests stay green throughout.
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
Run tests after every refactoring step.
|
|
70
|
+
If a test breaks during refactor, undo immediately — you changed behavior.
|
|
71
|
+
Apply DRY, extract patterns, improve readability.
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Checkpoint:** All tests still pass. Code is clean. Repeat from Step 1 for next behavior.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Rationalization Prevention
|
|
79
|
+
|
|
80
|
+
These are the excuses developers use to skip TDD. Recognize and reject them.
|
|
81
|
+
|
|
82
|
+
| Excuse | Why It Feels Right | Why It's Wrong | Correct Action |
|
|
83
|
+
|--------|-------------------|---------------|----------------|
|
|
84
|
+
| "I'll write tests after" | Need to see the shape first | Tests ARE the shape — they define the interface before implementation exists | Write the test first |
|
|
85
|
+
| "Too simple to test" | It's just a getter/setter | Getters break, defaults change, edge cases hide in "simple" code | Write it — takes 30 seconds |
|
|
86
|
+
| "I'll refactor later" | Just get it working now | "Later" never comes; technical debt compounds silently | Refactor now in Step 3 |
|
|
87
|
+
| "Test is too hard to write" | Setup is complex, mocking is painful | Hard-to-test code = bad design; the test is telling you the interface is wrong | Simplify the interface first |
|
|
88
|
+
| "Need to see the whole picture" | Can't test what I haven't designed yet | TDD IS design; each test reveals the next piece of the interface | Let the test guide the design |
|
|
89
|
+
| "Tests slow me down" | Faster to just write the code | Faster until the first regression; TDD is faster for anything > 50 lines | Trust the cycle |
|
|
90
|
+
|
|
91
|
+
See `references/rationalization-prevention.md` for extended examples with code.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Process Enforcement
|
|
96
|
+
|
|
97
|
+
When implementing any feature under ambient BUILD/STANDARD:
|
|
98
|
+
|
|
99
|
+
1. **Identify the first behavior** — What is the simplest thing this feature must do?
|
|
100
|
+
2. **Write the test** — Describe that behavior as a failing test
|
|
101
|
+
3. **Run the test** — Confirm it fails (RED)
|
|
102
|
+
4. **Write minimum code** — Just enough to pass (GREEN)
|
|
103
|
+
5. **Refactor** — Clean up while tests stay green (REFACTOR)
|
|
104
|
+
6. **Repeat** — Next behavior, next test, next cycle
|
|
105
|
+
|
|
106
|
+
### File Organization
|
|
107
|
+
|
|
108
|
+
- Test file lives next to production file: `user.ts` → `user.test.ts`
|
|
109
|
+
- Follow project's existing test conventions (Jest, Vitest, pytest, etc.)
|
|
110
|
+
- Import the module under test, not internal helpers
|
|
111
|
+
|
|
112
|
+
### What to Test
|
|
113
|
+
|
|
114
|
+
| Test | Don't Test |
|
|
115
|
+
|------|-----------|
|
|
116
|
+
| Public API behavior | Private implementation details |
|
|
117
|
+
| Error conditions and edge cases | Framework internals |
|
|
118
|
+
| Integration points (boundaries) | Third-party library correctness |
|
|
119
|
+
| State transitions | Getter/setter plumbing (unless non-trivial) |
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## When TDD Does Not Apply
|
|
124
|
+
|
|
125
|
+
- **QUICK depth** — Ambient classified as QUICK (chat, exploration, trivial edits)
|
|
126
|
+
- **Non-code tasks** — Documentation, configuration, CI changes
|
|
127
|
+
- **Exploratory prototyping** — User explicitly says "just spike this" or "prototype"
|
|
128
|
+
- **Existing test suite changes** — Modifying tests themselves (test-patterns skill applies instead)
|
|
129
|
+
|
|
130
|
+
When skipping TDD, never rationalize. State clearly: "Skipping TDD because: [specific reason from list above]."
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Integration with Ambient Mode
|
|
135
|
+
|
|
136
|
+
- **BUILD/STANDARD** → TDD enforced. Every new function/method gets test-first treatment.
|
|
137
|
+
- **BUILD/QUICK** → TDD skipped (trivial single-file edit).
|
|
138
|
+
- **BUILD/ESCALATE** → TDD mentioned in nudge toward `/implement`.
|
|
139
|
+
- **DEBUG/STANDARD** → TDD applies to the fix: write a test that reproduces the bug first, then fix.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# TDD Rationalization Prevention — Extended Examples
|
|
2
|
+
|
|
3
|
+
Detailed code examples showing how each rationalization leads to worse outcomes.
|
|
4
|
+
|
|
5
|
+
## "I'll write tests after"
|
|
6
|
+
|
|
7
|
+
### What happens:
|
|
8
|
+
|
|
9
|
+
```typescript
|
|
10
|
+
// Developer writes production code first
|
|
11
|
+
function calculateDiscount(price: number, tier: string): number {
|
|
12
|
+
if (tier === 'gold') return price * 0.8;
|
|
13
|
+
if (tier === 'silver') return price * 0.9;
|
|
14
|
+
return price;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Then "writes tests after" — but only for the happy path they remember
|
|
18
|
+
test('gold tier gets 20% off', () => {
|
|
19
|
+
expect(calculateDiscount(100, 'gold')).toBe(80);
|
|
20
|
+
});
|
|
21
|
+
// Missing: negative prices, unknown tiers, zero prices, NaN handling
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### What TDD would have caught:
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
// Test first — forces you to think about the contract
|
|
28
|
+
test('returns error for negative price', () => {
|
|
29
|
+
expect(calculateDiscount(-100, 'gold')).toEqual({ ok: false, error: 'NEGATIVE_PRICE' });
|
|
30
|
+
});
|
|
31
|
+
// Now the interface includes error handling from the start
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## "Too simple to test"
|
|
35
|
+
|
|
36
|
+
### What happens:
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
// "It's just a config getter, no test needed"
|
|
40
|
+
function getMaxRetries(): number {
|
|
41
|
+
return parseInt(process.env.MAX_RETRIES || '3');
|
|
42
|
+
}
|
|
43
|
+
// 6 months later: someone sets MAX_RETRIES="three" and prod crashes with NaN retries
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### What TDD would have caught:
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
test('returns default when env var is not a number', () => {
|
|
50
|
+
process.env.MAX_RETRIES = 'three';
|
|
51
|
+
expect(getMaxRetries()).toBe(3); // Forces validation logic
|
|
52
|
+
});
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## "Test is too hard to write"
|
|
56
|
+
|
|
57
|
+
### What happens:
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
// "I can't test this easily because it needs database + email + filesystem"
|
|
61
|
+
async function processOrder(orderId: string) {
|
|
62
|
+
const db = new Database();
|
|
63
|
+
const order = await db.find(orderId);
|
|
64
|
+
await sendEmail(order.customerEmail, 'Your order is processing');
|
|
65
|
+
await fs.writeFile(`/invoices/${orderId}.pdf`, generateInvoice(order));
|
|
66
|
+
await db.update(orderId, { status: 'processing' });
|
|
67
|
+
}
|
|
68
|
+
// Result: untestable monolith, test would need real DB + email + filesystem
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### What TDD forces:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
// Hard-to-test = bad design. TDD forces dependency injection:
|
|
75
|
+
async function processOrder(
|
|
76
|
+
orderId: string,
|
|
77
|
+
deps: { db: OrderRepository; emailer: Emailer; invoices: InvoiceStore }
|
|
78
|
+
): Promise<Result<void, OrderError>> {
|
|
79
|
+
// Now trivially testable with mocks
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## "I'll refactor later"
|
|
84
|
+
|
|
85
|
+
### What happens:
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
// Sprint 1: "just get it working"
|
|
89
|
+
function handleRequest(req: any) {
|
|
90
|
+
if (req.type === 'create') { /* 50 lines */ }
|
|
91
|
+
else if (req.type === 'update') { /* 50 lines */ }
|
|
92
|
+
else if (req.type === 'delete') { /* 30 lines */ }
|
|
93
|
+
// Sprint 2-10: more conditions added, function grows to 500 lines
|
|
94
|
+
// "Refactor later" never comes because nobody wants to touch it
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### What TDD enforces:
|
|
99
|
+
|
|
100
|
+
Step 3 (REFACTOR) happens every cycle. The function never grows beyond what's clean because you clean it every 5-10 minutes.
|
|
101
|
+
|
|
102
|
+
## "Tests slow me down"
|
|
103
|
+
|
|
104
|
+
### The math:
|
|
105
|
+
|
|
106
|
+
| Approach | Time to write | Time to first bug | Time to fix bug | Total (1 month) |
|
|
107
|
+
|----------|:---:|:---:|:---:|:---:|
|
|
108
|
+
| No TDD | 2h | 4h | 3h (no repro test) | 9h+ |
|
|
109
|
+
| TDD | 3h | Caught in test | 15min (test pinpoints) | 3h 15min |
|
|
110
|
+
|
|
111
|
+
TDD is slower for the first 30 minutes. It's faster for everything after that.
|
|
@@ -5,6 +5,15 @@
|
|
|
5
5
|
"Bash(rm -rf ~*)",
|
|
6
6
|
"Bash(rm -rf .*)",
|
|
7
7
|
"Bash(* rm -rf /*)",
|
|
8
|
+
"Bash(rm -r /*)",
|
|
9
|
+
"Bash(rm -r ~*)",
|
|
10
|
+
"Bash(rm -r .*)",
|
|
11
|
+
"Bash(rm -fr /*)",
|
|
12
|
+
"Bash(rm -fr ~*)",
|
|
13
|
+
"Bash(rm -fr .*)",
|
|
14
|
+
"Bash(rm -f /*)",
|
|
15
|
+
"Bash(rm -f ~*)",
|
|
16
|
+
"Bash(rm -f .*)",
|
|
8
17
|
"Bash(dd if=*)",
|
|
9
18
|
"Bash(dd*of=/dev/*)",
|
|
10
19
|
"Bash(mkfs*)",
|
|
@@ -85,12 +94,17 @@
|
|
|
85
94
|
"Bash(crontab*)",
|
|
86
95
|
"Bash(rm /var/log*)",
|
|
87
96
|
"Bash(rm -rf /var/log*)",
|
|
97
|
+
"Bash(rm -r /var/log*)",
|
|
98
|
+
"Bash(rm -f /var/log*)",
|
|
99
|
+
"Bash(rm -fr /var/log*)",
|
|
88
100
|
"Bash(> /var/log*)",
|
|
89
101
|
"Bash(truncate /var/log*)",
|
|
90
102
|
"Bash(history -c*)",
|
|
91
103
|
"Bash(history -w*)",
|
|
92
104
|
"Bash(rm ~/.bash_history*)",
|
|
105
|
+
"Bash(rm -f ~/.bash_history*)",
|
|
93
106
|
"Bash(rm ~/.zsh_history*)",
|
|
107
|
+
"Bash(rm -f ~/.zsh_history*)",
|
|
94
108
|
"Bash(unset HISTFILE*)",
|
|
95
109
|
"Bash(curl 169.254.169.254*)",
|
|
96
110
|
"Bash(wget 169.254.169.254*)",
|