@tianhai/pi-workflow-kit 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +509 -0
- package/ROADMAP.md +16 -0
- package/agents/code-reviewer.md +18 -0
- package/agents/config.ts +5 -0
- package/agents/implementer.md +26 -0
- package/agents/spec-reviewer.md +13 -0
- package/agents/worker.md +17 -0
- package/banner.jpg +0 -0
- package/docs/developer-usage-guide.md +463 -0
- package/docs/oversight-model.md +49 -0
- package/docs/workflow-phases.md +71 -0
- package/extensions/constants.ts +9 -0
- package/extensions/lib/logging.ts +138 -0
- package/extensions/plan-tracker.ts +496 -0
- package/extensions/subagent/agents.ts +144 -0
- package/extensions/subagent/concurrency.ts +52 -0
- package/extensions/subagent/env.ts +47 -0
- package/extensions/subagent/index.ts +1116 -0
- package/extensions/subagent/lifecycle.ts +25 -0
- package/extensions/subagent/timeout.ts +13 -0
- package/extensions/workflow-monitor/debug-monitor.ts +98 -0
- package/extensions/workflow-monitor/git.ts +31 -0
- package/extensions/workflow-monitor/heuristics.ts +58 -0
- package/extensions/workflow-monitor/investigation.ts +52 -0
- package/extensions/workflow-monitor/reference-tool.ts +42 -0
- package/extensions/workflow-monitor/skip-confirmation.ts +19 -0
- package/extensions/workflow-monitor/tdd-monitor.ts +137 -0
- package/extensions/workflow-monitor/test-runner.ts +37 -0
- package/extensions/workflow-monitor/verification-monitor.ts +61 -0
- package/extensions/workflow-monitor/warnings.ts +81 -0
- package/extensions/workflow-monitor/workflow-handler.ts +358 -0
- package/extensions/workflow-monitor/workflow-tracker.ts +231 -0
- package/extensions/workflow-monitor/workflow-transitions.ts +55 -0
- package/extensions/workflow-monitor.ts +885 -0
- package/package.json +49 -0
- package/skills/brainstorming/SKILL.md +70 -0
- package/skills/dispatching-parallel-agents/SKILL.md +194 -0
- package/skills/executing-tasks/SKILL.md +247 -0
- package/skills/receiving-code-review/SKILL.md +196 -0
- package/skills/systematic-debugging/SKILL.md +170 -0
- package/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
- package/skills/systematic-debugging/condition-based-waiting.md +115 -0
- package/skills/systematic-debugging/defense-in-depth.md +122 -0
- package/skills/systematic-debugging/find-polluter.sh +63 -0
- package/skills/systematic-debugging/reference/rationalizations.md +61 -0
- package/skills/systematic-debugging/root-cause-tracing.md +169 -0
- package/skills/test-driven-development/SKILL.md +266 -0
- package/skills/test-driven-development/reference/examples.md +101 -0
- package/skills/test-driven-development/reference/rationalizations.md +67 -0
- package/skills/test-driven-development/reference/when-stuck.md +33 -0
- package/skills/test-driven-development/testing-anti-patterns.md +299 -0
- package/skills/using-git-worktrees/SKILL.md +231 -0
- package/skills/writing-plans/SKILL.md +149 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Defense-in-Depth Validation
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
When you fix a bug caused by invalid data, adding validation at one place feels sufficient. But that single check can be bypassed by different code paths, refactoring, or mocks.
|
|
6
|
+
|
|
7
|
+
**Core principle:** Validate at EVERY layer data passes through. Make the bug structurally impossible.
|
|
8
|
+
|
|
9
|
+
## Why Multiple Layers
|
|
10
|
+
|
|
11
|
+
Single validation: "We fixed the bug"
|
|
12
|
+
Multiple layers: "We made the bug impossible"
|
|
13
|
+
|
|
14
|
+
Different layers catch different cases:
|
|
15
|
+
- Entry validation catches most bugs
|
|
16
|
+
- Business logic catches edge cases
|
|
17
|
+
- Environment guards prevent context-specific dangers
|
|
18
|
+
- Debug logging helps when other layers fail
|
|
19
|
+
|
|
20
|
+
## The Four Layers
|
|
21
|
+
|
|
22
|
+
### Layer 1: Entry Point Validation
|
|
23
|
+
**Purpose:** Reject obviously invalid input at API boundary
|
|
24
|
+
|
|
25
|
+
```typescript
|
|
26
|
+
function createProject(name: string, workingDirectory: string) {
|
|
27
|
+
if (!workingDirectory || workingDirectory.trim() === '') {
|
|
28
|
+
throw new Error('workingDirectory cannot be empty');
|
|
29
|
+
}
|
|
30
|
+
if (!existsSync(workingDirectory)) {
|
|
31
|
+
throw new Error(`workingDirectory does not exist: ${workingDirectory}`);
|
|
32
|
+
}
|
|
33
|
+
if (!statSync(workingDirectory).isDirectory()) {
|
|
34
|
+
throw new Error(`workingDirectory is not a directory: ${workingDirectory}`);
|
|
35
|
+
}
|
|
36
|
+
// ... proceed
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Layer 2: Business Logic Validation
|
|
41
|
+
**Purpose:** Ensure data makes sense for this operation
|
|
42
|
+
|
|
43
|
+
```typescript
|
|
44
|
+
function initializeWorkspace(projectDir: string, sessionId: string) {
|
|
45
|
+
if (!projectDir) {
|
|
46
|
+
throw new Error('projectDir required for workspace initialization');
|
|
47
|
+
}
|
|
48
|
+
// ... proceed
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Layer 3: Environment Guards
|
|
53
|
+
**Purpose:** Prevent dangerous operations in specific contexts
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
async function gitInit(directory: string) {
|
|
57
|
+
// In tests, refuse git init outside temp directories
|
|
58
|
+
if (process.env.NODE_ENV === 'test') {
|
|
59
|
+
const normalized = normalize(resolve(directory));
|
|
60
|
+
const tmpDir = normalize(resolve(tmpdir()));
|
|
61
|
+
|
|
62
|
+
if (!normalized.startsWith(tmpDir)) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
`Refusing git init outside temp dir during tests: ${directory}`
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// ... proceed
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Layer 4: Debug Instrumentation
|
|
73
|
+
**Purpose:** Capture context for forensics
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
async function gitInit(directory: string) {
|
|
77
|
+
const stack = new Error().stack;
|
|
78
|
+
logger.debug('About to git init', {
|
|
79
|
+
directory,
|
|
80
|
+
cwd: process.cwd(),
|
|
81
|
+
stack,
|
|
82
|
+
});
|
|
83
|
+
// ... proceed
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Applying the Pattern
|
|
88
|
+
|
|
89
|
+
When you find a bug:
|
|
90
|
+
|
|
91
|
+
1. **Trace the data flow** - Where does bad value originate? Where used?
|
|
92
|
+
2. **Map all checkpoints** - List every point data passes through
|
|
93
|
+
3. **Add validation at each layer** - Entry, business, environment, debug
|
|
94
|
+
4. **Test each layer** - Try to bypass layer 1, verify layer 2 catches it
|
|
95
|
+
|
|
96
|
+
## Example from Session
|
|
97
|
+
|
|
98
|
+
Bug: Empty `projectDir` caused `git init` in source code
|
|
99
|
+
|
|
100
|
+
**Data flow:**
|
|
101
|
+
1. Test setup → empty string
|
|
102
|
+
2. `Project.create(name, '')`
|
|
103
|
+
3. `WorkspaceManager.createWorkspace('')`
|
|
104
|
+
4. `git init` runs in `process.cwd()`
|
|
105
|
+
|
|
106
|
+
**Four layers added:**
|
|
107
|
+
- Layer 1: `Project.create()` validates not empty/exists/writable
|
|
108
|
+
- Layer 2: `WorkspaceManager` validates projectDir not empty
|
|
109
|
+
- Layer 3: `WorktreeManager` refuses git init outside tmpdir in tests
|
|
110
|
+
- Layer 4: Stack trace logging before git init
|
|
111
|
+
|
|
112
|
+
**Result:** All 1847 tests passed, bug impossible to reproduce
|
|
113
|
+
|
|
114
|
+
## Key Insight
|
|
115
|
+
|
|
116
|
+
All four layers were necessary. During testing, each layer caught bugs the others missed:
|
|
117
|
+
- Different code paths bypassed entry validation
|
|
118
|
+
- Mocks bypassed business logic checks
|
|
119
|
+
- Edge cases on different platforms needed environment guards
|
|
120
|
+
- Debug logging identified structural misuse
|
|
121
|
+
|
|
122
|
+
**Don't stop at one validation point.** Add checks at every layer.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Bisection script to find which test creates unwanted files/state
|
|
3
|
+
# Usage: ./find-polluter.sh <file_or_dir_to_check> <test_pattern>
|
|
4
|
+
# Example: ./find-polluter.sh '.git' 'src/**/*.test.ts'
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
if [ $# -ne 2 ]; then
|
|
9
|
+
echo "Usage: $0 <file_to_check> <test_pattern>"
|
|
10
|
+
echo "Example: $0 '.git' 'src/**/*.test.ts'"
|
|
11
|
+
exit 1
|
|
12
|
+
fi
|
|
13
|
+
|
|
14
|
+
POLLUTION_CHECK="$1"
|
|
15
|
+
TEST_PATTERN="$2"
|
|
16
|
+
|
|
17
|
+
echo "🔍 Searching for test that creates: $POLLUTION_CHECK"
|
|
18
|
+
echo "Test pattern: $TEST_PATTERN"
|
|
19
|
+
echo ""
|
|
20
|
+
|
|
21
|
+
# Get list of test files
|
|
22
|
+
TEST_FILES=$(find . -path "$TEST_PATTERN" | sort)
|
|
23
|
+
TOTAL=$(echo "$TEST_FILES" | wc -l | tr -d ' ')
|
|
24
|
+
|
|
25
|
+
echo "Found $TOTAL test files"
|
|
26
|
+
echo ""
|
|
27
|
+
|
|
28
|
+
COUNT=0
|
|
29
|
+
for TEST_FILE in $TEST_FILES; do
|
|
30
|
+
COUNT=$((COUNT + 1))
|
|
31
|
+
|
|
32
|
+
# Skip if pollution already exists
|
|
33
|
+
if [ -e "$POLLUTION_CHECK" ]; then
|
|
34
|
+
echo "⚠️ Pollution already exists before test $COUNT/$TOTAL"
|
|
35
|
+
echo " Skipping: $TEST_FILE"
|
|
36
|
+
continue
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
echo "[$COUNT/$TOTAL] Testing: $TEST_FILE"
|
|
40
|
+
|
|
41
|
+
# Run the test
|
|
42
|
+
npm test "$TEST_FILE" > /dev/null 2>&1 || true
|
|
43
|
+
|
|
44
|
+
# Check if pollution appeared
|
|
45
|
+
if [ -e "$POLLUTION_CHECK" ]; then
|
|
46
|
+
echo ""
|
|
47
|
+
echo "🎯 FOUND POLLUTER!"
|
|
48
|
+
echo " Test: $TEST_FILE"
|
|
49
|
+
echo " Created: $POLLUTION_CHECK"
|
|
50
|
+
echo ""
|
|
51
|
+
echo "Pollution details:"
|
|
52
|
+
ls -la "$POLLUTION_CHECK"
|
|
53
|
+
echo ""
|
|
54
|
+
echo "To investigate:"
|
|
55
|
+
echo " npm test $TEST_FILE # Run just this test"
|
|
56
|
+
echo " cat $TEST_FILE # Review test code"
|
|
57
|
+
exit 1
|
|
58
|
+
fi
|
|
59
|
+
done
|
|
60
|
+
|
|
61
|
+
echo ""
|
|
62
|
+
echo "✅ No polluter found - all tests clean!"
|
|
63
|
+
exit 0
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Debugging Rationalizations & Red Flags
|
|
2
|
+
|
|
3
|
+
## Common Rationalizations
|
|
4
|
+
|
|
5
|
+
| Excuse | Reality |
|
|
6
|
+
|--------|---------|
|
|
7
|
+
| "Issue is simple, don't need process" | Simple issues have root causes too. Process is fast for simple bugs. |
|
|
8
|
+
| "Emergency, no time for process" | Systematic debugging is FASTER than guess-and-check thrashing. |
|
|
9
|
+
| "Just try this first, then investigate" | First fix sets the pattern. Do it right from the start. |
|
|
10
|
+
| "I'll write test after confirming fix works" | Untested fixes don't stick. Test first proves it. |
|
|
11
|
+
| "Multiple fixes at once saves time" | Can't isolate what worked. Causes new bugs. |
|
|
12
|
+
| "Reference too long, I'll adapt the pattern" | Partial understanding guarantees bugs. Read it completely. |
|
|
13
|
+
| "I see the problem, let me fix it" | Seeing symptoms ≠ understanding root cause. |
|
|
14
|
+
| "One more fix attempt" (after 2+ failures) | 3+ failures = architectural problem. Question pattern, don't fix again. |
|
|
15
|
+
|
|
16
|
+
## Red Flags — STOP and Follow Process
|
|
17
|
+
|
|
18
|
+
If you catch yourself thinking:
|
|
19
|
+
- "Quick fix for now, investigate later"
|
|
20
|
+
- "Just try changing X and see if it works"
|
|
21
|
+
- "Add multiple changes, run tests"
|
|
22
|
+
- "Skip the test, I'll manually verify"
|
|
23
|
+
- "It's probably X, let me fix that"
|
|
24
|
+
- "I don't fully understand but this might work"
|
|
25
|
+
- "Pattern says X but I'll adapt it differently"
|
|
26
|
+
- "Here are the main problems: [lists fixes without investigation]"
|
|
27
|
+
- Proposing solutions before tracing data flow
|
|
28
|
+
- "One more fix attempt" (when already tried 2+)
|
|
29
|
+
- Each fix reveals new problem in different place
|
|
30
|
+
|
|
31
|
+
**ALL of these mean: STOP. Return to Phase 1.**
|
|
32
|
+
|
|
33
|
+
**If 3+ fixes failed:** Question the architecture (see Phase 4.5)
|
|
34
|
+
|
|
35
|
+
## Your Human Partner's Signals You're Doing It Wrong
|
|
36
|
+
|
|
37
|
+
**Watch for these redirections:**
|
|
38
|
+
- "Is that not happening?" — You assumed without verifying
|
|
39
|
+
- "Will it show us...?" — You should have added evidence gathering
|
|
40
|
+
- "Stop guessing" — You're proposing fixes without understanding
|
|
41
|
+
- "Ultrathink this" — Question fundamentals, not just symptoms
|
|
42
|
+
- "We're stuck?" (frustrated) — Your approach isn't working
|
|
43
|
+
|
|
44
|
+
**When you see these:** STOP. Return to Phase 1.
|
|
45
|
+
|
|
46
|
+
## Quick Reference
|
|
47
|
+
|
|
48
|
+
| Phase | Key Activities | Success Criteria |
|
|
49
|
+
|-------|---------------|------------------|
|
|
50
|
+
| **1. Root Cause** | Read errors, reproduce, check changes, gather evidence | Understand WHAT and WHY |
|
|
51
|
+
| **2. Pattern** | Find working examples, compare | Identify differences |
|
|
52
|
+
| **3. Hypothesis** | Form theory, test minimally | Confirmed or new hypothesis |
|
|
53
|
+
| **4. Implementation** | Create test, fix, verify | Bug resolved, tests pass |
|
|
54
|
+
|
|
55
|
+
## Real-World Impact
|
|
56
|
+
|
|
57
|
+
From debugging sessions:
|
|
58
|
+
- Systematic approach: 15-30 minutes to fix
|
|
59
|
+
- Random fixes approach: 2-3 hours of thrashing
|
|
60
|
+
- First-time fix rate: 95% vs 40%
|
|
61
|
+
- New bugs introduced: Near zero vs common
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Root Cause Tracing
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Bugs often manifest deep in the call stack (git init in wrong directory, file created in wrong location, database opened with wrong path). Your instinct is to fix where the error appears, but that's treating a symptom.
|
|
6
|
+
|
|
7
|
+
**Core principle:** Trace backward through the call chain until you find the original trigger, then fix at the source.
|
|
8
|
+
|
|
9
|
+
## When to Use
|
|
10
|
+
|
|
11
|
+
```dot
|
|
12
|
+
digraph when_to_use {
|
|
13
|
+
"Bug appears deep in stack?" [shape=diamond];
|
|
14
|
+
"Can trace backwards?" [shape=diamond];
|
|
15
|
+
"Fix at symptom point" [shape=box];
|
|
16
|
+
"Trace to original trigger" [shape=box];
|
|
17
|
+
"BETTER: Also add defense-in-depth" [shape=box];
|
|
18
|
+
|
|
19
|
+
"Bug appears deep in stack?" -> "Can trace backwards?" [label="yes"];
|
|
20
|
+
"Can trace backwards?" -> "Trace to original trigger" [label="yes"];
|
|
21
|
+
"Can trace backwards?" -> "Fix at symptom point" [label="no - dead end"];
|
|
22
|
+
"Trace to original trigger" -> "BETTER: Also add defense-in-depth";
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**Use when:**
|
|
27
|
+
- Error happens deep in execution (not at entry point)
|
|
28
|
+
- Stack trace shows long call chain
|
|
29
|
+
- Unclear where invalid data originated
|
|
30
|
+
- Need to find which test/code triggers the problem
|
|
31
|
+
|
|
32
|
+
## The Tracing Process
|
|
33
|
+
|
|
34
|
+
### 1. Observe the Symptom
|
|
35
|
+
```
|
|
36
|
+
Error: git init failed in /Users/jesse/project/packages/core
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 2. Find Immediate Cause
|
|
40
|
+
**What code directly causes this?**
|
|
41
|
+
```typescript
|
|
42
|
+
await execFileAsync('git', ['init'], { cwd: projectDir });
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### 3. Ask: What Called This?
|
|
46
|
+
```typescript
|
|
47
|
+
WorktreeManager.createSessionWorktree(projectDir, sessionId)
|
|
48
|
+
→ called by Session.initializeWorkspace()
|
|
49
|
+
→ called by Session.create()
|
|
50
|
+
→ called by test at Project.create()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 4. Keep Tracing Up
|
|
54
|
+
**What value was passed?**
|
|
55
|
+
- `projectDir = ''` (empty string!)
|
|
56
|
+
- Empty string as `cwd` resolves to `process.cwd()`
|
|
57
|
+
- That's the source code directory!
|
|
58
|
+
|
|
59
|
+
### 5. Find Original Trigger
|
|
60
|
+
**Where did empty string come from?**
|
|
61
|
+
```typescript
|
|
62
|
+
const context = setupCoreTest(); // Returns { tempDir: '' }
|
|
63
|
+
Project.create('name', context.tempDir); // Accessed before beforeEach!
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Adding Stack Traces
|
|
67
|
+
|
|
68
|
+
When you can't trace manually, add instrumentation:
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
// Before the problematic operation
|
|
72
|
+
async function gitInit(directory: string) {
|
|
73
|
+
const stack = new Error().stack;
|
|
74
|
+
console.error('DEBUG git init:', {
|
|
75
|
+
directory,
|
|
76
|
+
cwd: process.cwd(),
|
|
77
|
+
nodeEnv: process.env.NODE_ENV,
|
|
78
|
+
stack,
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
await execFileAsync('git', ['init'], { cwd: directory });
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Critical:** Use `console.error()` in tests (not logger - may not show)
|
|
86
|
+
|
|
87
|
+
**Run and capture:**
|
|
88
|
+
```bash
|
|
89
|
+
npm test 2>&1 | grep 'DEBUG git init'
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Analyze stack traces:**
|
|
93
|
+
- Look for test file names
|
|
94
|
+
- Find the line number triggering the call
|
|
95
|
+
- Identify the pattern (same test? same parameter?)
|
|
96
|
+
|
|
97
|
+
## Finding Which Test Causes Pollution
|
|
98
|
+
|
|
99
|
+
If something appears during tests but you don't know which test:
|
|
100
|
+
|
|
101
|
+
Use the bisection script `find-polluter.sh` in this directory:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
./find-polluter.sh '.git' 'src/**/*.test.ts'
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Runs tests one-by-one, stops at first polluter. See script for usage.
|
|
108
|
+
|
|
109
|
+
## Real Example: Empty projectDir
|
|
110
|
+
|
|
111
|
+
**Symptom:** `.git` created in `packages/core/` (source code)
|
|
112
|
+
|
|
113
|
+
**Trace chain:**
|
|
114
|
+
1. `git init` runs in `process.cwd()` ← empty cwd parameter
|
|
115
|
+
2. WorktreeManager called with empty projectDir
|
|
116
|
+
3. Session.create() passed empty string
|
|
117
|
+
4. Test accessed `context.tempDir` before beforeEach
|
|
118
|
+
5. setupCoreTest() returns `{ tempDir: '' }` initially
|
|
119
|
+
|
|
120
|
+
**Root cause:** Top-level variable initialization accessing empty value
|
|
121
|
+
|
|
122
|
+
**Fix:** Made tempDir a getter that throws if accessed before beforeEach
|
|
123
|
+
|
|
124
|
+
**Also added defense-in-depth:**
|
|
125
|
+
- Layer 1: Project.create() validates directory
|
|
126
|
+
- Layer 2: WorkspaceManager validates not empty
|
|
127
|
+
- Layer 3: NODE_ENV guard refuses git init outside tmpdir
|
|
128
|
+
- Layer 4: Stack trace logging before git init
|
|
129
|
+
|
|
130
|
+
## Key Principle
|
|
131
|
+
|
|
132
|
+
```dot
|
|
133
|
+
digraph principle {
|
|
134
|
+
"Found immediate cause" [shape=ellipse];
|
|
135
|
+
"Can trace one level up?" [shape=diamond];
|
|
136
|
+
"Trace backwards" [shape=box];
|
|
137
|
+
"Is this the source?" [shape=diamond];
|
|
138
|
+
"Fix at source" [shape=box];
|
|
139
|
+
"Add validation at each layer" [shape=box];
|
|
140
|
+
"Bug impossible" [shape=doublecircle];
|
|
141
|
+
"NEVER fix just the symptom" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
|
|
142
|
+
|
|
143
|
+
"Found immediate cause" -> "Can trace one level up?";
|
|
144
|
+
"Can trace one level up?" -> "Trace backwards" [label="yes"];
|
|
145
|
+
"Can trace one level up?" -> "NEVER fix just the symptom" [label="no"];
|
|
146
|
+
"Trace backwards" -> "Is this the source?";
|
|
147
|
+
"Is this the source?" -> "Trace backwards" [label="no - keeps going"];
|
|
148
|
+
"Is this the source?" -> "Fix at source" [label="yes"];
|
|
149
|
+
"Fix at source" -> "Add validation at each layer";
|
|
150
|
+
"Add validation at each layer" -> "Bug impossible";
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**NEVER fix just where the error appears.** Trace back to find the original trigger.
|
|
155
|
+
|
|
156
|
+
## Stack Trace Tips
|
|
157
|
+
|
|
158
|
+
**In tests:** Use `console.error()` not logger - logger may be suppressed
|
|
159
|
+
**Before operation:** Log before the dangerous operation, not after it fails
|
|
160
|
+
**Include context:** Directory, cwd, environment variables, timestamps
|
|
161
|
+
**Capture stack:** `new Error().stack` shows complete call chain
|
|
162
|
+
|
|
163
|
+
## Real-World Impact
|
|
164
|
+
|
|
165
|
+
From debugging session (2025-10-03):
|
|
166
|
+
- Found root cause through 5-level trace
|
|
167
|
+
- Fixed at source (getter validation)
|
|
168
|
+
- Added 4 layers of defense
|
|
169
|
+
- 1847 tests passed, zero pollution
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: test-driven-development
|
|
3
|
+
description: Use when implementing any feature or bugfix, before writing implementation code
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
> **Related skills:** Before claiming done, use `/skill:executing-tasks` to verify tests actually pass.
|
|
7
|
+
|
|
8
|
+
# Test-Driven Development (TDD)
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
|
|
12
|
+
Write the test first. Watch it fail. Write minimal code to pass.
|
|
13
|
+
|
|
14
|
+
**Core principle:** If you didn't watch the test fail, you don't know if it tests the right thing.
|
|
15
|
+
|
|
16
|
+
**Violating the letter of the rules is violating the spirit of the rules.**
|
|
17
|
+
|
|
18
|
+
## Prerequisites
|
|
19
|
+
- Active branch (not main) or user-confirmed intent to work on main
|
|
20
|
+
- Approved plan or clear task scope
|
|
21
|
+
|
|
22
|
+
## When to Use — Three Scenarios
|
|
23
|
+
|
|
24
|
+
Not every change requires the same TDD approach. Determine which scenario applies:
|
|
25
|
+
|
|
26
|
+
### Scenario 1: New Feature / New File
|
|
27
|
+
|
|
28
|
+
Full TDD cycle. No shortcuts.
|
|
29
|
+
|
|
30
|
+
1. Write a failing test
|
|
31
|
+
2. Watch it fail
|
|
32
|
+
3. Write minimal code to pass
|
|
33
|
+
4. Watch it pass
|
|
34
|
+
5. Refactor
|
|
35
|
+
6. Repeat
|
|
36
|
+
|
|
37
|
+
**This is the default.** If in doubt, use this scenario.
|
|
38
|
+
|
|
39
|
+
### Scenario 2: Modifying Code with Existing Tests
|
|
40
|
+
|
|
41
|
+
When changing code that already has test coverage:
|
|
42
|
+
|
|
43
|
+
1. Run existing tests — confirm green
|
|
44
|
+
2. Make your change
|
|
45
|
+
3. Run tests again — confirm still green
|
|
46
|
+
4. If your change isn't covered by existing tests, add a test for it
|
|
47
|
+
5. If existing tests already cover the changed behavior, you're done
|
|
48
|
+
|
|
49
|
+
**Key:** You must verify existing tests pass *before* and *after* your change. If you can't confirm test coverage, fall back to Scenario 1.
|
|
50
|
+
|
|
51
|
+
### Scenario 3: Trivial Change
|
|
52
|
+
|
|
53
|
+
For typo fixes, config tweaks, string changes, renames:
|
|
54
|
+
|
|
55
|
+
- Use judgment
|
|
56
|
+
- If relevant tests exist, run them after your change
|
|
57
|
+
- Don't write a new test for a string literal change
|
|
58
|
+
|
|
59
|
+
**Be honest:** If the change touches logic, it's not trivial. Use Scenario 1 or 2.
|
|
60
|
+
|
|
61
|
+
### Scenario 4: If You See a ⚠️ TDD Warning
|
|
62
|
+
|
|
63
|
+
The workflow monitor detected a potential TDD violation. Pause and assess:
|
|
64
|
+
|
|
65
|
+
1. **Identify your scenario** — which of 1, 2, or 3 applies to this change?
|
|
66
|
+
2. **Scenario 1 (new file):** If no test exists yet, stop, delete any written source code, write a failing test first, then re-implement.
|
|
67
|
+
3. **Scenario 2 (existing tests):** Run the existing tests now. Confirm they're green. Then proceed with your change. Run them again after.
|
|
68
|
+
4. **Scenario 3 (trivial):** If the change truly is trivial, run relevant tests after and continue.
|
|
69
|
+
|
|
70
|
+
The warning is a signal to think, not a hard stop. But if you can't clearly identify which scenario applies, default to Scenario 1.
|
|
71
|
+
|
|
72
|
+
## Interpreting Runtime Warnings
|
|
73
|
+
|
|
74
|
+
The workflow monitor tracks your TDD phase and may inject warnings like:
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
⚠️ TDD: Writing source code (src/foo.ts) without a failing test.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**When you see this, pause and assess:**
|
|
81
|
+
- Which scenario applies to this change?
|
|
82
|
+
- If Scenario 2: run existing tests to confirm coverage, then proceed
|
|
83
|
+
- If Scenario 1: write a failing test first
|
|
84
|
+
- If Scenario 3: proceed, run tests after
|
|
85
|
+
|
|
86
|
+
The warning is a signal to think, not a hard stop.
|
|
87
|
+
|
|
88
|
+
## The Iron Law (Scenario 1)
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Write code before the test? Delete it. Start over.
|
|
95
|
+
- Don't keep it as "reference"
|
|
96
|
+
- Don't "adapt" it while writing tests
|
|
97
|
+
- Delete means delete. Implement fresh from tests.
|
|
98
|
+
|
|
99
|
+
## Red-Green-Refactor
|
|
100
|
+
|
|
101
|
+
### RED — Write Failing Test
|
|
102
|
+
|
|
103
|
+
Write one minimal test showing what should happen.
|
|
104
|
+
|
|
105
|
+
**Requirements:**
|
|
106
|
+
- One behavior per test
|
|
107
|
+
- Clear name describing behavior (if the name contains "and", split it)
|
|
108
|
+
- Real code (no mocks unless unavoidable)
|
|
109
|
+
- Shows desired API — demonstrates how code should be called
|
|
110
|
+
|
|
111
|
+
**Good:**
|
|
112
|
+
```typescript
|
|
113
|
+
test('retries failed operations 3 times', async () => {
|
|
114
|
+
let attempts = 0;
|
|
115
|
+
const operation = () => {
|
|
116
|
+
attempts++;
|
|
117
|
+
if (attempts < 3) throw new Error('fail');
|
|
118
|
+
return 'success';
|
|
119
|
+
};
|
|
120
|
+
const result = await retryOperation(operation);
|
|
121
|
+
expect(result).toBe('success');
|
|
122
|
+
expect(attempts).toBe(3);
|
|
123
|
+
});
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Bad:**
|
|
127
|
+
```typescript
|
|
128
|
+
test('retry works', async () => {
|
|
129
|
+
const mock = jest.fn().mockRejectedValueOnce(new Error()).mockResolvedValueOnce('ok');
|
|
130
|
+
await retryOperation(mock);
|
|
131
|
+
expect(mock).toHaveBeenCalledTimes(2);
|
|
132
|
+
});
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Verify RED — Watch It Fail
|
|
136
|
+
|
|
137
|
+
**MANDATORY. Never skip.**
|
|
138
|
+
|
|
139
|
+
Run the test. Confirm:
|
|
140
|
+
- Test **fails** (not errors from syntax/import issues)
|
|
141
|
+
- Failure message matches expectation
|
|
142
|
+
- Fails because the feature is missing (not because of typos)
|
|
143
|
+
|
|
144
|
+
**Test passes immediately?** You're testing existing behavior. Fix the test.
|
|
145
|
+
**Test errors instead of failing?** Fix the error, re-run until it fails correctly.
|
|
146
|
+
|
|
147
|
+
### GREEN — Minimal Code
|
|
148
|
+
|
|
149
|
+
Write the simplest code to pass the test. Nothing more.
|
|
150
|
+
|
|
151
|
+
Don't add features, refactor other code, or "improve" beyond what the test requires. If you're writing code that no test exercises, stop.
|
|
152
|
+
|
|
153
|
+
**Good:** Just enough to pass the test.
|
|
154
|
+
**Bad:** Adding options, config, generalization that no test asks for (YAGNI).
|
|
155
|
+
|
|
156
|
+
### Verify GREEN — Watch It Pass
|
|
157
|
+
|
|
158
|
+
**MANDATORY.**
|
|
159
|
+
|
|
160
|
+
Run the test. Confirm:
|
|
161
|
+
- New test passes
|
|
162
|
+
- All other tests still pass
|
|
163
|
+
- Output is pristine (no errors, no warnings)
|
|
164
|
+
|
|
165
|
+
**Test fails?** Fix code, not test.
|
|
166
|
+
**Other tests fail?** Fix now — don't move on with broken tests.
|
|
167
|
+
|
|
168
|
+
### REFACTOR — Clean Up
|
|
169
|
+
|
|
170
|
+
Only after green:
|
|
171
|
+
- Remove duplication
|
|
172
|
+
- Improve names
|
|
173
|
+
- Extract helpers
|
|
174
|
+
|
|
175
|
+
Keep tests green throughout. Don't add new behavior during refactor.
|
|
176
|
+
|
|
177
|
+
### Repeat
|
|
178
|
+
|
|
179
|
+
Next failing test for next behavior.
|
|
180
|
+
|
|
181
|
+
## Common Rationalizations
|
|
182
|
+
|
|
183
|
+
| Excuse | Reality |
|
|
184
|
+
|--------|---------|
|
|
185
|
+
| "Too simple to test" | Simple code breaks. Test takes 30 seconds. |
|
|
186
|
+
| "I'll test after" | Tests passing immediately prove nothing. |
|
|
187
|
+
| "Tests after achieve same goals" | Tests-after = "what does this do?" Tests-first = "what should this do?" |
|
|
188
|
+
| "Already manually tested" | Ad-hoc ≠ systematic. No record, can't re-run. |
|
|
189
|
+
| "Deleting X hours is wasteful" | Sunk cost fallacy. Keeping unverified code is technical debt. |
|
|
190
|
+
| "Keep as reference, write tests first" | You'll adapt it. That's testing after. Delete means delete. |
|
|
191
|
+
| "Need to explore first" | Fine. Throw away exploration, start with TDD. |
|
|
192
|
+
| "Test hard = design unclear" | Listen to test. Hard to test = hard to use. |
|
|
193
|
+
| "TDD will slow me down" | TDD faster than debugging. Pragmatic = test-first. |
|
|
194
|
+
| "Existing code has no tests" | You're improving it. Add tests for the code you're changing. |
|
|
195
|
+
| "This is different because..." | It's not. Follow the process. |
|
|
196
|
+
|
|
197
|
+
## Red Flags — STOP and Start Over
|
|
198
|
+
|
|
199
|
+
If you catch yourself doing any of these, stop immediately:
|
|
200
|
+
|
|
201
|
+
- Writing production code before the test
|
|
202
|
+
- Writing tests after implementation
|
|
203
|
+
- Test passes immediately (didn't catch the bug)
|
|
204
|
+
- Can't explain why test failed
|
|
205
|
+
- Rationalizing "just this once"
|
|
206
|
+
- "I already manually tested it"
|
|
207
|
+
- "Keep as reference" or "adapt existing code"
|
|
208
|
+
- "Already spent X hours, deleting is wasteful"
|
|
209
|
+
- "TDD is dogmatic, I'm being pragmatic"
|
|
210
|
+
|
|
211
|
+
**All of these mean: Delete code. Start over with TDD.**
|
|
212
|
+
|
|
213
|
+
## Verification Checklist
|
|
214
|
+
|
|
215
|
+
Before marking work complete:
|
|
216
|
+
|
|
217
|
+
- [ ] Every new function/method has a test
|
|
218
|
+
- [ ] Watched each test fail before implementing
|
|
219
|
+
- [ ] Each test failed for expected reason (feature missing, not typo)
|
|
220
|
+
- [ ] Wrote minimal code to pass each test
|
|
221
|
+
- [ ] All tests pass
|
|
222
|
+
- [ ] Output pristine (no errors, warnings)
|
|
223
|
+
- [ ] Tests use real code (mocks only if unavoidable)
|
|
224
|
+
- [ ] Edge cases and errors covered
|
|
225
|
+
|
|
226
|
+
Can't check all boxes? You skipped TDD. Start over.
|
|
227
|
+
|
|
228
|
+
## When Stuck
|
|
229
|
+
|
|
230
|
+
| Problem | Solution |
|
|
231
|
+
|---------|----------|
|
|
232
|
+
| Don't know how to test | Write wished-for API. Write assertion first. Ask your human partner. |
|
|
233
|
+
| Test too complicated | Design too complicated. Simplify interface. |
|
|
234
|
+
| Must mock everything | Code too coupled. Use dependency injection. |
|
|
235
|
+
| Test setup huge | Extract helpers. Still complex? Simplify design. |
|
|
236
|
+
|
|
237
|
+
## Debugging Integration
|
|
238
|
+
|
|
239
|
+
Bug found? Write failing test reproducing it. Follow TDD cycle. Test proves fix and prevents regression. Never fix bugs without a test.
|
|
240
|
+
|
|
241
|
+
## Testing Anti-Patterns
|
|
242
|
+
|
|
243
|
+
When adding mocks or test utilities, read `testing-anti-patterns.md` in this skill directory to avoid common pitfalls:
|
|
244
|
+
- Testing mock behavior instead of real behavior
|
|
245
|
+
- Adding test-only methods to production classes
|
|
246
|
+
- Mocking without understanding dependencies
|
|
247
|
+
|
|
248
|
+
## Reference
|
|
249
|
+
|
|
250
|
+
Use `workflow_reference` for additional detail:
|
|
251
|
+
- `tdd-rationalizations` — Extended rationalization discussion
|
|
252
|
+
- `tdd-examples` — More good/bad code examples, bug fix walkthrough
|
|
253
|
+
- `tdd-when-stuck` — Extended solutions for common blockers
|
|
254
|
+
- `tdd-anti-patterns` — Mock pitfalls, test-only methods, incomplete mocks
|
|
255
|
+
|
|
256
|
+
## Final Rule
|
|
257
|
+
|
|
258
|
+
```
|
|
259
|
+
Production code → test exists and failed first (Scenario 1)
|
|
260
|
+
Modifying tested code → existing tests verified before and after (Scenario 2)
|
|
261
|
+
Trivial change → relevant tests run after (Scenario 3)
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
No exceptions without your human partner's permission.
|
|
265
|
+
|
|
266
|
+
When the TDD implementation cycle is complete (all tests green, code committed), the orchestrating agent updates `plan_tracker` for the current task as part of the executing-tasks lifecycle.
|