devflow-kit 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +13 -6
  3. package/dist/cli.js +5 -1
  4. package/dist/commands/ambient.d.ts +18 -0
  5. package/dist/commands/ambient.js +136 -0
  6. package/dist/commands/init.d.ts +2 -0
  7. package/dist/commands/init.js +97 -10
  8. package/dist/commands/memory.d.ts +22 -0
  9. package/dist/commands/memory.js +175 -0
  10. package/dist/commands/uninstall.js +72 -5
  11. package/dist/plugins.js +8 -1
  12. package/dist/utils/post-install.d.ts +12 -0
  13. package/dist/utils/post-install.js +82 -1
  14. package/dist/utils/safe-delete-install.d.ts +7 -0
  15. package/dist/utils/safe-delete-install.js +40 -5
  16. package/package.json +1 -1
  17. package/plugins/devflow-ambient/.claude-plugin/plugin.json +7 -0
  18. package/plugins/devflow-ambient/README.md +49 -0
  19. package/plugins/devflow-ambient/commands/ambient.md +110 -0
  20. package/plugins/devflow-ambient/skills/ambient-router/SKILL.md +89 -0
  21. package/plugins/devflow-ambient/skills/ambient-router/references/skill-catalog.md +64 -0
  22. package/plugins/devflow-audit-claude/.claude-plugin/plugin.json +1 -1
  23. package/plugins/devflow-code-review/.claude-plugin/plugin.json +1 -1
  24. package/plugins/devflow-core-skills/.claude-plugin/plugin.json +2 -1
  25. package/plugins/devflow-core-skills/skills/docs-framework/SKILL.md +10 -6
  26. package/plugins/devflow-core-skills/skills/test-driven-development/SKILL.md +139 -0
  27. package/plugins/devflow-core-skills/skills/test-driven-development/references/rationalization-prevention.md +111 -0
  28. package/plugins/devflow-debug/.claude-plugin/plugin.json +1 -1
  29. package/plugins/devflow-implement/.claude-plugin/plugin.json +1 -1
  30. package/plugins/devflow-resolve/.claude-plugin/plugin.json +1 -1
  31. package/plugins/devflow-self-review/.claude-plugin/plugin.json +1 -1
  32. package/plugins/devflow-specify/.claude-plugin/plugin.json +1 -1
  33. package/scripts/hooks/ambient-prompt.sh +48 -0
  34. package/scripts/hooks/background-memory-update.sh +49 -8
  35. package/scripts/hooks/ensure-memory-gitignore.sh +17 -0
  36. package/scripts/hooks/pre-compact-memory.sh +12 -6
  37. package/scripts/hooks/session-start-memory.sh +50 -8
  38. package/scripts/hooks/stop-update-memory.sh +10 -6
  39. package/shared/skills/ambient-router/SKILL.md +89 -0
  40. package/shared/skills/ambient-router/references/skill-catalog.md +64 -0
  41. package/shared/skills/docs-framework/SKILL.md +10 -6
  42. package/shared/skills/test-driven-development/SKILL.md +139 -0
  43. package/shared/skills/test-driven-development/references/rationalization-prevention.md +111 -0
  44. package/src/templates/managed-settings.json +14 -0
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  # Working Memory: SessionStart Hook
4
- # Reads .docs/WORKING-MEMORY.md and injects it as additionalContext for the new session.
4
+ # Reads .memory/WORKING-MEMORY.md and injects it as additionalContext for the new session.
5
5
  # Also captures fresh git state so Claude knows what's changed since the memory was written.
6
6
  # Adds staleness warning if memory is >1 hour old.
7
7
 
@@ -17,12 +17,7 @@ if [ -z "$CWD" ]; then
17
17
  exit 0
18
18
  fi
19
19
 
20
- # Only activate in DevFlow-initialized projects
21
- if [ ! -d "$CWD/.docs" ]; then
22
- exit 0
23
- fi
24
-
25
- MEMORY_FILE="$CWD/.docs/WORKING-MEMORY.md"
20
+ MEMORY_FILE="$CWD/.memory/WORKING-MEMORY.md"
26
21
 
27
22
  # No memory file = nothing to restore (fresh project or first session)
28
23
  if [ ! -f "$MEMORY_FILE" ]; then
@@ -31,6 +26,13 @@ fi
31
26
 
32
27
  MEMORY_CONTENT=$(cat "$MEMORY_FILE")
33
28
 
29
+ # Read accumulated patterns if they exist
30
+ PATTERNS_FILE="$CWD/.memory/PROJECT-PATTERNS.md"
31
+ PATTERNS_CONTENT=""
32
+ if [ -f "$PATTERNS_FILE" ]; then
33
+ PATTERNS_CONTENT=$(cat "$PATTERNS_FILE")
34
+ fi
35
+
34
36
  # Compute staleness warning
35
37
  if stat --version &>/dev/null 2>&1; then
36
38
  FILE_MTIME=$(stat -c %Y "$MEMORY_FILE")
@@ -40,6 +42,30 @@ fi
40
42
  NOW=$(date +%s)
41
43
  AGE=$(( NOW - FILE_MTIME ))
42
44
 
45
+ # Check for pre-compact memory snapshot (compaction recovery)
46
+ BACKUP_FILE="$CWD/.memory/backup.json"
47
+ COMPACT_NOTE=""
48
+ if [ -f "$BACKUP_FILE" ]; then
49
+ BACKUP_MEMORY=$(jq -r '.memory_snapshot // ""' "$BACKUP_FILE" 2>/dev/null)
50
+ if [ -n "$BACKUP_MEMORY" ]; then
51
+ BACKUP_TS=$(jq -r '.timestamp // ""' "$BACKUP_FILE" 2>/dev/null)
52
+ BACKUP_EPOCH=0
53
+ if [ -n "$BACKUP_TS" ]; then
54
+ BACKUP_EPOCH=$(date -j -f "%Y-%m-%dT%H:%M:%SZ" "$BACKUP_TS" +%s 2>/dev/null \
55
+ || date -d "$BACKUP_TS" +%s 2>/dev/null \
56
+ || echo "0")
57
+ fi
58
+ if [ "$BACKUP_EPOCH" -gt "$FILE_MTIME" ]; then
59
+ COMPACT_NOTE="
60
+ --- PRE-COMPACT SNAPSHOT ($BACKUP_TS) ---
61
+ Context was compacted. This snapshot may contain decisions or progress not yet in working memory.
62
+
63
+ $BACKUP_MEMORY
64
+ "
65
+ fi
66
+ fi
67
+ fi
68
+
43
69
  STALE_WARNING=""
44
70
  if [ "$AGE" -gt 3600 ]; then
45
71
  HOURS=$(( AGE / 3600 ))
@@ -62,7 +88,18 @@ fi
62
88
  # Build context string
63
89
  CONTEXT="${STALE_WARNING}--- WORKING MEMORY (from previous session) ---
64
90
 
65
- ${MEMORY_CONTENT}
91
+ ${MEMORY_CONTENT}"
92
+
93
+ # Insert accumulated patterns between working memory and git state
94
+ if [ -n "$PATTERNS_CONTENT" ]; then
95
+ CONTEXT="${CONTEXT}
96
+
97
+ --- PROJECT PATTERNS (accumulated) ---
98
+
99
+ ${PATTERNS_CONTENT}"
100
+ fi
101
+
102
+ CONTEXT="${CONTEXT}
66
103
 
67
104
  --- CURRENT GIT STATE ---
68
105
  Branch: ${GIT_BRANCH}
@@ -75,6 +112,11 @@ Uncommitted changes:
75
112
  ${GIT_STATUS}"
76
113
  fi
77
114
 
115
+ if [ -n "$COMPACT_NOTE" ]; then
116
+ CONTEXT="${CONTEXT}
117
+ ${COMPACT_NOTE}"
118
+ fi
119
+
78
120
  # Output as additionalContext JSON envelope (Claude sees it as system context, not user-visible)
79
121
  jq -n --arg ctx "$CONTEXT" '{
80
122
  "hookSpecificOutput": {
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  # Working Memory: Stop Hook
4
- # Spawns a background process to update .docs/WORKING-MEMORY.md asynchronously.
4
+ # Spawns a background process to update .memory/WORKING-MEMORY.md asynchronously.
5
5
  # The session ends immediately — no visible edit in the TUI.
6
6
  # On failure: does nothing (stale memory is better than fake data).
7
7
 
@@ -16,21 +16,25 @@ if ! command -v jq &>/dev/null; then exit 0; fi
16
16
 
17
17
  INPUT=$(cat)
18
18
 
19
- # Only activate in projects with .docs/ directory (DevFlow-initialized projects)
19
+ # Resolve project directory bail if missing
20
20
  CWD=$(echo "$INPUT" | jq -r '.cwd // ""' 2>/dev/null)
21
- if [ -z "$CWD" ] || [ ! -d "$CWD/.docs" ]; then
21
+ if [ -z "$CWD" ]; then
22
22
  exit 0
23
23
  fi
24
24
 
25
+ # Auto-create .memory/ and ensure .gitignore entries (idempotent after first run)
26
+ SCRIPT_DIR_EARLY="$(cd "$(dirname "$0")" && pwd)"
27
+ source "$SCRIPT_DIR_EARLY/ensure-memory-gitignore.sh" "$CWD" || exit 0
28
+
25
29
  # Logging (shared log file with background updater; [stop-hook] prefix distinguishes)
26
- MEMORY_FILE="$CWD/.docs/WORKING-MEMORY.md"
27
- LOG_FILE="$CWD/.docs/.working-memory-update.log"
30
+ MEMORY_FILE="$CWD/.memory/WORKING-MEMORY.md"
31
+ LOG_FILE="$CWD/.memory/.working-memory-update.log"
28
32
  log() { echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] [stop-hook] $1" >> "$LOG_FILE"; }
29
33
 
30
34
  # Throttle: skip if stop hook was triggered within the last 2 minutes
31
35
  # Uses a marker file touched BEFORE spawning the updater — prevents race condition
32
36
  # where multiple hooks see stale WORKING-MEMORY.md mtime and all bypass throttle.
33
- TRIGGER_MARKER="$CWD/.docs/.working-memory-last-trigger"
37
+ TRIGGER_MARKER="$CWD/.memory/.working-memory-last-trigger"
34
38
  if [ -f "$TRIGGER_MARKER" ]; then
35
39
  if stat --version &>/dev/null 2>&1; then
36
40
  MARKER_MTIME=$(stat -c %Y "$TRIGGER_MARKER")
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: ambient-router
3
+ description: >-
4
+ Classify user intent and response depth for ambient mode. Auto-loads relevant
5
+ skills without explicit command invocation. Used by /ambient command and
6
+ always-on UserPromptSubmit hook.
7
+ user-invocable: false
8
+ allowed-tools: Read, Grep, Glob
9
+ ---
10
+
11
+ # Ambient Router
12
+
13
+ Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill injection for substantive work, workflow nudges for complex tasks.
14
+
15
+ ## Iron Law
16
+
17
+ > **PROPORTIONAL RESPONSE**
18
+ >
19
+ > Match effort to intent. Never apply heavyweight processes to lightweight requests.
20
+ > A chat question gets zero overhead. A 3-file feature gets 2-3 skills. A system
21
+ > refactor gets a nudge toward `/implement`. Misclassification in either direction
22
+ > is a failure.
23
+
24
+ ---
25
+
26
+ ## Step 1: Classify Intent
27
+
28
+ Determine what the user is trying to do from their prompt.
29
+
30
+ | Intent | Signal Words / Patterns | Examples |
31
+ |--------|------------------------|---------|
32
+ | **BUILD** | "add", "create", "implement", "build", "write", "make" | "add a login form", "create an API endpoint" |
33
+ | **DEBUG** | "fix", "bug", "broken", "failing", "error", "why does" | "fix the auth error", "why is this test failing" |
34
+ | **REVIEW** | "check", "look at", "review", "is this ok", "any issues" | "check this function", "any issues with this?" |
35
+ | **PLAN** | "how should", "design", "architecture", "approach", "strategy" | "how should I structure auth?", "what's the approach for caching?" |
36
+ | **EXPLORE** | "what is", "where is", "find", "show me", "explain", "how does" | "where is the config?", "explain this function" |
37
+ | **CHAT** | greetings, meta-questions, confirmations, short responses | "thanks", "yes", "what can you do?" |
38
+
39
+ **Ambiguous prompts:** Default to the lowest-overhead classification. "Update the README" → BUILD/STANDARD. Git operations like "commit this" → QUICK.
40
+
41
+ ## Step 2: Classify Depth
42
+
43
+ Determine how much enforcement the prompt warrants.
44
+
45
+ | Depth | Criteria | Action |
46
+ |-------|----------|--------|
47
+ | **QUICK** | CHAT intent. EXPLORE with no analytical depth ("where is X?"). Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. | Respond normally. Zero overhead. Do not state classification. |
48
+ | **STANDARD** | BUILD/DEBUG/REVIEW/PLAN intent (any word count). EXPLORE with analytical depth ("analyze our X", "discuss how Y works"). | Read and apply 2-3 relevant skills from the selection matrix below. State classification briefly. |
49
+ | **ESCALATE** | Multi-file architectural change, system-wide scope, > 5 files. Detailed implementation plan (100+ words with plan structure). | Respond at best effort + recommend: "This looks like it would benefit from `/implement` for full lifecycle management." |
50
+
51
+ ## Step 3: Select Skills (STANDARD depth only)
52
+
53
+ Based on classified intent, read the following skills to inform your response.
54
+
55
+ | Intent | Primary Skills | Secondary (if file type matches) |
56
+ |--------|---------------|----------------------------------|
57
+ | **BUILD** | test-driven-development, implementation-patterns | typescript (.ts), react (.tsx/.jsx), frontend-design (CSS/UI), input-validation (forms/API), security-patterns (auth/crypto) |
58
+ | **DEBUG** | test-patterns, core-patterns | git-safety (if git operations involved) |
59
+ | **REVIEW** | self-review, core-patterns | test-patterns |
60
+ | **PLAN** | implementation-patterns | core-patterns |
61
+
62
+ **Excluded from ambient** (review-command-only): review-methodology, complexity-patterns, consistency-patterns, database-patterns, dependencies-patterns, documentation-patterns, regression-patterns, architecture-patterns, accessibility.
63
+
64
+ See `references/skill-catalog.md` for the full skill-to-intent mapping with file pattern triggers.
65
+
66
+ ## Step 4: Apply
67
+
68
+ - **QUICK:** Respond directly. No preamble, no classification statement.
69
+ - **STANDARD:** State classification briefly: `Ambient: BUILD/STANDARD. Loading: test-driven-development, implementation-patterns.` Then read the selected skills and apply their patterns to your response. For BUILD intent, enforce RED-GREEN-REFACTOR from test-driven-development.
70
+ - **ESCALATE:** Respond with your best effort, then append: `> This task spans multiple files/systems. Consider \`/implement\` for full lifecycle (exploration → planning → implementation → review).`
71
+
72
+ ---
73
+
74
+ ## Transparency Rules
75
+
76
+ 1. **QUICK → silent.** No classification output.
77
+ 2. **STANDARD → brief statement.** One line: intent, depth, skills loaded.
78
+ 3. **ESCALATE → recommendation.** Best-effort response + workflow nudge.
79
+ 4. **Never lie about classification.** If uncertain, say so.
80
+ 5. **Never over-classify.** When in doubt, go one tier lower.
81
+
82
+ ## Edge Cases
83
+
84
+ | Case | Handling |
85
+ |------|----------|
86
+ | Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (BUILD > DEBUG) |
87
+ | Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts |
88
+ | User explicitly requests no enforcement | Respect immediately — classify as QUICK |
89
+ | Prompt references specific DevFlow command | Skip ambient — the command has its own orchestration |
@@ -0,0 +1,64 @@
1
+ # Ambient Router — Skill Catalog
2
+
3
+ Full mapping of DevFlow skills to ambient intents and file-type triggers. The ambient-router SKILL.md references this for detailed selection logic.
4
+
5
+ ## Skills Available for Ambient Loading
6
+
7
+ These skills may be loaded during STANDARD-depth ambient routing.
8
+
9
+ ### BUILD Intent
10
+
11
+ | Skill | When to Load | File Patterns |
12
+ |-------|-------------|---------------|
13
+ | test-driven-development | Always for BUILD | `*.ts`, `*.tsx`, `*.js`, `*.jsx`, `*.py` |
14
+ | implementation-patterns | Always for BUILD | Any code file |
15
+ | typescript | TypeScript files in scope | `*.ts`, `*.tsx` |
16
+ | react | React components in scope | `*.tsx`, `*.jsx` |
17
+ | frontend-design | UI/styling work | `*.css`, `*.scss`, `*.tsx` with styling keywords |
18
+ | input-validation | Forms, APIs, user input | Files with form/input/validation keywords |
19
+ | security-patterns | Auth, crypto, secrets | Files with auth/token/crypto/password keywords |
20
+
21
+ ### DEBUG Intent
22
+
23
+ | Skill | When to Load | File Patterns |
24
+ |-------|-------------|---------------|
25
+ | test-patterns | Always for DEBUG | Any test-related context |
26
+ | core-patterns | Always for DEBUG | Any code file |
27
+ | git-safety | Git operations involved | User mentions git, rebase, merge, etc. |
28
+
29
+ ### REVIEW Intent
30
+
31
+ | Skill | When to Load | File Patterns |
32
+ |-------|-------------|---------------|
33
+ | self-review | Always for REVIEW | Any code file |
34
+ | core-patterns | Always for REVIEW | Any code file |
35
+ | test-patterns | Test files in scope | `*.test.*`, `*.spec.*` |
36
+
37
+ ### PLAN Intent
38
+
39
+ | Skill | When to Load | File Patterns |
40
+ |-------|-------------|---------------|
41
+ | implementation-patterns | Always for PLAN | Any planning context |
42
+ | core-patterns | Architectural planning | System design discussions |
43
+
44
+ ## Skills Excluded from Ambient
45
+
46
+ These skills are loaded only by explicit DevFlow commands (primarily `/code-review`):
47
+
48
+ - review-methodology — Full review process (6-step, 3-category classification)
49
+ - complexity-patterns — Cyclomatic complexity, deep nesting analysis
50
+ - consistency-patterns — Naming convention, pattern deviation detection
51
+ - database-patterns — Index analysis, query optimization, migration safety
52
+ - dependencies-patterns — CVE detection, license audit, outdated packages
53
+ - documentation-patterns — Doc drift, stale comments, missing API docs
54
+ - regression-patterns — Lost functionality, broken exports, behavioral changes
55
+ - architecture-patterns — SOLID analysis, coupling detection, layering issues
56
+ - accessibility — WCAG compliance, ARIA roles, keyboard navigation
57
+ - performance-patterns — N+1 queries, memory leaks, caching opportunities
58
+
59
+ ## Selection Limits
60
+
61
+ - **Maximum 3 skills** per ambient response (primary + up to 2 secondary)
62
+ - **Primary skills** are always loaded for the classified intent
63
+ - **Secondary skills** are loaded only when file patterns match conversation context
64
+ - If more than 3 skills seem relevant, this is an ESCALATE signal
@@ -32,10 +32,14 @@ All generated documentation lives under `.docs/` in the project root:
32
32
  │ ├── {timestamp}.md
33
33
  │ ├── compact/{timestamp}.md
34
34
  │ └── INDEX.md
35
- ├── swarm/ # Swarm operation state
36
- ├── state.json
37
- └── plans/
38
- └── WORKING-MEMORY.md # Auto-maintained by Stop hook (overwritten)
35
+ └── swarm/ # Swarm operation state
36
+ ├── state.json
37
+ └── plans/
38
+
39
+ .memory/
40
+ ├── WORKING-MEMORY.md # Auto-maintained by Stop hook (overwritten)
41
+ ├── PROJECT-PATTERNS.md # Accumulated patterns (merged across sessions)
42
+ └── backup.json # Pre-compact git state snapshot
39
43
  ```
40
44
 
41
45
  ---
@@ -92,7 +96,7 @@ source .devflow/scripts/docs-helpers.sh 2>/dev/null || {
92
96
  | Agent | Output Location | Behavior |
93
97
  |-------|-----------------|----------|
94
98
  | Reviewer | `.docs/reviews/{branch-slug}/{type}-report.{timestamp}.md` | Creates new |
95
- | Working Memory | `.docs/WORKING-MEMORY.md` | Overwrites (auto-maintained by Stop hook) |
99
+ | Working Memory | `.memory/WORKING-MEMORY.md` | Overwrites (auto-maintained by Stop hook) |
96
100
 
97
101
  ### Agents That Don't Persist
98
102
 
@@ -120,7 +124,7 @@ When creating or modifying persisting agents:
120
124
 
121
125
  This framework is used by:
122
126
  - **Review agents**: Creates review reports
123
- - **Working Memory hooks**: Auto-maintains `.docs/WORKING-MEMORY.md`
127
+ - **Working Memory hooks**: Auto-maintains `.memory/WORKING-MEMORY.md`
124
128
 
125
129
  All persisting agents should load this skill to ensure consistent documentation.
126
130
 
@@ -0,0 +1,139 @@
1
+ ---
2
+ name: test-driven-development
3
+ description: >-
4
+ Enforce RED-GREEN-REFACTOR cycle during implementation. Write failing tests before
5
+ production code. Distinct from test-patterns (which reviews test quality) — this
6
+ skill enforces the TDD workflow during code generation.
7
+ user-invocable: false
8
+ allowed-tools: Read, Grep, Glob
9
+ activation:
10
+ file-patterns:
11
+ - "**/*.ts"
12
+ - "**/*.tsx"
13
+ - "**/*.js"
14
+ - "**/*.jsx"
15
+ - "**/*.py"
16
+ exclude:
17
+ - "node_modules/**"
18
+ - "dist/**"
19
+ - "**/*.test.*"
20
+ - "**/*.spec.*"
21
+ ---
22
+
23
+ # Test-Driven Development
24
+
25
+ Enforce the RED-GREEN-REFACTOR cycle for all implementation work. Tests define the design. Code satisfies the tests. Refactoring improves the design without changing behavior.
26
+
27
+ ## Iron Law
28
+
29
+ > **TESTS FIRST, ALWAYS**
30
+ >
31
+ > Write the failing test before the production code. No exceptions. If you catch
32
+ > yourself writing production code without a failing test, stop immediately, delete
33
+ > the production code, write the test, watch it fail, then write the minimum code
34
+ > to make it pass. The test IS the specification.
35
+
36
+ ---
37
+
38
+ ## The Cycle
39
+
40
+ ### Step 1: RED — Write a Failing Test
41
+
42
+ Write a test that describes the behavior you want. Run it. Watch it fail. The failure message IS your specification.
43
+
44
+ ```
45
+ Describe what the code SHOULD do, not how it does it.
46
+ One behavior per test. One assertion per test (ideally).
47
+ Name tests as sentences: "returns error when email is invalid"
48
+ ```
49
+
50
+ **Checkpoint:** The test MUST fail before proceeding. A test that passes immediately proves nothing.
51
+
52
+ ### Step 2: GREEN — Write Minimum Code to Pass
53
+
54
+ Write the simplest production code that makes the failing test pass. No more, no less.
55
+
56
+ ```
57
+ Hardcode first if that's simplest. Generalize when the next test forces it.
58
+ Don't write code "you'll need later." Write code the test demands NOW.
59
+ Don't optimize. Don't refactor. Don't clean up. Just pass the test.
60
+ ```
61
+
62
+ **Checkpoint:** All tests pass. If any test fails, fix it before moving on.
63
+
64
+ ### Step 3: REFACTOR — Improve Without Changing Behavior
65
+
66
+ Now clean up. Extract helpers, rename variables, simplify logic. Tests stay green throughout.
67
+
68
+ ```
69
+ Run tests after every refactoring step.
70
+ If a test breaks during refactor, undo immediately — you changed behavior.
71
+ Apply DRY, extract patterns, improve readability.
72
+ ```
73
+
74
+ **Checkpoint:** All tests still pass. Code is clean. Repeat from Step 1 for next behavior.
75
+
76
+ ---
77
+
78
+ ## Rationalization Prevention
79
+
80
+ These are the excuses developers use to skip TDD. Recognize and reject them.
81
+
82
+ | Excuse | Why It Feels Right | Why It's Wrong | Correct Action |
83
+ |--------|-------------------|---------------|----------------|
84
+ | "I'll write tests after" | Need to see the shape first | Tests ARE the shape — they define the interface before implementation exists | Write the test first |
85
+ | "Too simple to test" | It's just a getter/setter | Getters break, defaults change, edge cases hide in "simple" code | Write it — takes 30 seconds |
86
+ | "I'll refactor later" | Just get it working now | "Later" never comes; technical debt compounds silently | Refactor now in Step 3 |
87
+ | "Test is too hard to write" | Setup is complex, mocking is painful | Hard-to-test code = bad design; the test is telling you the interface is wrong | Simplify the interface first |
88
+ | "Need to see the whole picture" | Can't test what I haven't designed yet | TDD IS design; each test reveals the next piece of the interface | Let the test guide the design |
89
+ | "Tests slow me down" | Faster to just write the code | Faster until the first regression; TDD is faster for anything > 50 lines | Trust the cycle |
90
+
91
+ See `references/rationalization-prevention.md` for extended examples with code.
92
+
93
+ ---
94
+
95
+ ## Process Enforcement
96
+
97
+ When implementing any feature under ambient BUILD/STANDARD:
98
+
99
+ 1. **Identify the first behavior** — What is the simplest thing this feature must do?
100
+ 2. **Write the test** — Describe that behavior as a failing test
101
+ 3. **Run the test** — Confirm it fails (RED)
102
+ 4. **Write minimum code** — Just enough to pass (GREEN)
103
+ 5. **Refactor** — Clean up while tests stay green (REFACTOR)
104
+ 6. **Repeat** — Next behavior, next test, next cycle
105
+
106
+ ### File Organization
107
+
108
+ - Test file lives next to production file: `user.ts` → `user.test.ts`
109
+ - Follow project's existing test conventions (Jest, Vitest, pytest, etc.)
110
+ - Import the module under test, not internal helpers
111
+
112
+ ### What to Test
113
+
114
+ | Test | Don't Test |
115
+ |------|-----------|
116
+ | Public API behavior | Private implementation details |
117
+ | Error conditions and edge cases | Framework internals |
118
+ | Integration points (boundaries) | Third-party library correctness |
119
+ | State transitions | Getter/setter plumbing (unless non-trivial) |
120
+
121
+ ---
122
+
123
+ ## When TDD Does Not Apply
124
+
125
+ - **QUICK depth** — Ambient classified as QUICK (chat, exploration, trivial edits)
126
+ - **Non-code tasks** — Documentation, configuration, CI changes
127
+ - **Exploratory prototyping** — User explicitly says "just spike this" or "prototype"
128
+ - **Existing test suite changes** — Modifying tests themselves (test-patterns skill applies instead)
129
+
130
+ When skipping TDD, never rationalize. State clearly: "Skipping TDD because: [specific reason from list above]."
131
+
132
+ ---
133
+
134
+ ## Integration with Ambient Mode
135
+
136
+ - **BUILD/STANDARD** → TDD enforced. Every new function/method gets test-first treatment.
137
+ - **BUILD/QUICK** → TDD skipped (trivial single-file edit).
138
+ - **BUILD/ESCALATE** → TDD mentioned in nudge toward `/implement`.
139
+ - **DEBUG/STANDARD** → TDD applies to the fix: write a test that reproduces the bug first, then fix.
@@ -0,0 +1,111 @@
1
+ # TDD Rationalization Prevention — Extended Examples
2
+
3
+ Detailed code examples showing how each rationalization leads to worse outcomes.
4
+
5
+ ## "I'll write tests after"
6
+
7
+ ### What happens:
8
+
9
+ ```typescript
10
+ // Developer writes production code first
11
+ function calculateDiscount(price: number, tier: string): number {
12
+ if (tier === 'gold') return price * 0.8;
13
+ if (tier === 'silver') return price * 0.9;
14
+ return price;
15
+ }
16
+
17
+ // Then "writes tests after" — but only for the happy path they remember
18
+ test('gold tier gets 20% off', () => {
19
+ expect(calculateDiscount(100, 'gold')).toBe(80);
20
+ });
21
+ // Missing: negative prices, unknown tiers, zero prices, NaN handling
22
+ ```
23
+
24
+ ### What TDD would have caught:
25
+
26
+ ```typescript
27
+ // Test first — forces you to think about the contract
28
+ test('returns error for negative price', () => {
29
+ expect(calculateDiscount(-100, 'gold')).toEqual({ ok: false, error: 'NEGATIVE_PRICE' });
30
+ });
31
+ // Now the interface includes error handling from the start
32
+ ```
33
+
34
+ ## "Too simple to test"
35
+
36
+ ### What happens:
37
+
38
+ ```typescript
39
+ // "It's just a config getter, no test needed"
40
+ function getMaxRetries(): number {
41
+ return parseInt(process.env.MAX_RETRIES || '3');
42
+ }
43
+ // 6 months later: someone sets MAX_RETRIES="three" and prod crashes with NaN retries
44
+ ```
45
+
46
+ ### What TDD would have caught:
47
+
48
+ ```typescript
49
+ test('returns default when env var is not a number', () => {
50
+ process.env.MAX_RETRIES = 'three';
51
+ expect(getMaxRetries()).toBe(3); // Forces validation logic
52
+ });
53
+ ```
54
+
55
+ ## "Test is too hard to write"
56
+
57
+ ### What happens:
58
+
59
+ ```typescript
60
+ // "I can't test this easily because it needs database + email + filesystem"
61
+ async function processOrder(orderId: string) {
62
+ const db = new Database();
63
+ const order = await db.find(orderId);
64
+ await sendEmail(order.customerEmail, 'Your order is processing');
65
+ await fs.writeFile(`/invoices/${orderId}.pdf`, generateInvoice(order));
66
+ await db.update(orderId, { status: 'processing' });
67
+ }
68
+ // Result: untestable monolith, test would need real DB + email + filesystem
69
+ ```
70
+
71
+ ### What TDD forces:
72
+
73
+ ```typescript
74
+ // Hard-to-test = bad design. TDD forces dependency injection:
75
+ async function processOrder(
76
+ orderId: string,
77
+ deps: { db: OrderRepository; emailer: Emailer; invoices: InvoiceStore }
78
+ ): Promise<Result<void, OrderError>> {
79
+ // Now trivially testable with mocks
80
+ }
81
+ ```
82
+
83
+ ## "I'll refactor later"
84
+
85
+ ### What happens:
86
+
87
+ ```typescript
88
+ // Sprint 1: "just get it working"
89
+ function handleRequest(req: any) {
90
+ if (req.type === 'create') { /* 50 lines */ }
91
+ else if (req.type === 'update') { /* 50 lines */ }
92
+ else if (req.type === 'delete') { /* 30 lines */ }
93
+ // Sprint 2-10: more conditions added, function grows to 500 lines
94
+ // "Refactor later" never comes because nobody wants to touch it
95
+ }
96
+ ```
97
+
98
+ ### What TDD enforces:
99
+
100
+ Step 3 (REFACTOR) happens every cycle. The function never grows beyond what's clean because you clean it every 5-10 minutes.
101
+
102
+ ## "Tests slow me down"
103
+
104
+ ### The math:
105
+
106
+ | Approach | Time to write | Time to first bug | Time to fix bug | Total (1 month) |
107
+ |----------|:---:|:---:|:---:|:---:|
108
+ | No TDD | 2h | 4h | 3h (no repro test) | 9h+ |
109
+ | TDD | 3h | Caught in test | 15min (test pinpoints) | 3h 15min |
110
+
111
+ TDD is slower for the first 30 minutes. It's faster for everything after that.
@@ -5,6 +5,15 @@
5
5
  "Bash(rm -rf ~*)",
6
6
  "Bash(rm -rf .*)",
7
7
  "Bash(* rm -rf /*)",
8
+ "Bash(rm -r /*)",
9
+ "Bash(rm -r ~*)",
10
+ "Bash(rm -r .*)",
11
+ "Bash(rm -fr /*)",
12
+ "Bash(rm -fr ~*)",
13
+ "Bash(rm -fr .*)",
14
+ "Bash(rm -f /*)",
15
+ "Bash(rm -f ~*)",
16
+ "Bash(rm -f .*)",
8
17
  "Bash(dd if=*)",
9
18
  "Bash(dd*of=/dev/*)",
10
19
  "Bash(mkfs*)",
@@ -85,12 +94,17 @@
85
94
  "Bash(crontab*)",
86
95
  "Bash(rm /var/log*)",
87
96
  "Bash(rm -rf /var/log*)",
97
+ "Bash(rm -r /var/log*)",
98
+ "Bash(rm -f /var/log*)",
99
+ "Bash(rm -fr /var/log*)",
88
100
  "Bash(> /var/log*)",
89
101
  "Bash(truncate /var/log*)",
90
102
  "Bash(history -c*)",
91
103
  "Bash(history -w*)",
92
104
  "Bash(rm ~/.bash_history*)",
105
+ "Bash(rm -f ~/.bash_history*)",
93
106
  "Bash(rm ~/.zsh_history*)",
107
+ "Bash(rm -f ~/.zsh_history*)",
94
108
  "Bash(unset HISTFILE*)",
95
109
  "Bash(curl 169.254.169.254*)",
96
110
  "Bash(wget 169.254.169.254*)",