safeword 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/.claude/commands/arch-review.md +32 -0
  2. package/.claude/commands/lint.md +6 -0
  3. package/.claude/commands/quality-review.md +13 -0
  4. package/.claude/commands/setup-linting.md +6 -0
  5. package/.claude/hooks/auto-lint.sh +6 -0
  6. package/.claude/hooks/auto-quality-review.sh +170 -0
  7. package/.claude/hooks/check-linting-sync.sh +17 -0
  8. package/.claude/hooks/inject-timestamp.sh +6 -0
  9. package/.claude/hooks/question-protocol.sh +12 -0
  10. package/.claude/hooks/run-linters.sh +8 -0
  11. package/.claude/hooks/run-quality-review.sh +76 -0
  12. package/.claude/hooks/version-check.sh +10 -0
  13. package/.claude/mcp/README.md +96 -0
  14. package/.claude/mcp/arcade.sample.json +9 -0
  15. package/.claude/mcp/context7.sample.json +7 -0
  16. package/.claude/mcp/playwright.sample.json +7 -0
  17. package/.claude/settings.json +62 -0
  18. package/.claude/skills/quality-reviewer/SKILL.md +190 -0
  19. package/.claude/skills/safeword-quality-reviewer/SKILL.md +13 -0
  20. package/.env.arcade.example +4 -0
  21. package/.env.example +11 -0
  22. package/.gitmodules +4 -0
  23. package/.safeword/SAFEWORD.md +33 -0
  24. package/.safeword/eslint/eslint-base.mjs +101 -0
  25. package/.safeword/guides/architecture-guide.md +404 -0
  26. package/.safeword/guides/code-philosophy.md +174 -0
  27. package/.safeword/guides/context-files-guide.md +405 -0
  28. package/.safeword/guides/data-architecture-guide.md +183 -0
  29. package/.safeword/guides/design-doc-guide.md +165 -0
  30. package/.safeword/guides/learning-extraction.md +515 -0
  31. package/.safeword/guides/llm-instruction-design.md +239 -0
  32. package/.safeword/guides/llm-prompting.md +95 -0
  33. package/.safeword/guides/tdd-best-practices.md +570 -0
  34. package/.safeword/guides/test-definitions-guide.md +243 -0
  35. package/.safeword/guides/testing-methodology.md +573 -0
  36. package/.safeword/guides/user-story-guide.md +237 -0
  37. package/.safeword/guides/zombie-process-cleanup.md +214 -0
  38. package/{templates → .safeword}/hooks/agents-md-check.sh +0 -0
  39. package/{templates → .safeword}/hooks/post-tool.sh +0 -0
  40. package/{templates → .safeword}/hooks/pre-commit.sh +0 -0
  41. package/.safeword/planning/002-user-story-quality-evaluation.md +1840 -0
  42. package/.safeword/planning/003-langsmith-eval-setup-prompt.md +363 -0
  43. package/.safeword/planning/004-llm-eval-test-cases.md +3226 -0
  44. package/.safeword/planning/005-architecture-enforcement-system.md +169 -0
  45. package/.safeword/planning/006-reactive-fix-prevention-research.md +135 -0
  46. package/.safeword/planning/011-cli-ux-vision.md +330 -0
  47. package/.safeword/planning/012-project-structure-cleanup.md +154 -0
  48. package/.safeword/planning/README.md +39 -0
  49. package/.safeword/planning/automation-plan-v2.md +1225 -0
  50. package/.safeword/planning/automation-plan-v3.md +1291 -0
  51. package/.safeword/planning/automation-plan.md +3058 -0
  52. package/.safeword/planning/design/005-cli-implementation.md +343 -0
  53. package/.safeword/planning/design/013-cli-self-contained-templates.md +596 -0
  54. package/.safeword/planning/design/013a-eslint-plugin-suite.md +256 -0
  55. package/.safeword/planning/design/013b-implementation-snippets.md +385 -0
  56. package/.safeword/planning/design/013c-config-isolation-strategy.md +242 -0
  57. package/.safeword/planning/design/code-philosophy-improvements.md +60 -0
  58. package/.safeword/planning/mcp-analysis.md +545 -0
  59. package/.safeword/planning/phase2-subagents-vs-skills-analysis.md +451 -0
  60. package/.safeword/planning/settings-improvements.md +970 -0
  61. package/.safeword/planning/test-definitions/005-cli-implementation.md +1301 -0
  62. package/.safeword/planning/test-definitions/cli-self-contained-templates.md +205 -0
  63. package/.safeword/planning/user-stories/001-guides-review-user-stories.md +1381 -0
  64. package/.safeword/planning/user-stories/003-reactive-fix-prevention.md +132 -0
  65. package/.safeword/planning/user-stories/004-technical-constraints.md +86 -0
  66. package/.safeword/planning/user-stories/005-cli-implementation.md +311 -0
  67. package/.safeword/planning/user-stories/cli-self-contained-templates.md +172 -0
  68. package/.safeword/planning/versioned-distribution.md +740 -0
  69. package/.safeword/prompts/arch-review.md +43 -0
  70. package/.safeword/prompts/quality-review.md +11 -0
  71. package/.safeword/scripts/arch-review.sh +235 -0
  72. package/.safeword/scripts/check-linting-sync.sh +58 -0
  73. package/.safeword/scripts/setup-linting.sh +559 -0
  74. package/.safeword/templates/architecture-template.md +136 -0
  75. package/.safeword/templates/ci/architecture-check.yml +79 -0
  76. package/.safeword/templates/design-doc-template.md +127 -0
  77. package/.safeword/templates/test-definitions-feature.md +100 -0
  78. package/.safeword/templates/ticket-template.md +74 -0
  79. package/.safeword/templates/user-stories-template.md +82 -0
  80. package/.safeword/tickets/001-guides-review-user-stories.md +83 -0
  81. package/.safeword/tickets/002-architecture-enforcement.md +211 -0
  82. package/.safeword/tickets/003-reactive-fix-prevention.md +57 -0
  83. package/.safeword/tickets/004-technical-constraints-in-user-stories.md +39 -0
  84. package/.safeword/tickets/005-cli-implementation.md +248 -0
  85. package/.safeword/tickets/006-flesh-out-skills.md +43 -0
  86. package/.safeword/tickets/007-flesh-out-questioning.md +44 -0
  87. package/.safeword/tickets/008-upgrade-questioning.md +58 -0
  88. package/.safeword/tickets/009-naming-conventions.md +41 -0
  89. package/.safeword/tickets/010-safeword-md-cleanup.md +34 -0
  90. package/.safeword/tickets/011-cursor-setup.md +86 -0
  91. package/.safeword/tickets/README.md +73 -0
  92. package/.safeword/version +1 -0
  93. package/AGENTS.md +59 -0
  94. package/CLAUDE.md +12 -0
  95. package/README.md +347 -0
  96. package/docs/001-cli-implementation-plan.md +856 -0
  97. package/docs/elite-dx-implementation-plan.md +1034 -0
  98. package/framework/README.md +131 -0
  99. package/framework/mcp/README.md +96 -0
  100. package/framework/mcp/arcade.sample.json +8 -0
  101. package/framework/mcp/context7.sample.json +6 -0
  102. package/framework/mcp/playwright.sample.json +6 -0
  103. package/framework/scripts/arch-review.sh +235 -0
  104. package/framework/scripts/check-linting-sync.sh +58 -0
  105. package/framework/scripts/load-env.sh +49 -0
  106. package/framework/scripts/setup-claude.sh +223 -0
  107. package/framework/scripts/setup-linting.sh +559 -0
  108. package/framework/scripts/setup-quality.sh +477 -0
  109. package/framework/scripts/setup-safeword.sh +550 -0
  110. package/framework/templates/ci/architecture-check.yml +78 -0
  111. package/learnings/ai-sdk-v5-breaking-changes.md +178 -0
  112. package/learnings/e2e-test-zombie-processes.md +231 -0
  113. package/learnings/milkdown-crepe-editor-property.md +96 -0
  114. package/learnings/prosemirror-fragment-traversal.md +119 -0
  115. package/package.json +19 -43
  116. package/packages/cli/AGENTS.md +1 -0
  117. package/packages/cli/ARCHITECTURE.md +279 -0
  118. package/packages/cli/package.json +51 -0
  119. package/packages/cli/src/cli.ts +63 -0
  120. package/packages/cli/src/commands/check.ts +166 -0
  121. package/packages/cli/src/commands/diff.ts +209 -0
  122. package/packages/cli/src/commands/reset.ts +190 -0
  123. package/packages/cli/src/commands/setup.ts +325 -0
  124. package/packages/cli/src/commands/upgrade.ts +163 -0
  125. package/packages/cli/src/index.ts +3 -0
  126. package/packages/cli/src/templates/config.ts +58 -0
  127. package/packages/cli/src/templates/content.ts +18 -0
  128. package/packages/cli/src/templates/index.ts +12 -0
  129. package/packages/cli/src/utils/agents-md.ts +66 -0
  130. package/packages/cli/src/utils/fs.ts +179 -0
  131. package/packages/cli/src/utils/git.ts +124 -0
  132. package/packages/cli/src/utils/hooks.ts +29 -0
  133. package/packages/cli/src/utils/output.ts +60 -0
  134. package/packages/cli/src/utils/project-detector.test.ts +185 -0
  135. package/packages/cli/src/utils/project-detector.ts +44 -0
  136. package/packages/cli/src/utils/version.ts +28 -0
  137. package/packages/cli/src/version.ts +6 -0
  138. package/packages/cli/templates/SAFEWORD.md +776 -0
  139. package/packages/cli/templates/doc-templates/architecture-template.md +136 -0
  140. package/packages/cli/templates/doc-templates/design-doc-template.md +134 -0
  141. package/packages/cli/templates/doc-templates/test-definitions-feature.md +131 -0
  142. package/packages/cli/templates/doc-templates/ticket-template.md +82 -0
  143. package/packages/cli/templates/doc-templates/user-stories-template.md +92 -0
  144. package/packages/cli/templates/guides/architecture-guide.md +423 -0
  145. package/packages/cli/templates/guides/code-philosophy.md +195 -0
  146. package/packages/cli/templates/guides/context-files-guide.md +457 -0
  147. package/packages/cli/templates/guides/data-architecture-guide.md +200 -0
  148. package/packages/cli/templates/guides/design-doc-guide.md +171 -0
  149. package/packages/cli/templates/guides/learning-extraction.md +552 -0
  150. package/packages/cli/templates/guides/llm-instruction-design.md +248 -0
  151. package/packages/cli/templates/guides/llm-prompting.md +102 -0
  152. package/packages/cli/templates/guides/tdd-best-practices.md +615 -0
  153. package/packages/cli/templates/guides/test-definitions-guide.md +334 -0
  154. package/packages/cli/templates/guides/testing-methodology.md +618 -0
  155. package/packages/cli/templates/guides/user-story-guide.md +256 -0
  156. package/packages/cli/templates/guides/zombie-process-cleanup.md +219 -0
  157. package/packages/cli/templates/hooks/agents-md-check.sh +27 -0
  158. package/packages/cli/templates/hooks/post-tool.sh +4 -0
  159. package/packages/cli/templates/hooks/pre-commit.sh +10 -0
  160. package/packages/cli/templates/prompts/arch-review.md +43 -0
  161. package/packages/cli/templates/prompts/quality-review.md +10 -0
  162. package/packages/cli/templates/skills/safeword-quality-reviewer/SKILL.md +207 -0
  163. package/packages/cli/tests/commands/check.test.ts +129 -0
  164. package/packages/cli/tests/commands/cli.test.ts +89 -0
  165. package/packages/cli/tests/commands/diff.test.ts +115 -0
  166. package/packages/cli/tests/commands/reset.test.ts +310 -0
  167. package/packages/cli/tests/commands/self-healing.test.ts +170 -0
  168. package/packages/cli/tests/commands/setup-blocking.test.ts +71 -0
  169. package/packages/cli/tests/commands/setup-core.test.ts +135 -0
  170. package/packages/cli/tests/commands/setup-git.test.ts +139 -0
  171. package/packages/cli/tests/commands/setup-hooks.test.ts +334 -0
  172. package/packages/cli/tests/commands/setup-linting.test.ts +189 -0
  173. package/packages/cli/tests/commands/setup-noninteractive.test.ts +80 -0
  174. package/packages/cli/tests/commands/setup-templates.test.ts +181 -0
  175. package/packages/cli/tests/commands/upgrade.test.ts +215 -0
  176. package/packages/cli/tests/helpers.ts +243 -0
  177. package/packages/cli/tests/npm-package.test.ts +83 -0
  178. package/packages/cli/tests/technical-constraints.test.ts +96 -0
  179. package/packages/cli/tsconfig.json +25 -0
  180. package/packages/cli/tsup.config.ts +11 -0
  181. package/packages/cli/vitest.config.ts +23 -0
  182. package/promptfoo.yaml +3270 -0
  183. package/dist/check-3NGQ4NR5.js +0 -129
  184. package/dist/check-3NGQ4NR5.js.map +0 -1
  185. package/dist/chunk-2XWIUEQK.js +0 -190
  186. package/dist/chunk-2XWIUEQK.js.map +0 -1
  187. package/dist/chunk-GZRQL3SX.js +0 -146
  188. package/dist/chunk-GZRQL3SX.js.map +0 -1
  189. package/dist/chunk-ORQHKDT2.js +0 -10
  190. package/dist/chunk-ORQHKDT2.js.map +0 -1
  191. package/dist/chunk-W66Z3C5H.js +0 -21
  192. package/dist/chunk-W66Z3C5H.js.map +0 -1
  193. package/dist/cli.d.ts +0 -1
  194. package/dist/cli.js +0 -34
  195. package/dist/cli.js.map +0 -1
  196. package/dist/diff-Y6QTAW4O.js +0 -166
  197. package/dist/diff-Y6QTAW4O.js.map +0 -1
  198. package/dist/index.d.ts +0 -11
  199. package/dist/index.js +0 -7
  200. package/dist/index.js.map +0 -1
  201. package/dist/reset-3ACTIYYE.js +0 -143
  202. package/dist/reset-3ACTIYYE.js.map +0 -1
  203. package/dist/setup-RR4M334C.js +0 -266
  204. package/dist/setup-RR4M334C.js.map +0 -1
  205. package/dist/upgrade-6AR3DHUV.js +0 -134
  206. package/dist/upgrade-6AR3DHUV.js.map +0 -1
  207. /package/{templates → framework}/SAFEWORD.md +0 -0
  208. /package/{templates → framework}/guides/architecture-guide.md +0 -0
  209. /package/{templates → framework}/guides/code-philosophy.md +0 -0
  210. /package/{templates → framework}/guides/context-files-guide.md +0 -0
  211. /package/{templates → framework}/guides/data-architecture-guide.md +0 -0
  212. /package/{templates → framework}/guides/design-doc-guide.md +0 -0
  213. /package/{templates → framework}/guides/learning-extraction.md +0 -0
  214. /package/{templates → framework}/guides/llm-instruction-design.md +0 -0
  215. /package/{templates → framework}/guides/llm-prompting.md +0 -0
  216. /package/{templates → framework}/guides/tdd-best-practices.md +0 -0
  217. /package/{templates → framework}/guides/test-definitions-guide.md +0 -0
  218. /package/{templates → framework}/guides/testing-methodology.md +0 -0
  219. /package/{templates → framework}/guides/user-story-guide.md +0 -0
  220. /package/{templates → framework}/guides/zombie-process-cleanup.md +0 -0
  221. /package/{templates → framework}/prompts/arch-review.md +0 -0
  222. /package/{templates → framework}/prompts/quality-review.md +0 -0
  223. /package/{templates/skills/safeword-quality-reviewer → framework/skills/quality-reviewer}/SKILL.md +0 -0
  224. /package/{templates/doc-templates → framework/templates}/architecture-template.md +0 -0
  225. /package/{templates/doc-templates → framework/templates}/design-doc-template.md +0 -0
  226. /package/{templates/doc-templates → framework/templates}/test-definitions-feature.md +0 -0
  227. /package/{templates/doc-templates → framework/templates}/ticket-template.md +0 -0
  228. /package/{templates/doc-templates → framework/templates}/user-stories-template.md +0 -0
  229. /package/{templates → packages/cli/templates}/commands/arch-review.md +0 -0
  230. /package/{templates → packages/cli/templates}/commands/lint.md +0 -0
  231. /package/{templates → packages/cli/templates}/commands/quality-review.md +0 -0
  232. /package/{templates → packages/cli/templates}/hooks/inject-timestamp.sh +0 -0
  233. /package/{templates → packages/cli/templates}/lib/common.sh +0 -0
  234. /package/{templates → packages/cli/templates}/lib/jq-fallback.sh +0 -0
  235. /package/{templates → packages/cli/templates}/markdownlint.jsonc +0 -0
@@ -0,0 +1,615 @@
1
+ # TDD Best Practices
2
+
3
+ Patterns and examples for user stories and test definitions following TDD best practices.
4
+
5
+ **LLM Instruction Design:** These templates create documentation that LLMs read and follow. For comprehensive framework on writing clear, actionable LLM-consumable documentation, see `@.safeword/guides/llm-instruction-design.md`.
6
+
7
+ ---
8
+
9
+ ## Fillable Template Files (When to Use Each)
10
+
11
+ ### Quick Reference
12
+
13
+ | Need | Template | Location |
14
+ | ----------------------------- | ----------------------------- | -------------------------------------- |
15
+ | Feature/issue user stories | `user-stories-template.md` | `.safeword/planning/user-stories/` |
16
+ | Feature test suites | `test-definitions-feature.md` | `.safeword/planning/test-definitions/` |
17
+ | Feature implementation design | `design-doc-template.md` | `.safeword/planning/design/` |
18
+ | Project-wide architecture | No template | `ARCHITECTURE.md` at root |
19
+
20
+ **Decision rule:** If unclear, ask: "Does this affect the whole project or just one feature?" Project-wide → architecture doc. Single feature → design doc.
21
+
22
+ ### Template Details
23
+
24
+ **User Stories** (`@.safeword/templates/user-stories-template.md`) - **For features/issues**
25
+
26
+ - Multiple related stories in one file
27
+ - Status tracking (✅/❌ per story and AC)
28
+ - Test file references and implementation notes
29
+ - Completion % and phase tracking
30
+ - Use for GitHub issues with multiple user stories
31
+ - Guidance: `@.safeword/guides/user-story-guide.md`
32
+
33
+ **Test Definitions** (`@.safeword/templates/test-definitions-feature.md`) - **For feature test suites**
34
+
35
+ - Organized by test suites and individual tests
36
+ - Status tracking (✅ Passing / ⏭️ Skipped / ❌ Not Implemented / 🔴 Failing)
37
+ - Detailed steps and expected outcomes
38
+ - Coverage summary with percentages
39
+ - Test execution commands
40
+ - Guidance: `@.safeword/guides/test-definitions-guide.md`
41
+
42
+ **Design Doc** (`@.safeword/templates/design-doc-template.md`) - **For feature/system implementation**
43
+
44
+ - Implementation-focused (architecture, components, data model, user flow, component interaction)
45
+ - Key technical decisions with rationale (includes "why")
46
+ - Full [N] and [N+1] examples (matches user stories/test definitions pattern)
47
+ - ~121 lines, optimized for LLM filling and consumption
48
+ - No duplication (references user stories, test definitions)
49
+ - Guidance: `@.safeword/guides/architecture-guide.md`
50
+
51
+ **Architecture Document** (no template) - **For project/package-wide architecture decisions**
52
+
53
+ - One `ARCHITECTURE.md` per project or package (in monorepos)
54
+ - Document principles, data model, component design, decision rationale
55
+ - Living document (updated as architecture evolves)
56
+ - Include version, status, table of contents
57
+ - All architectural decisions in one place (not separate ADRs)
58
+ - Guidance: `@.safeword/guides/architecture-guide.md`
59
+
60
+ **Example prompts:**
61
+
62
+ - "Create user stories for issue #N" → Uses user stories template
63
+ - "Create test definitions for issue #N" → Uses test definitions template
64
+ - "Create a design doc for [feature]" → Uses design doc template (2-3 pages)
65
+ - "Update the project architecture doc" → Adds to existing ARCHITECTURE.md
66
+
67
+ **TDD Workflow:** See `@.safeword/guides/testing-methodology.md` for comprehensive RED → GREEN → REFACTOR workflow with latest best practices
68
+
69
+ ---
70
+
71
+ ## User Story Templates
72
+
73
+ ### When to Use Each Format
74
+
75
+ | Format | Best For | Example Trigger |
76
+ | ------------------------------ | ------------------------------------------- | ---------------------------- |
77
+ | Standard (As a/I want/So that) | User-facing features, UI flows | "User can do X" |
78
+ | Given-When-Then | API behavior, state transitions, edge cases | "When X happens, then Y" |
79
+ | Job Story | Problem-solving, user motivation unclear | "User needs to accomplish X" |
80
+
81
+ **Decision rule:** Default to Standard. Use Given-When-Then for APIs or complex state. Use Job Story when focusing on the problem, not the solution.
82
+
83
+ ### Standard Format (Recommended)
84
+
85
+ ```
86
+ As a [role/persona]
87
+ I want [capability/feature]
88
+ So that [business value/benefit]
89
+
90
+ Acceptance Criteria:
91
+ - [Specific, testable condition 1]
92
+ - [Specific, testable condition 2]
93
+ - [Specific, testable condition 3]
94
+
95
+ Out of Scope:
96
+ - [What this story explicitly does NOT include]
97
+ ```
98
+
99
+ ### Given-When-Then Format (Behavior-Focused)
100
+
101
+ ```
102
+ Given [initial context/state]
103
+ When [action/event occurs]
104
+ Then [expected outcome]
105
+
106
+ And [additional context/outcome]
107
+ But [exception/edge case]
108
+ ```
109
+
110
+ **Filled example:**
111
+
112
+ ```
113
+ Given I am an authenticated API user
114
+ When I POST to /api/campaigns with valid JSON
115
+ Then I receive a 201 Created response with campaign ID
116
+ And the campaign appears in my GET /api/campaigns list
117
+ But invalid JSON returns 400 with descriptive error messages
118
+ ```
119
+
120
+ ### Job Story Format (Outcome-Focused)
121
+
122
+ ```
123
+ When [situation/context]
124
+ I want to [motivation/job-to-be-done]
125
+ So I can [expected outcome]
126
+ ```
127
+
128
+ **Filled example:**
129
+
130
+ ```
131
+ When I'm debugging a failing test
132
+ I want to see the exact LLM prompt and response
133
+ So I can identify whether the issue is prompt engineering or code logic
134
+ ```
135
+
136
+ ---
137
+
138
+ ## User Story Best Practices
139
+
140
+ ### ✅ GOOD Examples
141
+
142
+ **Web App Feature:**
143
+
144
+ ```
145
+ As a user with multiple campaigns
146
+ I want to switch between campaigns without reloading the page
147
+ So that I can quickly compare game states
148
+
149
+ Acceptance Criteria:
150
+ - Campaign list shows all saved campaigns with last-played timestamp
151
+ - Clicking a campaign loads its state within 200ms
152
+ - Current campaign is visually highlighted
153
+ - Switching preserves unsaved input in the current campaign
154
+
155
+ Out of Scope:
156
+ - Campaign merging/deletion (separate story)
157
+ - Multi-campaign view (future epic)
158
+ ```
159
+
160
+ **API Feature:**
161
+
162
+ ```
163
+ Given I am an authenticated API user
164
+ When I POST to /api/campaigns with valid JSON
165
+ Then I receive a 201 Created response with campaign ID
166
+ And the campaign appears in my GET /api/campaigns list
167
+ But invalid JSON returns 400 with descriptive error messages
168
+ ```
169
+
170
+ **CLI Feature:**
171
+
172
+ ```
173
+ When I'm debugging a failing test
174
+ I want to see the exact LLM prompt and response
175
+ So I can identify whether the issue is prompt engineering or code logic
176
+
177
+ Acceptance Criteria:
178
+ - `--verbose` flag prints full prompt to stderr
179
+ - Response JSON is pretty-printed with syntax highlighting
180
+ - Token count and cost are displayed
181
+ - Works with all agent types (rules, narrative, character)
182
+ ```
183
+
184
+ **With Technical Constraints:**
185
+
186
+ ```
187
+ As a user with multiple campaigns
188
+ I want to switch between campaigns without reloading the page
189
+ So that I can quickly compare game states
190
+
191
+ Acceptance Criteria:
192
+ - Campaign list shows all saved campaigns with last-played timestamp
193
+ - Clicking a campaign loads its state within 200ms
194
+ - Current campaign is visually highlighted
195
+
196
+ Technical Constraints:
197
+ Performance:
198
+ - [ ] Campaign switch completes in < 200ms at P95
199
+ - [ ] Works with up to 50 campaigns without UI lag
200
+
201
+ Compatibility:
202
+ - [ ] Chrome 100+, Safari 16+, Firefox 115+
203
+
204
+ Data:
205
+ - [ ] Campaign data persists across browser sessions
206
+ ```
207
+
208
+ ### ❌ BAD Examples (Anti-Patterns)
209
+
210
+ **Too Vague:**
211
+
212
+ ```
213
+ As a user
214
+ I want the app to work better
215
+ So that I'm happy
216
+ ```
217
+
218
+ - ❌ No specific role
219
+ - ❌ "Work better" is not measurable
220
+ - ❌ No acceptance criteria
221
+
222
+ **Too Technical (Implementation Details):**
223
+
224
+ ```
225
+ As a developer
226
+ I want to refactor the CharacterStore to use Immer
227
+ So that state mutations are prevented
228
+ ```
229
+
230
+ - ❌ This is a technical task, not a user story
231
+ - ❌ Users don't care about Immer
232
+ - ✅ Better as: Spike ticket or refactoring task
233
+
234
+ **Missing "So That" (No Value):**
235
+
236
+ ```
237
+ As a GM
238
+ I want to roll dice
239
+ ```
240
+
241
+ - ❌ No business value stated
242
+ - ❌ Why does the GM need this?
243
+
244
+ **Multiple Features in One Story:**
245
+
246
+ ```
247
+ As a player
248
+ I want to create characters, manage inventory, and track relationships
249
+ So that I can play the game
250
+ ```
251
+
252
+ - ❌ 3+ separate features bundled together
253
+ - ❌ Cannot be completed in one sprint
254
+ - ✅ Split into 3 stories
255
+
256
+ ---
257
+
258
+ ## Test Definition Templates
259
+
260
+ ### Unit Test Template
261
+
262
+ ```typescript
263
+ describe('[Unit/Module Name]', () => {
264
+ describe('[function/method name]', () => {
265
+ it('should [expected behavior] when [condition]', () => {
266
+ // Arrange: Set up test data and dependencies
267
+ const input = {
268
+ /* test data */
269
+ };
270
+ const expected = {
271
+ /* expected output */
272
+ };
273
+
274
+ // Act: Execute the function under test
275
+ const result = functionUnderTest(input);
276
+
277
+ // Assert: Verify the outcome
278
+ expect(result).toEqual(expected);
279
+ });
280
+
281
+ it('should throw [error type] when [invalid condition]', () => {
282
+ const invalidInput = {
283
+ /* bad data */
284
+ };
285
+
286
+ expect(() => functionUnderTest(invalidInput)).toThrow('Expected error message');
287
+ });
288
+
289
+ it('should handle edge case: [specific edge case]', () => {
290
+ // Edge cases: empty arrays, null, undefined, boundary values
291
+ });
292
+ });
293
+ });
294
+ ```
295
+
296
+ ### Integration Test Template
297
+
298
+ ```typescript
299
+ describe('[Feature Name] Integration', () => {
300
+ beforeEach(async () => {
301
+ // Setup: Initialize database, mock external APIs
302
+ await setupTestDatabase();
303
+ });
304
+
305
+ afterEach(async () => {
306
+ // Teardown: Clean up resources
307
+ await cleanupTestDatabase();
308
+ });
309
+
310
+ it('should [complete user flow] successfully', async () => {
311
+ // Arrange: Create test user and prerequisites
312
+ const user = await createTestUser();
313
+
314
+ // Act: Execute the full workflow
315
+ const campaign = await createCampaign(user.id);
316
+ const character = await createCharacter(campaign.id);
317
+ const result = await performAction(character.id, 'Skirmish');
318
+
319
+ // Assert: Verify end-to-end behavior
320
+ expect(result.position).toBe('risky');
321
+ expect(result.effect).toBe('standard');
322
+ expect(campaign.history).toHaveLength(1);
323
+ });
324
+
325
+ it('should rollback transaction when [failure occurs]', async () => {
326
+ // Test error handling and data consistency
327
+ });
328
+
329
+ // Filled example: rollback on failure
330
+ it('should rollback order when payment fails', async () => {
331
+ const user = await createTestUser();
332
+ const order = await createOrder(user.id, { items: ['sword'] });
333
+
334
+ // Simulate payment failure
335
+ mockPaymentGateway.mockRejectedValue(new Error('Card declined'));
336
+
337
+ await expect(processOrder(order.id)).rejects.toThrow('Card declined');
338
+
339
+ // Verify rollback - order cancelled, inventory restored
340
+ const updatedOrder = await getOrder(order.id);
341
+ expect(updatedOrder.status).toBe('cancelled');
342
+ expect(await getInventory('sword')).toBe(1); // Not decremented
343
+ });
344
+ });
345
+ ```
346
+
347
+ ### E2E Test Template (Playwright/Cypress)
348
+
349
+ ```typescript
350
+ test.describe('[User Journey Name]', () => {
351
+ test('should [complete full user flow]', async ({ page }) => {
352
+ // Arrange: Navigate to starting point
353
+ await page.goto('/campaigns');
354
+
355
+ // Act: Simulate user interactions
356
+ await page.click('button:has-text("New Campaign")');
357
+ await page.fill('[name="campaignName"]', 'The Bloodletters');
358
+ await page.click('button:has-text("Create")');
359
+
360
+ // Assert: Verify UI state matches expectations
361
+ await expect(page.locator('h1')).toContainText('The Bloodletters');
362
+ await expect(page.locator('.campaign-list')).toContainText('The Bloodletters');
363
+
364
+ // Act: Continue the flow
365
+ await page.click('button:has-text("Create Character")');
366
+
367
+ // Assert: Verify next state
368
+ await expect(page).toHaveURL(/\/characters\/create/);
369
+ });
370
+ });
371
+ ```
372
+
373
+ ---
374
+
375
+ ## Test Best Practices
376
+
377
+ ### Test Naming Conventions
378
+
379
+ **✅ GOOD - Descriptive and Specific:**
380
+
381
+ ```typescript
382
+ it('should return risky position when outnumbered 3-to-1');
383
+ it('should cache LLM responses for 5 minutes to reduce costs');
384
+ it('should preserve armor state after reducing harm from L2 to L1');
385
+ it('should throw ValidationError when dice pool is negative');
386
+ ```
387
+
388
+ **❌ BAD - Vague or Implementation-Focused:**
389
+
390
+ ```typescript
391
+ it('works correctly'); // What does "correctly" mean?
392
+ it('tests the function'); // Obvious, not descriptive
393
+ it('should call setState'); // Implementation detail
394
+ it('scenario 1'); // No context
395
+ ```
396
+
397
+ **How to rename:**
398
+
399
+ 1. Identify the behavior being tested
400
+ 2. Identify the condition/input
401
+ 3. Use pattern: `'should [behavior] when [condition]'`
402
+
403
+ Example: `'works correctly'` → `'should return 200 when user is authenticated'`
404
+
405
+ ### Arrange-Act-Assert (AAA) Pattern
406
+
407
+ **Always use AAA structure for clarity:**
408
+
409
+ ```typescript
410
+ it('should calculate critical success on 6', () => {
411
+ // Arrange: Setup test data
412
+ const diceResults = [6, 6, 4];
413
+
414
+ // Act: Execute the logic
415
+ const outcome = evaluateDiceRoll(diceResults);
416
+
417
+ // Assert: Verify expectations
418
+ expect(outcome).toBe('critical');
419
+ expect(outcome.highestDie).toBe(6);
420
+ });
421
+ ```
422
+
423
+ ### Test Independence
424
+
425
+ **✅ GOOD - Isolated Tests:**
426
+
427
+ ```typescript
428
+ beforeEach(() => {
429
+ // Each test gets fresh state
430
+ gameState = createFreshGameState();
431
+ });
432
+
433
+ it('test A', () => {
434
+ /* uses gameState */
435
+ });
436
+ it('test B', () => {
437
+ /* uses separate gameState */
438
+ });
439
+ ```
440
+
441
+ **❌ BAD - Shared State:**
442
+
443
+ ```typescript
444
+ let sharedState = {}; // Tests modify this
445
+ it('test A', () => {
446
+ sharedState.foo = 'bar';
447
+ });
448
+ it('test B', () => {
449
+ expect(sharedState.foo).toBe('bar');
450
+ }); // Depends on test A!
451
+ ```
452
+
453
+ ### What to Test
454
+
455
+ **✅ Test These:**
456
+
457
+ - Public API behavior (functions, methods, components)
458
+ - User-facing features (can the user do X?)
459
+ - Edge cases (empty, null, boundary values)
460
+ - Error handling (does it fail gracefully?)
461
+ - Integration points (API calls, database queries)
462
+
463
+ **❌ Don't Test These:**
464
+
465
+ - Private implementation details (internal helper functions)
466
+ - Third-party library internals (assume React works)
467
+ - Generated code (unless it's business logic)
468
+ - Trivial getters/setters with no logic
469
+
470
+ **Boundary example:**
471
+
472
+ ```typescript
473
+ // ❌ DON'T test this private helper
474
+ function _formatDateInternal(date) {
475
+ /* internal logic */
476
+ }
477
+
478
+ // ✅ DO test the public function that uses it
479
+ export function getFormattedTimestamp(event) {
480
+ return _formatDateInternal(event.createdAt);
481
+ }
482
+ // Test getFormattedTimestamp, not _formatDateInternal
483
+ ```
484
+
485
+ ### Test Data Builders
486
+
487
+ **Use builders for complex test data:**
488
+
489
+ ```typescript
490
+ // ✅ GOOD - Reusable test data builder
491
+ function buildCharacter(overrides = {}) {
492
+ return {
493
+ id: 'test-char-1',
494
+ name: 'Cutter',
495
+ playbook: 'Cutter',
496
+ stress: 0,
497
+ harm: [],
498
+ armor: true,
499
+ ...overrides, // Easy to customize per test
500
+ };
501
+ }
502
+
503
+ it('should increase stress when resisting', () => {
504
+ const character = buildCharacter({ stress: 3 });
505
+ // Test uses character with stress=3
506
+ });
507
+ ```
508
+
509
+ ---
510
+
511
+ ## LLM Testing Patterns
512
+
513
+ ### Promptfoo LLM-as-Judge Template
514
+
515
+ ```yaml
516
+ # Tests for AI outputs (narrative quality, reasoning)
517
+ prompts:
518
+ - file://prompts/gm-narrative.txt
519
+
520
+ providers:
521
+ - id: anthropic:messages:claude-sonnet-4
522
+ config:
523
+ temperature: 1.0
524
+
525
+ tests:
526
+ - description: 'GM should telegraph position/effect before roll'
527
+ vars:
528
+ action: 'I Skirmish with the gang enforcers'
529
+ character: { /* character JSON */ }
530
+ assert:
531
+ - type: llm-rubric
532
+ value: |
533
+ The GM response must:
534
+ - State position (controlled/risky/desperate) explicitly
535
+ - State effect (limited/standard/great) explicitly
536
+ - Explain WHY these were chosen based on fiction
537
+
538
+ Grade as:
539
+ EXCELLENT: All three present and clear
540
+ ACCEPTABLE: Position and effect stated, reasoning weak
541
+ POOR: Missing position or effect
542
+
543
+ - type: llm-rubric
544
+ value: |
545
+ Does the GM show collaborative tone (asking questions, inviting detail)?
546
+
547
+ EXCELLENT: Asks open-ended questions, invites player creativity
548
+ ACCEPTABLE: Acknowledges player action, minimal collaboration
549
+ POOR: Dictates outcomes without player input
550
+ ```
551
+
552
+ ### Integration Test with Real LLM
553
+
554
+ ```typescript
555
+ describe('Rules Agent Integration', () => {
556
+ it('should infer correct position for desperate situation', async () => {
557
+ // Arrange
558
+ const scenario = {
559
+ action: 'I Skirmish against 5 armed guards while wounded',
560
+ character: buildCharacter({ harm: [{ level: 2, description: 'Broken Arm' }] }),
561
+ };
562
+
563
+ // Act: Real LLM call (costs ~$0.01)
564
+ const response = await rulesAgent.processAction(scenario);
565
+
566
+ // Assert: Structured output (not narrative quality)
567
+ expect(response.position).toBe('desperate');
568
+ expect(response.effect).toBe('limited');
569
+ expect(response.dicePool).toBeLessThan(3); // Harm reduces dice
570
+ expect(response.consequences).toContain('severe harm');
571
+ });
572
+ });
573
+ ```
574
+
575
+ ---
576
+
577
+ ## INVEST Checklist (Apply to Every User Story)
578
+
579
+ Before writing a story, verify it passes all six criteria:
580
+
581
+ - [ ] **Independent** - Can be completed without depending on other stories
582
+ - [ ] **Negotiable** - Details emerge through conversation, not a fixed contract
583
+ - [ ] **Valuable** - Delivers clear value to user or business
584
+ - [ ] **Estimable** - Team can estimate effort (not too vague, not too detailed)
585
+ - [ ] **Small** - Completable in one sprint/iteration (typically 1-5 days)
586
+ - [ ] **Testable** - Clear acceptance criteria define when it's done
587
+
588
+ **If a story fails any criteria, it's not ready - refine or split it.**
589
+
590
+ ---
591
+
592
+ ## Quick Reference
593
+
594
+ **User Story Red Flags (INVEST Violations):**
595
+
596
+ - No acceptance criteria → Too vague
597
+ - > 3 acceptance criteria → Split into multiple stories
598
+ - Technical implementation details → Wrong audience
599
+ - Missing "So that" → No clear value
600
+
601
+ **Test Red Flags:**
602
+
603
+ - Test name doesn't describe behavior → Rename
604
+ - Test depends on another test's state → Isolate
605
+ - Test is >50 lines → Break into smaller tests
606
+ - Test tests implementation details → Test behavior instead
607
+ - Test never fails → Remove (not testing anything)
608
+
609
+ **When to Write E2E vs Integration vs Unit:**
610
+
611
+ - **E2E:** User can complete full workflow (slow, expensive, high confidence)
612
+ - **Integration:** Multiple modules work together (moderate speed, good ROI)
613
+ - **Unit:** Single function/module logic (fast, cheap, low-level confidence)
614
+
615
+ **Ratio guidance:** 70% unit, 20% integration, 10% E2E (adjust based on project)