gentle-pi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -0
- package/assets/agents/sdd-apply.md +71 -0
- package/assets/agents/sdd-archive.md +14 -0
- package/assets/agents/sdd-design.md +14 -0
- package/assets/agents/sdd-explore.md +14 -0
- package/assets/agents/sdd-init.md +14 -0
- package/assets/agents/sdd-onboard.md +15 -0
- package/assets/agents/sdd-proposal.md +14 -0
- package/assets/agents/sdd-spec.md +14 -0
- package/assets/agents/sdd-tasks.md +61 -0
- package/assets/agents/sdd-verify.md +55 -0
- package/assets/chains/sdd-full.chain.md +75 -0
- package/assets/chains/sdd-plan.chain.md +35 -0
- package/assets/chains/sdd-verify.chain.md +27 -0
- package/assets/orchestrator.md +191 -0
- package/assets/support/strict-tdd-verify.md +269 -0
- package/assets/support/strict-tdd.md +364 -0
- package/extensions/gentle-ai.ts +157 -0
- package/extensions/sdd-init.ts +83 -0
- package/extensions/skill-registry.ts +267 -0
- package/package.json +47 -0
- package/prompts/cl.md +54 -0
- package/prompts/is.md +25 -0
- package/prompts/pr.md +41 -0
- package/prompts/wr.md +31 -0
- package/skills/branch-pr/SKILL.md +202 -0
- package/skills/chained-pr/SKILL.md +50 -0
- package/skills/chained-pr/references/chaining-details.md +99 -0
- package/skills/cognitive-doc-design/SKILL.md +81 -0
- package/skills/comment-writer/SKILL.md +74 -0
- package/skills/gentle-ai/SKILL.md +43 -0
- package/skills/issue-creation/SKILL.md +223 -0
- package/skills/judgment-day/SKILL.md +52 -0
- package/skills/judgment-day/references/prompts-and-formats.md +75 -0
- package/skills/work-unit-commits/SKILL.md +86 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# Strict TDD Module — Apply Phase
|
|
2
|
+
|
|
3
|
+
> **This module is loaded ONLY when Strict TDD Mode is enabled AND a test runner is available.**
|
|
4
|
+
> If you are reading this, the orchestrator already verified both conditions. Follow every instruction.
|
|
5
|
+
|
|
6
|
+
## TDD Philosophy
|
|
7
|
+
|
|
8
|
+
TDD is not testing. TDD is **software design driven by tests**. You write a test that describes what the code SHOULD do, then write the minimum code to make it real. The tests design the API, the contracts, the behavior. Code is a side effect of tests.
|
|
9
|
+
|
|
10
|
+
### The Three Laws
|
|
11
|
+
|
|
12
|
+
1. **Do NOT write production code** until you have a failing test
|
|
13
|
+
2. **Do NOT write more test** than is necessary to fail
|
|
14
|
+
3. **Do NOT write more code** than is necessary to pass the test
|
|
15
|
+
|
|
16
|
+
## TDD Implementation Cycle
|
|
17
|
+
|
|
18
|
+
For EVERY task assigned to you, follow this cycle strictly:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
FOR EACH TASK:
|
|
22
|
+
├── 0. SAFETY NET (only if modifying existing files)
|
|
23
|
+
│ ├── Run existing tests for files being modified
|
|
24
|
+
│ ├── Capture baseline: "{N} tests passing"
|
|
25
|
+
│ ├── If any FAIL → STOP, report as "pre-existing failure"
|
|
26
|
+
│ │ (do NOT fix pre-existing failures — report to orchestrator)
|
|
27
|
+
│ └── This baseline proves you did not break what already worked
|
|
28
|
+
│
|
|
29
|
+
├── 1. UNDERSTAND
|
|
30
|
+
│ ├── Read the task description
|
|
31
|
+
│ ├── Read relevant spec scenarios (these ARE your acceptance criteria)
|
|
32
|
+
│ ├── Read the design decisions (these CONSTRAIN your approach)
|
|
33
|
+
│ ├── Read existing code and test patterns (match the style)
|
|
34
|
+
│ └── Determine test layer (see "Choosing Test Layer" below)
|
|
35
|
+
│
|
|
36
|
+
├── 2. RED — Write a failing test FIRST
|
|
37
|
+
│ ├── Write test(s) that describe the expected behavior from the spec
|
|
38
|
+
│ ├── Prefer pure functions where possible (no side effects = easy to test)
|
|
39
|
+
│ ├── The test MUST reference production code that does NOT exist yet
|
|
40
|
+
│ │ (this guarantees failure — no need to execute to confirm)
|
|
41
|
+
│ ├── If the production code/function already exists:
|
|
42
|
+
│ │ └── Write a test for the NEW behavior that is NOT yet implemented
|
|
43
|
+
│ └── GATE: Do NOT proceed to GREEN until the test is written
|
|
44
|
+
│
|
|
45
|
+
├── 3. GREEN — Write the MINIMUM code to pass
|
|
46
|
+
│ ├── Implement ONLY what the failing test needs
|
|
47
|
+
│ ├── Fake It is VALID here (hardcoded return values are OK)
|
|
48
|
+
│ ├── EXECUTE tests → must PASS
|
|
49
|
+
│ │ ├── ✅ Passed → proceed to TRIANGULATE or REFACTOR
|
|
50
|
+
│ │ └── ❌ Failed → fix the implementation, NOT the test
|
|
51
|
+
│ └── GATE: Do NOT proceed until GREEN is confirmed by execution
|
|
52
|
+
│
|
|
53
|
+
├── 4. TRIANGULATE (MANDATORY for most tasks)
|
|
54
|
+
│ ├── DEFAULT: triangulation is REQUIRED. You need a compelling reason to skip it.
|
|
55
|
+
│ ├── Add a second test case with DIFFERENT inputs/expected outputs
|
|
56
|
+
│ ├── EXECUTE tests → if Fake It breaks (hardcoded no longer works):
|
|
57
|
+
│ │ └── Generalize to real logic (this is the whole point)
|
|
58
|
+
│ ├── Repeat until ALL spec scenarios for this task are covered
|
|
59
|
+
│ ├── Each triangulation pass: write test → run → fix implementation
|
|
60
|
+
│ ├── MINIMUM: at least 2 test cases per behavior (happy path + one edge case)
|
|
61
|
+
│ │ ├── One test with data that produces a NON-EMPTY/NON-TRIVIAL result
|
|
62
|
+
│ │ └── One test with data that exercises a DIFFERENT code path
|
|
63
|
+
│ ├── WATCH OUT for GREEN that passes trivially:
|
|
64
|
+
│ │ ├── If your test passes because the component/element isn't rendered → NOT a real GREEN
|
|
65
|
+
│ │ ├── If your test passes because a loop iterates 0 times → NOT a real GREEN
|
|
66
|
+
│ │ ├── If your test passes because the setup doesn't trigger the code path → NOT a real GREEN
|
|
67
|
+
│ │ └── A real GREEN means: production code RAN and produced the expected output
|
|
68
|
+
│ ├── Skip triangulation ONLY when ALL of these are true:
|
|
69
|
+
│ │ ├── The task is purely structural (config file, constant definition, type export)
|
|
70
|
+
│ │ ├── There is literally ONE possible output (no branching, no logic)
|
|
71
|
+
│ │ └── You explicitly note "Triangulation skipped: {reason}" in the evidence table
|
|
72
|
+
│ └── GATE: All spec scenarios for this task must have tests before REFACTOR
|
|
73
|
+
│
|
|
74
|
+
├── 5. REFACTOR — Improve without changing behavior
|
|
75
|
+
│ ├── Extract constants (eliminate magic numbers)
|
|
76
|
+
│ ├── Extract functions (reduce cyclomatic complexity)
|
|
77
|
+
│ ├── Improve naming, remove duplication
|
|
78
|
+
│ ├── Push toward pure functions where feasible
|
|
79
|
+
│ ├── Apply Boy Scout Rule: leave code cleaner than you found it
|
|
80
|
+
│ ├── EXECUTE tests after EACH refactoring step → must STILL PASS
|
|
81
|
+
│ │ ├── ✅ Still passing → refactoring is safe, continue
|
|
82
|
+
│ │ └── ❌ Failed → REVERT that refactoring step, try smaller
|
|
83
|
+
│ └── GATE: Tests green after EVERY refactoring change
|
|
84
|
+
│
|
|
85
|
+
├── 6. Mark task complete [x]
|
|
86
|
+
└── 7. Note any deviations or issues discovered
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Choosing Test Layer
|
|
90
|
+
|
|
91
|
+
Based on the testing capabilities cached in Engram (`sdd/{project}/testing-capabilities`), choose the appropriate test layer for each task:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Determine test layer by WHAT the task does:
|
|
95
|
+
├── Pure logic, utility function, calculation, data transformation
|
|
96
|
+
│ └── Unit test (always available if test runner exists)
|
|
97
|
+
│
|
|
98
|
+
├── Component rendering, user interaction, state changes
|
|
99
|
+
│ ├── IF integration tools available → Integration test
|
|
100
|
+
│ └── IF NOT → Unit test with mocks (degrade gracefully)
|
|
101
|
+
│
|
|
102
|
+
├── Multi-component flow, API interaction, context/provider behavior
|
|
103
|
+
│ ├── IF integration tools available → Integration test
|
|
104
|
+
│ └── IF NOT → Unit test with mocks
|
|
105
|
+
│
|
|
106
|
+
├── Critical business flow, full user journey, cross-page navigation
|
|
107
|
+
│ ├── IF E2E tools available → E2E test
|
|
108
|
+
│ ├── IF NOT but integration available → Integration test
|
|
109
|
+
│ └── IF neither → Unit test (degrade gracefully)
|
|
110
|
+
│
|
|
111
|
+
└── Default: Unit test (always the fallback)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Key rule**: Use the HIGHEST available layer that fits the task. But NEVER skip a task because a layer is unavailable — degrade to the next available layer.
|
|
115
|
+
|
|
116
|
+
## Test Execution
|
|
117
|
+
|
|
118
|
+
Detect the test runner from the cached testing capabilities:
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
Read test command from:
|
|
122
|
+
├── Cached capabilities → test_runner.command (fastest — already detected)
|
|
123
|
+
├── openspec/config.yaml → rules.apply.test_command (override)
|
|
124
|
+
└── Fallback: detect from package.json/pyproject.toml/go.mod
|
|
125
|
+
|
|
126
|
+
When executing tests during TDD:
|
|
127
|
+
├── Run ONLY the relevant test file, not the entire suite
|
|
128
|
+
│ ├── JS/TS: {runner} {test-file-path} (e.g., pnpm vitest run src/utils/tax.test.ts)
|
|
129
|
+
│ ├── Python: pytest {test-file-path}
|
|
130
|
+
│ ├── Go: go test ./{package}/... -run {TestName}
|
|
131
|
+
│ └── Adapt to the runner's CLI
|
|
132
|
+
├── This keeps the cycle FAST
|
|
133
|
+
└── Full suite runs happen in sdd-verify, not here
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Pure Function Preference
|
|
137
|
+
|
|
138
|
+
When writing production code in GREEN/TRIANGULATE steps, prefer pure functions:
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
✅ PREFER (pure — easy to test):
|
|
142
|
+
function calculateDiscount(price: number, quantity: number): number {
|
|
143
|
+
return quantity >= 5 ? price * quantity * 0.1 : 0
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
❌ AVOID (impure — hard to test):
|
|
147
|
+
function calculateDiscount(item: Item) {
|
|
148
|
+
globalState.lastDiscount = item.price * 0.1 // side effect
|
|
149
|
+
updateDOM() // side effect
|
|
150
|
+
return globalState.lastDiscount
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Why**: Pure functions are deterministic (same input → same output), have no side effects, and are trivially testable. TDD naturally pushes you toward pure functions — embrace it.
|
|
155
|
+
|
|
156
|
+
## Approval Testing (for refactoring existing code)
|
|
157
|
+
|
|
158
|
+
When a task involves REFACTORING existing code (not writing new code):
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
BEFORE touching production code:
|
|
162
|
+
├── 1. Identify existing behavior to preserve
|
|
163
|
+
├── 2. Write "approval tests" that capture current behavior:
|
|
164
|
+
│ ├── Call the function with known inputs
|
|
165
|
+
│ ├── Assert the CURRENT outputs (even if ugly or wrong)
|
|
166
|
+
│ └── These tests document what the code does NOW
|
|
167
|
+
├── 3. Run approval tests → must PASS (they describe current reality)
|
|
168
|
+
├── 4. NOW refactor the production code
|
|
169
|
+
├── 5. Run approval tests again → must STILL PASS
|
|
170
|
+
│ ├── ✅ Passing → refactoring preserved behavior
|
|
171
|
+
│ └── ❌ Failing → refactoring broke something, revert
|
|
172
|
+
└── 6. If the spec says behavior should CHANGE:
|
|
173
|
+
├── Update the approval test to reflect NEW expected behavior
|
|
174
|
+
├── Run → test FAILS (RED — new behavior not implemented yet)
|
|
175
|
+
└── Implement new behavior → GREEN
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Return Summary Extension
|
|
179
|
+
|
|
180
|
+
When Strict TDD Mode is active, your return summary MUST include this section:
|
|
181
|
+
|
|
182
|
+
```markdown
|
|
183
|
+
### TDD Cycle Evidence
|
|
184
|
+
| Task | Test File | Layer | Safety Net | RED | GREEN | TRIANGULATE | REFACTOR |
|
|
185
|
+
|------|-----------|-------|------------|-----|-------|-------------|----------|
|
|
186
|
+
| 1.1 | `path/test.ext` | Unit | ✅ 5/5 | ✅ Written | ✅ Passed | ✅ 3 cases | ✅ Clean |
|
|
187
|
+
| 1.2 | `path/test.ext` | Integration | N/A (new) | ✅ Written | ✅ Passed | ➖ Single | ✅ Clean |
|
|
188
|
+
| 1.3 | `path/test.ext` | Unit | ✅ 2/2 | ✅ Written | ✅ Passed | ✅ 2 cases | ➖ None needed |
|
|
189
|
+
|
|
190
|
+
### Test Summary
|
|
191
|
+
- **Total tests written**: {N}
|
|
192
|
+
- **Total tests passing**: {N}
|
|
193
|
+
- **Layers used**: Unit ({N}), Integration ({N}), E2E ({N})
|
|
194
|
+
- **Approval tests** (refactoring): {N} or "None — no refactoring tasks"
|
|
195
|
+
- **Pure functions created**: {N}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Column definitions**:
|
|
199
|
+
- **Safety Net**: Pre-existing tests run before modifying files. "N/A (new)" for new files.
|
|
200
|
+
- **RED**: Test written first, referencing code that doesn't exist yet. Always "✅ Written".
|
|
201
|
+
- **GREEN**: Tests executed and passing after minimal implementation. Must show execution result.
|
|
202
|
+
- **TRIANGULATE**: Additional test cases added to force real logic. "➖ Single" if spec has only one scenario.
|
|
203
|
+
- **REFACTOR**: Code improved with tests still passing. "➖ None needed" if code was already clean.
|
|
204
|
+
|
|
205
|
+
## Assertion Quality Rules (MANDATORY)
|
|
206
|
+
|
|
207
|
+
**Every assertion must verify REAL behavior.** A test that passes without exercising production logic is worse than no test — it gives false confidence.
|
|
208
|
+
|
|
209
|
+
### Banned Assertion Patterns (NEVER write these)
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
# TRIVIAL ASSERTIONS — test proves nothing
|
|
213
|
+
expect(true).toBe(true) # ❌ Tautology
|
|
214
|
+
expect(false).toBe(false) # ❌ Tautology
|
|
215
|
+
expect(1).toBe(1) # ❌ Tautology — no production code involved
|
|
216
|
+
assert True # ❌ Always passes
|
|
217
|
+
assert 1 == 1 # ❌ Always passes
|
|
218
|
+
|
|
219
|
+
# EMPTY COLLECTION ASSERTIONS without setup context
|
|
220
|
+
expect(result).toEqual([]) # ❌ ONLY valid if you set up conditions for empty
|
|
221
|
+
expect(result).toHaveLength(0) # ❌ Same — why is it empty? Did production code run?
|
|
222
|
+
assert len(result) == 0 # ❌ Same — prove the emptiness comes from real logic
|
|
223
|
+
assert result == [] # ❌ Same
|
|
224
|
+
|
|
225
|
+
# TYPE-ONLY ASSERTIONS — proves existence, not behavior
|
|
226
|
+
expect(result).toBeDefined() # ❌ Alone is useless — WHAT is the value?
|
|
227
|
+
expect(result).not.toBeNull() # ❌ Alone is useless — assert the actual value
|
|
228
|
+
expect(typeof result).toBe('object') # ❌ Alone is useless — what does the object contain?
|
|
229
|
+
assert result is not None # ❌ Alone — assert what result actually IS
|
|
230
|
+
|
|
231
|
+
# GHOST LOOP — assertion inside a loop that iterates 0 times
|
|
232
|
+
const items = screen.queryAllByTestId("item"); // returns []
|
|
233
|
+
for (const item of items) {
|
|
234
|
+
expect(item).toHaveTextContent("value"); # ❌ NEVER EXECUTES — loop body is dead code
|
|
235
|
+
}
|
|
236
|
+
# FIX: assert the collection is non-empty FIRST, or set up data so it IS non-empty:
|
|
237
|
+
expect(items).toHaveLength(3); # ✅ Proves items exist
|
|
238
|
+
for (const item of items) { ... } # ✅ Now the loop actually runs
|
|
239
|
+
|
|
240
|
+
# INCOMPLETE TDD CYCLE — GREEN without TRIANGULATE
|
|
241
|
+
# If your GREEN test passes because the setup doesn't exercise the code path,
|
|
242
|
+
# you are NOT done. You MUST triangulate with a setup that DOES exercise it.
|
|
243
|
+
# Example: testing "search doesn't update until Enter" but the component
|
|
244
|
+
# that receives the search is never rendered → the test proves nothing.
|
|
245
|
+
# FIX: add a test where the component IS rendered and verify the behavior.
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### What Makes a REAL Assertion
|
|
249
|
+
|
|
250
|
+
Every test assertion must satisfy ALL of these:
|
|
251
|
+
1. **Calls production code** — the test invokes a function, method, or component from the implementation
|
|
252
|
+
2. **Asserts a specific output** — compares against a concrete expected value derived from the spec
|
|
253
|
+
3. **Would FAIL if the production code were wrong** — if you change the implementation logic, THIS test breaks
|
|
254
|
+
|
|
255
|
+
```
|
|
256
|
+
# ✅ REAL assertions — production code determines the result
|
|
257
|
+
expect(calculateDiscount(100, 10)).toBe(10) # Real input → real output
|
|
258
|
+
expect(screen.getByText('Welcome, John')).toBeInTheDocument() # Rendered from data
|
|
259
|
+
assert result[0].status == "FAIL" # Specific finding from check execution
|
|
260
|
+
assert response.status_code == 403 # Real HTTP response from the endpoint
|
|
261
|
+
expect(result).toHaveLength(3) # AND you set up exactly 3 items
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Empty Collection Rule
|
|
265
|
+
|
|
266
|
+
`expect(result).toEqual([])` or `assert len(result) == 0` is ONLY valid when:
|
|
267
|
+
1. You set up a specific precondition that SHOULD produce an empty result (e.g., no matching records)
|
|
268
|
+
2. The production code actually ran and filtered/processed data to arrive at empty
|
|
269
|
+
3. A companion test with different setup produces a NON-EMPTY result (triangulation)
|
|
270
|
+
|
|
271
|
+
If you cannot explain WHY the result is empty based on setup → the assertion is trivial.
|
|
272
|
+
|
|
273
|
+
### Smoke Test Rule
|
|
274
|
+
|
|
275
|
+
A test that only renders a component without asserting any output is NOT a valid test:
|
|
276
|
+
|
|
277
|
+
```
|
|
278
|
+
# ❌ SMOKE TEST ONLY — proves nothing about behavior
|
|
279
|
+
render(<MyComponent data={mockData} />);
|
|
280
|
+
expect(screen.getByTestId("wrapper")).toBeInTheDocument(); # Just proves it rendered
|
|
281
|
+
|
|
282
|
+
# ✅ BEHAVIORAL TEST — proves what the component DOES with the data
|
|
283
|
+
render(<MyComponent data={mockData} />);
|
|
284
|
+
expect(screen.getByText("Expected Title")).toBeInTheDocument(); # Verifies output from data
|
|
285
|
+
expect(screen.getByRole("button")).toHaveTextContent("Submit"); # Verifies real content
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
"Renders without crash" is a smoke test. It is NOT a unit test, NOT an integration test, and it does NOT count toward TDD coverage. If you need a smoke test, it must be accompanied by real behavioral assertions.
|
|
289
|
+
|
|
290
|
+
### Mock Hygiene Rules
|
|
291
|
+
|
|
292
|
+
**If you need more mocks than assertions, you are testing at the WRONG level.**
|
|
293
|
+
|
|
294
|
+
```
|
|
295
|
+
Mock/assertion ratio guide:
|
|
296
|
+
├── ≤ 3 mocks for a test file → ✅ Healthy — focused test
|
|
297
|
+
├── 4–6 mocks → ⚠️ Consider extracting logic to a pure function
|
|
298
|
+
├── 7+ mocks → ❌ STOP — you are testing at the wrong layer
|
|
299
|
+
│ ├── Extract the logic under test to a PURE FUNCTION and test it without mocks
|
|
300
|
+
│ ├── OR move the test to integration/E2E layer where real dependencies exist
|
|
301
|
+
│ └── NEVER write 10+ mocks to verify a one-line transformation
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
**Extract-Before-Mock Rule**: If the behavior you want to test is a data transformation, mapping, filtering, or conditional logic (e.g., `MUTED → FAIL` status conversion), EXTRACT it to a pure function FIRST, then test the pure function directly. No mocks needed.
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
# ❌ BAD: 15 mocks to test a one-line status conversion
|
|
308
|
+
vi.mock("next/navigation", ...);
|
|
309
|
+
vi.mock("next/link", ...);
|
|
310
|
+
vi.mock("@/components/shadcn", ...);
|
|
311
|
+
// ... 12 more mocks ...
|
|
312
|
+
render(<StatusCell row={mutedRow} />);
|
|
313
|
+
expect(screen.getByText("FAIL")).toBeInTheDocument();
|
|
314
|
+
|
|
315
|
+
# ✅ GOOD: extract and test the logic directly
|
|
316
|
+
// In production code:
|
|
317
|
+
export function resolveDisplayStatus(status: string, isMuted: boolean): string {
|
|
318
|
+
return status === "MUTED" ? "FAIL" : status;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// In test — ZERO mocks needed:
|
|
322
|
+
expect(resolveDisplayStatus("MUTED", true)).toBe("FAIL");
|
|
323
|
+
expect(resolveDisplayStatus("PASS", false)).toBe("PASS");
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Implementation Detail Coupling Rule
|
|
327
|
+
|
|
328
|
+
Tests must assert **behavior visible to the user**, not internal implementation details:
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
# ❌ COUPLED TO IMPLEMENTATION — breaks on any style refactor
|
|
332
|
+
expect(element.className).toContain("text-xs");
|
|
333
|
+
expect(element.className).toContain("-mt-2.5");
|
|
334
|
+
expect(element.className).toContain("border-border-error-primary");
|
|
335
|
+
expect(element.style.color).toBe("red");
|
|
336
|
+
|
|
337
|
+
# ❌ COUPLED TO INTERNALS — breaks when implementation changes
|
|
338
|
+
expect(mockService.mock.calls.length).toBe(3); # Why 3? Brittle.
|
|
339
|
+
expect(component.state.isLoading).toBe(true); # Internal state, not behavior.
|
|
340
|
+
|
|
341
|
+
# ✅ BEHAVIORAL — survives refactors, tests what users see
|
|
342
|
+
expect(screen.getByText("Error: Payment failed")).toBeInTheDocument();
|
|
343
|
+
expect(screen.getByRole("alert")).toHaveTextContent("Risk:");
|
|
344
|
+
expect(screen.getByRole("button")).toBeDisabled();
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
**CSS class assertions are NEVER valid test assertions.** If you need to verify visual styling:
|
|
348
|
+
1. Test the **semantic outcome** (e.g., element has `role="alert"`, text is visible, button is disabled)
|
|
349
|
+
2. OR use a visual regression tool / E2E screenshot comparison
|
|
350
|
+
3. NEVER assert specific Tailwind/CSS class names — they are implementation details
|
|
351
|
+
|
|
352
|
+
## Rules (Strict TDD specific)
|
|
353
|
+
|
|
354
|
+
- NEVER write production code before writing its test — this is the ONE rule that cannot be broken
|
|
355
|
+
- NEVER skip the GREEN execution gate — you MUST run tests and confirm they pass
|
|
356
|
+
- NEVER skip triangulation when the spec defines multiple scenarios — hardcoded Fake It must be forced out
|
|
357
|
+
- NEVER write trivial assertions (see Banned Assertion Patterns above) — they are WORSE than no test
|
|
358
|
+
- ALWAYS verify that every assertion CALLS production code and asserts a SPECIFIC expected value
|
|
359
|
+
- ALWAYS run the Safety Net before modifying existing files — protect what already works
|
|
360
|
+
- ALWAYS report the TDD Cycle Evidence table — the verify phase will check it
|
|
361
|
+
- If a test runner execution fails for infrastructure reasons (not test failures), report as "Blocked" and continue to next task
|
|
362
|
+
- Prefer pure functions — but don't force it where it doesn't fit (e.g., React components with state)
|
|
363
|
+
- For refactoring tasks, ALWAYS write approval tests before touching code
|
|
364
|
+
- Run ONLY the relevant test file during the cycle, not the full suite
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import type { ExtensionAPI, ToolCallEventResult } from "@earendil-works/pi-coding-agent";
|
|
5
|
+
|
|
6
|
+
const PACKAGE_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
|
|
7
|
+
const ASSETS_DIR = join(PACKAGE_ROOT, "assets");
|
|
8
|
+
const ORCHESTRATOR_PROMPT = readFileSync(join(ASSETS_DIR, "orchestrator.md"), "utf8").trim();
|
|
9
|
+
|
|
10
|
+
const GENTLE_AI_PROMPT = `## Gentle AI Harness
|
|
11
|
+
You are operating with the Gentle AI harness package for Pi.
|
|
12
|
+
|
|
13
|
+
Persona:
|
|
14
|
+
- Be direct, technical, and concise.
|
|
15
|
+
- When the user writes Spanish, answer in natural Rioplatense Spanish with voseo.
|
|
16
|
+
- Act as a senior architect and teacher: concepts before code, no shortcuts.
|
|
17
|
+
- Treat AI as a tool directed by the human; never present yourself as a default chatbot.
|
|
18
|
+
|
|
19
|
+
Harness principles:
|
|
20
|
+
- Gentle AI is not prompt engineering. It is runtime discipline around powerful agents.
|
|
21
|
+
- Prefer SDD/OpenSpec artifacts over floating chat context for non-trivial work.
|
|
22
|
+
- Clarify scope, constraints, acceptance criteria, and non-goals before implementation.
|
|
23
|
+
- Use subagents when available for exploration, planning, implementation, and review, while keeping one parent session responsible for orchestration.
|
|
24
|
+
- Keep writes single-threaded unless the user explicitly approves parallel write isolation.
|
|
25
|
+
- If tests exist, use strict TDD evidence: RED, GREEN, TRIANGULATE, REFACTOR.
|
|
26
|
+
- Protect the human reviewer: avoid oversized changes, surface review workload risk, and ask before turning one task into a large multi-area change.
|
|
27
|
+
- Never claim persistent memory is available because of this package. Memory is provided by separate packages or MCP tools when installed and callable.
|
|
28
|
+
|
|
29
|
+
${ORCHESTRATOR_PROMPT}`;
|
|
30
|
+
|
|
31
|
+
const DENIED_BASH_PATTERNS: RegExp[] = [
|
|
32
|
+
/\brm\s+-rf\s+(?:\/|~|\$HOME|\.\.?)(?:\s|$)/,
|
|
33
|
+
/\bgit\s+reset\s+--hard\b/,
|
|
34
|
+
/\bgit\s+clean\b(?=[^\n]*(?:-[^\n]*f|--force))(?=[^\n]*(?:-[^\n]*d|--directories))/,
|
|
35
|
+
/\bgit\s+push\b(?=[^\n]*\s--force(?:-with-lease)?\b)/,
|
|
36
|
+
/\bchmod\s+-R\s+777\b/,
|
|
37
|
+
/\bchown\s+-R\b/,
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
const CONFIRM_BASH_PATTERNS: RegExp[] = [
|
|
41
|
+
/\bgit\s+push\b/,
|
|
42
|
+
/\bgit\s+rebase\b/,
|
|
43
|
+
/\bgit\s+branch\s+-D\b/,
|
|
44
|
+
/\bnpm\s+publish\b/,
|
|
45
|
+
/\bpi\s+remove\b/,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
function evaluateCommand(command: string): ToolCallEventResult | undefined {
|
|
49
|
+
for (const pattern of DENIED_BASH_PATTERNS) {
|
|
50
|
+
if (pattern.test(command)) {
|
|
51
|
+
return {
|
|
52
|
+
block: true,
|
|
53
|
+
reason: "Gentle AI safety policy blocked a destructive shell command. Ask the user for an explicit safer plan.",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
for (const pattern of CONFIRM_BASH_PATTERNS) {
|
|
58
|
+
if (pattern.test(command)) {
|
|
59
|
+
return {
|
|
60
|
+
block: true,
|
|
61
|
+
reason: "Gentle AI safety policy requires explicit user approval before this command.",
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return undefined;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function copyDirectoryFiles(sourceDir: string, targetDir: string, force: boolean): { copied: number; skipped: number } {
|
|
69
|
+
if (!existsSync(sourceDir)) return { copied: 0, skipped: 0 };
|
|
70
|
+
mkdirSync(targetDir, { recursive: true });
|
|
71
|
+
let copied = 0;
|
|
72
|
+
let skipped = 0;
|
|
73
|
+
for (const entry of readdirSync(sourceDir, { withFileTypes: true })) {
|
|
74
|
+
const sourcePath = join(sourceDir, entry.name);
|
|
75
|
+
const targetPath = join(targetDir, entry.name);
|
|
76
|
+
if (entry.isDirectory()) {
|
|
77
|
+
const child = copyDirectoryFiles(sourcePath, targetPath, force);
|
|
78
|
+
copied += child.copied;
|
|
79
|
+
skipped += child.skipped;
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
if (!entry.isFile()) continue;
|
|
83
|
+
if (!force && existsSync(targetPath)) {
|
|
84
|
+
skipped += 1;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
writeFileSync(targetPath, readFileSync(sourcePath));
|
|
88
|
+
copied += 1;
|
|
89
|
+
}
|
|
90
|
+
return { copied, skipped };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function installSddAssets(
|
|
94
|
+
cwd: string,
|
|
95
|
+
force: boolean,
|
|
96
|
+
): { agents: number; chains: number; support: number; skipped: number } {
|
|
97
|
+
const agents = copyDirectoryFiles(join(ASSETS_DIR, "agents"), join(cwd, ".pi", "agents"), force);
|
|
98
|
+
const chains = copyDirectoryFiles(join(ASSETS_DIR, "chains"), join(cwd, ".pi", "chains"), force);
|
|
99
|
+
const support = copyDirectoryFiles(join(ASSETS_DIR, "support"), join(cwd, ".pi", "gentle-ai", "support"), force);
|
|
100
|
+
return {
|
|
101
|
+
agents: agents.copied,
|
|
102
|
+
chains: chains.copied,
|
|
103
|
+
support: support.copied,
|
|
104
|
+
skipped: agents.skipped + chains.skipped + support.skipped,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export default function gentleAi(pi: ExtensionAPI): void {
|
|
109
|
+
pi.on("session_start", (_event, ctx) => {
|
|
110
|
+
const result = installSddAssets(ctx.cwd, false);
|
|
111
|
+
if (ctx.hasUI && (result.agents > 0 || result.chains > 0 || result.support > 0)) {
|
|
112
|
+
ctx.ui.notify(
|
|
113
|
+
`Gentle AI SDD assets auto-installed: ${result.agents} agent(s), ${result.chains} chain(s), ${result.support} support file(s).`,
|
|
114
|
+
"info",
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
pi.on("before_agent_start", (event) => ({
|
|
120
|
+
systemPrompt: `${event.systemPrompt}\n\n${GENTLE_AI_PROMPT}`,
|
|
121
|
+
}));
|
|
122
|
+
|
|
123
|
+
pi.on("tool_call", (event) => {
|
|
124
|
+
if (event.toolName !== "bash") return undefined;
|
|
125
|
+
return evaluateCommand(event.input.command);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
pi.registerCommand("gentle-ai:install-sdd", {
|
|
129
|
+
description: "Install Gentle AI SDD subagent and chain assets into this project.",
|
|
130
|
+
handler: async (args, ctx) => {
|
|
131
|
+
const force = args.includes("--force");
|
|
132
|
+
const result = installSddAssets(ctx.cwd, force);
|
|
133
|
+
ctx.ui.notify(
|
|
134
|
+
`Gentle AI SDD assets installed: ${result.agents} agent(s), ${result.chains} chain(s), ${result.support} support file(s), ${result.skipped} skipped.`,
|
|
135
|
+
"info",
|
|
136
|
+
);
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
pi.registerCommand("gentle-ai:status", {
|
|
141
|
+
description: "Show Gentle AI package status for this project.",
|
|
142
|
+
handler: async (_args, ctx) => {
|
|
143
|
+
const agentsInstalled = existsSync(join(ctx.cwd, ".pi", "agents", "sdd-apply.md"));
|
|
144
|
+
const chainsInstalled = existsSync(join(ctx.cwd, ".pi", "chains", "sdd-full.chain.md"));
|
|
145
|
+
const openspecConfigured = existsSync(join(ctx.cwd, "openspec", "config.yaml"));
|
|
146
|
+
ctx.ui.notify(
|
|
147
|
+
[
|
|
148
|
+
"Gentle AI package is active.",
|
|
149
|
+
`SDD agents: ${agentsInstalled ? "installed" : "not installed"}`,
|
|
150
|
+
`SDD chains: ${chainsInstalled ? "installed" : "not installed"}`,
|
|
151
|
+
`OpenSpec config: ${openspecConfigured ? "present" : "missing"}`,
|
|
152
|
+
].join("\n"),
|
|
153
|
+
"info",
|
|
154
|
+
);
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
4
|
+
|
|
5
|
+
const CONFIG_REL_PATH = "openspec/config.yaml";
|
|
6
|
+
|
|
7
|
+
function escapeBlockScalar(value: string): string {
|
|
8
|
+
return value
|
|
9
|
+
.split("\n")
|
|
10
|
+
.map((line) => ` ${line}`)
|
|
11
|
+
.join("\n");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function renderConfig(strictTdd: boolean, testCommand: string, context: string): string {
|
|
15
|
+
const lines = [
|
|
16
|
+
`strict_tdd: ${strictTdd}`,
|
|
17
|
+
"context: |",
|
|
18
|
+
escapeBlockScalar(context.trimEnd()),
|
|
19
|
+
"rules:",
|
|
20
|
+
" apply:",
|
|
21
|
+
` test_command: ${testCommand}`,
|
|
22
|
+
"testing:",
|
|
23
|
+
" runner:",
|
|
24
|
+
` command: ${testCommand}`,
|
|
25
|
+
"",
|
|
26
|
+
];
|
|
27
|
+
return lines.join("\n");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export default function (pi: ExtensionAPI) {
|
|
31
|
+
pi.registerCommand("sdd-init", {
|
|
32
|
+
description: "Bootstrap openspec/config.yaml for SDD workflow (one-time per project).",
|
|
33
|
+
handler: async (_args, ctx) => {
|
|
34
|
+
const configPath = join(ctx.cwd, CONFIG_REL_PATH);
|
|
35
|
+
if (existsSync(configPath)) {
|
|
36
|
+
ctx.ui.notify(
|
|
37
|
+
`${CONFIG_REL_PATH} already exists. Edit it manually or remove it before re-running /sdd-init.`,
|
|
38
|
+
"warning",
|
|
39
|
+
);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const TDD_YES = "Yes — tests must run before each change";
|
|
44
|
+
const TDD_NO = "No — TDD is opt-in per task";
|
|
45
|
+
const TDD_CANCEL = "Cancel";
|
|
46
|
+
const tddChoice = await ctx.ui.select("Enable strict TDD for this project?", [
|
|
47
|
+
TDD_YES,
|
|
48
|
+
TDD_NO,
|
|
49
|
+
TDD_CANCEL,
|
|
50
|
+
]);
|
|
51
|
+
if (!tddChoice || tddChoice === TDD_CANCEL) {
|
|
52
|
+
ctx.ui.notify("sdd-init cancelled.", "info");
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
const strictTdd = tddChoice === TDD_YES;
|
|
56
|
+
|
|
57
|
+
const testCommand = await ctx.ui.input(
|
|
58
|
+
"Test command",
|
|
59
|
+
"e.g. npm test, pnpm vitest, cargo test",
|
|
60
|
+
);
|
|
61
|
+
if (!testCommand) {
|
|
62
|
+
ctx.ui.notify("sdd-init cancelled (no test command).", "info");
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const context = await ctx.ui.input(
|
|
67
|
+
"Project context (one paragraph)",
|
|
68
|
+
"Describe the project, stack, and constraints.",
|
|
69
|
+
);
|
|
70
|
+
if (!context) {
|
|
71
|
+
ctx.ui.notify("sdd-init cancelled (no context).", "info");
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
mkdirSync(dirname(configPath), { recursive: true });
|
|
76
|
+
writeFileSync(configPath, renderConfig(strictTdd, testCommand.trim(), context));
|
|
77
|
+
ctx.ui.notify(
|
|
78
|
+
`Wrote ${CONFIG_REL_PATH}. Run /skill-registry:refresh once skills with '## Compact Rules' are available.`,
|
|
79
|
+
"info",
|
|
80
|
+
);
|
|
81
|
+
},
|
|
82
|
+
});
|
|
83
|
+
}
|