gentle-pi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +66 -0
  2. package/assets/agents/sdd-apply.md +71 -0
  3. package/assets/agents/sdd-archive.md +14 -0
  4. package/assets/agents/sdd-design.md +14 -0
  5. package/assets/agents/sdd-explore.md +14 -0
  6. package/assets/agents/sdd-init.md +14 -0
  7. package/assets/agents/sdd-onboard.md +15 -0
  8. package/assets/agents/sdd-proposal.md +14 -0
  9. package/assets/agents/sdd-spec.md +14 -0
  10. package/assets/agents/sdd-tasks.md +61 -0
  11. package/assets/agents/sdd-verify.md +55 -0
  12. package/assets/chains/sdd-full.chain.md +75 -0
  13. package/assets/chains/sdd-plan.chain.md +35 -0
  14. package/assets/chains/sdd-verify.chain.md +27 -0
  15. package/assets/orchestrator.md +191 -0
  16. package/assets/support/strict-tdd-verify.md +269 -0
  17. package/assets/support/strict-tdd.md +364 -0
  18. package/extensions/gentle-ai.ts +157 -0
  19. package/extensions/sdd-init.ts +83 -0
  20. package/extensions/skill-registry.ts +267 -0
  21. package/package.json +47 -0
  22. package/prompts/cl.md +54 -0
  23. package/prompts/is.md +25 -0
  24. package/prompts/pr.md +41 -0
  25. package/prompts/wr.md +31 -0
  26. package/skills/branch-pr/SKILL.md +202 -0
  27. package/skills/chained-pr/SKILL.md +50 -0
  28. package/skills/chained-pr/references/chaining-details.md +99 -0
  29. package/skills/cognitive-doc-design/SKILL.md +81 -0
  30. package/skills/comment-writer/SKILL.md +74 -0
  31. package/skills/gentle-ai/SKILL.md +43 -0
  32. package/skills/issue-creation/SKILL.md +223 -0
  33. package/skills/judgment-day/SKILL.md +52 -0
  34. package/skills/judgment-day/references/prompts-and-formats.md +75 -0
  35. package/skills/work-unit-commits/SKILL.md +86 -0
@@ -0,0 +1,364 @@
1
+ # Strict TDD Module — Apply Phase
2
+
3
+ > **This module is loaded ONLY when Strict TDD Mode is enabled AND a test runner is available.**
4
+ > If you are reading this, the orchestrator already verified both conditions. Follow every instruction.
5
+
6
+ ## TDD Philosophy
7
+
8
+ TDD is not testing. TDD is **software design driven by tests**. You write a test that describes what the code SHOULD do, then write the minimum code to make it real. The tests design the API, the contracts, the behavior. Code is a side effect of tests.
9
+
10
+ ### The Three Laws
11
+
12
+ 1. **Do NOT write production code** until you have a failing test
13
+ 2. **Do NOT write more test** than is necessary to fail
14
+ 3. **Do NOT write more code** than is necessary to pass the test
15
+
16
+ ## TDD Implementation Cycle
17
+
18
+ For EVERY task assigned to you, follow this cycle strictly:
19
+
20
+ ```
21
+ FOR EACH TASK:
22
+ ├── 0. SAFETY NET (only if modifying existing files)
23
+ │ ├── Run existing tests for files being modified
24
+ │ ├── Capture baseline: "{N} tests passing"
25
+ │ ├── If any FAIL → STOP, report as "pre-existing failure"
26
+ │ │ (do NOT fix pre-existing failures — report to orchestrator)
27
+ │ └── This baseline proves you did not break what already worked
28
+
29
+ ├── 1. UNDERSTAND
30
+ │ ├── Read the task description
31
+ │ ├── Read relevant spec scenarios (these ARE your acceptance criteria)
32
+ │ ├── Read the design decisions (these CONSTRAIN your approach)
33
+ │ ├── Read existing code and test patterns (match the style)
34
+ │ └── Determine test layer (see "Choosing Test Layer" below)
35
+
36
+ ├── 2. RED — Write a failing test FIRST
37
+ │ ├── Write test(s) that describe the expected behavior from the spec
38
+ │ ├── Prefer pure functions where possible (no side effects = easy to test)
39
+ │ ├── The test MUST reference production code that does NOT exist yet
40
+ │ │ (this guarantees failure — no need to execute to confirm)
41
+ │ ├── If the production code/function already exists:
42
+ │ │ └── Write a test for the NEW behavior that is NOT yet implemented
43
+ │ └── GATE: Do NOT proceed to GREEN until the test is written
44
+
45
+ ├── 3. GREEN — Write the MINIMUM code to pass
46
+ │ ├── Implement ONLY what the failing test needs
47
+ │ ├── Fake It is VALID here (hardcoded return values are OK)
48
+ │ ├── EXECUTE tests → must PASS
49
+ │ │ ├── ✅ Passed → proceed to TRIANGULATE or REFACTOR
50
+ │ │ └── ❌ Failed → fix the implementation, NOT the test
51
+ │ └── GATE: Do NOT proceed until GREEN is confirmed by execution
52
+
53
+ ├── 4. TRIANGULATE (MANDATORY for most tasks)
54
+ │ ├── DEFAULT: triangulation is REQUIRED. You need a compelling reason to skip it.
55
+ │ ├── Add a second test case with DIFFERENT inputs/expected outputs
56
+ │ ├── EXECUTE tests → if Fake It breaks (hardcoded no longer works):
57
+ │ │ └── Generalize to real logic (this is the whole point)
58
+ │ ├── Repeat until ALL spec scenarios for this task are covered
59
+ │ ├── Each triangulation pass: write test → run → fix implementation
60
+ │ ├── MINIMUM: at least 2 test cases per behavior (happy path + one edge case)
61
+ │ │ ├── One test with data that produces a NON-EMPTY/NON-TRIVIAL result
62
+ │ │ └── One test with data that exercises a DIFFERENT code path
63
+ │ ├── WATCH OUT for GREEN that passes trivially:
64
+ │ │ ├── If your test passes because the component/element isn't rendered → NOT a real GREEN
65
+ │ │ ├── If your test passes because a loop iterates 0 times → NOT a real GREEN
66
+ │ │ ├── If your test passes because the setup doesn't trigger the code path → NOT a real GREEN
67
+ │ │ └── A real GREEN means: production code RAN and produced the expected output
68
+ │ ├── Skip triangulation ONLY when ALL of these are true:
69
+ │ │ ├── The task is purely structural (config file, constant definition, type export)
70
+ │ │ ├── There is literally ONE possible output (no branching, no logic)
71
+ │ │ └── You explicitly note "Triangulation skipped: {reason}" in the evidence table
72
+ │ └── GATE: All spec scenarios for this task must have tests before REFACTOR
73
+
74
+ ├── 5. REFACTOR — Improve without changing behavior
75
+ │ ├── Extract constants (eliminate magic numbers)
76
+ │ ├── Extract functions (reduce cyclomatic complexity)
77
+ │ ├── Improve naming, remove duplication
78
+ │ ├── Push toward pure functions where feasible
79
+ │ ├── Apply Boy Scout Rule: leave code cleaner than you found it
80
+ │ ├── EXECUTE tests after EACH refactoring step → must STILL PASS
81
+ │ │ ├── ✅ Still passing → refactoring is safe, continue
82
+ │ │ └── ❌ Failed → REVERT that refactoring step, try smaller
83
+ │ └── GATE: Tests green after EVERY refactoring change
84
+
85
+ ├── 6. Mark task complete [x]
86
+ └── 7. Note any deviations or issues discovered
87
+ ```
88
+
89
+ ## Choosing Test Layer
90
+
91
+ Based on the testing capabilities cached in Engram (`sdd/{project}/testing-capabilities`), choose the appropriate test layer for each task:
92
+
93
+ ```
94
+ Determine test layer by WHAT the task does:
95
+ ├── Pure logic, utility function, calculation, data transformation
96
+ │ └── Unit test (always available if test runner exists)
97
+
98
+ ├── Component rendering, user interaction, state changes
99
+ │ ├── IF integration tools available → Integration test
100
+ │ └── IF NOT → Unit test with mocks (degrade gracefully)
101
+
102
+ ├── Multi-component flow, API interaction, context/provider behavior
103
+ │ ├── IF integration tools available → Integration test
104
+ │ └── IF NOT → Unit test with mocks
105
+
106
+ ├── Critical business flow, full user journey, cross-page navigation
107
+ │ ├── IF E2E tools available → E2E test
108
+ │ ├── IF NOT but integration available → Integration test
109
+ │ └── IF neither → Unit test (degrade gracefully)
110
+
111
+ └── Default: Unit test (always the fallback)
112
+ ```
113
+
114
+ **Key rule**: Use the HIGHEST available layer that fits the task. But NEVER skip a task because a layer is unavailable — degrade to the next available layer.
115
+
116
+ ## Test Execution
117
+
118
+ Detect the test runner from the cached testing capabilities:
119
+
120
+ ```
121
+ Read test command from:
122
+ ├── Cached capabilities → test_runner.command (fastest — already detected)
123
+ ├── openspec/config.yaml → rules.apply.test_command (override)
124
+ └── Fallback: detect from package.json/pyproject.toml/go.mod
125
+
126
+ When executing tests during TDD:
127
+ ├── Run ONLY the relevant test file, not the entire suite
128
+ │ ├── JS/TS: {runner} {test-file-path} (e.g., pnpm vitest run src/utils/tax.test.ts)
129
+ │ ├── Python: pytest {test-file-path}
130
+ │ ├── Go: go test ./{package}/... -run {TestName}
131
+ │ └── Adapt to the runner's CLI
132
+ ├── This keeps the cycle FAST
133
+ └── Full suite runs happen in sdd-verify, not here
134
+ ```
135
+
136
+ ## Pure Function Preference
137
+
138
+ When writing production code in GREEN/TRIANGULATE steps, prefer pure functions:
139
+
140
+ ```
141
+ ✅ PREFER (pure — easy to test):
142
+ function calculateDiscount(price: number, quantity: number): number {
143
+ return quantity >= 5 ? price * quantity * 0.1 : 0
144
+ }
145
+
146
+ ❌ AVOID (impure — hard to test):
147
+ function calculateDiscount(item: Item) {
148
+ globalState.lastDiscount = item.price * 0.1 // side effect
149
+ updateDOM() // side effect
150
+ return globalState.lastDiscount
151
+ }
152
+ ```
153
+
154
+ **Why**: Pure functions are deterministic (same input → same output), have no side effects, and are trivially testable. TDD naturally pushes you toward pure functions — embrace it.
155
+
156
+ ## Approval Testing (for refactoring existing code)
157
+
158
+ When a task involves REFACTORING existing code (not writing new code):
159
+
160
+ ```
161
+ BEFORE touching production code:
162
+ ├── 1. Identify existing behavior to preserve
163
+ ├── 2. Write "approval tests" that capture current behavior:
164
+ │ ├── Call the function with known inputs
165
+ │ ├── Assert the CURRENT outputs (even if ugly or wrong)
166
+ │ └── These tests document what the code does NOW
167
+ ├── 3. Run approval tests → must PASS (they describe current reality)
168
+ ├── 4. NOW refactor the production code
169
+ ├── 5. Run approval tests again → must STILL PASS
170
+ │ ├── ✅ Passing → refactoring preserved behavior
171
+ │ └── ❌ Failing → refactoring broke something, revert
172
+ └── 6. If the spec says behavior should CHANGE:
173
+ ├── Update the approval test to reflect NEW expected behavior
174
+ ├── Run → test FAILS (RED — new behavior not implemented yet)
175
+ └── Implement new behavior → GREEN
176
+ ```
177
+
178
+ ## Return Summary Extension
179
+
180
+ When Strict TDD Mode is active, your return summary MUST include this section:
181
+
182
+ ```markdown
183
+ ### TDD Cycle Evidence
184
+ | Task | Test File | Layer | Safety Net | RED | GREEN | TRIANGULATE | REFACTOR |
185
+ |------|-----------|-------|------------|-----|-------|-------------|----------|
186
+ | 1.1 | `path/test.ext` | Unit | ✅ 5/5 | ✅ Written | ✅ Passed | ✅ 3 cases | ✅ Clean |
187
+ | 1.2 | `path/test.ext` | Integration | N/A (new) | ✅ Written | ✅ Passed | ➖ Single | ✅ Clean |
188
+ | 1.3 | `path/test.ext` | Unit | ✅ 2/2 | ✅ Written | ✅ Passed | ✅ 2 cases | ➖ None needed |
189
+
190
+ ### Test Summary
191
+ - **Total tests written**: {N}
192
+ - **Total tests passing**: {N}
193
+ - **Layers used**: Unit ({N}), Integration ({N}), E2E ({N})
194
+ - **Approval tests** (refactoring): {N} or "None — no refactoring tasks"
195
+ - **Pure functions created**: {N}
196
+ ```
197
+
198
+ **Column definitions**:
199
+ - **Safety Net**: Pre-existing tests run before modifying files. "N/A (new)" for new files.
200
+ - **RED**: Test written first, referencing code that doesn't exist yet. Always "✅ Written".
201
+ - **GREEN**: Tests executed and passing after minimal implementation. Must show execution result.
202
+ - **TRIANGULATE**: Additional test cases added to force real logic. "➖ Single" if spec has only one scenario.
203
+ - **REFACTOR**: Code improved with tests still passing. "➖ None needed" if code was already clean.
204
+
205
+ ## Assertion Quality Rules (MANDATORY)
206
+
207
+ **Every assertion must verify REAL behavior.** A test that passes without exercising production logic is worse than no test — it gives false confidence.
208
+
209
+ ### Banned Assertion Patterns (NEVER write these)
210
+
211
+ ```
212
+ # TRIVIAL ASSERTIONS — test proves nothing
213
+ expect(true).toBe(true) # ❌ Tautology
214
+ expect(false).toBe(false) # ❌ Tautology
215
+ expect(1).toBe(1) # ❌ Tautology — no production code involved
216
+ assert True # ❌ Always passes
217
+ assert 1 == 1 # ❌ Always passes
218
+
219
+ # EMPTY COLLECTION ASSERTIONS without setup context
220
+ expect(result).toEqual([]) # ❌ ONLY valid if you set up conditions for empty
221
+ expect(result).toHaveLength(0) # ❌ Same — why is it empty? Did production code run?
222
+ assert len(result) == 0 # ❌ Same — prove the emptiness comes from real logic
223
+ assert result == [] # ❌ Same
224
+
225
+ # TYPE-ONLY ASSERTIONS — proves existence, not behavior
226
+ expect(result).toBeDefined() # ❌ Alone is useless — WHAT is the value?
227
+ expect(result).not.toBeNull() # ❌ Alone is useless — assert the actual value
228
+ expect(typeof result).toBe('object') # ❌ Alone is useless — what does the object contain?
229
+ assert result is not None # ❌ Alone — assert what result actually IS
230
+
231
+ # GHOST LOOP — assertion inside a loop that iterates 0 times
232
+ const items = screen.queryAllByTestId("item"); // returns []
233
+ for (const item of items) {
234
+ expect(item).toHaveTextContent("value"); # ❌ NEVER EXECUTES — loop body is dead code
235
+ }
236
+ # FIX: assert the collection is non-empty FIRST, or set up data so it IS non-empty:
237
+ expect(items).toHaveLength(3); # ✅ Proves items exist
238
+ for (const item of items) { ... } # ✅ Now the loop actually runs
239
+
240
+ # INCOMPLETE TDD CYCLE — GREEN without TRIANGULATE
241
+ # If your GREEN test passes because the setup doesn't exercise the code path,
242
+ # you are NOT done. You MUST triangulate with a setup that DOES exercise it.
243
+ # Example: testing "search doesn't update until Enter" but the component
244
+ # that receives the search is never rendered → the test proves nothing.
245
+ # FIX: add a test where the component IS rendered and verify the behavior.
246
+ ```
247
+
248
+ ### What Makes a REAL Assertion
249
+
250
+ Every test assertion must satisfy ALL of these:
251
+ 1. **Calls production code** — the test invokes a function, method, or component from the implementation
252
+ 2. **Asserts a specific output** — compares against a concrete expected value derived from the spec
253
+ 3. **Would FAIL if the production code were wrong** — if you change the implementation logic, THIS test breaks
254
+
255
+ ```
256
+ # ✅ REAL assertions — production code determines the result
257
+ expect(calculateDiscount(100, 10)).toBe(10) # Real input → real output
258
+ expect(screen.getByText('Welcome, John')).toBeInTheDocument() # Rendered from data
259
+ assert result[0].status == "FAIL" # Specific finding from check execution
260
+ assert response.status_code == 403 # Real HTTP response from the endpoint
261
+ expect(result).toHaveLength(3) # AND you set up exactly 3 items
262
+ ```
263
+
264
+ ### Empty Collection Rule
265
+
266
+ `expect(result).toEqual([])` or `assert len(result) == 0` is ONLY valid when:
267
+ 1. You set up a specific precondition that SHOULD produce an empty result (e.g., no matching records)
268
+ 2. The production code actually ran and filtered/processed data to arrive at empty
269
+ 3. A companion test with different setup produces a NON-EMPTY result (triangulation)
270
+
271
+ If you cannot explain WHY the result is empty based on setup → the assertion is trivial.
272
+
273
+ ### Smoke Test Rule
274
+
275
+ A test that only renders a component without asserting any output is NOT a valid test:
276
+
277
+ ```
278
+ # ❌ SMOKE TEST ONLY — proves nothing about behavior
279
+ render(<MyComponent data={mockData} />);
280
+ expect(screen.getByTestId("wrapper")).toBeInTheDocument(); # Just proves it rendered
281
+
282
+ # ✅ BEHAVIORAL TEST — proves what the component DOES with the data
283
+ render(<MyComponent data={mockData} />);
284
+ expect(screen.getByText("Expected Title")).toBeInTheDocument(); # Verifies output from data
285
+ expect(screen.getByRole("button")).toHaveTextContent("Submit"); # Verifies real content
286
+ ```
287
+
288
+ "Renders without crash" is a smoke test. It is NOT a unit test, NOT an integration test, and it does NOT count toward TDD coverage. If you need a smoke test, it must be accompanied by real behavioral assertions.
289
+
290
+ ### Mock Hygiene Rules
291
+
292
+ **If you need more mocks than assertions, you are testing at the WRONG level.**
293
+
294
+ ```
295
+ Mock/assertion ratio guide:
296
+ ├── ≤ 3 mocks for a test file → ✅ Healthy — focused test
297
+ ├── 4–6 mocks → ⚠️ Consider extracting logic to a pure function
298
+ ├── 7+ mocks → ❌ STOP — you are testing at the wrong layer
299
+ │ ├── Extract the logic under test to a PURE FUNCTION and test it without mocks
300
+ │ ├── OR move the test to integration/E2E layer where real dependencies exist
301
+ │ └── NEVER write 10+ mocks to verify a one-line transformation
302
+ ```
303
+
304
+ **Extract-Before-Mock Rule**: If the behavior you want to test is a data transformation, mapping, filtering, or conditional logic (e.g., `MUTED → FAIL` status conversion), EXTRACT it to a pure function FIRST, then test the pure function directly. No mocks needed.
305
+
306
+ ```
307
+ # ❌ BAD: 15 mocks to test a one-line status conversion
308
+ vi.mock("next/navigation", ...);
309
+ vi.mock("next/link", ...);
310
+ vi.mock("@/components/shadcn", ...);
311
+ // ... 12 more mocks ...
312
+ render(<StatusCell row={mutedRow} />);
313
+ expect(screen.getByText("FAIL")).toBeInTheDocument();
314
+
315
+ # ✅ GOOD: extract and test the logic directly
316
+ // In production code:
317
+ export function resolveDisplayStatus(status: string, isMuted: boolean): string {
318
+ return status === "MUTED" ? "FAIL" : status;
319
+ }
320
+
321
+ // In test — ZERO mocks needed:
322
+ expect(resolveDisplayStatus("MUTED", true)).toBe("FAIL");
323
+ expect(resolveDisplayStatus("PASS", false)).toBe("PASS");
324
+ ```
325
+
326
+ ### Implementation Detail Coupling Rule
327
+
328
+ Tests must assert **behavior visible to the user**, not internal implementation details:
329
+
330
+ ```
331
+ # ❌ COUPLED TO IMPLEMENTATION — breaks on any style refactor
332
+ expect(element.className).toContain("text-xs");
333
+ expect(element.className).toContain("-mt-2.5");
334
+ expect(element.className).toContain("border-border-error-primary");
335
+ expect(element.style.color).toBe("red");
336
+
337
+ # ❌ COUPLED TO INTERNALS — breaks when implementation changes
338
+ expect(mockService.mock.calls.length).toBe(3); # Why 3? Brittle.
339
+ expect(component.state.isLoading).toBe(true); # Internal state, not behavior.
340
+
341
+ # ✅ BEHAVIORAL — survives refactors, tests what users see
342
+ expect(screen.getByText("Error: Payment failed")).toBeInTheDocument();
343
+ expect(screen.getByRole("alert")).toHaveTextContent("Risk:");
344
+ expect(screen.getByRole("button")).toBeDisabled();
345
+ ```
346
+
347
+ **CSS class assertions are NEVER valid test assertions.** If you need to verify visual styling:
348
+ 1. Test the **semantic outcome** (e.g., element has `role="alert"`, text is visible, button is disabled)
349
+ 2. OR use a visual regression tool / E2E screenshot comparison
350
+ 3. NEVER assert specific Tailwind/CSS class names — they are implementation details
351
+
352
+ ## Rules (Strict TDD specific)
353
+
354
+ - NEVER write production code before writing its test — this is the ONE rule that cannot be broken
355
+ - NEVER skip the GREEN execution gate — you MUST run tests and confirm they pass
356
+ - NEVER skip triangulation when the spec defines multiple scenarios — hardcoded Fake It must be forced out
357
+ - NEVER write trivial assertions (see Banned Assertion Patterns above) — they are WORSE than no test
358
+ - ALWAYS verify that every assertion CALLS production code and asserts a SPECIFIC expected value
359
+ - ALWAYS run the Safety Net before modifying existing files — protect what already works
360
+ - ALWAYS report the TDD Cycle Evidence table — the verify phase will check it
361
+ - If a test runner execution fails for infrastructure reasons (not test failures), report as "Blocked" and continue to next task
362
+ - Prefer pure functions — but don't force it where it doesn't fit (e.g., React components with state)
363
+ - For refactoring tasks, ALWAYS write approval tests before touching code
364
+ - Run ONLY the relevant test file during the cycle, not the full suite
@@ -0,0 +1,157 @@
1
+ import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import type { ExtensionAPI, ToolCallEventResult } from "@earendil-works/pi-coding-agent";
5
+
6
+ const PACKAGE_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
7
+ const ASSETS_DIR = join(PACKAGE_ROOT, "assets");
8
+ const ORCHESTRATOR_PROMPT = readFileSync(join(ASSETS_DIR, "orchestrator.md"), "utf8").trim();
9
+
10
+ const GENTLE_AI_PROMPT = `## Gentle AI Harness
11
+ You are operating with the Gentle AI harness package for Pi.
12
+
13
+ Persona:
14
+ - Be direct, technical, and concise.
15
+ - When the user writes Spanish, answer in natural Rioplatense Spanish with voseo.
16
+ - Act as a senior architect and teacher: concepts before code, no shortcuts.
17
+ - Treat AI as a tool directed by the human; never present yourself as a default chatbot.
18
+
19
+ Harness principles:
20
+ - Gentle AI is not prompt engineering. It is runtime discipline around powerful agents.
21
+ - Prefer SDD/OpenSpec artifacts over floating chat context for non-trivial work.
22
+ - Clarify scope, constraints, acceptance criteria, and non-goals before implementation.
23
+ - Use subagents when available for exploration, planning, implementation, and review, while keeping one parent session responsible for orchestration.
24
+ - Keep writes single-threaded unless the user explicitly approves parallel write isolation.
25
+ - If tests exist, use strict TDD evidence: RED, GREEN, TRIANGULATE, REFACTOR.
26
+ - Protect the human reviewer: avoid oversized changes, surface review workload risk, and ask before turning one task into a large multi-area change.
27
+ - Never claim persistent memory is available because of this package. Memory is provided by separate packages or MCP tools when installed and callable.
28
+
29
+ ${ORCHESTRATOR_PROMPT}`;
30
+
31
+ const DENIED_BASH_PATTERNS: RegExp[] = [
32
+ /\brm\s+-rf\s+(?:\/|~|\$HOME|\.\.?)(?:\s|$)/,
33
+ /\bgit\s+reset\s+--hard\b/,
34
+ /\bgit\s+clean\b(?=[^\n]*(?:-[^\n]*f|--force))(?=[^\n]*(?:-[^\n]*d|--directories))/,
35
+ /\bgit\s+push\b(?=[^\n]*\s--force(?:-with-lease)?\b)/,
36
+ /\bchmod\s+-R\s+777\b/,
37
+ /\bchown\s+-R\b/,
38
+ ];
39
+
40
+ const CONFIRM_BASH_PATTERNS: RegExp[] = [
41
+ /\bgit\s+push\b/,
42
+ /\bgit\s+rebase\b/,
43
+ /\bgit\s+branch\s+-D\b/,
44
+ /\bnpm\s+publish\b/,
45
+ /\bpi\s+remove\b/,
46
+ ];
47
+
48
+ function evaluateCommand(command: string): ToolCallEventResult | undefined {
49
+ for (const pattern of DENIED_BASH_PATTERNS) {
50
+ if (pattern.test(command)) {
51
+ return {
52
+ block: true,
53
+ reason: "Gentle AI safety policy blocked a destructive shell command. Ask the user for an explicit safer plan.",
54
+ };
55
+ }
56
+ }
57
+ for (const pattern of CONFIRM_BASH_PATTERNS) {
58
+ if (pattern.test(command)) {
59
+ return {
60
+ block: true,
61
+ reason: "Gentle AI safety policy requires explicit user approval before this command.",
62
+ };
63
+ }
64
+ }
65
+ return undefined;
66
+ }
67
+
68
+ function copyDirectoryFiles(sourceDir: string, targetDir: string, force: boolean): { copied: number; skipped: number } {
69
+ if (!existsSync(sourceDir)) return { copied: 0, skipped: 0 };
70
+ mkdirSync(targetDir, { recursive: true });
71
+ let copied = 0;
72
+ let skipped = 0;
73
+ for (const entry of readdirSync(sourceDir, { withFileTypes: true })) {
74
+ const sourcePath = join(sourceDir, entry.name);
75
+ const targetPath = join(targetDir, entry.name);
76
+ if (entry.isDirectory()) {
77
+ const child = copyDirectoryFiles(sourcePath, targetPath, force);
78
+ copied += child.copied;
79
+ skipped += child.skipped;
80
+ continue;
81
+ }
82
+ if (!entry.isFile()) continue;
83
+ if (!force && existsSync(targetPath)) {
84
+ skipped += 1;
85
+ continue;
86
+ }
87
+ writeFileSync(targetPath, readFileSync(sourcePath));
88
+ copied += 1;
89
+ }
90
+ return { copied, skipped };
91
+ }
92
+
93
+ function installSddAssets(
94
+ cwd: string,
95
+ force: boolean,
96
+ ): { agents: number; chains: number; support: number; skipped: number } {
97
+ const agents = copyDirectoryFiles(join(ASSETS_DIR, "agents"), join(cwd, ".pi", "agents"), force);
98
+ const chains = copyDirectoryFiles(join(ASSETS_DIR, "chains"), join(cwd, ".pi", "chains"), force);
99
+ const support = copyDirectoryFiles(join(ASSETS_DIR, "support"), join(cwd, ".pi", "gentle-ai", "support"), force);
100
+ return {
101
+ agents: agents.copied,
102
+ chains: chains.copied,
103
+ support: support.copied,
104
+ skipped: agents.skipped + chains.skipped + support.skipped,
105
+ };
106
+ }
107
+
108
+ export default function gentleAi(pi: ExtensionAPI): void {
109
+ pi.on("session_start", (_event, ctx) => {
110
+ const result = installSddAssets(ctx.cwd, false);
111
+ if (ctx.hasUI && (result.agents > 0 || result.chains > 0 || result.support > 0)) {
112
+ ctx.ui.notify(
113
+ `Gentle AI SDD assets auto-installed: ${result.agents} agent(s), ${result.chains} chain(s), ${result.support} support file(s).`,
114
+ "info",
115
+ );
116
+ }
117
+ });
118
+
119
+ pi.on("before_agent_start", (event) => ({
120
+ systemPrompt: `${event.systemPrompt}\n\n${GENTLE_AI_PROMPT}`,
121
+ }));
122
+
123
+ pi.on("tool_call", (event) => {
124
+ if (event.toolName !== "bash") return undefined;
125
+ return evaluateCommand(event.input.command);
126
+ });
127
+
128
+ pi.registerCommand("gentle-ai:install-sdd", {
129
+ description: "Install Gentle AI SDD subagent and chain assets into this project.",
130
+ handler: async (args, ctx) => {
131
+ const force = args.includes("--force");
132
+ const result = installSddAssets(ctx.cwd, force);
133
+ ctx.ui.notify(
134
+ `Gentle AI SDD assets installed: ${result.agents} agent(s), ${result.chains} chain(s), ${result.support} support file(s), ${result.skipped} skipped.`,
135
+ "info",
136
+ );
137
+ },
138
+ });
139
+
140
+ pi.registerCommand("gentle-ai:status", {
141
+ description: "Show Gentle AI package status for this project.",
142
+ handler: async (_args, ctx) => {
143
+ const agentsInstalled = existsSync(join(ctx.cwd, ".pi", "agents", "sdd-apply.md"));
144
+ const chainsInstalled = existsSync(join(ctx.cwd, ".pi", "chains", "sdd-full.chain.md"));
145
+ const openspecConfigured = existsSync(join(ctx.cwd, "openspec", "config.yaml"));
146
+ ctx.ui.notify(
147
+ [
148
+ "Gentle AI package is active.",
149
+ `SDD agents: ${agentsInstalled ? "installed" : "not installed"}`,
150
+ `SDD chains: ${chainsInstalled ? "installed" : "not installed"}`,
151
+ `OpenSpec config: ${openspecConfigured ? "present" : "missing"}`,
152
+ ].join("\n"),
153
+ "info",
154
+ );
155
+ },
156
+ });
157
+ }
@@ -0,0 +1,83 @@
1
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
4
+
5
+ const CONFIG_REL_PATH = "openspec/config.yaml";
6
+
7
+ function escapeBlockScalar(value: string): string {
8
+ return value
9
+ .split("\n")
10
+ .map((line) => ` ${line}`)
11
+ .join("\n");
12
+ }
13
+
14
+ function renderConfig(strictTdd: boolean, testCommand: string, context: string): string {
15
+ const lines = [
16
+ `strict_tdd: ${strictTdd}`,
17
+ "context: |",
18
+ escapeBlockScalar(context.trimEnd()),
19
+ "rules:",
20
+ " apply:",
21
+ ` test_command: ${testCommand}`,
22
+ "testing:",
23
+ " runner:",
24
+ ` command: ${testCommand}`,
25
+ "",
26
+ ];
27
+ return lines.join("\n");
28
+ }
29
+
30
+ export default function (pi: ExtensionAPI) {
31
+ pi.registerCommand("sdd-init", {
32
+ description: "Bootstrap openspec/config.yaml for SDD workflow (one-time per project).",
33
+ handler: async (_args, ctx) => {
34
+ const configPath = join(ctx.cwd, CONFIG_REL_PATH);
35
+ if (existsSync(configPath)) {
36
+ ctx.ui.notify(
37
+ `${CONFIG_REL_PATH} already exists. Edit it manually or remove it before re-running /sdd-init.`,
38
+ "warning",
39
+ );
40
+ return;
41
+ }
42
+
43
+ const TDD_YES = "Yes — tests must run before each change";
44
+ const TDD_NO = "No — TDD is opt-in per task";
45
+ const TDD_CANCEL = "Cancel";
46
+ const tddChoice = await ctx.ui.select("Enable strict TDD for this project?", [
47
+ TDD_YES,
48
+ TDD_NO,
49
+ TDD_CANCEL,
50
+ ]);
51
+ if (!tddChoice || tddChoice === TDD_CANCEL) {
52
+ ctx.ui.notify("sdd-init cancelled.", "info");
53
+ return;
54
+ }
55
+ const strictTdd = tddChoice === TDD_YES;
56
+
57
+ const testCommand = await ctx.ui.input(
58
+ "Test command",
59
+ "e.g. npm test, pnpm vitest, cargo test",
60
+ );
61
+ if (!testCommand) {
62
+ ctx.ui.notify("sdd-init cancelled (no test command).", "info");
63
+ return;
64
+ }
65
+
66
+ const context = await ctx.ui.input(
67
+ "Project context (one paragraph)",
68
+ "Describe the project, stack, and constraints.",
69
+ );
70
+ if (!context) {
71
+ ctx.ui.notify("sdd-init cancelled (no context).", "info");
72
+ return;
73
+ }
74
+
75
+ mkdirSync(dirname(configPath), { recursive: true });
76
+ writeFileSync(configPath, renderConfig(strictTdd, testCommand.trim(), context));
77
+ ctx.ui.notify(
78
+ `Wrote ${CONFIG_REL_PATH}. Run /skill-registry:refresh once skills with '## Compact Rules' are available.`,
79
+ "info",
80
+ );
81
+ },
82
+ });
83
+ }