@tianhai/pi-workflow-kit 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,107 +13,107 @@ type Phase = "brainstorm" | "plan" | null;
13
13
 
14
14
  // Destructive commands blocked in brainstorm/plan phases
15
15
  const DESTRUCTIVE_PATTERNS = [
16
- /\brm\b/i,
17
- /\brmdir\b/i,
18
- /\bmv\b/i,
19
- /\bcp\b/i,
20
- /\bmkdir\b/i,
21
- /\btouch\b/i,
22
- /\bchmod\b/i,
23
- /\bchown\b/i,
24
- /\bchgrp\b/i,
25
- /\bln\b/i,
26
- /\btee\b/i,
27
- /\btruncate\b/i,
28
- /\bdd\b/i,
29
- /\bshred\b/i,
30
- /(^|[^<])>(?!>)/,
31
- />>/,
32
- /\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
33
- /\byarn\s+(add|remove|install|publish)/i,
34
- /\bpnpm\s+(add|remove|install|publish)/i,
35
- /\bpip\s+(install|uninstall)/i,
36
- /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
37
- /\bbrew\s+(install|uninstall|upgrade)/i,
38
- /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash(?!\s+list)|cherry-pick|revert|tag(?!\s+(-l|--list))|init|clone)/i,
39
- /\bsudo\b/i,
40
- /\bsu\b/i,
41
- /\bkill\b/i,
42
- /\bpkill\b/i,
43
- /\bkillall\b/i,
44
- /\breboot\b/i,
45
- /\bshutdown\b/i,
46
- /\bsystemctl\s+(start|stop|restart|enable|disable)/i,
47
- /\bservice\s+\S+\s+(start|stop|restart)/i,
48
- /^\s*(vim?|nano|emacs|code|subl)\b/i,
16
+ /\brm\b/i,
17
+ /\brmdir\b/i,
18
+ /\bmv\b/i,
19
+ /\bcp\b/i,
20
+ /\bmkdir\b/i,
21
+ /\btouch\b/i,
22
+ /\bchmod\b/i,
23
+ /\bchown\b/i,
24
+ /\bchgrp\b/i,
25
+ /\bln\b/i,
26
+ /\btee\b/i,
27
+ /\btruncate\b/i,
28
+ /\bdd\b/i,
29
+ /\bshred\b/i,
30
+ /(^|[^<])>(?!>)/,
31
+ />>/,
32
+ /\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
33
+ /\byarn\s+(add|remove|install|publish)/i,
34
+ /\bpnpm\s+(add|remove|install|publish)/i,
35
+ /\bpip\s+(install|uninstall)/i,
36
+ /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
37
+ /\bbrew\s+(install|uninstall|upgrade)/i,
38
+ /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash(?!\s+list)|cherry-pick|revert|tag(?!\s+(-l|--list))|init|clone)/i,
39
+ /\bsudo\b/i,
40
+ /\bsu\b/i,
41
+ /\bkill\b/i,
42
+ /\bpkill\b/i,
43
+ /\bkillall\b/i,
44
+ /\breboot\b/i,
45
+ /\bshutdown\b/i,
46
+ /\bsystemctl\s+(start|stop|restart|enable|disable)/i,
47
+ /\bservice\s+\S+\s+(start|stop|restart)/i,
48
+ /^\s*(vim?|nano|emacs|code|subl)\b/i,
49
49
  ];
50
50
 
51
51
  const SAFE_PATTERNS = [
52
- /^\s*cat\b/,
53
- /^\s*head\b/,
54
- /^\s*tail\b/,
55
- /^\s*less\b/,
56
- /^\s*more\b/,
57
- /^\s*grep\b/,
58
- /^\s*find\b/,
59
- /^\s*ls\b/,
60
- /^\s*pwd\b/,
61
- /^\s*echo\b/,
62
- /^\s*printf\b/,
63
- /^\s*wc\b/,
64
- /^\s*sort\b/,
65
- /^\s*uniq\b/,
66
- /^\s*diff\b/,
67
- /^\s*file\b/,
68
- /^\s*stat\b/,
69
- /^\s*du\b/,
70
- /^\s*df\b/,
71
- /^\s*tree\b/,
72
- /^\s*which\b/,
73
- /^\s*whereis\b/,
74
- /^\s*type\b/,
75
- /^\s*env\b/,
76
- /^\s*printenv\b/,
77
- /^\s*uname\b/,
78
- /^\s*whoami\b/,
79
- /^\s*id\b/,
80
- /^\s*date\b/,
81
- /^\s*cal\b/,
82
- /^\s*uptime\b/,
83
- /^\s*ps\b/,
84
- /^\s*top\b/,
85
- /^\s*htop\b/,
86
- /^\s*free\b/,
87
- /^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
88
- /^\s*git\s+ls-/i,
89
- /^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
90
- /^\s*yarn\s+(list|info|why|audit)/i,
91
- /^\s*node\s+--version/i,
92
- /^\s*python\s+--version/i,
93
- /^\s*curl\s/i,
94
- /^\s*wget\s+-O\s*-/i,
95
- /^\s*jq\b/,
96
- /^\s*sed\s+-n/i,
97
- /^\s*awk\b/,
98
- /^\s*rg\b/,
99
- /^\s*fd\b/,
100
- /^\s*bat\b/,
101
- /^\s*eza\b/,
102
- /^\s*cd\b/,
103
- /^\s*gh\s+pr\s+(view|list|diff|checks|status)\b/i,
104
- /^\s*gh\s+issue\s+(view|list)\b/i,
105
- /^\s*gh\s+repo\s+(view|fork|list)\b/i,
106
- /^\s*gh\s+release\s+(view|list|download)\b/i,
107
- /^\s*gh\s+run\s+(view|list)\b/i,
108
- /^\s*git\s+blame\b/,
109
- /^\s*git\s+shortlog\b/,
110
- /^\s*git\s+stash\s+list\b/i,
111
- /^\s*git\s+tag\s+(-l|--list)\b/i,
112
- /^\s*git\s+describe\b/,
113
- /^\s*go\s+doc\b/,
114
- /^\s*go\s+list\b/,
115
- /^\s*go\s+version\b/,
116
- /^\s*go\s+env\b/,
52
+ /^\s*cat\b/,
53
+ /^\s*head\b/,
54
+ /^\s*tail\b/,
55
+ /^\s*less\b/,
56
+ /^\s*more\b/,
57
+ /^\s*grep\b/,
58
+ /^\s*find\b/,
59
+ /^\s*ls\b/,
60
+ /^\s*pwd\b/,
61
+ /^\s*echo\b/,
62
+ /^\s*printf\b/,
63
+ /^\s*wc\b/,
64
+ /^\s*sort\b/,
65
+ /^\s*uniq\b/,
66
+ /^\s*diff\b/,
67
+ /^\s*file\b/,
68
+ /^\s*stat\b/,
69
+ /^\s*du\b/,
70
+ /^\s*df\b/,
71
+ /^\s*tree\b/,
72
+ /^\s*which\b/,
73
+ /^\s*whereis\b/,
74
+ /^\s*type\b/,
75
+ /^\s*env\b/,
76
+ /^\s*printenv\b/,
77
+ /^\s*uname\b/,
78
+ /^\s*whoami\b/,
79
+ /^\s*id\b/,
80
+ /^\s*date\b/,
81
+ /^\s*cal\b/,
82
+ /^\s*uptime\b/,
83
+ /^\s*ps\b/,
84
+ /^\s*top\b/,
85
+ /^\s*htop\b/,
86
+ /^\s*free\b/,
87
+ /^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
88
+ /^\s*git\s+ls-/i,
89
+ /^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
90
+ /^\s*yarn\s+(list|info|why|audit)/i,
91
+ /^\s*node\s+--version/i,
92
+ /^\s*python\s+--version/i,
93
+ /^\s*curl\s/i,
94
+ /^\s*wget\s+-O\s*-/i,
95
+ /^\s*jq\b/,
96
+ /^\s*sed\s+-n/i,
97
+ /^\s*awk\b/,
98
+ /^\s*rg\b/,
99
+ /^\s*fd\b/,
100
+ /^\s*bat\b/,
101
+ /^\s*eza\b/,
102
+ /^\s*cd\b/,
103
+ /^\s*gh\s+pr\s+(view|list|diff|checks|status)\b/i,
104
+ /^\s*gh\s+issue\s+(view|list)\b/i,
105
+ /^\s*gh\s+repo\s+(view|fork|list)\b/i,
106
+ /^\s*gh\s+release\s+(view|list|download)\b/i,
107
+ /^\s*gh\s+run\s+(view|list)\b/i,
108
+ /^\s*git\s+blame\b/,
109
+ /^\s*git\s+shortlog\b/,
110
+ /^\s*git\s+stash\s+list\b/i,
111
+ /^\s*git\s+tag\s+(-l|--list)\b/i,
112
+ /^\s*git\s+describe\b/,
113
+ /^\s*go\s+doc\b/,
114
+ /^\s*go\s+list\b/,
115
+ /^\s*go\s+version\b/,
116
+ /^\s*go\s+env\b/,
117
117
  ];
118
118
 
119
119
  /** Split a compound command into individual sub-commands.
@@ -121,114 +121,103 @@ const SAFE_PATTERNS = [
121
121
  * Does NOT split on | (pipe) to allow piping (e.g. `git log | head`).
122
122
  */
123
123
  function splitCompoundCommand(command: string): string[] {
124
- // Match sub-commands separated by &&, ||, ; (with optional whitespace)
125
- // We don't split on | to allow piping (e.g. `git log | head`)
126
- return command
127
- .split(/&&|\|\||;/)
128
- .map((s) => s.trim())
129
- .filter((s) => s.length > 0);
124
+ // Match sub-commands separated by &&, ||, ; (with optional whitespace)
125
+ // We don't split on | to allow piping (e.g. `git log | head`)
126
+ return command
127
+ .split(/&&|\|\||;/)
128
+ .map((s) => s.trim())
129
+ .filter((s) => s.length > 0);
130
130
  }
131
131
 
132
132
  /** Strip stderr redirects that are purely cosmetic (no side effects). */
133
133
  function stripHarmlessRedirects(cmd: string): string {
134
- return cmd.replace(/\s*2\s*>\s*(\/dev\/null|&1)\b/g, "");
134
+ return cmd.replace(/\s*2\s*>\s*(\/dev\/null|&1)\b/g, "");
135
135
  }
136
136
 
137
137
  export function isSafeCommand(command: string): boolean {
138
- const parts = splitCompoundCommand(command);
139
- return parts.every(
140
- (part) => {
141
- const cleaned = stripHarmlessRedirects(part);
142
- const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(cleaned));
143
- const isSafe = SAFE_PATTERNS.some((p) => p.test(cleaned));
144
- return !isDestructive && isSafe;
145
- },
146
- );
138
+ const parts = splitCompoundCommand(command);
139
+ return parts.every((part) => {
140
+ const cleaned = stripHarmlessRedirects(part);
141
+ const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(cleaned));
142
+ const isSafe = SAFE_PATTERNS.some((p) => p.test(cleaned));
143
+ return !isDestructive && isSafe;
144
+ });
147
145
  }
148
146
 
149
147
  const SKILL_TO_PHASE: Record<string, Phase> = {
150
- brainstorming: "brainstorm",
151
- "writing-plans": "plan",
148
+ brainstorming: "brainstorm",
149
+ "writing-plans": "plan",
152
150
  };
153
151
 
154
152
  /** Determine if a write/edit to filePath should be blocked during the given phase.
155
153
  * Only writes under docs/plans/ are allowed during brainstorm and plan phases.
156
154
  */
157
- export function shouldBlockFilePath(
158
- filePath: string,
159
- cwd: string,
160
- ): boolean {
161
- const absolute = resolve(cwd, filePath);
162
- const plansDir = resolve(cwd, "docs/plans");
163
- return !absolute.startsWith(plansDir + "/");
155
+ export function shouldBlockFilePath(filePath: string, cwd: string): boolean {
156
+ const absolute = resolve(cwd, filePath);
157
+ const plansDir = resolve(cwd, "docs/plans");
158
+ return !absolute.startsWith(`${plansDir}/`);
164
159
  }
165
160
 
166
161
  export function getCurrentPhase(): Phase {
167
- return phase;
162
+ return phase;
168
163
  }
169
164
 
170
165
  let phase: Phase = null;
171
166
 
172
167
  export default function (pi: ExtensionAPI) {
173
- pi.on("session_start", () => {
174
- phase = null;
175
- });
176
-
177
- pi.on("input", (event) => {
178
- const text = event.text ?? "";
179
- const match = text.match(/^\/skill:([\w-]+)/);
180
- if (match) {
181
- const skill = match[1];
182
- if (skill in SKILL_TO_PHASE) {
183
- phase = SKILL_TO_PHASE[skill];
184
- return;
185
- }
186
- }
187
- if (
188
- text.startsWith("/skill:executing-tasks") ||
189
- text.startsWith("/skill:finalizing")
190
- ) {
191
- phase = null;
192
- }
193
- });
194
-
195
- pi.on("tool_call", (event, ctx) => {
196
- if (!phase) return;
197
-
198
- if (event.toolName === "bash") {
199
- const command = (event.input as { command?: string }).command ?? "";
200
- if (!isSafeCommand(command)) {
201
- if (ctx.hasUI) {
202
- ctx.ui.notify(
203
- `Blocked bash command during ${phase} phase: ${command}`,
204
- "warning",
205
- );
206
- }
207
- return {
208
- block: true,
209
- reason: `⚠️ ${phase.toUpperCase()} PHASE: Bash command blocked (not allowlisted). Only read-only commands are permitted during brainstorming and planning.\nCommand: ${command}`,
210
- };
211
- }
212
- return;
213
- }
214
-
215
- if (event.toolName !== "write" && event.toolName !== "edit") return;
216
-
217
- const filePath = (event.input as { path?: string }).path ?? "";
218
- if (!filePath) return;
219
-
220
- if (!shouldBlockFilePath(filePath, ctx.cwd)) return;
221
-
222
- if (ctx.hasUI) {
223
- ctx.ui.notify(
224
- `Blocked ${event.toolName} to ${filePath} during ${phase} phase. Only docs/plans/ is writable.`,
225
- "warning",
226
- );
227
- }
228
-
229
- return {
230
- block: true,
231
- reason: `⚠️ ${phase.toUpperCase()} PHASE: Cannot ${event.toolName} to ${filePath}. Only docs/plans/ is writable during brainstorming and planning.`,
232
- };
233
- });
168
+ pi.on("session_start", () => {
169
+ phase = null;
170
+ });
171
+
172
+ pi.on("input", (event) => {
173
+ const text = event.text ?? "";
174
+ const match = text.match(/^\/skill:([\w-]+)/);
175
+ if (match) {
176
+ const skill = match[1];
177
+ if (skill in SKILL_TO_PHASE) {
178
+ phase = SKILL_TO_PHASE[skill];
179
+ return;
180
+ }
181
+ }
182
+ if (text.startsWith("/skill:executing-tasks") || text.startsWith("/skill:finalizing")) {
183
+ phase = null;
184
+ }
185
+ });
186
+
187
+ pi.on("tool_call", (event, ctx) => {
188
+ if (!phase) return;
189
+
190
+ if (event.toolName === "bash") {
191
+ const command = (event.input as { command?: string }).command ?? "";
192
+ if (!isSafeCommand(command)) {
193
+ if (ctx.hasUI) {
194
+ ctx.ui.notify(`Blocked bash command during ${phase} phase: ${command}`, "warning");
195
+ }
196
+ return {
197
+ block: true,
198
+ reason: `⚠️ ${phase.toUpperCase()} PHASE: Bash command blocked (not allowlisted). Only read-only commands are permitted during brainstorming and planning.\nCommand: ${command}`,
199
+ };
200
+ }
201
+ return;
202
+ }
203
+
204
+ if (event.toolName !== "write" && event.toolName !== "edit") return;
205
+
206
+ const filePath = (event.input as { path?: string }).path ?? "";
207
+ if (!filePath) return;
208
+
209
+ if (!shouldBlockFilePath(filePath, ctx.cwd)) return;
210
+
211
+ if (ctx.hasUI) {
212
+ ctx.ui.notify(
213
+ `Blocked ${event.toolName} to ${filePath} during ${phase} phase. Only docs/plans/ is writable.`,
214
+ "warning",
215
+ );
216
+ }
217
+
218
+ return {
219
+ block: true,
220
+ reason: `⚠️ ${phase.toUpperCase()} PHASE: Cannot ${event.toolName} to ${filePath}. Only docs/plans/ is writable during brainstorming and planning.`,
221
+ };
222
+ });
234
223
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tianhai/pi-workflow-kit",
3
- "version": "0.15.0",
3
+ "version": "0.16.0",
4
4
  "description": "Enforce structured brainstorm→plan→execute→finalize workflow with TDD discipline in AI coding agents",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -29,6 +29,10 @@ Read-only exploration. You may **not** edit or create any files except under `do
29
29
  ```
30
30
 
31
31
  ADRs live under `docs/plans/adr/` and are archived during finalizing alongside the design doc.
32
+
33
+ For non-trivial designs, note any areas that may need production-risk review (database schema changes, authentication or authorization, external API integrations, concurrency or batch processing, file uploads or large data flows, Redis/caching/message queues). You don't need to audit them here — just flag them for the design-review stage.
34
+
35
+ For trivial changes (config, naming, simple field additions), note "Simple change — no design review needed" in the design doc.
32
36
  5. **Write the design doc** — save it to `docs/plans/YYYY-MM-DD-<topic>-design.md`. Organize features as end-to-end slices (each slice delivers one observable behavior through all relevant layers) so the planning phase can decompose them directly into tasks. Branch creation, committing, and workspace setup are handled by `/skill:executing-tasks`.
33
37
 
34
38
  ## Principles
@@ -40,4 +44,5 @@ Read-only exploration. You may **not** edit or create any files except under `do
40
44
 
41
45
  ## After the design
42
46
 
43
- Ask: "Ready to plan? Run `/skill:writing-plans`"
47
+ - **Non-trivial design**: Ask: "Design looks good. Run `/skill:design-review` to check for production risks before planning."
48
+ - **Trivial change**: Ask: "Simple change — skip design review. Ready to plan? Run `/skill:writing-plans`"
@@ -0,0 +1,113 @@
1
+ ---
2
+ name: design-review
3
+ description: "Audit a design doc for production risks — security, scalability, fault tolerance, and operational hazards. Use after brainstorming for non-trivial designs, or when you want to stress-test a design for production readiness."
4
+ ---
5
+
6
+ # Design Review
7
+
8
+ Read-only exploration of the design doc. You **may** edit the design doc to append review findings. You may **not** edit source code or configuration.
9
+
10
+ ## Process
11
+
12
+ 1. **Find the design doc** — look for `docs/plans/*-design.md`. If none exists, say "No design doc found. Run `/skill:brainstorming` first." and stop.
13
+
14
+ 2. **Check triviality** — if the design doc notes "Simple change — no design review needed", append a brief section:
15
+
16
+ ```markdown
17
+ ## Architectural Review
18
+
19
+ **Status**: Skipped — trivial change. No high-risk operations detected.
20
+ ```
21
+
22
+ Then say: "Ready to plan? Run `/skill:writing-plans`" and stop.
23
+
24
+ 3. **Read the design doc in full** — understand the architecture, data flow, components, and error handling proposed.
25
+
26
+ 4. **🏛️ Architectural Pillars Review** — evaluate the design against the 6 Pillars of Production-Grade Design:
27
+
28
+ 1. **Robustness & Fault Tolerance**: How expected failures are handled, subsystem isolation, graceful degradation.
29
+ 2. **Atomicity & Consistency**: Database transactions, state rollback on error, endpoint idempotency.
30
+ 3. **Security & Access Control**: Input validation/sanitization, authorization checks at the boundary.
31
+ 4. **Scalability & Performance**: Connection pooling, closing resource leaks, preventing N+1 queries.
32
+ 5. **Backwards Compatibility**: Schema migration safety, zero-downtime deployment, API versioning.
33
+ 6. **Testability**: Injection seams for external dependencies (APIs, system clocks, randomizers) to keep tests 100% deterministic.
34
+
35
+ For each pillar, write a 1-2 sentence assessment. Flag any concerns.
36
+
37
+ 5. **⚠️ High-Risk Hazard Audit** — evaluate the design against the 8 High-Risk Production Hazards. For each hazard, write either `[SAFE]` (with a 1-sentence justification) or `[TRIGGERED]` (detailing the mitigation):
38
+
39
+ 1. **Unbounded Redis Deletions / Operations**: Multi-key deletion or scans (e.g. `KEYS` or raw `SCAN` loops) that block single-threaded performance.
40
+ 2. **In-Memory OOM Loops**: Fetching complete database datasets into server memory (e.g., raw `select *`) to filter, sort, or map in runtime heap.
41
+ 3. **Unbounded Concurrency Spikes**: Running concurrent network requests (e.g. unthrottled `Promise.all`) without strict batch limits.
42
+ 4. **Missing High-Frequency Indexes**: Running queries on unindexed columns, forcing expensive table-scans under load.
43
+ 5. **Nested/Long-Running Transactions**: Holding database connections and locks open while awaiting slow external HTTP, disk, or cryptographic tasks.
44
+ 6. **Unrestricted Uploads & Temp Flooding**: Writing uploaded data directly to local temporary paths without validation limits or explicit `finally` cleanup blocks.
45
+ 7. **Raw Query String Interpolation**: Merging raw variables into SQL queries or shell command inputs (susceptible to injection).
46
+ 8. **Silent Swallowing Loops**: Background workers or cron tasks silently catching and suppressing exceptions without logging, back-offs, or alerts.
47
+
48
+ 6. **🔍 Socratic Risk Discovery** — put on your **SRE Hat** and audit the proposed logic against 3 heuristics to identify novel or domain-specific risks:
49
+
50
+ - **The "Scale to 100x" Heuristic**: If this operation is run 100x/sec or on 100k items, what breaks? (Memory, CPU, Disk I/O, sockets, database connection limits).
51
+ - **The "Hostile World" Heuristic**: If a malicious actor has complete control over these inputs (headers, payloads, IDs), how can they exploit, crash, or extract data?
52
+ - **The "Silent Error" Heuristic**: If this downstream dependency or query hangs or fails silently, how does our server react? Is there a timeout, a back-off, or logging?
53
+
54
+ For each heuristic, note any risks discovered. If a risk overlaps with a triggered hazard, cross-reference it.
55
+
56
+ 7. **Present findings** — show the full review to the user. For each triggered hazard or Socratic risk, propose a concrete mitigation. Wait for user feedback and incorporate changes.
57
+
58
+ 8. **Append to design doc** — add a `## Architectural Review` section to the design doc. Two cases:
59
+
60
+ **All clear** (no hazards triggered, no Socratic risks):
61
+ ```markdown
62
+ ## Architectural Review
63
+
64
+ **Status**: ✅ No high-risk hazards detected.
65
+
66
+ **Pillars reviewed**: All 6 — no concerns.
67
+ **Hazards audited**: All 8 [SAFE].
68
+ **Socratic risks**: None identified.
69
+ ```
70
+
71
+ **Hazards or risks found**:
72
+ ```markdown
73
+ ## Architectural Review
74
+
75
+ **Status**: ⚠️ High-risk operations detected — see mitigations below.
76
+
77
+ ### Pillar Assessments
78
+ - **Robustness**: [assessment]
79
+ - **Atomicity**: [assessment]
80
+ - **Security**: [assessment]
81
+ - **Scalability**: [assessment]
82
+ - **Backwards Compatibility**: [assessment]
83
+ - **Testability**: [assessment]
84
+
85
+ ### Hazard Audit
86
+ - 1. Unbounded Redis: [SAFE / TRIGGERED — mitigation]
87
+ - 2. In-Memory OOM: [SAFE / TRIGGERED — mitigation]
88
+ - 3. Unbounded Concurrency: [SAFE / TRIGGERED — mitigation]
89
+ - 4. Missing Indexes: [SAFE / TRIGGERED — mitigation]
90
+ - 5. Long-Running Transactions: [SAFE / TRIGGERED — mitigation]
91
+ - 6. Unrestricted Uploads: [SAFE / TRIGGERED — mitigation]
92
+ - 7. Query Interpolation: [SAFE / TRIGGERED — mitigation]
93
+ - 8. Silent Swallowing: [SAFE / TRIGGERED — mitigation]
94
+
95
+ ### ⚠️ High-Risk Operations & Mitigations
96
+ [Detailed mitigation for each TRIGGERED hazard and Socratic risk]
97
+
98
+ ### Socratic Risks
99
+ - **Scale to 100x**: [finding or "none identified"]
100
+ - **Hostile World**: [finding or "none identified"]
101
+ - **Silent Error**: [finding or "none identified"]
102
+ ```
103
+
104
+ ## Principles
105
+
106
+ - Be specific — every `[TRIGGERED]` hazard must include a concrete mitigation, not just "be careful"
107
+ - Be honest — if the design is risky and the risk can't be mitigated easily, say so
108
+ - Be proportional — a simple CRUD endpoint doesn't need the same depth as a batch processing pipeline
109
+ - Don't redesign — flag risks and propose mitigations, but the design owner decides
110
+
111
+ ## After the review
112
+
113
+ Ask: "Ready to plan? Run `/skill:writing-plans`"
@@ -156,16 +156,23 @@ For each task:
156
156
 
157
157
  1. **Mark in-progress** — update the progress file: `🔄 in-progress`
158
158
  2. **Read the plan** — read the plan's overview section (everything before `## Task 1:`). Skim all `## Task N:` headings for dependency awareness. Then read the current task's body in full. **Read `docs/lessons.md` if it exists** — follow all rules listed there while working on this task.
159
- 3. **Execute the plan steps** — follow each numbered step in the task body, in order. Stop at any `⏸ CHECKPOINT` gate (see [Checkpoint gates](#checkpoint-gates--when-the-plan-says-stop)).
160
- 4. **Verify against task description** — re-read the task from the plan. Does the implementation satisfy every requirement listed? If not, fix before proceeding.
161
- 5. **Refactor** after all tests pass, look for:
159
+ 3. **Execute the plan steps** — follow each numbered step in the task body, in order. As you work, shift your cognitive focus through three frames:
160
+
161
+ **QA Test frame** (when writing/running tests): Focus entirely on translating the task's `Given/When/Then` Acceptance Criteria into precise failing tests. Before running tests, verify the test environment is sandboxed — no real database connections, API calls, or live services. External dependencies must be mocked or stubbed. Ensure the test environment is isolated (e.g., `NODE_ENV=test`, `GO_ENV=test`, or equivalent for your stack).
162
+
163
+ **Pragmatic Developer frame** (when implementing): Focus on the simplest possible code to make the tests green. Do not over-engineer or add code for future requirements. Keep complexity to a bare minimum.
164
+
165
+ **Senior Refactoring frame** (when refactoring): Evaluate the craftsmanship of the code. Check for:
162
166
  - **Shallow modules** — is the interface nearly as complex as the implementation? Can complexity be hidden behind a simpler interface?
163
167
  - **Deletion test** — if you deleted this module, would complexity vanish (pass-through) or reappear across callers (earning its keep)?
164
168
  - **Duplication** — extract repeated patterns
165
169
  - **Seam discipline** — don't introduce abstraction unless something actually varies across it. One adapter = hypothetical seam. Two adapters = real seam
166
170
 
167
171
  Run tests after each refactor step. Never refactor while tests are failing.
168
- 6. **Learn from mistakes** — if you caught yourself making a mistake during this task that you've made before or that would apply to future tasks, append a rule to `docs/lessons.md`. Only add rules that would change future behavior. If the file doesn't exist, create it with the standard format (see below).
172
+
173
+ Stop at any `⏸ CHECKPOINT` gate (see [Checkpoint gates](#checkpoint-gates--when-the-plan-says-stop)).
174
+ 4. **Verify against task description** — re-read the task from the plan. Does the implementation satisfy every requirement listed? If not, fix before proceeding.
175
+ 5. **Learn from mistakes** — if you caught yourself making a mistake during this task that you've made before or that would apply to future tasks, append a rule to `docs/lessons.md`. Only add rules that would change future behavior. If the file doesn't exist, create it with the standard format (see below).
169
176
 
170
177
  Before writing, apply the **generalization test**: would this rule apply equally to a completely different feature or domain in this repo? If not, rewrite it — strip out specific service names, entity types, and domain concepts, and express the underlying pattern instead. If you can't express a generic form, don't write the rule.
171
178
 
@@ -174,9 +181,9 @@ For each task:
174
181
 
175
182
  ✅ **Generic** (applies across the whole repo):
176
183
  > "Always validate required ID fields at the service boundary — missing IDs should return 400, not 500"
177
- 7. **Commit** — after all steps are done (no checkpoint gates remain in the task), `git add` the relevant files and commit with a clear message.
178
- 8. **Update progress** — mark `✅ done` + record the commit hash.
179
- 9. **Suggest session break if needed** — after completing ~3-5 tasks since the last break, suggest:
184
+ 6. **Commit** — after all steps are done (no checkpoint gates remain in the task), `git add` the relevant files and commit with a clear message.
185
+ 7. **Update progress** — mark `✅ done` + record the commit hash.
186
+ 8. **Suggest session break if needed** — after completing ~3-5 tasks since the last break, suggest:
180
187
  ```
181
188
  ✅ Tasks N-M done (commits: abc, def)
182
189
  Progress: X/Y tasks done
@@ -186,7 +193,7 @@ For each task:
186
193
  (or just say "continue" to keep going here)
187
194
  ```
188
195
  Also suggest at checkpoint review pauses when multiple tasks have been completed since the last break. Respect the user's choice if they say "continue".
189
- 10. **Loop** — go back to step 1 for the next `⬜ pending` task, or see [After all tasks](#after-all-tasks) if none remain.
196
+ 9. **Loop** — go back to step 1 for the next `⬜ pending` task, or see [After all tasks](#after-all-tasks) if none remain.
190
197
 
191
198
  ### `docs/lessons.md` format
192
199
 
@@ -205,6 +212,8 @@ Retire rules that no longer apply during finalizing.
205
212
  - <new rule here>
206
213
  ```
207
214
 
215
+ When adding a new rule during execution, always append it under `## Rules`. The categorization into specific headers (e.g., `## Tool Usage`, `## Testing Patterns`) is done during finalizing — never during execution.
216
+
208
217
  ### Checkpoint gates — when the plan says STOP
209
218
 
210
219
  The plan marks certain steps with `⏸ **CHECKPOINT: test**` or `⏸ **CHECKPOINT: done**`. These are hard stop points. When you reach one:
@@ -35,10 +35,12 @@ Wait for the user to confirm before proceeding.
35
35
 
36
36
  Each `mv` gracefully handles the case where no matching files exist (e.g., if the user skipped straight from brainstorm to finalize without executing tasks).
37
37
 
38
- 2. **Review lessons learned** — if `docs/lessons.md` exists, review it:
39
- - Add any lessons from this session that were missed during execution
38
+ 2. **Review & Polish Lessons (Agile Scrum Master Hat)** — if `docs/lessons.md` exists, put on your **Agile Scrum Master Hat** to curate and optimize it for future sprints:
39
+ - **Add missed lessons** — capture any lessons from this session that weren't written during execution
40
40
  - **Generalize domain-specific rules** — if a rule names a specific service, entity, or feature, either rewrite it as a generic pattern or remove it if no generic form exists
41
- - Retire rules that no longer apply (remove the bullet)
41
+ - **De-duplicate** combine overlapping or redundant rules into single, sharper entries
42
+ - **Categorize** — group the rules under clear, structured markdown headers (e.g., `## Tool Usage`, `## Testing Patterns`, `## Architecture Rules`) to make the document highly scannable for future sessions. Keep the `## Rules` section as the append target for new entries during execution — categorization moves rules out of `## Rules` into the appropriate category headers.
43
+ - **Retire stale rules** — remove bullets that no longer apply
42
44
  - If no changes are needed, leave it as-is
43
45
 
44
46
  If `docs/lessons.md` doesn't exist but lessons were learned this session, create it with the standard format: