npm - @tianhai/pi-workflow-kit - Versions diffs - 0.15.0 → 0.16.0 - Mend

@tianhai/pi-workflow-kit 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +16 -8
package/docs/plans/completed/2026-05-22-agentic-agile-enhancements-design.md +77 -0
package/docs/plans/completed/2026-05-22-agentic-agile-enhancements-implementation.md +473 -0
package/docs/plans/completed/2026-05-25-design-review-split-implementation.md +622 -0
package/docs/plans/completed/2026-05-25-design-review-split-progress.md +16 -0
package/docs/plans/completed/2026-05-25-pr5-improvements-implementation.md +273 -0
package/docs/plans/completed/2026-05-25-pr5-improvements-progress.md +17 -0
package/extensions/workflow-guard.ts +174 -185
package/package.json +1 -1
package/skills/brainstorming/SKILL.md +6 -1
package/skills/design-review/SKILL.md +113 -0
package/skills/executing-tasks/SKILL.md +17 -8
package/skills/finalizing/SKILL.md +5 -3
package/skills/writing-plans/SKILL.md +70 -1

package/extensions/workflow-guard.ts CHANGED Viewed

@@ -13,107 +13,107 @@ type Phase = "brainstorm" | "plan" | null;
 // Destructive commands blocked in brainstorm/plan phases
 const DESTRUCTIVE_PATTERNS = [
-	/\brm\b/i,
-	/\brmdir\b/i,
-	/\bmv\b/i,
-	/\bcp\b/i,
-	/\bmkdir\b/i,
-	/\btouch\b/i,
-	/\bchmod\b/i,
-	/\bchown\b/i,
-	/\bchgrp\b/i,
-	/\bln\b/i,
-	/\btee\b/i,
-	/\btruncate\b/i,
-	/\bdd\b/i,
-	/\bshred\b/i,
-	/(^|[^<])>(?!>)/,
-	/>>/,
-	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
-	/\byarn\s+(add|remove|install|publish)/i,
-	/\bpnpm\s+(add|remove|install|publish)/i,
-	/\bpip\s+(install|uninstall)/i,
-	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
-	/\bbrew\s+(install|uninstall|upgrade)/i,
-	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash(?!\s+list)|cherry-pick|revert|tag(?!\s+(-l|--list))|init|clone)/i,
-	/\bsudo\b/i,
-	/\bsu\b/i,
-	/\bkill\b/i,
-	/\bpkill\b/i,
-	/\bkillall\b/i,
-	/\breboot\b/i,
-	/\bshutdown\b/i,
-	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
-	/\bservice\s+\S+\s+(start|stop|restart)/i,
-	/^\s*(vim?|nano|emacs|code|subl)\b/i,
+  /\brm\b/i,
+  /\brmdir\b/i,
+  /\bmv\b/i,
+  /\bcp\b/i,
+  /\bmkdir\b/i,
+  /\btouch\b/i,
+  /\bchmod\b/i,
+  /\bchown\b/i,
+  /\bchgrp\b/i,
+  /\bln\b/i,
+  /\btee\b/i,
+  /\btruncate\b/i,
+  /\bdd\b/i,
+  /\bshred\b/i,
+  /(^|[^<])>(?!>)/,
+  />>/,
+  /\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
+  /\byarn\s+(add|remove|install|publish)/i,
+  /\bpnpm\s+(add|remove|install|publish)/i,
+  /\bpip\s+(install|uninstall)/i,
+  /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
+  /\bbrew\s+(install|uninstall|upgrade)/i,
+  /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash(?!\s+list)|cherry-pick|revert|tag(?!\s+(-l|--list))|init|clone)/i,
+  /\bsudo\b/i,
+  /\bsu\b/i,
+  /\bkill\b/i,
+  /\bpkill\b/i,
+  /\bkillall\b/i,
+  /\breboot\b/i,
+  /\bshutdown\b/i,
+  /\bsystemctl\s+(start|stop|restart|enable|disable)/i,
+  /\bservice\s+\S+\s+(start|stop|restart)/i,
+  /^\s*(vim?|nano|emacs|code|subl)\b/i,
 ];
 const SAFE_PATTERNS = [
-	/^\s*cat\b/,
-	/^\s*head\b/,
-	/^\s*tail\b/,
-	/^\s*less\b/,
-	/^\s*more\b/,
-	/^\s*grep\b/,
-	/^\s*find\b/,
-	/^\s*ls\b/,
-	/^\s*pwd\b/,
-	/^\s*echo\b/,
-	/^\s*printf\b/,
-	/^\s*wc\b/,
-	/^\s*sort\b/,
-	/^\s*uniq\b/,
-	/^\s*diff\b/,
-	/^\s*file\b/,
-	/^\s*stat\b/,
-	/^\s*du\b/,
-	/^\s*df\b/,
-	/^\s*tree\b/,
-	/^\s*which\b/,
-	/^\s*whereis\b/,
-	/^\s*type\b/,
-	/^\s*env\b/,
-	/^\s*printenv\b/,
-	/^\s*uname\b/,
-	/^\s*whoami\b/,
-	/^\s*id\b/,
-	/^\s*date\b/,
-	/^\s*cal\b/,
-	/^\s*uptime\b/,
-	/^\s*ps\b/,
-	/^\s*top\b/,
-	/^\s*htop\b/,
-	/^\s*free\b/,
-	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
-	/^\s*git\s+ls-/i,
-	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
-	/^\s*yarn\s+(list|info|why|audit)/i,
-	/^\s*node\s+--version/i,
-	/^\s*python\s+--version/i,
-	/^\s*curl\s/i,
-	/^\s*wget\s+-O\s*-/i,
-	/^\s*jq\b/,
-	/^\s*sed\s+-n/i,
-	/^\s*awk\b/,
-	/^\s*rg\b/,
-	/^\s*fd\b/,
-	/^\s*bat\b/,
-	/^\s*eza\b/,
-	/^\s*cd\b/,
-	/^\s*gh\s+pr\s+(view|list|diff|checks|status)\b/i,
-	/^\s*gh\s+issue\s+(view|list)\b/i,
-	/^\s*gh\s+repo\s+(view|fork|list)\b/i,
-	/^\s*gh\s+release\s+(view|list|download)\b/i,
-	/^\s*gh\s+run\s+(view|list)\b/i,
-	/^\s*git\s+blame\b/,
-	/^\s*git\s+shortlog\b/,
-	/^\s*git\s+stash\s+list\b/i,
-	/^\s*git\s+tag\s+(-l|--list)\b/i,
-	/^\s*git\s+describe\b/,
-	/^\s*go\s+doc\b/,
-	/^\s*go\s+list\b/,
-	/^\s*go\s+version\b/,
-	/^\s*go\s+env\b/,
+  /^\s*cat\b/,
+  /^\s*head\b/,
+  /^\s*tail\b/,
+  /^\s*less\b/,
+  /^\s*more\b/,
+  /^\s*grep\b/,
+  /^\s*find\b/,
+  /^\s*ls\b/,
+  /^\s*pwd\b/,
+  /^\s*echo\b/,
+  /^\s*printf\b/,
+  /^\s*wc\b/,
+  /^\s*sort\b/,
+  /^\s*uniq\b/,
+  /^\s*diff\b/,
+  /^\s*file\b/,
+  /^\s*stat\b/,
+  /^\s*du\b/,
+  /^\s*df\b/,
+  /^\s*tree\b/,
+  /^\s*which\b/,
+  /^\s*whereis\b/,
+  /^\s*type\b/,
+  /^\s*env\b/,
+  /^\s*printenv\b/,
+  /^\s*uname\b/,
+  /^\s*whoami\b/,
+  /^\s*id\b/,
+  /^\s*date\b/,
+  /^\s*cal\b/,
+  /^\s*uptime\b/,
+  /^\s*ps\b/,
+  /^\s*top\b/,
+  /^\s*htop\b/,
+  /^\s*free\b/,
+  /^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
+  /^\s*git\s+ls-/i,
+  /^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
+  /^\s*yarn\s+(list|info|why|audit)/i,
+  /^\s*node\s+--version/i,
+  /^\s*python\s+--version/i,
+  /^\s*curl\s/i,
+  /^\s*wget\s+-O\s*-/i,
+  /^\s*jq\b/,
+  /^\s*sed\s+-n/i,
+  /^\s*awk\b/,
+  /^\s*rg\b/,
+  /^\s*fd\b/,
+  /^\s*bat\b/,
+  /^\s*eza\b/,
+  /^\s*cd\b/,
+  /^\s*gh\s+pr\s+(view|list|diff|checks|status)\b/i,
+  /^\s*gh\s+issue\s+(view|list)\b/i,
+  /^\s*gh\s+repo\s+(view|fork|list)\b/i,
+  /^\s*gh\s+release\s+(view|list|download)\b/i,
+  /^\s*gh\s+run\s+(view|list)\b/i,
+  /^\s*git\s+blame\b/,
+  /^\s*git\s+shortlog\b/,
+  /^\s*git\s+stash\s+list\b/i,
+  /^\s*git\s+tag\s+(-l|--list)\b/i,
+  /^\s*git\s+describe\b/,
+  /^\s*go\s+doc\b/,
+  /^\s*go\s+list\b/,
+  /^\s*go\s+version\b/,
+  /^\s*go\s+env\b/,
 ];
 /** Split a compound command into individual sub-commands.
@@ -121,114 +121,103 @@ const SAFE_PATTERNS = [
  * Does NOT split on | (pipe) to allow piping (e.g. `git log | head`).
  */
 function splitCompoundCommand(command: string): string[] {
-	// Match sub-commands separated by &&, ||, ; (with optional whitespace)
-	// We don't split on | to allow piping (e.g. `git log | head`)
-	return command
-		.split(/&&|\|\||;/)
-		.map((s) => s.trim())
-		.filter((s) => s.length > 0);
+  // Match sub-commands separated by &&, ||, ; (with optional whitespace)
+  // We don't split on | to allow piping (e.g. `git log | head`)
+  return command
+    .split(/&&|\|\||;/)
+    .map((s) => s.trim())
+    .filter((s) => s.length > 0);
 }
 /** Strip stderr redirects that are purely cosmetic (no side effects). */
 function stripHarmlessRedirects(cmd: string): string {
-	return cmd.replace(/\s*2\s*>\s*(\/dev\/null|&1)\b/g, "");
+  return cmd.replace(/\s*2\s*>\s*(\/dev\/null|&1)\b/g, "");
 }
 export function isSafeCommand(command: string): boolean {
-	const parts = splitCompoundCommand(command);
-	return parts.every(
-		(part) => {
-			const cleaned = stripHarmlessRedirects(part);
-			const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(cleaned));
-			const isSafe = SAFE_PATTERNS.some((p) => p.test(cleaned));
-			return !isDestructive && isSafe;
-		},
-	);
+  const parts = splitCompoundCommand(command);
+  return parts.every((part) => {
+    const cleaned = stripHarmlessRedirects(part);
+    const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(cleaned));
+    const isSafe = SAFE_PATTERNS.some((p) => p.test(cleaned));
+    return !isDestructive && isSafe;
+  });
 }
 const SKILL_TO_PHASE: Record<string, Phase> = {
-	brainstorming: "brainstorm",
-	"writing-plans": "plan",
+  brainstorming: "brainstorm",
+  "writing-plans": "plan",
 };
 /** Determine if a write/edit to filePath should be blocked during the given phase.
  *  Only writes under docs/plans/ are allowed during brainstorm and plan phases.
  */
-export function shouldBlockFilePath(
-	filePath: string,
-	cwd: string,
-): boolean {
-	const absolute = resolve(cwd, filePath);
-	const plansDir = resolve(cwd, "docs/plans");
-	return !absolute.startsWith(plansDir + "/");
+export function shouldBlockFilePath(filePath: string, cwd: string): boolean {
+  const absolute = resolve(cwd, filePath);
+  const plansDir = resolve(cwd, "docs/plans");
+  return !absolute.startsWith(`${plansDir}/`);
 }
 export function getCurrentPhase(): Phase {
-	return phase;
+  return phase;
 }
 let phase: Phase = null;
 export default function (pi: ExtensionAPI) {
-	pi.on("session_start", () => {
-		phase = null;
-	});
-	pi.on("input", (event) => {
-		const text = event.text ?? "";
-		const match = text.match(/^\/skill:([\w-]+)/);
-		if (match) {
-			const skill = match[1];
-			if (skill in SKILL_TO_PHASE) {
-				phase = SKILL_TO_PHASE[skill];
-				return;
-			}
-		}
-		if (
-			text.startsWith("/skill:executing-tasks") ||
-			text.startsWith("/skill:finalizing")
-		) {
-			phase = null;
-		}
-	});
-	pi.on("tool_call", (event, ctx) => {
-		if (!phase) return;
-		if (event.toolName === "bash") {
-			const command = (event.input as { command?: string }).command ?? "";
-			if (!isSafeCommand(command)) {
-				if (ctx.hasUI) {
-					ctx.ui.notify(
-						`Blocked bash command during ${phase} phase: ${command}`,
-						"warning",
-					);
-				}
-				return {
-					block: true,
-					reason: `⚠️ ${phase.toUpperCase()} PHASE: Bash command blocked (not allowlisted). Only read-only commands are permitted during brainstorming and planning.\nCommand: ${command}`,
-				};
-			}
-			return;
-		}
-		if (event.toolName !== "write" && event.toolName !== "edit") return;
-		const filePath = (event.input as { path?: string }).path ?? "";
-		if (!filePath) return;
-		if (!shouldBlockFilePath(filePath, ctx.cwd)) return;
-		if (ctx.hasUI) {
-			ctx.ui.notify(
-				`Blocked ${event.toolName} to ${filePath} during ${phase} phase. Only docs/plans/ is writable.`,
-				"warning",
-			);
-		}
-		return {
-			block: true,
-			reason: `⚠️ ${phase.toUpperCase()} PHASE: Cannot ${event.toolName} to ${filePath}. Only docs/plans/ is writable during brainstorming and planning.`,
-		};
-	});
+  pi.on("session_start", () => {
+    phase = null;
+  });
+  pi.on("input", (event) => {
+    const text = event.text ?? "";
+    const match = text.match(/^\/skill:([\w-]+)/);
+    if (match) {
+      const skill = match[1];
+      if (skill in SKILL_TO_PHASE) {
+        phase = SKILL_TO_PHASE[skill];
+        return;
+      }
+    }
+    if (text.startsWith("/skill:executing-tasks") || text.startsWith("/skill:finalizing")) {
+      phase = null;
+    }
+  });
+  pi.on("tool_call", (event, ctx) => {
+    if (!phase) return;
+    if (event.toolName === "bash") {
+      const command = (event.input as { command?: string }).command ?? "";
+      if (!isSafeCommand(command)) {
+        if (ctx.hasUI) {
+          ctx.ui.notify(`Blocked bash command during ${phase} phase: ${command}`, "warning");
+        }
+        return {
+          block: true,
+          reason: `⚠️ ${phase.toUpperCase()} PHASE: Bash command blocked (not allowlisted). Only read-only commands are permitted during brainstorming and planning.\nCommand: ${command}`,
+        };
+      }
+      return;
+    }
+    if (event.toolName !== "write" && event.toolName !== "edit") return;
+    const filePath = (event.input as { path?: string }).path ?? "";
+    if (!filePath) return;
+    if (!shouldBlockFilePath(filePath, ctx.cwd)) return;
+    if (ctx.hasUI) {
+      ctx.ui.notify(
+        `Blocked ${event.toolName} to ${filePath} during ${phase} phase. Only docs/plans/ is writable.`,
+        "warning",
+      );
+    }
+    return {
+      block: true,
+      reason: `⚠️ ${phase.toUpperCase()} PHASE: Cannot ${event.toolName} to ${filePath}. Only docs/plans/ is writable during brainstorming and planning.`,
+    };
+  });
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tianhai/pi-workflow-kit",
-  "version": "0.15.0",
+  "version": "0.16.0",
   "description": "Enforce structured brainstorm→plan→execute→finalize workflow with TDD discipline in AI coding agents",
   "keywords": [
     "pi-package",

package/skills/brainstorming/SKILL.md CHANGED Viewed

@@ -29,6 +29,10 @@ Read-only exploration. You may **not** edit or create any files except under `do
    ```
    ADRs live under `docs/plans/adr/` and are archived during finalizing alongside the design doc.
+   For non-trivial designs, note any areas that may need production-risk review (database schema changes, authentication or authorization, external API integrations, concurrency or batch processing, file uploads or large data flows, Redis/caching/message queues). You don't need to audit them here — just flag them for the design-review stage.
+   For trivial changes (config, naming, simple field additions), note "Simple change — no design review needed" in the design doc.
 5. **Write the design doc** — save it to `docs/plans/YYYY-MM-DD-<topic>-design.md`. Organize features as end-to-end slices (each slice delivers one observable behavior through all relevant layers) so the planning phase can decompose them directly into tasks. Branch creation, committing, and workspace setup are handled by `/skill:executing-tasks`.
 ## Principles
@@ -40,4 +44,5 @@ Read-only exploration. You may **not** edit or create any files except under `do
 ## After the design
-Ask: "Ready to plan? Run `/skill:writing-plans`"
+- **Non-trivial design**: Ask: "Design looks good. Run `/skill:design-review` to check for production risks before planning."
+- **Trivial change**: Ask: "Simple change — skip design review. Ready to plan? Run `/skill:writing-plans`"

package/skills/design-review/SKILL.md ADDED Viewed

@@ -0,0 +1,113 @@
+---
+name: design-review
+description: "Audit a design doc for production risks — security, scalability, fault tolerance, and operational hazards. Use after brainstorming for non-trivial designs, or when you want to stress-test a design for production readiness."
+---
+# Design Review
+Read-only exploration of the design doc. You **may** edit the design doc to append review findings. You may **not** edit source code or configuration.
+## Process
+1. **Find the design doc** — look for `docs/plans/*-design.md`. If none exists, say "No design doc found. Run `/skill:brainstorming` first." and stop.
+2. **Check triviality** — if the design doc notes "Simple change — no design review needed", append a brief section:
+   ```markdown
+   ## Architectural Review
+   **Status**: Skipped — trivial change. No high-risk operations detected.
+   ```
+   Then say: "Ready to plan? Run `/skill:writing-plans`" and stop.
+3. **Read the design doc in full** — understand the architecture, data flow, components, and error handling proposed.
+4. **🏛️ Architectural Pillars Review** — evaluate the design against the 6 Pillars of Production-Grade Design:
+   1. **Robustness & Fault Tolerance**: How expected failures are handled, subsystem isolation, graceful degradation.
+   2. **Atomicity & Consistency**: Database transactions, state rollback on error, endpoint idempotency.
+   3. **Security & Access Control**: Input validation/sanitization, authorization checks at the boundary.
+   4. **Scalability & Performance**: Connection pooling, closing resource leaks, preventing N+1 queries.
+   5. **Backwards Compatibility**: Schema migration safety, zero-downtime deployment, API versioning.
+   6. **Testability**: Injection seams for external dependencies (APIs, system clocks, randomizers) to keep tests 100% deterministic.
+   For each pillar, write a 1-2 sentence assessment. Flag any concerns.
+5. **⚠️ High-Risk Hazard Audit** — evaluate the design against the 8 High-Risk Production Hazards. For each hazard, write either `[SAFE]` (with a 1-sentence justification) or `[TRIGGERED]` (detailing the mitigation):
+   1. **Unbounded Redis Deletions / Operations**: Multi-key deletion or scans (e.g. `KEYS` or raw `SCAN` loops) that block single-threaded performance.
+   2. **In-Memory OOM Loops**: Fetching complete database datasets into server memory (e.g., raw `select *`) to filter, sort, or map in runtime heap.
+   3. **Unbounded Concurrency Spikes**: Running concurrent network requests (e.g. unthrottled `Promise.all`) without strict batch limits.
+   4. **Missing High-Frequency Indexes**: Running queries on unindexed columns, forcing expensive table-scans under load.
+   5. **Nested/Long-Running Transactions**: Holding database connections and locks open while awaiting slow external HTTP, disk, or cryptographic tasks.
+   6. **Unrestricted Uploads & Temp Flooding**: Writing uploaded data directly to local temporary paths without validation limits or explicit `finally` cleanup blocks.
+   7. **Raw Query String Interpolation**: Merging raw variables into SQL queries or shell command inputs (susceptible to injection).
+   8. **Silent Swallowing Loops**: Background workers or cron tasks silently catching and suppressing exceptions without logging, back-offs, or alerts.
+6. **🔍 Socratic Risk Discovery** — put on your **SRE Hat** and audit the proposed logic against 3 heuristics to identify novel or domain-specific risks:
+   - **The "Scale to 100x" Heuristic**: If this operation is run 100x/sec or on 100k items, what breaks? (Memory, CPU, Disk I/O, sockets, database connection limits).
+   - **The "Hostile World" Heuristic**: If a malicious actor has complete control over these inputs (headers, payloads, IDs), how can they exploit, crash, or extract data?
+   - **The "Silent Error" Heuristic**: If this downstream dependency or query hangs or fails silently, how does our server react? Is there a timeout, a back-off, or logging?
+   For each heuristic, note any risks discovered. If a risk overlaps with a triggered hazard, cross-reference it.
+7. **Present findings** — show the full review to the user. For each triggered hazard or Socratic risk, propose a concrete mitigation. Wait for user feedback and incorporate changes.
+8. **Append to design doc** — add a `## Architectural Review` section to the design doc. Two cases:
+   **All clear** (no hazards triggered, no Socratic risks):
+   ```markdown
+   ## Architectural Review
+   **Status**: ✅ No high-risk hazards detected.
+   **Pillars reviewed**: All 6 — no concerns.
+   **Hazards audited**: All 8 [SAFE].
+   **Socratic risks**: None identified.
+   ```
+   **Hazards or risks found**:
+   ```markdown
+   ## Architectural Review
+   **Status**: ⚠️ High-risk operations detected — see mitigations below.
+   ### Pillar Assessments
+   - **Robustness**: [assessment]
+   - **Atomicity**: [assessment]
+   - **Security**: [assessment]
+   - **Scalability**: [assessment]
+   - **Backwards Compatibility**: [assessment]
+   - **Testability**: [assessment]
+   ### Hazard Audit
+   - 1. Unbounded Redis: [SAFE / TRIGGERED — mitigation]
+   - 2. In-Memory OOM: [SAFE / TRIGGERED — mitigation]
+   - 3. Unbounded Concurrency: [SAFE / TRIGGERED — mitigation]
+   - 4. Missing Indexes: [SAFE / TRIGGERED — mitigation]
+   - 5. Long-Running Transactions: [SAFE / TRIGGERED — mitigation]
+   - 6. Unrestricted Uploads: [SAFE / TRIGGERED — mitigation]
+   - 7. Query Interpolation: [SAFE / TRIGGERED — mitigation]
+   - 8. Silent Swallowing: [SAFE / TRIGGERED — mitigation]
+   ### ⚠️ High-Risk Operations & Mitigations
+   [Detailed mitigation for each TRIGGERED hazard and Socratic risk]
+   ### Socratic Risks
+   - **Scale to 100x**: [finding or "none identified"]
+   - **Hostile World**: [finding or "none identified"]
+   - **Silent Error**: [finding or "none identified"]
+   ```
+## Principles
+- Be specific — every `[TRIGGERED]` hazard must include a concrete mitigation, not just "be careful"
+- Be honest — if the design is risky and the risk can't be mitigated easily, say so
+- Be proportional — a simple CRUD endpoint doesn't need the same depth as a batch processing pipeline
+- Don't redesign — flag risks and propose mitigations, but the design owner decides
+## After the review
+Ask: "Ready to plan? Run `/skill:writing-plans`"

package/skills/executing-tasks/SKILL.md CHANGED Viewed

@@ -156,16 +156,23 @@ For each task:
 1. **Mark in-progress** — update the progress file: `🔄 in-progress`
 2. **Read the plan** — read the plan's overview section (everything before `## Task 1:`). Skim all `## Task N:` headings for dependency awareness. Then read the current task's body in full. **Read `docs/lessons.md` if it exists** — follow all rules listed there while working on this task.
-3. **Execute the plan steps** — follow each numbered step in the task body, in order. Stop at any `⏸ CHECKPOINT` gate (see [Checkpoint gates](#checkpoint-gates--when-the-plan-says-stop)).
-4. **Verify against task description** — re-read the task from the plan. Does the implementation satisfy every requirement listed? If not, fix before proceeding.
-5. **Refactor** — after all tests pass, look for:
+3. **Execute the plan steps** — follow each numbered step in the task body, in order. As you work, shift your cognitive focus through three frames:
+   **QA Test frame** (when writing/running tests): Focus entirely on translating the task's `Given/When/Then` Acceptance Criteria into precise failing tests. Before running tests, verify the test environment is sandboxed — no real database connections, API calls, or live services. External dependencies must be mocked or stubbed. Ensure the test environment is isolated (e.g., `NODE_ENV=test`, `GO_ENV=test`, or equivalent for your stack).
+   **Pragmatic Developer frame** (when implementing): Focus on the simplest possible code to make the tests green. Do not over-engineer or add code for future requirements. Keep complexity to a bare minimum.
+   **Senior Refactoring frame** (when refactoring): Evaluate the craftsmanship of the code. Check for:
    - **Shallow modules** — is the interface nearly as complex as the implementation? Can complexity be hidden behind a simpler interface?
    - **Deletion test** — if you deleted this module, would complexity vanish (pass-through) or reappear across callers (earning its keep)?
    - **Duplication** — extract repeated patterns
    - **Seam discipline** — don't introduce abstraction unless something actually varies across it. One adapter = hypothetical seam. Two adapters = real seam
    Run tests after each refactor step. Never refactor while tests are failing.
-6. **Learn from mistakes** — if you caught yourself making a mistake during this task that you've made before or that would apply to future tasks, append a rule to `docs/lessons.md`. Only add rules that would change future behavior. If the file doesn't exist, create it with the standard format (see below).
+   Stop at any `⏸ CHECKPOINT` gate (see [Checkpoint gates](#checkpoint-gates--when-the-plan-says-stop)).
+4. **Verify against task description** — re-read the task from the plan. Does the implementation satisfy every requirement listed? If not, fix before proceeding.
+5. **Learn from mistakes** — if you caught yourself making a mistake during this task that you've made before or that would apply to future tasks, append a rule to `docs/lessons.md`. Only add rules that would change future behavior. If the file doesn't exist, create it with the standard format (see below).
    Before writing, apply the **generalization test**: would this rule apply equally to a completely different feature or domain in this repo? If not, rewrite it — strip out specific service names, entity types, and domain concepts, and express the underlying pattern instead. If you can't express a generic form, don't write the rule.
@@ -174,9 +181,9 @@ For each task:
    ✅ **Generic** (applies across the whole repo):
    > "Always validate required ID fields at the service boundary — missing IDs should return 400, not 500"
-7. **Commit** — after all steps are done (no checkpoint gates remain in the task), `git add` the relevant files and commit with a clear message.
-8. **Update progress** — mark `✅ done` + record the commit hash.
-9. **Suggest session break if needed** — after completing ~3-5 tasks since the last break, suggest:
+6. **Commit** — after all steps are done (no checkpoint gates remain in the task), `git add` the relevant files and commit with a clear message.
+7. **Update progress** — mark `✅ done` + record the commit hash.
+8. **Suggest session break if needed** — after completing ~3-5 tasks since the last break, suggest:
    ```
    ✅ Tasks N-M done (commits: abc, def)
    Progress: X/Y tasks done
@@ -186,7 +193,7 @@ For each task:
       (or just say "continue" to keep going here)
    ```
    Also suggest at checkpoint review pauses when multiple tasks have been completed since the last break. Respect the user's choice if they say "continue".
-10. **Loop** — go back to step 1 for the next `⬜ pending` task, or see [After all tasks](#after-all-tasks) if none remain.
+9. **Loop** — go back to step 1 for the next `⬜ pending` task, or see [After all tasks](#after-all-tasks) if none remain.
 ### `docs/lessons.md` format
@@ -205,6 +212,8 @@ Retire rules that no longer apply during finalizing.
 - <new rule here>
 ```
+When adding a new rule during execution, always append it under `## Rules`. The categorization into specific headers (e.g., `## Tool Usage`, `## Testing Patterns`) is done during finalizing — never during execution.
 ### Checkpoint gates — when the plan says STOP
 The plan marks certain steps with `⏸ **CHECKPOINT: test**` or `⏸ **CHECKPOINT: done**`. These are hard stop points. When you reach one:

package/skills/finalizing/SKILL.md CHANGED Viewed

@@ -35,10 +35,12 @@ Wait for the user to confirm before proceeding.
    Each `mv` gracefully handles the case where no matching files exist (e.g., if the user skipped straight from brainstorm to finalize without executing tasks).
-2. **Review lessons learned** — if `docs/lessons.md` exists, review it:
-   - Add any lessons from this session that were missed during execution
+2. **Review & Polish Lessons (Agile Scrum Master Hat)** — if `docs/lessons.md` exists, put on your **Agile Scrum Master Hat** to curate and optimize it for future sprints:
+   - **Add missed lessons** — capture any lessons from this session that weren't written during execution
    - **Generalize domain-specific rules** — if a rule names a specific service, entity, or feature, either rewrite it as a generic pattern or remove it if no generic form exists
-   - Retire rules that no longer apply (remove the bullet)
+   - **De-duplicate** — combine overlapping or redundant rules into single, sharper entries
+   - **Categorize** — group the rules under clear, structured markdown headers (e.g., `## Tool Usage`, `## Testing Patterns`, `## Architecture Rules`) to make the document highly scannable for future sessions. Keep the `## Rules` section as the append target for new entries during execution — categorization moves rules out of `## Rules` into the appropriate category headers.
+   - **Retire stale rules** — remove bullets that no longer apply
    - If no changes are needed, leave it as-is
    If `docs/lessons.md` doesn't exist but lessons were learned this session, create it with the standard format: