@qball-inc/the-bulwark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/.claude-plugin/plugin.json +43 -0
  2. package/agents/bulwark-fix-validator.md +633 -0
  3. package/agents/bulwark-implementer.md +391 -0
  4. package/agents/bulwark-issue-analyzer.md +308 -0
  5. package/agents/bulwark-standards-reviewer.md +221 -0
  6. package/agents/plan-creation-architect.md +323 -0
  7. package/agents/plan-creation-eng-lead.md +352 -0
  8. package/agents/plan-creation-po.md +300 -0
  9. package/agents/plan-creation-qa-critic.md +334 -0
  10. package/agents/product-ideation-competitive-analyzer.md +298 -0
  11. package/agents/product-ideation-idea-validator.md +268 -0
  12. package/agents/product-ideation-market-researcher.md +292 -0
  13. package/agents/product-ideation-pattern-documenter.md +308 -0
  14. package/agents/product-ideation-segment-analyzer.md +303 -0
  15. package/agents/product-ideation-strategist.md +259 -0
  16. package/agents/statusline-setup.md +97 -0
  17. package/hooks/hooks.json +59 -0
  18. package/package.json +45 -0
  19. package/scripts/hooks/cleanup-stale.sh +13 -0
  20. package/scripts/hooks/enforce-quality.sh +166 -0
  21. package/scripts/hooks/implementer-quality.sh +256 -0
  22. package/scripts/hooks/inject-protocol.sh +52 -0
  23. package/scripts/hooks/suggest-pipeline.sh +175 -0
  24. package/scripts/hooks/track-pipeline-start.sh +37 -0
  25. package/scripts/hooks/track-pipeline-stop.sh +52 -0
  26. package/scripts/init-rules.sh +35 -0
  27. package/scripts/init.sh +151 -0
  28. package/skills/anthropic-validator/SKILL.md +607 -0
  29. package/skills/anthropic-validator/references/agents-checklist.md +131 -0
  30. package/skills/anthropic-validator/references/commands-checklist.md +102 -0
  31. package/skills/anthropic-validator/references/hooks-checklist.md +151 -0
  32. package/skills/anthropic-validator/references/mcp-checklist.md +136 -0
  33. package/skills/anthropic-validator/references/plugins-checklist.md +148 -0
  34. package/skills/anthropic-validator/references/skills-checklist.md +85 -0
  35. package/skills/assertion-patterns/SKILL.md +296 -0
  36. package/skills/bug-magnet-data/SKILL.md +284 -0
  37. package/skills/bug-magnet-data/context/cli-args.md +91 -0
  38. package/skills/bug-magnet-data/context/db-query.md +104 -0
  39. package/skills/bug-magnet-data/context/file-contents.md +103 -0
  40. package/skills/bug-magnet-data/context/http-body.md +91 -0
  41. package/skills/bug-magnet-data/context/process-spawn.md +123 -0
  42. package/skills/bug-magnet-data/data/booleans/boundaries.yaml +143 -0
  43. package/skills/bug-magnet-data/data/collections/arrays.yaml +114 -0
  44. package/skills/bug-magnet-data/data/collections/objects.yaml +123 -0
  45. package/skills/bug-magnet-data/data/concurrency/race-conditions.yaml +118 -0
  46. package/skills/bug-magnet-data/data/concurrency/state-machines.yaml +115 -0
  47. package/skills/bug-magnet-data/data/dates/boundaries.yaml +137 -0
  48. package/skills/bug-magnet-data/data/dates/invalid.yaml +132 -0
  49. package/skills/bug-magnet-data/data/dates/timezone.yaml +118 -0
  50. package/skills/bug-magnet-data/data/encoding/charset.yaml +79 -0
  51. package/skills/bug-magnet-data/data/encoding/normalization.yaml +105 -0
  52. package/skills/bug-magnet-data/data/formats/email.yaml +154 -0
  53. package/skills/bug-magnet-data/data/formats/json.yaml +187 -0
  54. package/skills/bug-magnet-data/data/formats/url.yaml +165 -0
  55. package/skills/bug-magnet-data/data/language-specific/javascript.yaml +182 -0
  56. package/skills/bug-magnet-data/data/language-specific/python.yaml +174 -0
  57. package/skills/bug-magnet-data/data/language-specific/rust.yaml +148 -0
  58. package/skills/bug-magnet-data/data/numbers/boundaries.yaml +161 -0
  59. package/skills/bug-magnet-data/data/numbers/precision.yaml +89 -0
  60. package/skills/bug-magnet-data/data/numbers/special.yaml +69 -0
  61. package/skills/bug-magnet-data/data/strings/boundaries.yaml +109 -0
  62. package/skills/bug-magnet-data/data/strings/injection.yaml +208 -0
  63. package/skills/bug-magnet-data/data/strings/special-chars.yaml +190 -0
  64. package/skills/bug-magnet-data/data/strings/unicode.yaml +139 -0
  65. package/skills/bug-magnet-data/references/external-lists.md +115 -0
  66. package/skills/bulwark-brainstorm/SKILL.md +563 -0
  67. package/skills/bulwark-brainstorm/references/at-teammate-prompts.md +60 -0
  68. package/skills/bulwark-brainstorm/references/role-critical-analyst.md +78 -0
  69. package/skills/bulwark-brainstorm/references/role-development-lead.md +66 -0
  70. package/skills/bulwark-brainstorm/references/role-product-delivery-lead.md +79 -0
  71. package/skills/bulwark-brainstorm/references/role-product-manager.md +62 -0
  72. package/skills/bulwark-brainstorm/references/role-project-sme.md +59 -0
  73. package/skills/bulwark-brainstorm/references/role-technical-architect.md +66 -0
  74. package/skills/bulwark-research/SKILL.md +298 -0
  75. package/skills/bulwark-research/references/viewpoint-contrarian.md +63 -0
  76. package/skills/bulwark-research/references/viewpoint-direct-investigation.md +62 -0
  77. package/skills/bulwark-research/references/viewpoint-first-principles.md +65 -0
  78. package/skills/bulwark-research/references/viewpoint-practitioner.md +62 -0
  79. package/skills/bulwark-research/references/viewpoint-prior-art.md +66 -0
  80. package/skills/bulwark-scaffold/SKILL.md +330 -0
  81. package/skills/bulwark-statusline/SKILL.md +161 -0
  82. package/skills/bulwark-statusline/scripts/statusline.sh +144 -0
  83. package/skills/bulwark-verify/SKILL.md +519 -0
  84. package/skills/code-review/SKILL.md +428 -0
  85. package/skills/code-review/examples/anti-patterns/linting.ts +181 -0
  86. package/skills/code-review/examples/anti-patterns/security.ts +91 -0
  87. package/skills/code-review/examples/anti-patterns/standards.ts +195 -0
  88. package/skills/code-review/examples/anti-patterns/type-safety.ts +108 -0
  89. package/skills/code-review/examples/recommended/linting.ts +195 -0
  90. package/skills/code-review/examples/recommended/security.ts +154 -0
  91. package/skills/code-review/examples/recommended/standards.ts +231 -0
  92. package/skills/code-review/examples/recommended/type-safety.ts +181 -0
  93. package/skills/code-review/frameworks/angular.md +218 -0
  94. package/skills/code-review/frameworks/django.md +235 -0
  95. package/skills/code-review/frameworks/express.md +207 -0
  96. package/skills/code-review/frameworks/flask.md +298 -0
  97. package/skills/code-review/frameworks/generic.md +146 -0
  98. package/skills/code-review/frameworks/react.md +152 -0
  99. package/skills/code-review/frameworks/vue.md +244 -0
  100. package/skills/code-review/references/linting-patterns.md +221 -0
  101. package/skills/code-review/references/security-patterns.md +125 -0
  102. package/skills/code-review/references/standards-patterns.md +246 -0
  103. package/skills/code-review/references/type-safety-patterns.md +130 -0
  104. package/skills/component-patterns/SKILL.md +131 -0
  105. package/skills/component-patterns/references/pattern-cli-command.md +118 -0
  106. package/skills/component-patterns/references/pattern-database.md +166 -0
  107. package/skills/component-patterns/references/pattern-external-api.md +139 -0
  108. package/skills/component-patterns/references/pattern-file-parser.md +168 -0
  109. package/skills/component-patterns/references/pattern-http-server.md +162 -0
  110. package/skills/component-patterns/references/pattern-process-spawner.md +133 -0
  111. package/skills/continuous-feedback/SKILL.md +327 -0
  112. package/skills/continuous-feedback/references/collect-instructions.md +81 -0
  113. package/skills/continuous-feedback/references/specialize-code-review.md +82 -0
  114. package/skills/continuous-feedback/references/specialize-general.md +98 -0
  115. package/skills/continuous-feedback/references/specialize-test-audit.md +81 -0
  116. package/skills/create-skill/SKILL.md +359 -0
  117. package/skills/create-skill/references/agent-conventions.md +194 -0
  118. package/skills/create-skill/references/agent-template.md +195 -0
  119. package/skills/create-skill/references/content-guidance.md +291 -0
  120. package/skills/create-skill/references/decision-framework.md +124 -0
  121. package/skills/create-skill/references/template-pipeline.md +217 -0
  122. package/skills/create-skill/references/template-reference-heavy.md +111 -0
  123. package/skills/create-skill/references/template-research.md +210 -0
  124. package/skills/create-skill/references/template-script-driven.md +172 -0
  125. package/skills/create-skill/references/template-simple.md +80 -0
  126. package/skills/create-subagent/SKILL.md +353 -0
  127. package/skills/create-subagent/references/agent-conventions.md +268 -0
  128. package/skills/create-subagent/references/content-guidance.md +232 -0
  129. package/skills/create-subagent/references/decision-framework.md +134 -0
  130. package/skills/create-subagent/references/template-single-agent.md +192 -0
  131. package/skills/fix-bug/SKILL.md +241 -0
  132. package/skills/governance-protocol/SKILL.md +116 -0
  133. package/skills/init/SKILL.md +341 -0
  134. package/skills/issue-debugging/SKILL.md +385 -0
  135. package/skills/issue-debugging/references/anti-patterns.md +245 -0
  136. package/skills/issue-debugging/references/debug-report-schema.md +227 -0
  137. package/skills/mock-detection/SKILL.md +511 -0
  138. package/skills/mock-detection/references/false-positive-prevention.md +402 -0
  139. package/skills/mock-detection/references/stub-patterns.md +236 -0
  140. package/skills/pipeline-templates/SKILL.md +215 -0
  141. package/skills/pipeline-templates/references/code-change-workflow.md +277 -0
  142. package/skills/pipeline-templates/references/code-review.md +336 -0
  143. package/skills/pipeline-templates/references/fix-validation.md +421 -0
  144. package/skills/pipeline-templates/references/new-feature.md +335 -0
  145. package/skills/pipeline-templates/references/research-brainstorm.md +161 -0
  146. package/skills/pipeline-templates/references/research-planning.md +257 -0
  147. package/skills/pipeline-templates/references/test-audit.md +389 -0
  148. package/skills/pipeline-templates/references/test-execution-fix.md +238 -0
  149. package/skills/plan-creation/SKILL.md +497 -0
  150. package/skills/product-ideation/SKILL.md +372 -0
  151. package/skills/product-ideation/references/analysis-frameworks.md +161 -0
  152. package/skills/session-handoff/SKILL.md +139 -0
  153. package/skills/session-handoff/references/examples.md +223 -0
  154. package/skills/setup-lsp/SKILL.md +312 -0
  155. package/skills/setup-lsp/references/server-registry.md +85 -0
  156. package/skills/setup-lsp/references/troubleshooting.md +135 -0
  157. package/skills/subagent-output-templating/SKILL.md +415 -0
  158. package/skills/subagent-output-templating/references/examples.md +440 -0
  159. package/skills/subagent-prompting/SKILL.md +364 -0
  160. package/skills/subagent-prompting/references/examples.md +342 -0
  161. package/skills/test-audit/SKILL.md +531 -0
  162. package/skills/test-audit/references/known-limitations.md +41 -0
  163. package/skills/test-audit/references/priority-classification.md +30 -0
  164. package/skills/test-audit/references/prompts/deep-mode-detection.md +83 -0
  165. package/skills/test-audit/references/prompts/synthesis.md +57 -0
  166. package/skills/test-audit/references/rewrite-instructions.md +46 -0
  167. package/skills/test-audit/references/schemas/audit-output.yaml +100 -0
  168. package/skills/test-audit/references/schemas/diagnostic-output.yaml +49 -0
  169. package/skills/test-audit/scripts/data-flow-analyzer.ts +509 -0
  170. package/skills/test-audit/scripts/integration-mock-detector.ts +462 -0
  171. package/skills/test-audit/scripts/package.json +20 -0
  172. package/skills/test-audit/scripts/skip-detector.ts +211 -0
  173. package/skills/test-audit/scripts/verification-counter.ts +295 -0
  174. package/skills/test-classification/SKILL.md +310 -0
  175. package/skills/test-fixture-creation/SKILL.md +295 -0
@@ -0,0 +1,257 @@
1
+ # Research & Planning Pipeline
2
+
3
+ ## Purpose
4
+
5
+ Research and plan before implementation with iterative refinement.
6
+
7
+ ## When to Use
8
+
9
+ - Complex features requiring deep research
10
+ - Architecture decisions
11
+ - Technology evaluations
12
+ - Pre-implementation planning
13
+ - Tasks where planning quality matters
14
+
15
+ ## Pipeline Definition
16
+
17
+ ```fsharp
18
+ // Research & Planning Pipeline
19
+ // Trigger: Complex task requiring planning
20
+ // Constraint: Minimum 3 iterations for quality
21
+ // Output: Reviewed and refined implementation plan
22
+
23
+ Researcher (gather information) // Haiku - lookups
24
+ |> PlanDrafter (create initial plan) // Main thread (Orchestrator)
25
+ |> PlanReviewer (critical review) // Sonnet - analysis
26
+ |> PlanRefiner (apply feedback) // Main thread (Orchestrator)
27
+ |> LOOP(min=3, max=5) // 3-5 iterations required
28
+ |> FinalValidator (ensure completeness) // Sonnet - validation
29
+ ```
30
+
31
+ ## Stage Details
32
+
33
+ ### Stage 1: Researcher
34
+
35
+ **Model**: Haiku (lookup task)
36
+
37
+ **GOAL**: Gather all information needed for planning.
38
+
39
+ **CONSTRAINTS**:
40
+ - Do NOT modify any files
41
+ - Focus on relevant information only
42
+ - Document sources and confidence
43
+
44
+ **CONTEXT**:
45
+ - Task/feature requirements
46
+ - Questions to answer
47
+ - Areas to research
48
+
49
+ **OUTPUT**: Research findings
50
+ ```yaml
51
+ research:
52
+ questions_answered:
53
+ - question: "What authentication methods are supported?"
54
+ answer: "OAuth 2.0, SAML, API keys"
55
+ confidence: high
56
+ source: "docs/auth.md"
57
+ - question: "How is session state managed?"
58
+ answer: "Redis-backed sessions"
59
+ confidence: high
60
+ source: "src/session/redis.ts"
61
+ patterns_found:
62
+ - pattern: "All API endpoints use middleware chain"
63
+ location: "src/middleware/"
64
+ - pattern: "Error handling via custom Error classes"
65
+ location: "src/errors/"
66
+ unknowns:
67
+ - "Rate limiting configuration unclear"
68
+ - "Caching strategy not documented"
69
+ ```
70
+
71
+ ### Stage 2: PlanDrafter
72
+
73
+ **Model**: Main thread (Orchestrator synthesizes)
74
+
75
+ **GOAL**: Create initial implementation plan from research.
76
+
77
+ **CONSTRAINTS**:
78
+ - Use research findings
79
+ - Follow project patterns
80
+ - Be specific and actionable
81
+
82
+ **CONTEXT**:
83
+ - Research findings from Stage 1
84
+ - Project structure
85
+ - Requirements/PRD
86
+
87
+ **OUTPUT**: Draft plan
88
+ ```yaml
89
+ plan:
90
+ version: 1
91
+ objective: "Implement user authentication with OAuth 2.0"
92
+ approach:
93
+ - step: 1
94
+ description: "Add OAuth provider configuration"
95
+ files: ["src/config/oauth.ts"]
96
+ effort: low
97
+ - step: 2
98
+ description: "Implement OAuth callback handler"
99
+ files: ["src/auth/oauth-callback.ts"]
100
+ effort: medium
101
+ - step: 3
102
+ description: "Add session creation on successful auth"
103
+ files: ["src/session/create.ts"]
104
+ effort: medium
105
+ risks:
106
+ - "Token refresh handling complexity"
107
+ open_questions:
108
+ - "Which OAuth providers to support initially?"
109
+ ```
110
+
111
+ ### Stage 3: PlanReviewer
112
+
113
+ **Model**: Sonnet (critical analysis)
114
+
115
+ **GOAL**: Critically review plan against requirements and context.
116
+
117
+ **CONSTRAINTS**:
118
+ - Do NOT modify any files
119
+ - Check against requirements
120
+ - Identify gaps and risks
121
+ - Be constructively critical
122
+
123
+ **CONTEXT**:
124
+ - Draft plan from Stage 2
125
+ - Original requirements
126
+ - Research findings
127
+ - Conversation context
128
+
129
+ **OUTPUT**: Review feedback
130
+ ```yaml
131
+ review:
132
+ iteration: 1
133
+ overall: "Good start, needs refinement"
134
+ gaps:
135
+ - "Missing: Error handling for OAuth failures"
136
+ - "Missing: Logout/token revocation flow"
137
+ risks_identified:
138
+ - risk: "Token storage security"
139
+ recommendation: "Use encrypted storage, not plain Redis"
140
+ suggestions:
141
+ - "Add step for CSRF protection on callback"
142
+ - "Consider adding refresh token rotation"
143
+ questions:
144
+ - "How will we handle users with multiple OAuth providers?"
145
+ ```
146
+
147
+ ### Stage 4: PlanRefiner
148
+
149
+ **Model**: Main thread (Orchestrator applies feedback)
150
+
151
+ **GOAL**: Incorporate review feedback into plan.
152
+
153
+ **CONSTRAINTS**:
154
+ - Address all gaps identified
155
+ - Respond to all questions
156
+ - Update risk mitigations
157
+
158
+ **CONTEXT**:
159
+ - Review feedback from Stage 3
160
+ - Previous plan version
161
+ - Research findings
162
+
163
+ **OUTPUT**: Refined plan (version N+1)
164
+
165
+ ### Loop: Minimum 3 Iterations
166
+
167
+ **Why minimum 3?**
168
+ 1. **Iteration 1**: Initial plan usually has gaps
169
+ 2. **Iteration 2**: Addresses obvious issues, reveals deeper ones
170
+ 3. **Iteration 3**: Refinement and polish
171
+
172
+ **Loop continues until**:
173
+ - Reviewer approves with no major gaps
174
+ - OR max iterations (5) reached
175
+
176
+ ### Stage 5: FinalValidator
177
+
178
+ **Model**: Sonnet (validation)
179
+
180
+ **GOAL**: Final check that plan is complete and actionable.
181
+
182
+ **CONSTRAINTS**:
183
+ - Plan must be implementable
184
+ - All requirements covered
185
+ - Risks mitigated or accepted
186
+
187
+ **OUTPUT**: Validation result
188
+ ```yaml
189
+ validation:
190
+ approved: true
191
+ final_plan_version: 3
192
+ completeness_check:
193
+ requirements_covered: true
194
+ risks_documented: true
195
+ steps_actionable: true
196
+ ready_for_implementation: true
197
+ ```
198
+
199
+ ## Example Invocation
200
+
201
+ ```markdown
202
+ ## Pipeline: Research & Planning
203
+
204
+ ### Stage 1: Researcher
205
+ Task: subagent_type=general-purpose, model=haiku
206
+ Prompt: [4-part prompt with research questions]
207
+
208
+ ### Stage 2: PlanDrafter
209
+ Orchestrator synthesizes research into draft plan
210
+
211
+ ### Stage 3: PlanReviewer (Iteration 1)
212
+ Task: subagent_type=general-purpose, model=sonnet
213
+ Prompt: [4-part prompt, reviews plan v1]
214
+
215
+ ### Stage 4: PlanRefiner (Iteration 1)
216
+ Orchestrator applies feedback, creates plan v2
217
+
218
+ ### Stage 3: PlanReviewer (Iteration 2)
219
+ Task: subagent_type=general-purpose, model=sonnet
220
+ Prompt: [4-part prompt, reviews plan v2]
221
+
222
+ [... continue for minimum 3 iterations ...]
223
+
224
+ ### Stage 5: FinalValidator
225
+ Task: subagent_type=general-purpose, model=sonnet
226
+ Prompt: [4-part prompt, validates final plan]
227
+ ```
228
+
229
+ ## Success Criteria
230
+
231
+ - Research gathered and documented
232
+ - Plan iterated minimum 3 times
233
+ - All gaps addressed
234
+ - Risks identified and mitigated
235
+ - Final validation passed
236
+
237
+ ## Iteration Tracking
238
+
239
+ Track iterations explicitly:
240
+
241
+ ```yaml
242
+ iterations:
243
+ - version: 1
244
+ reviewer_feedback: "Missing error handling"
245
+ gaps_remaining: 3
246
+ - version: 2
247
+ reviewer_feedback: "Better, but security unclear"
248
+ gaps_remaining: 1
249
+ - version: 3
250
+ reviewer_feedback: "Approved"
251
+ gaps_remaining: 0
252
+ ```
253
+
254
+ ## Related Pipelines
255
+
256
+ - **New Feature**: For implementation after planning
257
+ - **Code Review**: For reviewing completed implementation
@@ -0,0 +1,389 @@
1
+ # Test Audit Pipeline
2
+
3
+ ## Purpose
4
+
5
+ Audit test suite quality, identify mock-heavy tests, and prioritize rewrites.
6
+
7
+ ## When to Use
8
+
9
+ - Test suite quality assessment
10
+ - Identifying tests that mock the system under test
11
+ - Prioritizing test improvements
12
+ - Ensuring T1-T4 compliance
13
+
14
+ ## Entry Points
15
+
16
+ | Invocation | Trigger |
17
+ |------------|---------|
18
+ | `/test-audit [path]` | User slash command |
19
+ | Conversation: "audit tests in..." | Natural language |
20
+ | PostToolUse hook on `*.test.*` | Automatic after test file changes |
21
+
22
+ ## Pipeline Definition
23
+
24
+ ```fsharp
25
+ // Test Audit Pipeline
26
+ // Pattern: Main Context Orchestration
27
+ // Orchestrator loads skill prompt templates and spawns general-purpose sub-agents
28
+
29
+ // Entry: /test-audit [path] OR hook-triggered
30
+ test-classification (Haiku sub-agent, surface classification + line counting)
31
+ |> mock-detection (Sonnet sub-agent, deep analysis + violation scope tracking)
32
+ |> test-audit synthesis (Sonnet sub-agent, two-gate REWRITE_REQUIRED directive)
33
+ |> (if REWRITE_REQUIRED == true
34
+ then Orchestrator (Opus) rewrites tests
35
+ else Done)
36
+ |> LOOP(max=2) // Limit audit-rewrite cycles to prevent infinite loops
37
+ ```
38
+
39
+ ## Bias Avoidance
40
+
41
+ This pipeline separates audit work from implementation to prevent self-review bias:
42
+
43
+ | Role | Executor | Rationale |
44
+ |------|----------|-----------|
45
+ | Classification | Haiku sub-agent | Surface pattern matching, triage |
46
+ | Detection | Sonnet sub-agent | Deep analysis, mock appropriateness evaluation |
47
+ | Synthesis | Sonnet sub-agent | Analysis, two-gate REWRITE_REQUIRED logic |
48
+ | Rewrite | Opus (orchestrator) | Implementation strength |
49
+
50
+ The orchestrator (Opus) does NOT perform audit work - only orchestration and implementation.
51
+
52
+ ## Two-Gate REWRITE_REQUIRED Logic
53
+
54
+ | Gate | Condition | Trigger |
55
+ |------|-----------|---------|
56
+ | **Gate 1: Impact** | Any P0 violation (false confidence) | REWRITE_REQUIRED regardless of % |
57
+ | **Gate 2: Threshold** | P1 violations AND test effectiveness < 95% | REWRITE_REQUIRED |
58
+ | **Advisory** | P2 only OR P1 with effectiveness >= 95% | Recommendations only |
59
+
60
+ **Test Effectiveness** = (verification_lines - affected_lines) / verification_lines
61
+
62
+ ---
63
+
64
+ ## Stage Details
65
+
66
+ ### Stage 1: TestClassifier
67
+
68
+ **Skill**: `test-classification` (prompt template)
69
+
70
+ **Model**: Haiku (pattern matching task)
71
+
72
+ **Invocation**:
73
+ ```
74
+ Task(subagent_type="general-purpose", model="haiku", prompt=<skill template>)
75
+ ```
76
+
77
+ **GOAL**: Categorize all tests by type and quality.
78
+
79
+ **CONSTRAINTS**:
80
+ - Do NOT modify any files
81
+ - Classify every test file
82
+ - Use consistent categories
83
+ - Complete within 30 tool calls
84
+
85
+ **CONTEXT**:
86
+ - Target directory: {from $ARGUMENTS or conversation}
87
+ - Test file patterns: `*.test.*`, `*.spec.*`, `test_*`
88
+ - Project test framework (Jest, Vitest, etc.)
89
+
90
+ **OUTPUT**: `logs/test-classification-{YYYYMMDD-HHMMSS}.yaml`
91
+ ```yaml
92
+ metadata:
93
+ agent: test-classification
94
+ timestamp: {ISO-8601}
95
+ target: {directory}
96
+ model: haiku
97
+
98
+ classification:
99
+ total_files: 25
100
+ categories:
101
+ unit:
102
+ count: 80
103
+ files: [...]
104
+ integration:
105
+ count: 45
106
+ files: [...]
107
+ e2e:
108
+ count: 17
109
+ files: [...]
110
+ quality_indicators:
111
+ real_integration:
112
+ - file: tests/proxy.test.ts
113
+ indicators: [spawns-process, checks-port]
114
+ mock_heavy:
115
+ - file: tests/auth.test.ts
116
+ indicators: [mocks-fetch, mocks-fs]
117
+
118
+ summary: |
119
+ Classified 25 test files: 15 unit, 8 integration, 2 e2e.
120
+ Found 3 real integration tests and 5 mock-heavy tests.
121
+ ```
122
+
123
+ ---
124
+
125
+ ### Stage 2: MockDetector
126
+
127
+ **Skill**: `mock-detection` (prompt template)
128
+
129
+ **Model**: Sonnet (deep analysis, mock appropriateness evaluation)
130
+
131
+ **Invocation**:
132
+ ```
133
+ Task(subagent_type="general-purpose", model="sonnet", prompt=<skill template + classification>)
134
+ ```
135
+
136
+ **GOAL**: Identify T1-T4 violations with violation scope tracking for test effectiveness calculation.
137
+
138
+ **CONSTRAINTS**:
139
+ - Do NOT modify any files
140
+ - Only analyze files with `needs_deep_analysis: true` from classification
141
+ - Track violation scope (affected line ranges, not just violation lines)
142
+ - Use call graph analysis to detect broken integration chains
143
+ - Complete within 50 tool calls
144
+
145
+ **CONTEXT**:
146
+ - Test classification from Stage 1 (files to analyze, verification_lines)
147
+ - Mock appropriateness rubric (unit vs integration vs e2e)
148
+ - T1-T4 rules from Rules.md
149
+
150
+ **OUTPUT**: `logs/mock-detection-{YYYYMMDD-HHMMSS}.yaml`
151
+ ```yaml
152
+ metadata:
153
+ skill: mock-detection
154
+ timestamp: {ISO-8601}
155
+ classification_source: logs/test-classification-{YYYYMMDD-HHMMSS}.yaml
156
+ model: sonnet
157
+ files_analyzed: 5
158
+
159
+ violations:
160
+ - file: tests/proxy.test.ts
161
+ line: 15
162
+ violation_scope: [15, 95] # Lines 15-95 depend on the mock
163
+ affected_lines: 80
164
+ rule: T1
165
+ severity: critical
166
+ priority: P0
167
+ pattern: "jest.spyOn(child_process, 'spawn')"
168
+ reason: "Test mocks spawn, all downstream assertions are ineffective"
169
+ suggested_fix: "Use real spawn. Verify with port check."
170
+
171
+ - file: tests/api.integration.ts
172
+ line: 8
173
+ violation_scope: [8, 45]
174
+ affected_lines: 37
175
+ rule: T3
176
+ severity: critical
177
+ priority: P1
178
+ pattern: "jest.mock('node-fetch')"
179
+ reason: "Integration test should use real HTTP"
180
+ suggested_fix: "Use MSW or test server."
181
+
182
+ file_summaries:
183
+ - file: tests/proxy.test.ts
184
+ verification_lines: 95
185
+ affected_lines: 80
186
+ test_effectiveness: 16%
187
+
188
+ totals:
189
+ critical: 2
190
+ high: 1
191
+ total_affected_lines: 118
192
+
193
+ summary: |
194
+ Analyzed 5 files. Found 3 violations affecting 118 lines.
195
+ P0: proxy.test.ts (16% effective). P1: api.integration.ts (33% effective).
196
+ ```
197
+
198
+ ---
199
+
200
+ ### Stage 3: AuditSynthesizer
201
+
202
+ **Skill**: `test-audit` (synthesis prompt template)
203
+
204
+ **Model**: Sonnet (analysis and synthesis)
205
+
206
+ **Invocation**:
207
+ ```
208
+ Task(subagent_type="general-purpose", model="sonnet", prompt=<synthesis template + classification + violations>)
209
+ ```
210
+
211
+ **GOAL**: Compile findings into prioritized rewrite list.
212
+
213
+ **CONSTRAINTS**:
214
+ - Do NOT modify any files
215
+ - Prioritize by impact (P0 > P1 > P2 > P3)
216
+ - Provide actionable rewrite guidance
217
+ - Include REWRITE_REQUIRED directive
218
+
219
+ **CONTEXT**:
220
+ - Classification from Stage 1
221
+ - Violations from Stage 2
222
+ - T1-T4 rules reference
223
+
224
+ **OUTPUT**: `logs/test-audit-{YYYYMMDD-HHMMSS}.yaml`
225
+ ```yaml
226
+ metadata:
227
+ agent: test-audit
228
+ timestamp: {ISO-8601}
229
+ sources:
230
+ classification: logs/test-classification-{YYYYMMDD-HHMMSS}.yaml
231
+ violations: logs/mock-detection-{YYYYMMDD-HHMMSS}.yaml
232
+ model: sonnet
233
+
234
+ audit:
235
+ overview:
236
+ total_tests: 142
237
+ compliant: 98
238
+ violations: 44
239
+ critical: 12
240
+ priority_rewrites:
241
+ - file: tests/proxy.test.ts
242
+ priority: P0 # Critical
243
+ violations: [T1]
244
+ reason: "Core functionality mocked - test provides false confidence"
245
+ effort: medium
246
+ approach: |
247
+ Replace mock with real proxy spawn.
248
+ Use port check to verify proxy started.
249
+ Add timeout for startup wait.
250
+ - file: tests/api.integration.ts
251
+ priority: P1 # High
252
+ violations: [T3]
253
+ reason: "Integration test uses mocks - defeats purpose"
254
+ effort: low
255
+ approach: |
256
+ Remove jest.mock('node-fetch').
257
+ Use test server or real endpoint.
258
+ recommendations:
259
+ - "Establish test harness for proxy testing"
260
+ - "Create shared fixtures for integration tests"
261
+ - "Add pre-commit hook to prevent new T1 violations"
262
+
263
+ # CRITICAL: Orchestrator directive
264
+ directive:
265
+ REWRITE_REQUIRED: true
266
+ priority_files: [tests/proxy.test.ts, tests/api.integration.ts]
267
+ rationale: "2 P0/P1 violations require immediate attention"
268
+
269
+ summary: |
270
+ Audit complete: 44 violations found, 12 critical.
271
+ REWRITE_REQUIRED: true - 2 files need immediate attention.
272
+ Priority: proxy.test.ts (P0, mocks spawn), api.integration.ts (P1, mocks fetch).
273
+ ```
274
+
275
+ ---
276
+
277
+ ### Stage 4: Test Rewrite (Orchestrator Work)
278
+
279
+ **Executor**: Orchestrator (Opus) - NOT a sub-agent
280
+
281
+ **Conditional**: Only executes if `audit.directive.REWRITE_REQUIRED == true`
282
+
283
+ **Behavior**:
284
+ 1. Read `priority_files` from audit report
285
+ 2. For each file, follow `approach` guidance from audit
286
+ 3. Implement rewrites directly (not delegated to sub-agent)
287
+ 4. Follow T1-T4 rules strictly
288
+ 5. Present summary of changes
289
+
290
+ **Loop Handling**:
291
+ - After rewrite, PostToolUse hook may fire again
292
+ - Pipeline limited to `LOOP(max=2)` to prevent infinite cycles
293
+ - Second pass serves as verification that rewrites resolved violations
294
+
295
+ **OUTPUT**: Rewritten test files + summary
296
+ ```yaml
297
+ rewrites:
298
+ completed:
299
+ - file: tests/proxy.test.ts
300
+ changes: "Replaced mock with real proxy spawn"
301
+ verification: "Run: npm test -- proxy.test.ts"
302
+ remaining:
303
+ - file: tests/api.integration.ts
304
+ reason: "Needs test server setup first"
305
+ ```
306
+
307
+ ---
308
+
309
+ ## Example Orchestrator Flow
310
+
311
+ ```markdown
312
+ ## Orchestrator Execution (Main Context)
313
+
314
+ ### Step 1: Resolve Target
315
+ - Parse $ARGUMENTS from /test-audit invocation
316
+ - OR extract from conversation context
317
+ - OR receive from hook additionalContext
318
+
319
+ ### Step 2: Classification Stage
320
+ 1. Load `test-classification` skill
321
+ 2. Construct 4-part prompt using skill template
322
+ 3. Task(subagent_type="general-purpose", model="haiku", prompt=...)
323
+ 4. Read output: logs/test-classification-{YYYYMMDD-HHMMSS}.yaml
324
+
325
+ ### Step 3: Detection Stage (Sonnet)
326
+ 1. Load `mock-detection` skill
327
+ 2. Construct 4-part prompt + include classification as CONTEXT
328
+ 3. Task(subagent_type="general-purpose", model="sonnet", prompt=...)
329
+ 4. Read output: logs/mock-detection-{YYYYMMDD-HHMMSS}.yaml
330
+
331
+ ### Step 4: Synthesis Stage
332
+ 1. Construct synthesis prompt from test-audit skill
333
+ 2. Include classification + violations as CONTEXT
334
+ 3. Task(subagent_type="general-purpose", model="sonnet", prompt=...)
335
+ 4. Read output: logs/test-audit-{YYYYMMDD-HHMMSS}.yaml
336
+
337
+ ### Step 5: Present Summary
338
+ Display audit summary to user before proceeding.
339
+
340
+ ### Step 6: Rewrite (if needed)
341
+ IF audit.directive.REWRITE_REQUIRED == true:
342
+ For each file in priority_files:
343
+ Implement rewrite using approach from audit
344
+ Follow T1-T4 rules strictly
345
+ ELSE:
346
+ Report: "No rewrites required"
347
+ ```
348
+
349
+ ---
350
+
351
+ ## Success Criteria
352
+
353
+ - All tests classified
354
+ - T1-T4 violations identified with line numbers
355
+ - Priority rewrite list generated with approach guidance
356
+ - REWRITE_REQUIRED directive produced
357
+ - Critical tests rewritten (if any)
358
+ - Loop limited to max 2 cycles
359
+
360
+ ---
361
+
362
+ ## T1-T4 Rules Reference
363
+
364
+ | Rule | Description | Violation Example |
365
+ |------|-------------|-------------------|
366
+ | T1 | Never mock system under test | `jest.spyOn(spawn)` when testing spawn |
367
+ | T2 | Verify observable output | `expect(fn).toHaveBeenCalled()` only |
368
+ | T3 | Integration uses real systems | `jest.mock('fs')` in integration test |
369
+ | T4 | Run tests before complete | Not running after writing |
370
+
371
+ ---
372
+
373
+ ## Related Skills
374
+
375
+ | Skill | Purpose |
376
+ |-------|---------|
377
+ | `test-classification` | Prompt template for Stage 1 (Haiku) - surface classification + line counting |
378
+ | `mock-detection` | Prompt template for Stage 2 (Sonnet) - deep analysis + violation scope |
379
+ | `test-audit` | Entry point + orchestration + two-gate synthesis |
380
+ | `subagent-prompting` | 4-part prompt structure |
381
+ | `subagent-output-templating` | Output format for logs/ |
382
+
383
+ ---
384
+
385
+ ## Related Pipelines
386
+
387
+ - **Fix Validation**: For fixing issues found in tests
388
+ - **Test Execution & Fix**: For running and fixing tests
389
+ - **Code Review**: For reviewing test code quality