codingbuddy-rules 4.5.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.ai-rules/adapters/aider.md +374 -0
  2. package/.ai-rules/adapters/antigravity.md +6 -6
  3. package/.ai-rules/adapters/claude-code.md +68 -4
  4. package/.ai-rules/adapters/codex.md +5 -5
  5. package/.ai-rules/adapters/cursor.md +2 -2
  6. package/.ai-rules/adapters/kiro.md +8 -8
  7. package/.ai-rules/adapters/opencode.md +7 -7
  8. package/.ai-rules/adapters/q.md +2 -2
  9. package/.ai-rules/adapters/windsurf.md +395 -0
  10. package/.ai-rules/agents/README.md +66 -16
  11. package/.ai-rules/agents/accessibility-specialist.json +8 -1
  12. package/.ai-rules/agents/act-mode.json +8 -1
  13. package/.ai-rules/agents/agent-architect.json +14 -7
  14. package/.ai-rules/agents/ai-ml-engineer.json +7 -0
  15. package/.ai-rules/agents/architecture-specialist.json +7 -0
  16. package/.ai-rules/agents/auto-mode.json +10 -2
  17. package/.ai-rules/agents/backend-developer.json +7 -0
  18. package/.ai-rules/agents/code-quality-specialist.json +7 -0
  19. package/.ai-rules/agents/code-reviewer.json +86 -64
  20. package/.ai-rules/agents/data-engineer.json +14 -7
  21. package/.ai-rules/agents/data-scientist.json +16 -9
  22. package/.ai-rules/agents/devops-engineer.json +7 -0
  23. package/.ai-rules/agents/documentation-specialist.json +7 -0
  24. package/.ai-rules/agents/eval-mode.json +30 -19
  25. package/.ai-rules/agents/event-architecture-specialist.json +7 -0
  26. package/.ai-rules/agents/frontend-developer.json +7 -0
  27. package/.ai-rules/agents/i18n-specialist.json +8 -1
  28. package/.ai-rules/agents/integration-specialist.json +7 -0
  29. package/.ai-rules/agents/migration-specialist.json +7 -0
  30. package/.ai-rules/agents/mobile-developer.json +8 -10
  31. package/.ai-rules/agents/observability-specialist.json +7 -0
  32. package/.ai-rules/agents/parallel-orchestrator.json +352 -0
  33. package/.ai-rules/agents/performance-specialist.json +7 -0
  34. package/.ai-rules/agents/plan-mode.json +9 -1
  35. package/.ai-rules/agents/plan-reviewer.json +211 -0
  36. package/.ai-rules/agents/platform-engineer.json +7 -0
  37. package/.ai-rules/agents/security-engineer.json +15 -8
  38. package/.ai-rules/agents/security-specialist.json +8 -1
  39. package/.ai-rules/agents/seo-specialist.json +7 -0
  40. package/.ai-rules/agents/software-engineer.json +7 -0
  41. package/.ai-rules/agents/solution-architect.json +17 -10
  42. package/.ai-rules/agents/systems-developer.json +15 -8
  43. package/.ai-rules/agents/technical-planner.json +17 -10
  44. package/.ai-rules/agents/test-engineer.json +13 -6
  45. package/.ai-rules/agents/test-strategy-specialist.json +7 -0
  46. package/.ai-rules/agents/tooling-engineer.json +10 -3
  47. package/.ai-rules/agents/ui-ux-designer.json +7 -0
  48. package/.ai-rules/keyword-modes.json +4 -4
  49. package/.ai-rules/rules/clarification-guide.md +14 -14
  50. package/.ai-rules/rules/core.md +73 -0
  51. package/.ai-rules/rules/parallel-execution.md +217 -0
  52. package/.ai-rules/schemas/agent.schema.json +38 -0
  53. package/.ai-rules/skills/README.md +29 -1
  54. package/.ai-rules/skills/agent-design/SKILL.md +5 -0
  55. package/.ai-rules/skills/agent-design/examples/agent-template.json +55 -0
  56. package/.ai-rules/skills/agent-design/references/expertise-guidelines.md +112 -0
  57. package/.ai-rules/skills/agent-discussion/SKILL.md +199 -0
  58. package/.ai-rules/skills/agent-discussion-panel/SKILL.md +448 -0
  59. package/.ai-rules/skills/api-design/SKILL.md +5 -0
  60. package/.ai-rules/skills/api-design/examples/error-response.json +159 -0
  61. package/.ai-rules/skills/api-design/examples/openapi-template.yaml +393 -0
  62. package/.ai-rules/skills/build-fix/SKILL.md +234 -0
  63. package/.ai-rules/skills/code-explanation/SKILL.md +4 -0
  64. package/.ai-rules/skills/context-management/SKILL.md +1 -0
  65. package/.ai-rules/skills/cost-budget/SKILL.md +348 -0
  66. package/.ai-rules/skills/cross-repo-issues/SKILL.md +257 -0
  67. package/.ai-rules/skills/database-migration/SKILL.md +1 -0
  68. package/.ai-rules/skills/deepsearch/SKILL.md +214 -0
  69. package/.ai-rules/skills/deployment-checklist/SKILL.md +1 -0
  70. package/.ai-rules/skills/error-analysis/SKILL.md +1 -0
  71. package/.ai-rules/skills/finishing-a-development-branch/SKILL.md +281 -0
  72. package/.ai-rules/skills/frontend-design/SKILL.md +5 -0
  73. package/.ai-rules/skills/frontend-design/examples/component-template.tsx +203 -0
  74. package/.ai-rules/skills/frontend-design/references/css-patterns.md +243 -0
  75. package/.ai-rules/skills/git-master/SKILL.md +358 -0
  76. package/.ai-rules/skills/incident-response/SKILL.md +1 -0
  77. package/.ai-rules/skills/legacy-modernization/SKILL.md +1 -0
  78. package/.ai-rules/skills/mcp-builder/SKILL.md +7 -0
  79. package/.ai-rules/skills/mcp-builder/examples/resource-example.ts +233 -0
  80. package/.ai-rules/skills/mcp-builder/examples/tool-example.ts +198 -0
  81. package/.ai-rules/skills/mcp-builder/references/protocol-spec.md +215 -0
  82. package/.ai-rules/skills/onboard/SKILL.md +150 -0
  83. package/.ai-rules/skills/performance-optimization/SKILL.md +3 -0
  84. package/.ai-rules/skills/plan-and-review/SKILL.md +115 -0
  85. package/.ai-rules/skills/plan-to-issues/SKILL.md +318 -0
  86. package/.ai-rules/skills/pr-all-in-one/SKILL.md +15 -13
  87. package/.ai-rules/skills/pr-all-in-one/configuration-guide.md +7 -7
  88. package/.ai-rules/skills/pr-all-in-one/pr-templates.md +10 -10
  89. package/.ai-rules/skills/pr-review/SKILL.md +4 -0
  90. package/.ai-rules/skills/receiving-code-review/SKILL.md +347 -0
  91. package/.ai-rules/skills/refactoring/SKILL.md +1 -0
  92. package/.ai-rules/skills/requesting-code-review/SKILL.md +348 -0
  93. package/.ai-rules/skills/retrospective/SKILL.md +192 -0
  94. package/.ai-rules/skills/rule-authoring/SKILL.md +5 -0
  95. package/.ai-rules/skills/rule-authoring/examples/rule-template.md +142 -0
  96. package/.ai-rules/skills/rule-authoring/examples/trigger-patterns.md +126 -0
  97. package/.ai-rules/skills/security-audit/SKILL.md +4 -0
  98. package/.ai-rules/skills/ship/SKILL.md +242 -0
  99. package/.ai-rules/skills/skill-creator/SKILL.md +461 -0
  100. package/.ai-rules/skills/skill-creator/agents/analyzer.md +206 -0
  101. package/.ai-rules/skills/skill-creator/agents/comparator.md +167 -0
  102. package/.ai-rules/skills/skill-creator/agents/grader.md +152 -0
  103. package/.ai-rules/skills/skill-creator/assets/eval_review.html +568 -0
  104. package/.ai-rules/skills/skill-creator/assets/skill-template.md +43 -0
  105. package/.ai-rules/skills/skill-creator/eval-viewer/generate_review.py +496 -0
  106. package/.ai-rules/skills/skill-creator/references/frontmatter-guide.md +632 -0
  107. package/.ai-rules/skills/skill-creator/references/multi-tool-compat.md +480 -0
  108. package/.ai-rules/skills/skill-creator/references/schemas.md +784 -0
  109. package/.ai-rules/skills/skill-creator/scripts/aggregate_benchmark.py +302 -0
  110. package/.ai-rules/skills/skill-creator/scripts/init_skill.sh +196 -0
  111. package/.ai-rules/skills/skill-creator/scripts/run_loop.py +327 -0
  112. package/.ai-rules/skills/systematic-debugging/SKILL.md +1 -0
  113. package/.ai-rules/skills/tech-debt/SKILL.md +1 -0
  114. package/.ai-rules/skills/test-coverage-gate/SKILL.md +303 -0
  115. package/.ai-rules/skills/tmux-master/SKILL.md +491 -0
  116. package/.ai-rules/skills/using-git-worktrees/SKILL.md +368 -0
  117. package/.ai-rules/skills/verification-before-completion/SKILL.md +234 -0
  118. package/.ai-rules/skills/widget-slot-architecture/SKILL.md +6 -0
  119. package/.ai-rules/skills/widget-slot-architecture/examples/parallel-route-setup.tsx +206 -0
  120. package/.ai-rules/skills/widget-slot-architecture/examples/widget-component.tsx +250 -0
  121. package/.ai-rules/skills/writing-plans/SKILL.md +78 -0
  122. package/bin/cli.js +170 -0
  123. package/lib/init/detect-stack.js +162 -0
  124. package/lib/init/generate-config.js +31 -0
  125. package/lib/init/index.js +86 -0
  126. package/lib/init/prompt.js +60 -0
  127. package/lib/init/scaffold.js +67 -0
  128. package/lib/init/suggest-agent.js +57 -0
  129. package/package.json +10 -2
@@ -0,0 +1,327 @@
1
+ #!/usr/bin/env python3
2
+ """Run a description optimization loop for skill trigger evaluation.
3
+
4
+ Loads trigger_eval.json, splits cases into 60/40 train/test sets,
5
+ and iterates to optimize the skill description for better trigger accuracy.
6
+
7
+ LLM calls are replaced with CLI guidance — the script logs scores and
8
+ prompts the user to manually refine the description between iterations.
9
+
10
+ Usage:
11
+ python run_loop.py <trigger-eval-json> --skill-name <name> [--iterations N] [--seed S]
12
+
13
+ Example:
14
+ python run_loop.py workspace/trigger_eval.json --skill-name tdd --iterations 5
15
+
16
+ Requirements:
17
+ Python 3.8+ (standard library only)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import json
24
+ import random
25
+ import sys
26
+ from datetime import datetime, timezone
27
+ from pathlib import Path
28
+ from typing import Any, Dict, List, Optional, Tuple
29
+
30
+
31
+ def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
32
+ parser = argparse.ArgumentParser(
33
+ description=(
34
+ "Description optimization loop for skill trigger evaluation. "
35
+ "Splits trigger_eval.json into train/test sets and guides "
36
+ "iterative description refinement."
37
+ ),
38
+ epilog=(
39
+ "Example:\n"
40
+ " python run_loop.py workspace/trigger_eval.json "
41
+ "--skill-name tdd --iterations 5\n\n"
42
+ "The script will guide you through each iteration, prompting\n"
43
+ "you to run `recommend_skills` manually and enter results."
44
+ ),
45
+ formatter_class=argparse.RawDescriptionHelpFormatter,
46
+ )
47
+ parser.add_argument(
48
+ "trigger_eval_json",
49
+ type=str,
50
+ help="Path to trigger_eval.json file",
51
+ )
52
+ parser.add_argument(
53
+ "--skill-name",
54
+ required=True,
55
+ help="Target skill name in kebab-case",
56
+ )
57
+ parser.add_argument(
58
+ "--iterations",
59
+ type=int,
60
+ default=5,
61
+ help="Number of optimization iterations (default: 5)",
62
+ )
63
+ parser.add_argument(
64
+ "--seed",
65
+ type=int,
66
+ default=42,
67
+ help="Random seed for train/test split (default: 42)",
68
+ )
69
+ parser.add_argument(
70
+ "--output-dir",
71
+ type=str,
72
+ default=None,
73
+ help="Directory to write iteration logs (default: same as trigger_eval.json)",
74
+ )
75
+ return parser.parse_args(argv)
76
+
77
+
78
+ def _load_trigger_eval(path: Path) -> List[Dict[str, Any]]:
79
+ """Load and validate trigger_eval.json."""
80
+ with open(path, encoding="utf-8") as f:
81
+ data = json.load(f)
82
+
83
+ if not isinstance(data, list) or len(data) == 0:
84
+ raise ValueError(
85
+ "trigger_eval.json must be a non-empty array of test cases"
86
+ )
87
+
88
+ for i, case in enumerate(data):
89
+ if "query" not in case or "should_trigger" not in case:
90
+ raise ValueError(
91
+ f"Case {i} missing required fields: 'query' and 'should_trigger'"
92
+ )
93
+
94
+ return data
95
+
96
+
97
+ def _split_train_test(
98
+ cases: List[Dict[str, Any]], seed: int
99
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
100
+ """Split cases into 60% train / 40% test with deterministic shuffle."""
101
+ rng = random.Random(seed)
102
+ shuffled = list(cases)
103
+ rng.shuffle(shuffled)
104
+ split_idx = max(1, int(len(shuffled) * 0.6))
105
+ return shuffled[:split_idx], shuffled[split_idx:]
106
+
107
+
108
+ def _compute_metrics(
109
+ results: List[Dict[str, Any]],
110
+ ) -> Dict[str, float]:
111
+ """Compute precision, recall, F1 from trigger results.
112
+
113
+ Each result dict has:
114
+ - should_trigger: bool (ground truth)
115
+ - triggered: bool (actual result from user input)
116
+ """
117
+ tp = sum(1 for r in results if r["should_trigger"] and r["triggered"])
118
+ fp = sum(1 for r in results if not r["should_trigger"] and r["triggered"])
119
+ fn = sum(1 for r in results if r["should_trigger"] and not r["triggered"])
120
+ tn = sum(
121
+ 1 for r in results if not r["should_trigger"] and not r["triggered"]
122
+ )
123
+
124
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
125
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
126
+ f1 = (
127
+ 2 * precision * recall / (precision + recall)
128
+ if (precision + recall) > 0
129
+ else 0.0
130
+ )
131
+ accuracy = (tp + tn) / len(results) if results else 0.0
132
+
133
+ return {
134
+ "precision": round(precision, 4),
135
+ "recall": round(recall, 4),
136
+ "f1": round(f1, 4),
137
+ "accuracy": round(accuracy, 4),
138
+ "tp": tp,
139
+ "fp": fp,
140
+ "fn": fn,
141
+ "tn": tn,
142
+ }
143
+
144
+
145
+ def _prompt_user_results(
146
+ cases: List[Dict[str, Any]], skill_name: str, label: str
147
+ ) -> List[Dict[str, Any]]:
148
+ """Prompt user to manually run recommend_skills and report results.
149
+
150
+ Since LLM calls cannot be made from this script, we guide the user
151
+ to evaluate each query and enter whether the skill was triggered.
152
+ """
153
+ print(f"\n{'=' * 60}")
154
+ print(f" Evaluating {label} set ({len(cases)} cases)")
155
+ print(f" Target skill: {skill_name}")
156
+ print(f"{'=' * 60}")
157
+ print()
158
+ print("For each query below, run:")
159
+ print(f" recommend_skills(prompt=<query>)")
160
+ print(f"and check if '{skill_name}' appears in the results.")
161
+ print()
162
+
163
+ results: List[Dict[str, Any]] = []
164
+ for i, case in enumerate(cases):
165
+ query = case["query"]
166
+ expected = case["should_trigger"]
167
+ print(f" [{i + 1}/{len(cases)}] Query: {query}")
168
+ print(f" Expected: {'TRIGGER' if expected else 'NO TRIGGER'}")
169
+
170
+ while True:
171
+ response = input(
172
+ " Result? (y=triggered / n=not triggered / s=skip): "
173
+ ).strip().lower()
174
+ if response in ("y", "n", "s"):
175
+ break
176
+ print(" Invalid input. Enter y, n, or s.")
177
+
178
+ if response == "s":
179
+ print(" -> Skipped")
180
+ continue
181
+
182
+ triggered = response == "y"
183
+ match = triggered == expected
184
+ results.append(
185
+ {
186
+ "query": query,
187
+ "should_trigger": expected,
188
+ "triggered": triggered,
189
+ "match": match,
190
+ }
191
+ )
192
+ print(f" -> {'MATCH' if match else 'MISMATCH'}")
193
+
194
+ return results
195
+
196
+
197
+ def _print_metrics(metrics: Dict[str, float], label: str) -> None:
198
+ """Pretty-print evaluation metrics."""
199
+ print(f"\n--- {label} Metrics ---")
200
+ print(f" Precision: {metrics['precision']:.2%}")
201
+ print(f" Recall: {metrics['recall']:.2%}")
202
+ print(f" F1 Score: {metrics['f1']:.2%}")
203
+ print(f" Accuracy: {metrics['accuracy']:.2%}")
204
+ print(
205
+ f" (TP={metrics['tp']} FP={metrics['fp']} "
206
+ f"FN={metrics['fn']} TN={metrics['tn']})"
207
+ )
208
+
209
+
210
+ def main(argv: Optional[List[str]] = None) -> int:
211
+ args = parse_args(argv)
212
+ trigger_path = Path(args.trigger_eval_json).resolve()
213
+
214
+ if not trigger_path.is_file():
215
+ print(f"[ERROR] File not found: {trigger_path}", file=sys.stderr)
216
+ return 1
217
+
218
+ output_dir = Path(args.output_dir) if args.output_dir else trigger_path.parent
219
+ output_dir = output_dir.resolve()
220
+
221
+ try:
222
+ cases = _load_trigger_eval(trigger_path)
223
+ except (json.JSONDecodeError, ValueError) as exc:
224
+ print(f"[ERROR] Failed to load trigger_eval.json: {exc}", file=sys.stderr)
225
+ return 1
226
+
227
+ train_set, test_set = _split_train_test(cases, args.seed)
228
+ print(f"Loaded {len(cases)} cases: {len(train_set)} train / {len(test_set)} test")
229
+ print(f"Skill: {args.skill_name}")
230
+ print(f"Iterations: {args.iterations}")
231
+ print(f"Seed: {args.seed}")
232
+
233
+ iteration_log: List[Dict[str, Any]] = []
234
+
235
+ for iteration in range(1, args.iterations + 1):
236
+ print(f"\n{'#' * 60}")
237
+ print(f" ITERATION {iteration}/{args.iterations}")
238
+ print(f"{'#' * 60}")
239
+
240
+ # Step 1: Evaluate on train set
241
+ print("\n[Step 1] Evaluate current description on TRAIN set")
242
+ train_results = _prompt_user_results(
243
+ train_set, args.skill_name, f"Iteration {iteration} TRAIN"
244
+ )
245
+ train_metrics = _compute_metrics(train_results)
246
+ _print_metrics(train_metrics, f"Iteration {iteration} TRAIN")
247
+
248
+ # Step 2: Guide description refinement
249
+ print(f"\n[Step 2] Refine the skill description")
250
+ print(" Based on the train results above, update the skill's")
251
+ print(" 'description' field in SKILL.md frontmatter to improve")
252
+ print(" trigger accuracy.")
253
+ print()
254
+ print(" Mismatched cases to focus on:")
255
+ mismatches = [r for r in train_results if not r.get("match", True)]
256
+ if mismatches:
257
+ for m in mismatches:
258
+ direction = "should trigger but didn't" if m["should_trigger"] else "triggered but shouldn't"
259
+ print(f" - \"{m['query']}\" ({direction})")
260
+ else:
261
+ print(" (none — perfect score on train set)")
262
+ print()
263
+ input(" Press Enter when description has been updated...")
264
+
265
+ # Step 3: Evaluate on test set
266
+ print("\n[Step 3] Evaluate updated description on TEST set")
267
+ test_results = _prompt_user_results(
268
+ test_set, args.skill_name, f"Iteration {iteration} TEST"
269
+ )
270
+ test_metrics = _compute_metrics(test_results)
271
+ _print_metrics(test_metrics, f"Iteration {iteration} TEST")
272
+
273
+ # Log iteration
274
+ entry = {
275
+ "iteration": iteration,
276
+ "timestamp": datetime.now(timezone.utc).isoformat(),
277
+ "train": {
278
+ "metrics": train_metrics,
279
+ "total_cases": len(train_results),
280
+ },
281
+ "test": {
282
+ "metrics": test_metrics,
283
+ "total_cases": len(test_results),
284
+ },
285
+ }
286
+ iteration_log.append(entry)
287
+
288
+ # Check convergence
289
+ if test_metrics["f1"] >= 1.0:
290
+ print("\n[INFO] Perfect F1 on test set. Stopping early.")
291
+ break
292
+
293
+ # Write iteration log
294
+ log_path = output_dir / f"optimization_log_{args.skill_name}.json"
295
+ with open(log_path, "w", encoding="utf-8") as f:
296
+ json.dump(
297
+ {
298
+ "skill_name": args.skill_name,
299
+ "seed": args.seed,
300
+ "train_size": len(train_set),
301
+ "test_size": len(test_set),
302
+ "iterations": iteration_log,
303
+ },
304
+ f,
305
+ indent=2,
306
+ ensure_ascii=False,
307
+ )
308
+ print(f"\n[OK] Optimization log written: {log_path}")
309
+
310
+ # Final summary
311
+ print(f"\n{'=' * 60}")
312
+ print(" OPTIMIZATION SUMMARY")
313
+ print(f"{'=' * 60}")
314
+ print(f"\n {'Iter':>4} {'Train F1':>10} {'Test F1':>10} {'Test Acc':>10}")
315
+ print(f" {'----':>4} {'--------':>10} {'-------':>10} {'--------':>10}")
316
+ for entry in iteration_log:
317
+ it = entry["iteration"]
318
+ tf1 = entry["train"]["metrics"]["f1"]
319
+ sf1 = entry["test"]["metrics"]["f1"]
320
+ sacc = entry["test"]["metrics"]["accuracy"]
321
+ print(f" {it:>4} {tf1:>10.2%} {sf1:>10.2%} {sacc:>10.2%}")
322
+
323
+ return 0
324
+
325
+
326
+ if __name__ == "__main__":
327
+ sys.exit(main())
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: systematic-debugging
3
3
  description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
4
+ allowed-tools: Read, Grep, Glob, Bash
4
5
  ---
5
6
 
6
7
  # Systematic Debugging
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: tech-debt
3
3
  description: Use when identifying, prioritizing, and planning resolution of technical debt. Provides structured assessment, ROI-based prioritization, and incremental paydown strategies.
4
+ allowed-tools: Read, Grep, Glob
4
5
  ---
5
6
 
6
7
  # Tech Debt
@@ -0,0 +1,303 @@
1
+ ---
2
+ name: test-coverage-gate
3
+ description: >-
4
+ Use before shipping, creating PRs, or merging to enforce minimum test coverage thresholds.
5
+ Covers line, branch, and function coverage. Supports vitest, jest, c8, and istanbul.
6
+ Blocks shipping when coverage falls below configurable thresholds (default 80%).
7
+ disable-model-invocation: true
8
+ argument-hint: "[--threshold=N] [--type=line|branch|function|all]"
9
+ ---
10
+
11
+ # Test Coverage Gate
12
+
13
+ ## Overview
14
+
15
+ Untested code is unverified code. Shipping without coverage data is shipping blind.
16
+
17
+ This skill enforces minimum test coverage thresholds before code leaves your machine. It detects your coverage tool, runs the analysis, compares results against thresholds, and blocks shipping if coverage is insufficient.
18
+
19
+ **Core principle:** No code ships without meeting coverage thresholds. Period.
20
+
21
+ **Iron Law:**
22
+ ```
23
+ NO SHIP WITHOUT COVERAGE GATE PASSING
24
+ If coverage is below threshold, the PR does not get created.
25
+ ```
26
+
27
+ ## When to Use
28
+
29
+ **Always, before:**
30
+ - Running `/ship` to create a PR
31
+ - Merging feature branches
32
+ - Tagging releases
33
+ - Deploying to staging or production
34
+
35
+ **Especially when:**
36
+ - Adding new features (new code must be tested)
37
+ - Refactoring (coverage must not drop)
38
+ - Fixing bugs (regression test required)
39
+ - Touching critical paths (auth, payments, data processing)
40
+
41
+ **Skip only when:**
42
+ - Documentation-only changes (no source code modified)
43
+ - Configuration file changes (non-code)
44
+ - Generated code explicitly excluded from coverage
45
+
46
+ ## Configuration
47
+
48
+ ### Default Thresholds
49
+
50
+ ```
51
+ Line Coverage: 80%
52
+ Branch Coverage: 80%
53
+ Function Coverage: 80%
54
+ ```
55
+
56
+ ### Custom Thresholds
57
+
58
+ Override defaults per project via `codingbuddy.config.json`, `package.json`, or tool-native config:
59
+
60
+ ```jsonc
61
+ // codingbuddy.config.json
62
+ {
63
+ "coverage": {
64
+ "thresholds": {
65
+ "lines": 80,
66
+ "branches": 80,
67
+ "functions": 80
68
+ },
69
+ "tool": "auto", // "vitest" | "jest" | "c8" | "istanbul" | "auto"
70
+ "excludePatterns": [
71
+ "**/*.config.*",
72
+ "**/generated/**",
73
+ "**/migrations/**"
74
+ ]
75
+ }
76
+ }
77
+ ```
78
+
79
+ ```jsonc
80
+ // package.json (alternative)
81
+ {
82
+ "coverageGate": {
83
+ "lines": 80,
84
+ "branches": 80,
85
+ "functions": 80
86
+ }
87
+ }
88
+ ```
89
+
90
+ ### Per-Tool Native Config
91
+
92
+ If thresholds are already defined in your test tool's config, the gate respects them:
93
+
94
+ | Tool | Config Location |
95
+ |------|----------------|
96
+ | Vitest | `vitest.config.ts` → `test.coverage.thresholds` |
97
+ | Jest | `jest.config.ts` → `coverageThreshold.global` |
98
+ | c8 | `.c8rc.json` or `package.json` → `c8` |
99
+ | Istanbul/nyc | `.nycrc` or `package.json` → `nyc` |
100
+
101
+ **Priority order:** CLI flags > `codingbuddy.config.json` > tool-native config > defaults (80%)
102
+
103
+ ## Supported Tools
104
+
105
+ ### Auto-Detection
106
+
107
+ The gate detects your coverage tool automatically:
108
+
109
+ ```
110
+ 1. Check package.json devDependencies:
111
+ - @vitest/coverage-v8 or @vitest/coverage-istanbul → vitest
112
+ - jest + jest coverage config → jest
113
+ - c8 → c8
114
+ - nyc or istanbul → istanbul
115
+ 2. Check for tool-specific config files:
116
+ - vitest.config.ts → vitest
117
+ - jest.config.ts → jest
118
+ - .c8rc.json → c8
119
+ - .nycrc → istanbul
120
+ 3. Fallback: prompt user to specify tool
121
+ ```
122
+
123
+ ### Tool Commands
124
+
125
+ | Tool | Coverage Command |
126
+ |------|-----------------|
127
+ | **Vitest** | `npx vitest run --coverage --reporter=json` |
128
+ | **Jest** | `npx jest --coverage --coverageReporters=json-summary` |
129
+ | **c8** | `npx c8 --reporter=json-summary npm test` |
130
+ | **Istanbul/nyc** | `npx nyc --reporter=json-summary npm test` |
131
+
132
+ ## Workflow
133
+
134
+ ### Phase 1: Detect Coverage Tool
135
+
136
+ ```
137
+ - [ ] Scan package.json devDependencies for coverage packages
138
+ - [ ] Check for tool-specific config files
139
+ - [ ] Resolve coverage tool (or prompt if ambiguous)
140
+ - [ ] Verify coverage tool is installed
141
+ ```
142
+
143
+ If the tool is not installed:
144
+ ```bash
145
+ # Suggest installation
146
+ echo "Coverage tool not found. Install with:"
147
+ echo " yarn add -D @vitest/coverage-v8 # for vitest"
148
+ echo " yarn add -D jest # for jest"
149
+ echo " yarn add -D c8 # for c8"
150
+ echo " yarn add -D nyc # for istanbul/nyc"
151
+ ```
152
+
153
+ ### Phase 2: Run Coverage Analysis
154
+
155
+ ```
156
+ - [ ] Execute coverage command for detected tool
157
+ - [ ] Capture JSON coverage output
158
+ - [ ] Parse coverage summary (lines, branches, functions)
159
+ ```
160
+
161
+ **Expected output format (json-summary):**
162
+ ```json
163
+ {
164
+ "total": {
165
+ "lines": { "total": 500, "covered": 420, "pct": 84.0 },
166
+ "branches": { "total": 120, "covered": 96, "pct": 80.0 },
167
+ "functions": { "total": 80, "covered": 68, "pct": 85.0 }
168
+ }
169
+ }
170
+ ```
171
+
172
+ ### Phase 3: Compare Against Thresholds
173
+
174
+ ```
175
+ - [ ] Load thresholds (CLI > config > tool-native > defaults)
176
+ - [ ] Compare each metric against its threshold
177
+ - [ ] Classify result: PASS or FAIL per metric
178
+ ```
179
+
180
+ **Comparison logic:**
181
+ ```
182
+ For each metric in [lines, branches, functions]:
183
+ actual = coverage_report.total[metric].pct
184
+ threshold = resolved_thresholds[metric]
185
+ result = actual >= threshold ? PASS : FAIL
186
+ ```
187
+
188
+ ### Phase 4: Gate Decision
189
+
190
+ #### PASS — All metrics meet thresholds
191
+
192
+ ```
193
+ Coverage Gate: PASS
194
+
195
+ Lines: 84.0% >= 80% ✅
196
+ Branches: 80.0% >= 80% ✅
197
+ Functions: 85.0% >= 80% ✅
198
+
199
+ Proceed with shipping.
200
+ ```
201
+
202
+ #### FAIL — One or more metrics below threshold
203
+
204
+ ```
205
+ Coverage Gate: FAIL ❌
206
+
207
+ Lines: 84.0% >= 80% ✅
208
+ Branches: 72.0% >= 80% ❌ (-8.0%)
209
+ Functions: 85.0% >= 80% ✅
210
+
211
+ BLOCKED: Coverage below threshold.
212
+
213
+ Uncovered areas requiring attention:
214
+ - src/auth/oauth.ts: branches 45% (missing: error paths lines 23-31, 55-60)
215
+ - src/utils/parser.ts: branches 60% (missing: edge cases lines 88-95)
216
+
217
+ Action required:
218
+ 1. Add tests for uncovered branches listed above
219
+ 2. Re-run coverage gate
220
+ 3. Ship only after all metrics pass
221
+ ```
222
+
223
+ **On failure:**
224
+ - Block `/ship` from creating the PR
225
+ - List specific files and lines with low coverage
226
+ - Suggest which tests to write
227
+ - Never allow override without explicit `--skip-coverage` flag
228
+
229
+ ## Integration with /ship
230
+
231
+ ### Pre-Ship Hook
232
+
233
+ The coverage gate runs **before** any PR creation step in `/ship`:
234
+
235
+ ```
236
+ /ship workflow:
237
+ 1. Lint check
238
+ 2. Type check (tsc --noEmit)
239
+ 3. Run tests
240
+ 4. ▶ COVERAGE GATE ◀ ← This skill
241
+ 5. Build verification
242
+ 6. Create branch + commit + push
243
+ 7. Create PR
244
+ ```
245
+
246
+ If the coverage gate fails at step 4, steps 5-7 are **not executed**.
247
+
248
+ ### Skip Flag
249
+
250
+ For exceptional cases (hotfixes, documentation):
251
+ ```
252
+ /ship --skip-coverage
253
+ ```
254
+
255
+ When `--skip-coverage` is used:
256
+ - Log a warning: `⚠️ Coverage gate skipped. Reason must be documented in PR.`
257
+ - Add `[skip-coverage]` label to the PR
258
+ - Require reason in PR description
259
+
260
+ ## Failure Recovery
261
+
262
+ ### Common Issues
263
+
264
+ | Issue | Solution |
265
+ |-------|----------|
266
+ | Coverage tool not installed | Install via `yarn add -D <tool>` |
267
+ | No coverage config found | Add `coverage` section to test tool config |
268
+ | Coverage report not generated | Verify test command produces JSON output |
269
+ | Threshold too strict for legacy code | Set per-directory thresholds, raise gradually |
270
+ | New files have 0% coverage | Write tests before shipping (TDD) |
271
+
272
+ ### Gradual Adoption
273
+
274
+ For projects starting below 80%:
275
+
276
+ ```jsonc
277
+ // Start with current baseline, ratchet up over time
278
+ {
279
+ "coverage": {
280
+ "thresholds": {
281
+ "lines": 60, // Current: 58% → target 80%
282
+ "branches": 50, // Current: 48% → target 80%
283
+ "functions": 65 // Current: 63% → target 80%
284
+ },
285
+ "ratchet": true // Never allow coverage to decrease
286
+ }
287
+ }
288
+ ```
289
+
290
+ **Ratchet mode:** When enabled, the threshold automatically increases to match the highest coverage ever achieved. Coverage can only go up, never down.
291
+
292
+ ## Red Flags
293
+
294
+ These thoughts mean STOP — you're rationalizing:
295
+
296
+ | Thought | Reality |
297
+ |---------|---------|
298
+ | "Coverage doesn't matter for this change" | Every change matters. Untested code breaks. |
299
+ | "I'll add tests later" | Later never comes. Test now. |
300
+ | "80% is too high for this project" | Lower the threshold explicitly, don't skip the gate. |
301
+ | "This is just a hotfix" | Hotfixes need tests too — use `--skip-coverage` and document why. |
302
+ | "The tests are slow" | Slow tests > no tests. Optimize later. |
303
+ | "It's only a config change" | Config changes can break things. Verify coverage didn't drop. |