autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,364 @@
1
+ # Competitive Mode — Dual-Track Execution
2
+
3
+ Reference doc for competitive batch execution. Used by `run-plan` when `--mode competitive` is specified or batches are tagged `⚠ CRITICAL`.
4
+
5
+ ## Pre-Flight Exploration
6
+
7
+ Before spawning competitors, dispatch TWO agents in parallel:
8
+
9
+ ### a) Codebase Explorer (subagent_type=Explore) — internal context:
10
+ - Search for files/functions/components mentioned in the batch spec
11
+ - Find existing patterns, imports, constants, and conventions relevant to the batch
12
+ - Check config key names, API endpoint signatures, and shared module exports
13
+ - Note file sizes of files to be modified (flag any already near 300 lines)
14
+ - Identify reusable utilities, helpers, and shared components already in the codebase
15
+
16
+ ### b) External Research (subagent_type=general-purpose) — prior art & best practices:
17
+ - Search GitHub (via `gh search repos` and WebSearch) for similar implementations
18
+ in the same ecosystem (e.g., "preact dashboard conflict detection", "home assistant
19
+ automation suggestion UI", "python pipeline pattern matching")
20
+ - Search for established libraries/patterns that solve the batch's problem domain
21
+ (don't reinvent the wheel — if a well-tested utility exists, reference it)
22
+ - Check Context7 docs for relevant framework APIs (via resolve-library-id + query-docs)
23
+ - Look for common pitfalls and anti-patterns specific to the batch's technology
24
+ - Return: relevant code examples, library recommendations, common patterns to follow,
25
+ and anti-patterns to avoid
26
+
27
+ ### Combine into CONTEXT BRIEF:
28
+ Existing codebase patterns, available imports, correct key names, file sizes, external prior art, recommended libraries/patterns, and gotchas. This brief is injected into BOTH competitor prompts as "PRE-FLIGHT CONTEXT" to prevent schema mismatches, import duplication, reinventing existing solutions, and convention violations.
29
+
30
+ ---
31
+
32
+ ## Competitive Execution Flow
33
+
34
+ 1. Create two git worktrees: `git worktree add .worktrees/competitor-a HEAD` and `git worktree add .worktrees/competitor-b HEAD`
35
+ 2. Spawn Teammate-A in worktree-a with TDD strategy (see Competitor A Prompt below)
36
+ - Include the PRE-FLIGHT CONTEXT from pre-flight exploration
37
+ 3. Spawn Teammate-B in worktree-b with iterative strategy (see Competitor B Prompt below)
38
+ - Include the PRE-FLIGHT CONTEXT from pre-flight exploration
39
+ 4. Both run in parallel (separate worktrees = no conflicts)
40
+ 5. When both finish, spawn a judge teammate (see Judge Prompt below)
41
+ 6. Save judge verdict to `<worktree>/.judge-history/batch-N-verdict.md`
42
+ 7. Apply verdict:
43
+ - If single winner: Cherry-pick winner's commits into main worktree
44
+ - If HYBRID: Apply per-task or per-file attribution from judge's HYBRID ANALYSIS
45
+ 8. ALWAYS apply loser's valuable contributions from the judge's MANDATORY MERGE section:
46
+ - Extract the specific changes listed by the judge (with code snippets)
47
+ - Apply them to the main worktree (Edit tool or manual cherry-pick)
48
+ - Commit as: "fix: integration improvements from competitive review"
49
+ - This step is NOT optional — skip only if judge explicitly states winner is strictly superior
50
+ 9. Merge instruction verification: After applying merge snippets, run tests.
51
+ If any snippet fails to apply or breaks tests, log the failure in the verdict file
52
+ and fix manually. Track merge success rate for judge quality feedback.
53
+ 10. Adversarial review (optional, recommended for critical batches):
54
+ Spawn a second judge agent with: "Review this verdict for accuracy, missed issues,
55
+ and merge quality: [paste verdict]. Check: Are scores justified? Did the judge miss
56
+ valuable contributions from the loser? Are merge snippets syntactically correct?"
57
+ If adversarial judge finds issues, revise the merge before committing.
58
+ 11. Feed judge's LESSONS FOR NEXT ROUND into competitor prompts for next batch.
59
+ 12. Clean up competitor worktrees: `git worktree remove .worktrees/competitor-a` etc.
60
+
61
+ Non-competitive batches fall back to team mode (single implementer + reviewer).
62
+
63
+ ---
64
+
65
+ ## Leader Rules (keep context lean)
66
+
67
+ - Never read implementation files directly — teammates do that
68
+ - Track only: batch number, test counts, pass/fail status
69
+ - Each teammate gets full context via prompt (never "read the plan file")
70
+ - If context reaches 40% usage, serialize state and suggest /clear + resume
71
+ - State file: `<worktree>/.run-plan-state.json`
72
+
73
+ ---
74
+
75
+ ## Competitor A Prompt Template (TDD Strategy)
76
+
77
+ ```
78
+ You are Competitor A implementing Batch N: [title]
79
+
80
+ STRATEGY: Methodical TDD — write comprehensive tests first, verify they fail, then implement.
81
+
82
+ [full task text — provided by leader, do not read the plan file]
83
+
84
+ Context:
85
+ - Worktree: [path]
86
+ - Python: [path]
87
+ - Previous batch test count: [N]
88
+
89
+ PRE-FLIGHT CONTEXT (from exploration agents — use this to avoid reinventing the wheel):
90
+ [Leader injects the combined context brief here:]
91
+ - CODEBASE: existing patterns, available imports, correct key names, file sizes, API signatures
92
+ - EXTERNAL: prior art from GitHub/web, recommended libraries, common patterns, anti-patterns
93
+ - GOTCHAS: known issues, pitfalls, and lessons specific to this batch's technology
94
+ IMPORTANT: If prior art or existing utilities are listed, USE them. Do not rewrite what already exists.
95
+
96
+ CODE QUALITY STANDARDS: (see code-quality-standards.md)
97
+
98
+ CRITICAL RULES (learned from competitive rounds):
99
+ - BEFORE writing any test: read the existing source files referenced in the task to understand real interfaces, existing types, and API endpoints. Do NOT assume interfaces — verify them.
100
+ - ALWAYS import and use existing types/models from the codebase. Never redefine a type that already exists.
101
+ - For integration tests: use real components, not mocks. Only mock at true external boundaries (LLM, network I/O). Mocking away the components you're supposed to test defeats the purpose.
102
+ - For CLI/API tasks: verify you're calling the correct endpoints by reading the route registration code, not guessing from endpoint names.
103
+ - Check capability registries and module registration — if you add a new module, register it.
104
+ - HA automation dicts use BOTH singular and plural keys — REST API returns singular (`trigger`, `action`), new format uses plural (`triggers`, `actions`). Always check both: `get("triggers") or get("trigger", [])`.
105
+ - Config keys MUST be consumed — if you register a config key, add a corresponding `get_config_value()` call. Dead config keys that do nothing are worse than missing ones.
106
+ - Trace integration boundaries: when two modules produce/consume the same data structure, verify key names match across the boundary. Read both sides before writing either.
107
+
108
+ LEARNED FROM COMPETITOR B (adopt these strengths):
109
+ - Hunt for integration seams: when wiring modules together, check that called methods actually exist on the target class, that return types match what callers expect (e.g., list[dict] vs list[str]), and that registries include your new modules.
110
+ - Build production-ready API responses: include counts, remaining items, live fallbacks when cache is empty. Richer responses win over minimal ones.
111
+ - Non-fatal error wrapping: cache updates and optional pipeline stages (LLM, notifications) should catch exceptions and log rather than crash the pipeline.
112
+ - Fix bugs you discover along the way — seam fixes from you are valuable even if your main implementation isn't chosen.
113
+
114
+ Process per task:
115
+ 1. Read 2-3 existing files in the same package to absorb style patterns
116
+ 2. Read existing source files to understand interfaces
117
+ 3. Write comprehensive failing tests against REAL interfaces (cover edge cases, error paths, not just happy path)
118
+ 4. Verify tests fail
119
+ 5. Implement minimal code to pass — matching codebase style exactly
120
+ 6. Verify all tests pass
121
+ 7. Self-review for style consistency: does your code look like it belongs in this codebase?
122
+ 8. Commit with descriptive message (one commit per task, not one big batch commit)
123
+
124
+ After all tasks:
125
+ - Run full test suite
126
+ - Self-review: completeness, spec compliance, YAGNI, style consistency
127
+ - Report: files changed, tests added, test count, any issues, any integration seams fixed
128
+ ```
129
+
130
+ ---
131
+
132
+ ## Competitor B Prompt Template (Iterative Strategy)
133
+
134
+ ```
135
+ You are Competitor B implementing Batch N: [title]
136
+
137
+ STRATEGY: Iterative rapid-build — implement working code first with clean architecture, then add tests to lock down behavior.
138
+
139
+ [full task text — provided by leader, do not read the plan file]
140
+
141
+ Context:
142
+ - Worktree: [path]
143
+ - Python: [path]
144
+ - Previous batch test count: [N]
145
+
146
+ PRE-FLIGHT CONTEXT (from exploration agents — use this to avoid reinventing the wheel):
147
+ [Leader injects the combined context brief here:]
148
+ - CODEBASE: existing patterns, available imports, correct key names, file sizes, API signatures
149
+ - EXTERNAL: prior art from GitHub/web, recommended libraries, common patterns, anti-patterns
150
+ - GOTCHAS: known issues, pitfalls, and lessons specific to this batch's technology
151
+ IMPORTANT: If prior art or existing utilities are listed, USE them. Do not rewrite what already exists.
152
+
153
+ CODE QUALITY STANDARDS: (see code-quality-standards.md)
154
+
155
+ CRITICAL RULES (learned from competitive rounds):
156
+ - BEFORE building anything: read the existing source files to find existing types, models, and patterns. ALWAYS import and use existing types — never create a parallel type that duplicates an existing one.
157
+ - When the spec references a data model (e.g., ShadowResult, AutomationCandidate): find where it's already defined and use that exact type. Check models.py and similar files.
158
+ - For scoring formulas and weighted calculations: implement EXACTLY as specified. Do not reinterpret weights or add bonuses not in the spec.
159
+ - For integration tests: test against real pipeline components, not mocks. If you're testing an "end-to-end pipeline", the test must exercise the real pipeline, not a mock version.
160
+ - Check capability registries — register new modules in capabilities.py.
161
+ - Fix real integration seams you discover (missing methods, type mismatches) — these are valuable contributions even if you don't win.
162
+ - HA automation dicts use BOTH singular and plural keys — REST API returns singular (`trigger`, `action`), new format uses plural (`triggers`, `actions`). Always check both: `get("triggers") or get("trigger", [])`.
163
+ - Config keys MUST be consumed — if you register a config key, add a corresponding `get_config_value()` call. Dead config keys that do nothing are worse than missing ones.
164
+ - Trace integration boundaries: when two modules produce/consume the same data structure, verify key names match across the boundary. Read both sides before writing either.
165
+
166
+ LEARNED FROM COMPETITOR A (adopt these strengths):
167
+ - Comprehensive test coverage: aim for thorough edge case testing, not just happy path. Test error paths, empty inputs, boundary conditions, and failure fallbacks. More tests with meaningful assertions = higher judge scores.
168
+ - Deterministic IDs: use content-based hashing (SHA-256 of key fields) for generated IDs rather than relying on input fields that may be empty or non-unique.
169
+ - Granular commits: one commit per task with a descriptive message, not one big batch commit. This makes cherry-picking cleaner.
170
+ - Test real components: when writing "integration" tests, use the real hub, real template engine, real validator — only mock true external boundaries. Tests that mock everything are unit tests in disguise and will be scored lower.
171
+ - Spec-faithful scoring: the spec's weights mean exactly what they say. pattern × 0.5 means confidence feeds the 0.5 bucket for pattern-source detections, not a flat 0.5 multiplier on everything.
172
+
173
+ Process per task:
174
+ 1. Read 2-3 existing files in the same package to absorb style patterns
175
+ 2. Read existing source files to understand interfaces and types
176
+ 3. Build clean implementation using existing types — matching codebase style exactly
177
+ 4. Write tests against real components (cover edge cases and error paths)
178
+ 5. Verify all tests pass
179
+ 6. Self-review for style consistency: does your code look like it belongs?
180
+ 7. Commit with descriptive message (one commit per task)
181
+
182
+ After all tasks:
183
+ - Run full test suite
184
+ - Self-review: spec compliance, integration correctness, test coverage depth, style consistency
185
+ - Report: files changed, tests added, test count, any integration seams fixed
186
+ ```
187
+
188
+ ---
189
+
190
+ ## Mode A Implementer Prompt Template
191
+
192
+ ```
193
+ You are implementing Batch N: [title]
194
+
195
+ [full task text — provided by leader, do not read the plan file]
196
+
197
+ Context:
198
+ - Worktree: [path]
199
+ - Python: [path]
200
+ - Previous batch test count: [N]
201
+
202
+ CODE COHESION RULES (your code must look like ONE author wrote the whole codebase):
203
+ - BEFORE writing anything: read 2-3 existing files in the same package to absorb the project's style.
204
+ - Match naming conventions, docstring format, import ordering, error handling patterns, and logging style exactly.
205
+ - DRY: check if utilities already exist before writing new ones. YAGNI: no extra features beyond the spec.
206
+
207
+ Process per task:
208
+ 1. Read 2-3 existing files in the same package to absorb style
209
+ 2. Read existing source to understand interfaces
210
+ 3. Write failing test
211
+ 4. Verify it fails
212
+ 5. Implement — matching codebase style exactly
213
+ 6. Verify test passes
214
+ 7. Commit
215
+
216
+ After all tasks:
217
+ - Self-review: completeness, quality, YAGNI, style consistency
218
+ - Report: files changed, tests added, test count, any issues
219
+ ```
220
+
221
+ ---
222
+
223
+ ## Judge Prompt Template
224
+
225
+ ```
226
+ Evaluate two competing implementations of Batch N: [title].
227
+
228
+ Competitor A (TDD): [worktree-a path]
229
+ Competitor B (Iterative): [worktree-b path]
230
+
231
+ PRIOR JUDGE HISTORY (learn from past rounds — if available):
232
+ [Leader inserts summaries from <worktree>/.judge-history/batch-*.md here.
233
+ Include: verdict, scores, what was missed, merge success/failure notes.
234
+ If no history exists yet, omit this section.]
235
+
236
+ SCORE ANCHORS (use these to calibrate — do NOT inflate scores):
237
+ - 10/10: Perfect — every spec requirement met, zero issues, exemplary
238
+ - 8/10: Strong — all requirements met, minor style/coverage gaps
239
+ - 6/10: Acceptable — most requirements met, some gaps or wrong approaches
240
+ - 4/10: Weak — significant gaps, spec violations, or broken functionality
241
+ - 2/10: Failing — fundamental misunderstanding or broken implementation
242
+
243
+ Process:
244
+ 1. FIRST — Run full test suite in BOTH worktrees. Record pass/fail counts.
245
+ Compare total test count against baseline [N]. If tests decreased or pre-existing tests fail, flag immediately.
246
+ Gate: If either competitor has test failures, note this upfront — it caps their Spec Compliance at 6/10 max.
247
+
248
+ 2. STRUCTURED CHECKS (before reading code):
249
+ a. Anti-mock check: In any test file with "integration" or "e2e" in the name, count MagicMock/patch/Mock() usage. Integration tests with >3 mocks are unit tests in disguise — penalize under Test Coverage.
250
+ b. Type duplication check: Check if either competitor defines new dataclasses/types. If a type already exists elsewhere in the codebase (e.g., in models.py), redefining it = automatic -2 on Spec Compliance.
251
+ c. Endpoint verification (for API/CLI tasks): Verify every endpoint/route reference in the implementation against actual route registration code. Wrong endpoints = -2 on Spec Compliance.
252
+ d. Dead config check: If either competitor registers config keys (in config_defaults or similar), verify each key has a corresponding `get_config_value()` call. Dead config keys = -1 on Spec Compliance.
253
+ e. Integration boundary check: For modules that produce/consume shared data structures, verify key names match across the boundary. Watch for singular/plural HA automation key mismatches.
254
+
255
+ 3. Read implementation AND test files in both competitors.
256
+
257
+ 4. Score on: Spec compliance (0.35), Code quality (0.25), Test coverage (0.25), Cohesion (0.15)
258
+ - Test coverage scoring must consider test DEPTH, not just count. Report: "A: N tests, avg M assertions/test; B: N tests, avg M assertions/test". Tests with only 1 assertion are smoke tests. Tests with 3+ meaningful assertions are thorough.
259
+ - Cohesion scoring: Does the new code look like it was written by the same author as the existing codebase? Check:
260
+ * FILE SIZE: Any file over 300 lines? Deduct points. Check with `wc -l` on all changed files.
261
+ * MODULARITY: Does each file have one clear responsibility? Are functions under 30 lines?
262
+ * Naming conventions match (snake_case, _private prefix, UPPER_CONSTANTS)
263
+ * Import style matches (absolute, grouped: stdlib → third-party → local)
264
+ * Docstring format matches existing modules
265
+ * Error handling follows codebase patterns (logged before fallback, specific exceptions)
266
+ * File structure follows codebase patterns (constants → public API → private helpers)
267
+ * No reinvented utilities that already exist in shared modules
268
+ * Type hints on all function signatures
269
+ * Guard clauses over nested conditionals, no deep nesting (>3 levels)
270
+ * No magic numbers — named constants used
271
+ * Frontend: component patterns, hook usage, CSS class naming match sibling components
272
+
273
+ 5. Integration Seam Checklist — check BOTH competitors:
274
+ - [ ] New modules registered in capabilities.py?
275
+ - [ ] New methods called by existing code actually exist on the target class?
276
+ - [ ] Return types match what callers expect (e.g., list[dict] vs list[str])?
277
+ - [ ] Import paths correct (no circular imports)?
278
+ - [ ] Config keys registered in CONFIG_DEFAULTS?
279
+ - [ ] UI components using correct prop names from their target components?
280
+
281
+ 6. MANDATORY — Best-of-Both Synthesis:
282
+ The goal is NOT just to pick a winner. The goal is to produce the BEST POSSIBLE result by combining strengths from both competitors. For EVERY batch, you MUST:
283
+ a. Identify specific improvements from the loser that the winner lacks
284
+ b. List exact files and changes with CODE SNIPPETS to apply from the loser
285
+ c. Categories to check: integration seam fixes, missing registrations, richer API responses, better error handling, additional test edge cases, type fixes, missing methods
286
+ d. If the loser found and fixed real bugs (missing methods, type mismatches, wrong return types), these MUST be included regardless of who wins
287
+
288
+ 7. HYBRID EVALUATION — Can the best result be a combination?
289
+ Before declaring a single winner, evaluate whether a HYBRID of both implementations would be superior:
290
+ a. Per-task split: Could Task X use A's implementation and Task Y use B's?
291
+ b. Per-component split: Could specific components/functions from each be combined?
292
+ c. If a hybrid IS better: specify exactly which files/functions to take from each competitor
293
+ d. If one competitor is clearly better across all dimensions: say so and explain why hybrid adds no value
294
+
295
+ VERDICT OPTIONS:
296
+ - "Competitor A wins" — use A's commits, merge from B
297
+ - "Competitor B wins" — use B's commits, merge from A
298
+ - "HYBRID" — take specific pieces from each (specify per-file or per-task attribution)
299
+
300
+ 8. Deliver verdict with ALL of these sections:
301
+
302
+ TEST RESULTS:
303
+ - Competitor A: [N] passed, [N] failed, [N] skipped (baseline: [N])
304
+ - Competitor B: [N] passed, [N] failed, [N] skipped (baseline: [N])
305
+
306
+ STRUCTURED CHECKS:
307
+ - Anti-mock: A=[N mocks in integration tests], B=[N mocks in integration tests]
308
+ - Type duplication: [any issues found]
309
+ - Endpoint verification: [any issues found]
310
+ - Integration seams: [checklist results]
311
+
312
+ VERDICT: Competitor [A|B] wins | HYBRID
313
+
314
+ SCORES:
315
+ - Spec compliance (0.35): A=[score]/10, B=[score]/10
316
+ - Code quality (0.25): A=[score]/10, B=[score]/10
317
+ - Test coverage depth (0.25): A=[score]/10 (N tests, avg M asserts), B=[score]/10 (N tests, avg M asserts)
318
+ - Cohesion (0.15): A=[score]/10, B=[score]/10
319
+ - Weighted total: A=[score], B=[score]
320
+
321
+ COHESION & MODULARITY:
322
+ - File sizes: A=[list files >300 lines], B=[list files >300 lines]
323
+ - Functions >30 lines: A=[count], B=[count]
324
+ - Nesting depth >3: A=[count], B=[count]
325
+ - Type hints coverage: A=[pass/issues], B=[pass/issues]
326
+ - Naming consistency: A=[pass/issues], B=[pass/issues]
327
+ - Import style match: A=[pass/issues], B=[pass/issues]
328
+ - Error handling pattern: A=[pass/issues], B=[pass/issues]
329
+ - DRY (no reinvented utilities): A=[pass/issues], B=[pass/issues]
330
+ - Magic numbers: A=[count], B=[count]
331
+
332
+ REASONING: [2-3 sentences]
333
+
334
+ HYBRID ANALYSIS:
335
+ [If HYBRID verdict: specify exactly which files/functions/tasks come from each competitor.
336
+ If not hybrid: explain why one competitor is clearly better across all dimensions.]
337
+
338
+ CHERRY-PICK: [commits from winner, or per-task attribution for hybrid]
339
+
340
+ MANDATORY MERGE FROM LOSER:
341
+ [List EVERY valuable contribution from the loser. For each, specify:
342
+ - File path and what to extract
343
+ - Exact code snippet to apply
344
+ - Where to insert it (after which function/line)
345
+ - Why it's valuable (seam fix, better error handling, missing registration, etc.)
346
+ If truly nothing: explain why the winner's implementation is strictly superior in every dimension.
347
+ If HYBRID: this section covers pieces NOT already included in the hybrid attribution.]
348
+
349
+ LESSONS FOR NEXT ROUND:
350
+ - Competitor A should: [specific improvement]
351
+ - Competitor B should: [specific improvement]
352
+ ```
353
+
354
+ ---
355
+
356
+ ## Reviewer Prompt Template
357
+
358
+ ```
359
+ Review Batch N implementation against specification.
360
+ Spec: [full batch text]
361
+ Changes: git diff [base_sha]..HEAD
362
+ Check: spec compliance, code quality, lesson scan
363
+ Report: approved or issues with file:line references
364
+ ```
@@ -0,0 +1,97 @@
1
+ ---
2
+ name: brainstorming
3
+ description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
4
+ version: 1.0.0
5
+ ---
6
+
7
+ # Brainstorming Ideas Into Designs
8
+
9
+ ## Overview
10
+
11
+ Help turn ideas into fully formed designs and specs through natural collaborative dialogue.
12
+
13
+ Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design and get user approval.
14
+
15
+ <HARD-GATE>
16
+ Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it. This applies to EVERY project regardless of perceived simplicity.
17
+ </HARD-GATE>
18
+
19
+ ## Anti-Pattern: "This Is Too Simple To Need A Design"
20
+
21
+ Every project goes through this process. A todo list, a single-function utility, a config change — all of them. "Simple" projects are where unexamined assumptions cause the most wasted work. The design can be short (a few sentences for truly simple projects), but you MUST present it and get approval.
22
+
23
+ ## Checklist
24
+
25
+ You MUST create a task for each of these items and complete them in order:
26
+
27
+ 1. **Explore project context** — check files, docs, recent commits
28
+ 2. **Ask clarifying questions** — one at a time, understand purpose/constraints/success criteria
29
+ 3. **Propose 2-3 approaches** — with trade-offs and your recommendation
30
+ 4. **Present design** — in sections scaled to their complexity, get user approval after each section
31
+ 5. **Write design doc** — save to `docs/plans/YYYY-MM-DD-<topic>-design.md` and commit
32
+ 6. **Transition to implementation** — invoke writing-plans skill to create implementation plan
33
+
34
+ ## Process Flow
35
+
36
+ ```dot
37
+ digraph brainstorming {
38
+ "Explore project context" [shape=box];
39
+ "Ask clarifying questions" [shape=box];
40
+ "Propose 2-3 approaches" [shape=box];
41
+ "Present design sections" [shape=box];
42
+ "User approves design?" [shape=diamond];
43
+ "Write design doc" [shape=box];
44
+ "Invoke writing-plans skill" [shape=doublecircle];
45
+
46
+ "Explore project context" -> "Ask clarifying questions";
47
+ "Ask clarifying questions" -> "Propose 2-3 approaches";
48
+ "Propose 2-3 approaches" -> "Present design sections";
49
+ "Present design sections" -> "User approves design?";
50
+ "User approves design?" -> "Present design sections" [label="no, revise"];
51
+ "User approves design?" -> "Write design doc" [label="yes"];
52
+ "Write design doc" -> "Invoke writing-plans skill";
53
+ }
54
+ ```
55
+
56
+ **The terminal state is invoking writing-plans.** Do NOT invoke frontend-design, mcp-builder, or any other implementation skill. The ONLY skill you invoke after brainstorming is writing-plans.
57
+
58
+ ## The Process
59
+
60
+ **Understanding the idea:**
61
+ - Check out the current project state first (files, docs, recent commits)
62
+ - Ask questions one at a time to refine the idea
63
+ - Prefer multiple choice questions when possible, but open-ended is fine too
64
+ - Only one question per message - if a topic needs more exploration, break it into multiple questions
65
+ - Focus on understanding: purpose, constraints, success criteria
66
+
67
+ **Exploring approaches:**
68
+ - Propose 2-3 different approaches with trade-offs
69
+ - Present options conversationally with your recommendation and reasoning
70
+ - Lead with your recommended option and explain why
71
+
72
+ **Presenting the design:**
73
+ - Once you believe you understand what you're building, present the design
74
+ - Scale each section to its complexity: a few sentences if straightforward, up to 200-300 words if nuanced
75
+ - Ask after each section whether it looks right so far
76
+ - Cover: architecture, components, data flow, error handling, testing
77
+ - Be ready to go back and clarify if something doesn't make sense
78
+
79
+ ## After the Design
80
+
81
+ **Documentation:**
82
+ - Write the validated design to `docs/plans/YYYY-MM-DD-<topic>-design.md`
83
+ - Use elements-of-style:writing-clearly-and-concisely skill if available
84
+ - Commit the design document to git
85
+
86
+ **Implementation:**
87
+ - Invoke the writing-plans skill to create a detailed implementation plan
88
+ - Do NOT invoke any other skill. writing-plans is the next step.
89
+
90
+ ## Key Principles
91
+
92
+ - **One question at a time** - Don't overwhelm with multiple questions
93
+ - **Multiple choice preferred** - Easier to answer than open-ended when possible
94
+ - **YAGNI ruthlessly** - Remove unnecessary features from all designs
95
+ - **Explore alternatives** - Always propose 2-3 approaches before settling
96
+ - **Incremental validation** - Present design, get approval before moving on
97
+ - **Be flexible** - Go back and clarify when something doesn't make sense
@@ -0,0 +1,187 @@
1
+ ---
2
+ name: capture-lesson
3
+ description: Use when capturing a lesson learned from a bug, audit finding, or session insight — enforces template, validation, and commit workflow
4
+ ---
5
+
6
+ # Capture Lesson
7
+
8
+ ## Overview
9
+
10
+ Structured process for writing new lessons that enforces the FRAMEWORK.md template, OIL tier rules, category validation, and all three validation scripts before committing. Prevents manual shortcutting that skips recurrence analysis and sustain checks.
11
+
12
+ ## When to Use
13
+
14
+ - After discovering a bug, audit finding, or session insight worth capturing
15
+ - When `/capture-lesson` is invoked
16
+ - After a debugging session reveals a repeatable anti-pattern
17
+ - When a code review or counter session surfaces a new failure mode
18
+
19
+ ## Process (follow this order exactly)
20
+
21
+ ### Step 1: Gather Context
22
+
23
+ Ask the user:
24
+ 1. **What happened?** — factual description with error messages, data contradictions, numbers
25
+ 2. **Which files were involved?** — specific paths
26
+ 3. **Which cluster does this resemble?** — A (Silent Failures), B (Integration Boundary), C (Cold-Start), D (Specification Drift), E (Context & Retrieval), F (Planning & Control Flow), or standalone
27
+
28
+ ### Step 2: Draft the Lesson File
29
+
30
+ Create `~/Documents/docs/lessons/YYYY-MM-DD-short-description.md` using the exact FRAMEWORK.md template:
31
+
32
+ ```markdown
33
+ # Lesson: [Short Title]
34
+
35
+ **Date:** YYYY-MM-DD
36
+ **System:** [project name]
37
+ **Tier:** observation | insight | lesson
38
+ **Category:** [from enum below]
39
+ **Keywords:** [comma-separated for grep retrieval]
40
+ **Files:** `path/to/file1`, `path/to/file2`
41
+
42
+ ## Observation (What Happened)
43
+ [Factual description. Include numbers, error messages, data contradictions.]
44
+
45
+ ## Analysis (Root Cause — 5 Whys)
46
+ **Why #1:** [surface cause]
47
+ **Why #2:** [why that happened]
48
+ **Why #3:** [root cause — deepest controllable cause]
49
+
50
+ ## Corrective Actions
51
+ | # | Action | Status | Owner | Evidence |
52
+ |---|--------|--------|-------|----------|
53
+ | 1 | [specific action] | proposed | [who] | — |
54
+
55
+ ## Ripple Effects
56
+ [What other systems/pipelines does this touch?]
57
+
58
+ ## Sustain Plan
59
+ - [ ] 7-day check: [what to verify]
60
+ - [ ] 30-day check: [confirm no recurrence]
61
+ - [ ] Contingency: [if corrective action doesn't hold]
62
+
63
+ ## Key Takeaway
64
+ [One sentence. The thing you'd tell someone in 10 seconds.]
65
+ ```
66
+
67
+ ### Step 2.5: Infer Scope Tags
68
+
69
+ Determine the lesson's scope by analyzing its content:
70
+
71
+ 1. **Check domain signals:** Does the lesson reference specific systems?
72
+ - Home Assistant, HA entities, MQTT, Frigate → `domain:ha-aria`
73
+ - Telegram bot, polling, getUpdates → `domain:telegram`
74
+ - Notion API, sync, replica → `domain:notion`
75
+ - Ollama, model loading, queue → `domain:ollama`
76
+
77
+ 2. **Check framework signals:** Does it reference specific tooling?
78
+ - systemd, journalctl, timers → `framework:systemd`
79
+ - pytest, fixtures, conftest → `framework:pytest`
80
+ - Preact, JSX, `h()` → `framework:preact`
81
+
82
+ 3. **Check language signals:** What language(s) does it apply to?
83
+ - Python-only patterns → `language:python`
84
+ - Bash/shell patterns → `language:bash`
85
+ - JavaScript/TypeScript → `language:javascript`
86
+
87
+ 4. **Default to `universal`** if the lesson describes a general principle (error handling, testing, architecture) not specific to any domain/language.
88
+
89
+ 5. **Propose to user:** Present inferred scope tags and ask for confirmation before writing. Example: "Inferred scope: `[domain:ha-aria, language:python]` — does this look right?"
90
+
91
+ Add the `scope:` field to the YAML frontmatter after `languages:`:
92
+ ```yaml
93
+ scope: [domain:ha-aria, language:python]
94
+ ```
95
+
96
+ Reference: `~/Documents/docs/lessons/TEMPLATE.md` § Scope (Project-Level Filtering) for the full tag vocabulary.
97
+
98
+ ### Step 3: Validate Tier (HARD GATE)
99
+
100
+ Enforce OIL taxonomy rules:
101
+
102
+ | Tier | Requires | Status |
103
+ |------|----------|--------|
104
+ | `observation` | Raw facts only | `observed` |
105
+ | `insight` | Root cause identified via 5 Whys | `analyzed` |
106
+ | `lesson` | Corrective action proposed with owner + timeline | `proposed` |
107
+ | `lesson_learned` | Implementation proof + 30-day sustain evidence | `validated` |
108
+
109
+ **HARD GATE: Never assign `lesson_learned` to a new lesson.** A new lesson starts at `observation`, `insight`, or `lesson` depending on how far the analysis goes. Promotion to `lesson_learned` requires sustained evidence over time.
110
+
111
+ ### Step 4: Validate Category
112
+
113
+ Category must be exactly one of:
114
+
115
+ | Category | Scope |
116
+ |----------|-------|
117
+ | `data-model` | Schema, inheritance, data flow |
118
+ | `registration` | Module loading, decorators, imports |
119
+ | `cold-start` | First-run, missing baselines |
120
+ | `integration` | Cross-service, shared state, API contracts |
121
+ | `deployment` | Service config, systemd, env vars |
122
+ | `monitoring` | Alerts, noise suppression, staleness |
123
+ | `ui` | Frontend, data display |
124
+ | `testing` | Coverage gaps, mock masking |
125
+ | `performance` | Resources, memory, scheduling |
126
+ | `security` | Auth, secrets, permissions |
127
+
128
+ If the lesson doesn't fit any category cleanly, pick the closest match and note the tension in Ripple Effects.
129
+
130
+ ### Step 5: Update SUMMARY.md
131
+
132
+ Edit `~/Documents/docs/lessons/SUMMARY.md`:
133
+
134
+ 1. **Add row** to the Quick Reference table with the next sequential number
135
+ 2. **Update cluster membership** — add the lesson number to the relevant cluster's parenthetical list in the cluster section header
136
+ 3. **Update the count** in the header line (e.g., "72 lessons" becomes "73 lessons")
137
+ 4. **Update tier counts** in the Status & Maturity table
138
+
139
+ ### Step 6: Run Validation Scripts
140
+
141
+ Run each script and address output before proceeding:
142
+
143
+ ```bash
144
+ # Recurrence analysis — if alert triggers, answer the 4 questions before continuing
145
+ bash ~/Documents/scripts/lesson-class-check.sh ~/Documents/docs/lessons/YYYY-MM-DD-short-description.md
146
+
147
+ # Promotion candidates — informational, report to user
148
+ bash ~/Documents/scripts/lesson-promote-check.sh
149
+
150
+ # Overdue sustain items — informational, report to user
151
+ bash ~/Documents/scripts/lessons-sustain-check.sh
152
+ ```
153
+
154
+ **If `lesson-class-check.sh` triggers a recurrence alert**, answer these 4 questions before proceeding:
155
+ 1. Why didn't the existing cluster mitigations catch this?
156
+ 2. Is this a new sub-pattern or a gap in existing mitigations?
157
+ 3. Should a new mitigation be added to the cluster?
158
+ 4. Should an existing mitigation be strengthened?
159
+
160
+ ### Step 7: Commit
161
+
162
+ Stage and commit with the standard format:
163
+
164
+ ```bash
165
+ git add ~/Documents/docs/lessons/YYYY-MM-DD-short-description.md ~/Documents/docs/lessons/SUMMARY.md
166
+ git commit -m "docs: add lesson #N — short description"
167
+ ```
168
+
169
+ ## Key References
170
+
171
+ | File | Purpose |
172
+ |------|---------|
173
+ | `~/Documents/docs/lessons/FRAMEWORK.md` | Template and OIL taxonomy |
174
+ | `~/Documents/docs/lessons/SUMMARY.md` | Lesson index (Quick Reference table + clusters) |
175
+ | `~/Documents/scripts/lesson-class-check.sh` | Cluster recurrence analysis |
176
+ | `~/Documents/scripts/lesson-promote-check.sh` | Hookify promotion candidates |
177
+ | `~/Documents/scripts/lessons-sustain-check.sh` | Overdue sustain items |
178
+
179
+ ## Common Mistakes
180
+
181
+ | Mistake | Fix |
182
+ |---------|-----|
183
+ | Assigning `lesson_learned` to a new lesson | Start at `observation`, `insight`, or `lesson` — promotion requires 30-day evidence |
184
+ | Skipping 5 Whys analysis | If tier is `insight` or higher, 5 Whys is required — at least 2-3 levels deep |
185
+ | Using a category not in the enum | Pick the closest match from the 10 valid categories |
186
+ | Forgetting to update SUMMARY.md counts | Always update: row count in header, tier counts in Status table, cluster membership lists |
187
+ | Skipping `lesson-class-check.sh` | This is the most important validation — it detects cluster recurrence patterns |