autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,2012 @@
1
+ # Code Factory v2 Phase 4 Implementation Plan
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Complete Phase 4 of Code Factory v2 — 43 new lessons, per-batch context assembler, ast-grep integration, team mode with decision gate, and parallel patch sampling.
6
+
7
+ **Architecture:** Fixes-first, then features. Batch 1 ships stability fixes and all lessons. Batches 2-5 add capabilities that build on each other: context assembler feeds into team mode, ast-grep feeds into scoring, team mode enables parallel sampling.
8
+
9
+ **Tech Stack:** Bash, jq, ast-grep (optional), Claude Code agent teams (experimental)
10
+
11
+ ## Quality Gates
12
+
13
+ Between every batch, run:
14
+ ```bash
15
+ scripts/tests/run-all-tests.sh
16
+ scripts/quality-gate.sh --project-root .
17
+ ```
18
+
19
+ Expected: all test files pass, all assertions green, no lesson-check violations.
20
+
21
+ ---
22
+
23
+ ## Batch 1: Quick Fixes + Lessons (0007-0049)
24
+
25
+ context_refs: scripts/lib/run-plan-headless.sh, scripts/lib/common.sh, scripts/quality-gate.sh, docs/lessons/0001-bare-exception-swallowing.md, docs/lessons/TEMPLATE.md
26
+
27
+ ### Task 1: Fix empty batch detection in run-plan-headless.sh
28
+
29
+ **Files:**
30
+ - Modify: `scripts/lib/run-plan-headless.sh:37-44`
31
+ - Test: `scripts/tests/test-run-plan-headless.sh`
32
+
33
+ **Step 1: Write the failing test**
34
+
35
+ Add to `scripts/tests/test-run-plan-headless.sh`:
36
+
37
+ ```bash
38
+ # Create a plan with 2 real batches and 1 empty trailing match
39
+ cat > "$WORK/plan-empty.md" << 'PLAN'
40
+ ## Batch 1: Real Batch
41
+ ### Task 1: Do something
42
+ Write some code.
43
+
44
+ ## Batch 2: Also Real
45
+ ### Task 2: Do more
46
+ Write more code.
47
+
48
+ ## Batch 3:
49
+ PLAN
50
+
51
+ # get_batch_text should return empty for batch 3
52
+ source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
53
+ val=$(get_batch_text "$WORK/plan-empty.md" 3)
54
+ assert_eq "get_batch_text: empty batch returns empty" "" "$val"
55
+
56
+ # count_batches should count all 3 (parser counts headers)
57
+ val=$(count_batches "$WORK/plan-empty.md")
58
+ assert_eq "count_batches: counts all headers including empty" "3" "$val"
59
+ ```
60
+
61
+ **Step 2: Run test to verify it fails or passes**
62
+
63
+ Run: `bash scripts/tests/test-run-plan-headless.sh`
64
+ Expected: These tests should PASS (get_batch_text already returns empty for empty batches). The bug is in run-plan-headless.sh not checking the return value.
65
+
66
+ **Step 3: Implement empty batch skip**
67
+
68
+ In `scripts/lib/run-plan-headless.sh`, after line 39 (`title=$(get_batch_title...)`), add:
69
+
70
+ ```bash
71
+ local batch_text
72
+ batch_text=$(get_batch_text "$PLAN_FILE" "$batch")
73
+ if [[ -z "$batch_text" ]]; then
74
+ echo " (empty batch -- skipping)"
75
+ continue
76
+ fi
77
+ ```
78
+
79
+ **Step 4: Run all tests to verify**
80
+
81
+ Run: `bash scripts/tests/run-all-tests.sh`
82
+ Expected: ALL PASSED
83
+
84
+ **Step 5: Commit**
85
+
86
+ ```bash
87
+ git add scripts/lib/run-plan-headless.sh scripts/tests/test-run-plan-headless.sh
88
+ git commit -m "fix: skip empty batches in headless mode — avoids wasted API calls"
89
+ ```
90
+
91
+ ### Task 2: Add bash test suite detection to quality-gate.sh
92
+
93
+ **Files:**
94
+ - Modify: `scripts/lib/common.sh:12-23`
95
+ - Modify: `scripts/quality-gate.sh:129-155`
96
+ - Test: `scripts/tests/test-common.sh`
97
+ - Test: `scripts/tests/test-quality-gate.sh`
98
+
99
+ **Step 1: Write the failing test for detect_project_type**
100
+
101
+ Add to `scripts/tests/test-common.sh`:
102
+
103
+ ```bash
104
+ # Bash project detection
105
+ mkdir -p "$WORK/bash-proj/scripts/tests"
106
+ echo '#!/bin/bash' > "$WORK/bash-proj/scripts/tests/run-all-tests.sh"
107
+ chmod +x "$WORK/bash-proj/scripts/tests/run-all-tests.sh"
108
+ val=$(detect_project_type "$WORK/bash-proj")
109
+ assert_eq "detect_project_type: bash project with run-all-tests.sh" "bash" "$val"
110
+
111
+ # Bash project with test-*.sh glob
112
+ mkdir -p "$WORK/bash-proj2/scripts/tests"
113
+ touch "$WORK/bash-proj2/scripts/tests/test-foo.sh"
114
+ val=$(detect_project_type "$WORK/bash-proj2")
115
+ assert_eq "detect_project_type: bash project with test-*.sh files" "bash" "$val"
116
+ ```
117
+
118
+ **Step 2: Run test to verify it fails**
119
+
120
+ Run: `bash scripts/tests/test-common.sh`
121
+ Expected: FAIL — detect_project_type returns "unknown" for bash projects
122
+
123
+ **Step 3: Add bash detection to detect_project_type**
124
+
125
+ In `scripts/lib/common.sh`, modify `detect_project_type()` to add bash detection before the final `else`:
126
+
127
+ ```bash
128
+ detect_project_type() {
129
+ local dir="$1"
130
+ if [[ -f "$dir/pyproject.toml" || -f "$dir/setup.py" || -f "$dir/pytest.ini" ]]; then
131
+ echo "python"
132
+ elif [[ -f "$dir/package.json" ]]; then
133
+ echo "node"
134
+ elif [[ -f "$dir/Makefile" ]]; then
135
+ echo "make"
136
+ elif [[ -x "$dir/scripts/tests/run-all-tests.sh" ]] || ls "$dir"/scripts/tests/test-*.sh >/dev/null 2>&1; then
137
+ echo "bash"
138
+ else
139
+ echo "unknown"
140
+ fi
141
+ }
142
+ ```
143
+
144
+ **Step 4: Add bash case to quality-gate.sh test suite section**
145
+
146
+ In `scripts/quality-gate.sh`, after the `make)` case and before `esac`, add:
147
+
148
+ ```bash
149
+ bash)
150
+ if [[ -x "$PROJECT_ROOT/scripts/tests/run-all-tests.sh" ]]; then
151
+ echo "Detected: bash project (run-all-tests.sh)"
152
+ "$PROJECT_ROOT/scripts/tests/run-all-tests.sh"
153
+ test_ran=1
154
+ fi
155
+ ;;
156
+ ```
157
+
158
+ **Step 5: Run all tests to verify**
159
+
160
+ Run: `bash scripts/tests/run-all-tests.sh`
161
+ Expected: ALL PASSED (including the new assertions)
162
+
163
+ **Step 6: Commit**
164
+
165
+ ```bash
166
+ git add scripts/lib/common.sh scripts/quality-gate.sh scripts/tests/test-common.sh scripts/tests/test-quality-gate.sh
167
+ git commit -m "feat: detect bash test suites in quality-gate.sh — run-all-tests.sh and test-*.sh"
168
+ ```
169
+
170
+ ### Task 3: Write lesson files 0007-0019
171
+
172
+ **Files:**
173
+ - Create: `docs/lessons/0007-runner-state-self-rejection.md`
174
+ - Create: `docs/lessons/0008-quality-gate-blind-spot.md`
175
+ - Create: `docs/lessons/0009-parser-overcount-empty-batches.md`
176
+ - Create: `docs/lessons/0010-local-outside-function-bash.md`
177
+ - Create: `docs/lessons/0011-batch-tests-for-unimplemented-code.md`
178
+ - Create: `docs/lessons/0012-api-markdown-unescaped-chars.md`
179
+ - Create: `docs/lessons/0013-export-prefix-env-parsing.md`
180
+ - Create: `docs/lessons/0014-decorator-registry-import-side-effect.md`
181
+ - Create: `docs/lessons/0015-frontend-backend-schema-drift.md`
182
+ - Create: `docs/lessons/0016-event-driven-cold-start-seeding.md`
183
+ - Create: `docs/lessons/0017-copy-paste-logic-diverges.md`
184
+ - Create: `docs/lessons/0018-layer-passes-pipeline-broken.md`
185
+ - Create: `docs/lessons/0019-systemd-envfile-ignores-export.md`
186
+ - Reference: `docs/lessons/TEMPLATE.md`
187
+
188
+ **Instructions:**
189
+
190
+ Write each lesson file using the YAML frontmatter schema from `docs/lessons/TEMPLATE.md`. Each file must have:
191
+ - YAML frontmatter between `---` delimiters with: id, title, severity, languages, category, pattern (type + regex/description), fix, example (bad + good)
192
+ - Three markdown sections: ## Observation, ## Insight, ## Lesson
193
+ - NO project-specific references (no project names, IPs, hostnames, usernames)
194
+ - Generalized language — the anti-pattern, not the specific bug
195
+
196
+ Use the design doc mapping table at `docs/plans/2026-02-21-code-factory-v2-phase4-design.md` for the ID, title, severity, and category of each lesson.
197
+
198
+ **Lesson content (generalized):**
199
+
200
+ **0007** — Runner state file (e.g., `.run-plan-state.json`) created by a tool gets rejected by that same tool's git-clean check. The tool creates the file, then its quality gate rejects the batch because the file is untracked. Fix: add tool-generated files to `.gitignore`.
201
+
202
+ **0008** — Quality gates that auto-detect test frameworks (pytest/jest/make) miss non-standard test suites (bash `test-*.sh`, custom runners). Gate reports "no tests detected" while hundreds of assertions exist. Fix: detect custom test runners by convention (executable `run-all-tests.sh`, `test-*.sh` glob).
203
+
204
+ **0009** — Plan parsers that count batch headers can over-count (e.g., empty trailing headers, non-standard formatting). Each phantom batch spawns an agent that discovers "nothing to do" — wasted API call and time. Fix: check `get_batch_text` is non-empty before execution.
205
+
206
+ **0010** — In bash, `local` outside a function is undefined behavior. Some shells silently accept it, others error. This creates scripts that work on one machine but fail on another. `local` in auto-compound.sh line 149 was outside any function scope. Fix: never use `local` outside a function; use plain variable assignment.
207
+ Pattern type: syntactic. Regex: `^local ` (at script top-level, outside function).
208
+
209
+ **0011** — When a plan has batches 1-7 and batch 3's agent writes tests expecting batch 4's code, those tests fail until batch 4 runs. The agent in batch 3 is doing TDD for its own work but accidentally creates forward dependencies. Fix: plan tasks so each batch is self-contained — tests only reference code written in the same or earlier batches.
210
+
211
+ **0012** — Telegram (and similar APIs) with `parse_mode=Markdown` reject messages containing unescaped `_`, `*`, `[`, etc. The message silently fails or returns `{"ok":false}`. Fix: either escape all special characters or use plain text mode as default with markdown as opt-in.
212
+
213
+ **0013** — `.env` files commonly use `export VAR=value` syntax (for shell sourcing). Parsers that use `cut -d= -f2` get `value` correctly, but `grep VAR= file | cut -d= -f2` skips lines starting with `export`. Fix: strip `export ` prefix before parsing: `sed 's/^export //'`.
214
+ Pattern type: syntactic. Regex: `cut -d= -f2` (in env file parsing context).
215
+
216
+ **0014** — Python decorator-based registries (`@register("name")`) execute at import time. If the module containing decorated functions is never imported, the registry is empty. No error, no warning — the feature just doesn't work. Fix: ensure all modules with registrations are imported in the package `__init__.py` or an explicit loader.
217
+
218
+ **0015** — Frontend and backend can define the same data shape independently. Over time they drift — backend adds a field, frontend doesn't read it; frontend expects a format backend doesn't produce. Only an end-to-end trace catches this. Fix: shared schema definition (TypeScript types generated from API schema) or contract tests.
219
+
220
+ **0016** — Event-driven systems work fine in steady state (events flow, handlers react) but produce empty/wrong output on first boot — no events have arrived yet. Fix: on startup, seed current state by fetching a snapshot via REST/query before subscribing to events.
221
+
222
+ **0017** — Two modules that compute the same thing independently (e.g., feature extraction, date formatting, config parsing) will diverge silently over time as one gets updated and the other doesn't. Fix: import from one source. If two modules need the same logic, extract it to a shared function.
223
+
224
+ **0018** — Each layer of a pipeline (data fetch, transform, store, API, UI) can pass its unit tests while the full pipeline is broken at the seams. Fix: add at least one end-to-end test that traces a single input through every layer. Dual-axis testing: horizontal (every endpoint) + vertical (one full trace).
225
+
226
+ **0019** — systemd `EnvironmentFile=` expects `KEY=value` format. Lines starting with `export` are silently ignored. Services start without error but have empty environment variables. Fix: use a bash wrapper (`ExecStart=/bin/bash -c '. ~/.env && exec /path/to/binary'`) or strip `export` from the file.
227
+ Pattern type: syntactic. Regex: `EnvironmentFile=` (in systemd unit files — warn to use bash wrapper).
228
+
229
+ **Step 1: Write all 13 lesson files**
230
+
231
+ Create each file at `docs/lessons/NNNN-<slug>.md` following the template exactly.
232
+
233
+ **Step 2: Verify YAML frontmatter is valid**
234
+
235
+ Run for each file:
236
+ ```bash
237
+ for f in docs/lessons/00{07..19}-*.md; do
238
+ echo "--- $f ---"
239
+ sed -n '/^---$/,/^---$/p' "$f" | head -1
240
+ done
241
+ ```
242
+
243
+ **Step 3: Run lesson-check to verify no new violations**
244
+
245
+ Run: `bash scripts/lesson-check.sh docs/lessons/0007-*.md docs/lessons/0008-*.md`
246
+ Expected: No violations (lesson files are documentation, not code)
247
+
248
+ **Step 4: Commit**
249
+
250
+ ```bash
251
+ git add docs/lessons/0007-*.md docs/lessons/0008-*.md docs/lessons/0009-*.md docs/lessons/0010-*.md docs/lessons/0011-*.md docs/lessons/0012-*.md docs/lessons/0013-*.md docs/lessons/0014-*.md docs/lessons/0015-*.md docs/lessons/0016-*.md docs/lessons/0017-*.md docs/lessons/0018-*.md docs/lessons/0019-*.md
252
+ git commit -m "docs: add lessons 0007-0019 — v2 execution findings + generalized patterns"
253
+ ```
254
+
255
+ ### Task 4: Write lesson files 0020-0035
256
+
257
+ **Files:**
258
+ - Create: `docs/lessons/0020-persist-state-incrementally.md`
259
+ - Create: `docs/lessons/0021-dual-axis-testing.md`
260
+ - Create: `docs/lessons/0022-jsx-factory-shadowing.md`
261
+ - Create: `docs/lessons/0023-static-analysis-spiral.md`
262
+ - Create: `docs/lessons/0024-shared-pipeline-implementation.md`
263
+ - Create: `docs/lessons/0025-defense-in-depth-all-entry-points.md`
264
+ - Create: `docs/lessons/0026-linter-no-rules-false-enforcement.md`
265
+ - Create: `docs/lessons/0027-jsx-silent-prop-drop.md`
266
+ - Create: `docs/lessons/0028-no-infrastructure-in-client-code.md`
267
+ - Create: `docs/lessons/0029-never-write-secrets-to-files.md`
268
+ - Create: `docs/lessons/0030-cache-merge-not-replace.md`
269
+ - Create: `docs/lessons/0031-verify-units-at-boundaries.md`
270
+ - Create: `docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md`
271
+ - Create: `docs/lessons/0033-async-iteration-mutable-snapshot.md`
272
+ - Create: `docs/lessons/0034-caller-missing-await-silent-discard.md`
273
+ - Create: `docs/lessons/0035-duplicate-registration-silent-overwrite.md`
274
+ - Reference: `docs/lessons/TEMPLATE.md`
275
+
276
+ **Instructions:**
277
+
278
+ Same format as Task 3. Use design doc mapping table for metadata.
279
+
280
+ **Lesson content (generalized):**
281
+
282
+ **0020** — Long-running processes (ETL, embeddings, batch jobs) that save state only at the end lose all progress on crash. A 2-hour job that crashes on the save step restarts from zero. Fix: checkpoint state after each logical unit of work. Incremental saves mean crashes only lose the last unit, not everything.
283
+
284
+ **0021** — Dual-axis testing: horizontal sweep (hit every endpoint/CLI/interface) confirms the surface exists. Vertical trace (one input through every layer to final output) confirms data flows end-to-end. Both required. If time-constrained, vertical catches more integration bugs per minute.
285
+
286
+ **0022** — Build tools that inject JSX factory functions (e.g., esbuild's `jsxFactory: 'h'`) create invisible global variables. Arrow function parameters with the same name (`items.map(h => ...)`) shadow the factory, causing silent render crashes. Fix: never use single-letter variable names that match build tool injections. Lint rule: `no-shadow` for known factory names.
287
+ Pattern type: syntactic. Regex: `\.map\(h\s*=>` or `\.map\(\(h\)` (in JSX files).
288
+
289
+ **0023** — Static analysis tools suggest fixes. Implementing those fixes triggers new warnings. Fixing those triggers more. The spiral creates more bugs than it solves because each "fix" changes code the developer didn't intend to touch. Fix: set a lint baseline, only fix violations in code you're actively changing. If a refactor creates new lint failures, stop and reassess.
290
+
291
+ **0024** — When two pipeline stages independently implement the same feature logic (e.g., feature extraction, data normalization), they'll produce different results. Fix: shared implementation — both stages import from one module. If they can't share code (different languages), add a contract test.
292
+
293
+ **0025** — Validating input at the first entry point isn't enough if there are multiple paths into the system (API, CLI, WebSocket, cron). Each entry point needs the same validation. Fix: centralize validation in a shared function called by all entry points. Test each entry point with invalid input.
294
+
295
+ **0026** — Installing a linter with zero rules enabled gives a false sense of enforcement. `ruff check` with no `--select` runs nothing. `eslint` with no config flags nothing. Developers see "0 issues" and assume code is clean. Fix: always configure rules explicitly. Test that the linter actually catches something by including a known-bad sample.
296
+
297
+ **0027** — JSX frameworks silently drop unrecognized props. Passing `onClick` when the component expects `onPress`, or `value` when it expects `defaultValue`, produces no error — the prop is simply ignored. Fix: use TypeScript with strict component prop types. Without TS, verify prop names against component signature, not the plan.
298
+ Pattern type: syntactic (in TypeScript projects, detectable by unused prop warning).
299
+
300
+ **0028** — Embedding IP addresses, internal hostnames, or port numbers in client-side code (browser JS, mobile apps) exposes infrastructure details and breaks when infrastructure changes. Fix: use relative URLs, environment variables, or a config endpoint. Never hardcode infrastructure in shipped code.
301
+ Pattern type: syntactic. Regex: `['"]https?://\d+\.\d+\.\d+\.\d+` or `['"]https?://localhost:\d+` (in client-side files).
302
+
303
+ **0029** — Writing actual secret values (API keys, tokens, passwords) into committed files — even in tests, comments, or "examples" — risks exposure. Secrets in git history persist even after deletion. Fix: reference secrets by env var name only. In tests, use mock values (`test-token-123`). Enforce with pre-commit hooks (gitleaks, detect-secrets).
304
+ Pattern type: syntactic (detectable by secret scanning tools).
305
+
306
+ **0030** — Cache or registry updates that replace the entire cache with new data lose entries not present in the update. If module A registers 5 entries and module B replaces the cache with 3 entries, A's entries vanish. Fix: merge, never replace. `cache.update(new_entries)` not `cache = new_entries`.
307
+
308
+ **0031** — When data crosses boundaries (API to API, module to module, UI to backend), units can change silently. A function returns 0-1 (proportion), the consumer expects 0-100 (percentage), or vice versa. Comments may lie. Fix: verify units at every boundary. Add unit to variable names when ambiguous (`accuracy_pct`, `ratio_0_1`).
309
+
310
+ **0032** — Components that subscribe to events in the constructor but never unsubscribe leak handlers. After shutdown/restart, old handlers fire on stale state. Fix: subscribe in `initialize()` (after startup gate), store the callback reference on `self`, and unsubscribe in `shutdown()`. Anonymous closures can't be cleaned up.
311
+
312
+ **0033** — Iterating over a mutable collection (set, dict, list) while async operations inside the loop yield control (via `await`) allows concurrent modifications. Python raises `RuntimeError: Set changed size during iteration`. Fix: snapshot before iterating: `for item in list(my_set):`.
313
+ Pattern type: syntactic. Regex: `for .+ in self\.\w+:` (in async functions iterating over instance attributes).
314
+
315
+ **0034** — Calling an `async def` function without `await` silently discards its work. The coroutine object is created but never executed. No exception, no warning at runtime (only a `RuntimeWarning` in some configurations). Fix: always `await` async function calls. Use `create_task()` if fire-and-forget is intended (with `done_callback`).
316
+
317
+ **0035** — When multiple components register with the same ID (module name, plugin key, route path), the last registration silently overwrites earlier ones. No error, no warning — the overwritten component just stops working. Fix: check for existing registration before inserting. Log a warning or raise on duplicate.
318
+
319
+ **Step 1: Write all 16 lesson files**
320
+
321
+ **Step 2: Verify each has valid YAML frontmatter**
322
+
323
+ **Step 3: Commit**
324
+
325
+ ```bash
326
+ git add docs/lessons/00{20..35}-*.md
327
+ git commit -m "docs: add lessons 0020-0035 — lifecycle, async, security, testing patterns"
328
+ ```
329
+
330
+ ### Task 5: Write lesson files 0036-0049
331
+
332
+ **Files:**
333
+ - Create: `docs/lessons/0036-websocket-dirty-disconnect.md`
334
+ - Create: `docs/lessons/0037-parallel-agents-worktree-corruption.md`
335
+ - Create: `docs/lessons/0038-subscribe-no-stored-ref.md`
336
+ - Create: `docs/lessons/0039-fallback-or-default-hides-bugs.md`
337
+ - Create: `docs/lessons/0040-event-firehose-filter-first.md`
338
+ - Create: `docs/lessons/0041-ambiguous-base-dir-path-nesting.md`
339
+ - Create: `docs/lessons/0042-spec-compliance-insufficient.md`
340
+ - Create: `docs/lessons/0043-exact-count-extensible-collections.md`
341
+ - Create: `docs/lessons/0044-relative-file-deps-worktree.md`
342
+ - Create: `docs/lessons/0045-iterative-design-improvement.md`
343
+ - Create: `docs/lessons/0046-plan-assertion-math-bugs.md`
344
+ - Create: `docs/lessons/0047-pytest-single-threaded-default.md`
345
+ - Create: `docs/lessons/0048-integration-wiring-batch.md`
346
+ - Create: `docs/lessons/0049-ab-verification.md`
347
+ - Reference: `docs/lessons/TEMPLATE.md`
348
+
349
+ **Instructions:**
350
+
351
+ Same format as Tasks 3-4. Use design doc mapping table for metadata.
352
+
353
+ **Lesson content (generalized):**
354
+
355
+ **0036** — WebSocket clients that disconnect without a close frame (network drop, mobile backgrounding) don't trigger the `WebSocketDisconnect` exception. Instead, the next `send()` raises `RuntimeError`. Fix: wrap all WebSocket sends in `try/except RuntimeError` and clean up the connection.
356
+
357
+ **0037** — Multiple AI agents or CI jobs committing to the same git worktree corrupt the staging area. Pre-commit hooks that use `git stash` interfere with concurrent commits. Fix: each parallel agent gets its own git worktree. Never share a worktree between concurrent processes.
358
+
359
+ **0038** — Subscribing to events with an anonymous closure (`hub.subscribe(lambda e: handle(e))`) means you can't unsubscribe later — you don't have a reference to the callback. Fix: store the callback on `self` before subscribing: `self._handler = lambda e: handle(e); hub.subscribe(self._handler)`. Unsubscribe with the stored ref in shutdown.
360
+
361
+ **0039** — `self._resource or Resource()` creates a new resource every time `_resource` is falsy. This hides the bug that `_resource` was never properly initialized. The fallback silently masks the initialization failure and leaks resources. Fix: replace with a guard return + warning: `if not self._resource: logger.warning("not initialized"); return`.
362
+
363
+ **0040** — Processing every event in a firehose when only 5% are relevant wastes 95% of compute. A simple prefix filter (`event.startswith("target_domain")`) before any async lookup eliminates most wasted work. Fix: filter by domain/type/source at the top of the handler, before any expensive operations.
364
+
365
+ **0041** — A variable named `log_dir` that contains `/path/to/logs/intelligence/` used as `os.path.join(log_dir, "intelligence", "data")` produces `/path/to/logs/intelligence/intelligence/data`. The variable name doesn't encode what directory level it represents. Fix: name variables to encode their scope (`log_base_dir` vs `intelligence_dir`). Verify paths with `ls` before first use.
366
+
367
+ **0042** — A code review that checks "does this implement the spec?" catches functional gaps but misses defensive coding: error handling on external calls, cleanup on failure, validation on boundaries, timeouts on network ops. Fix: code review should include a defensive gaps checklist separate from spec compliance.
368
+
369
+ **0043** — Tests that assert `len(collection) == 15` break every time the collection grows (new config entry, new registered module, new test fixture). The test is coupled to an incidental count, not a meaningful invariant. Fix: use `>=` for extensible collections, or assert specific items exist rather than total count.
370
+ Pattern type: syntactic. Regex: `assert.*len\(.*==\s*\d+` (exact count assertions).
371
+
372
+ **0044** — `file:../shared-lib` dependencies in `package.json` use relative paths. In a git worktree (different depth from repo root), the relative path points to a non-existent location. Fix: use workspace protocols, absolute paths resolved at install time, or npm/yarn workspaces.
373
+
374
+ **0045** — Asking "how would you improve this section?" after each design section catches 35% more gaps than single-pass design. 5 rounds of iterative improvement is the sweet spot — diminishing returns after that. Fix: build iterative review into the design process, not as an afterthought.
375
+
376
+ **0046** — Implementation plans specify expected test assertions (`assert threshold > 0.85`). The plan author can make math errors (wrong boundary, off-by-one, inverted comparison). The implementer copies the assertion verbatim, and the test "passes" with the wrong threshold. Fix: verify threshold boundary logic independently before writing the test.
377
+
378
+ **0047** — pytest runs single-threaded by default, even on multi-core machines. A test suite that takes 5 minutes single-threaded takes 50 seconds with `-n auto` (pytest-xdist). Fix: add `pytest-xdist` to dev dependencies and `addopts = "-n auto"` to pytest config for any project with >20 tests.
379
+
380
+ **0048** — Multi-batch implementation plans that build components in separate batches often skip the "wire everything together" step. Each component passes its tests independently, but nothing connects them. Fix: plans with 3+ batches must include a final integration wiring batch that connects all prior components and runs an end-to-end test.
381
+
382
+ **0049** — A/B verification (bottom-up implementation review + top-down architectural review) finds zero-overlap bug classes. Bottom-up catches code-level issues (missing error handling, wrong types). Top-down catches design-level issues (missing components, wrong data flow). Run both after any 3+ batch implementation.
383
+
384
+ **Step 1: Write all 14 lesson files**
385
+
386
+ **Step 2: Verify each has valid YAML frontmatter**
387
+
388
+ **Step 3: Commit**
389
+
390
+ ```bash
391
+ git add docs/lessons/00{36..49}-*.md
392
+ git commit -m "docs: add lessons 0036-0049 — agents, testing, design, integration patterns"
393
+ ```
394
+
395
+ ### Task 6: Write SUMMARY.md for all 49 lessons
396
+
397
+ **Files:**
398
+ - Create/Rewrite: `docs/lessons/SUMMARY.md`
399
+
400
+ **Step 1: Write SUMMARY.md**
401
+
402
+ Structure:
403
+ - Quick Reference table (all 49 lessons: ID, title, category, severity, type)
404
+ - Three Root Cause Clusters (generalized from Documents workspace):
405
+ - **Cluster A: Silent Failures** — Something fails but produces no error, no log, no crash
406
+ - **Cluster B: Integration Boundaries** — Each component works alone, bug hides at the seam
407
+ - **Cluster C: Cold-Start Assumptions** — Works steady-state, fails on restart/first boot
408
+ - Six Rules to Build By (generalized)
409
+ - Diagnostic Shortcuts table (symptom → check this first)
410
+ - No project-specific references anywhere
411
+
412
+ Map each lesson to its cluster based on category:
413
+ - `silent-failures` → Cluster A
414
+ - `integration-boundaries` → Cluster B
415
+ - `async-traps` → Cluster A (async bugs are a form of silent failure)
416
+ - `resource-lifecycle` → Cluster A
417
+ - `test-anti-patterns` → Cluster B (tests fail at integration seams)
418
+ - `performance` → standalone
419
+
420
+ **Step 2: Verify all 49 IDs are listed**
421
+
422
+ ```bash
423
+ grep -c "^|" docs/lessons/SUMMARY.md # Should be >= 49 (plus header rows)
424
+ ```
425
+
426
+ **Step 3: Commit**
427
+
428
+ ```bash
429
+ git add docs/lessons/SUMMARY.md
430
+ git commit -m "docs: add SUMMARY.md — 49 lessons with clusters, rules, and diagnostic shortcuts"
431
+ ```
432
+
433
+ ---
434
+
435
+ ## Batch 2: Per-Batch Context Assembler
436
+
437
+ context_refs: scripts/lib/run-plan-headless.sh, scripts/lib/run-plan-prompt.sh, scripts/lib/run-plan-state.sh
438
+
439
+ ### Task 7: Create run-plan-context.sh with generate_batch_context()
440
+
441
+ **Files:**
442
+ - Create: `scripts/lib/run-plan-context.sh`
443
+ - Test: `scripts/tests/test-run-plan-context.sh`
444
+
445
+ **Step 1: Write the failing test**
446
+
447
+ Create `scripts/tests/test-run-plan-context.sh`:
448
+
449
+ ```bash
450
+ #!/usr/bin/env bash
451
+ set -euo pipefail
452
+
453
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
454
+ source "$SCRIPT_DIR/../lib/common.sh"
455
+ source "$SCRIPT_DIR/../lib/run-plan-context.sh"
456
+
457
+ FAILURES=0
458
+ TESTS=0
459
+
460
+ assert_eq() {
461
+ local desc="$1" expected="$2" actual="$3"
462
+ TESTS=$((TESTS + 1))
463
+ if [[ "$expected" != "$actual" ]]; then
464
+ echo "FAIL: $desc"
465
+ echo " expected: $expected"
466
+ echo " actual: $actual"
467
+ FAILURES=$((FAILURES + 1))
468
+ else
469
+ echo "PASS: $desc"
470
+ fi
471
+ }
472
+
473
+ assert_contains() {
474
+ local desc="$1" needle="$2" haystack="$3"
475
+ TESTS=$((TESTS + 1))
476
+ if [[ "$haystack" == *"$needle"* ]]; then
477
+ echo "PASS: $desc"
478
+ else
479
+ echo "FAIL: $desc"
480
+ echo " expected to contain: $needle"
481
+ echo " actual: ${haystack:0:200}..."
482
+ FAILURES=$((FAILURES + 1))
483
+ fi
484
+ }
485
+
486
+ assert_not_contains() {
487
+ local desc="$1" needle="$2" haystack="$3"
488
+ TESTS=$((TESTS + 1))
489
+ if [[ "$haystack" != *"$needle"* ]]; then
490
+ echo "PASS: $desc"
491
+ else
492
+ echo "FAIL: $desc"
493
+ echo " expected NOT to contain: $needle"
494
+ FAILURES=$((FAILURES + 1))
495
+ fi
496
+ }
497
+
498
+ WORK=$(mktemp -d)
499
+ trap "rm -rf '$WORK'" EXIT
500
+
501
+ # === Setup test fixtures ===
502
+
503
+ # State file
504
+ cat > "$WORK/.run-plan-state.json" << 'JSON'
505
+ {
506
+ "plan": "test-plan.md",
507
+ "mode": "headless",
508
+ "batches": {
509
+ "1": {"passed": true, "test_count": 50, "duration": 120},
510
+ "2": {"passed": true, "test_count": 75, "duration": 90}
511
+ }
512
+ }
513
+ JSON
514
+
515
+ # Progress file
516
+ cat > "$WORK/progress.txt" << 'TXT'
517
+ Batch 1: Created shared library
518
+ Batch 2: Fixed test parsing
519
+ Discovery: jest output needs special handling
520
+ TXT
521
+
522
+ # Git repo for git log
523
+ cd "$WORK" && git init -q && git commit --allow-empty -m "batch 1: initial" -q && git commit --allow-empty -m "batch 2: add tests" -q
524
+ cd -
525
+
526
+ # Plan with context_refs
527
+ cat > "$WORK/test-plan.md" << 'PLAN'
528
+ ## Batch 1: Foundation
529
+ ### Task 1: Setup
530
+ Create lib.
531
+
532
+ ## Batch 2: Tests
533
+ ### Task 2: Add tests
534
+ context_refs: src/lib.sh
535
+
536
+ ## Batch 3: Integration
537
+ ### Task 3: Wire together
538
+ context_refs: src/lib.sh, tests/test-lib.sh
539
+ PLAN
540
+
541
+ # Context ref files
542
+ mkdir -p "$WORK/src" "$WORK/tests"
543
+ echo "#!/bin/bash" > "$WORK/src/lib.sh"
544
+ echo "echo hello" >> "$WORK/src/lib.sh"
545
+ echo "#!/bin/bash" > "$WORK/tests/test-lib.sh"
546
+
547
+ # === Tests ===
548
+
549
+ # generate_batch_context for batch 3 (has context_refs and prior batches)
550
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
551
+ assert_contains "context: includes quality gate expectation" "tests must stay above 75" "$ctx"
552
+ assert_contains "context: includes prior batch summary" "Batch 2" "$ctx"
553
+ assert_contains "context: includes context_refs content" "echo hello" "$ctx"
554
+ assert_not_contains "context: excludes batch 1 details for batch 3" "Batch 1: Foundation" "$ctx"
555
+
556
+ # generate_batch_context for batch 1 (no prior context)
557
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 1 "$WORK")
558
+ assert_contains "context batch 1: minimal context" "Run-Plan" "$ctx"
559
+ # Should be short — no prior batches, no context_refs
560
+ char_count=${#ctx}
561
+ TESTS=$((TESTS + 1))
562
+ if [[ $char_count -lt 2000 ]]; then
563
+ echo "PASS: context batch 1: under 2000 chars ($char_count)"
564
+ else
565
+ echo "FAIL: context batch 1: over 2000 chars ($char_count)"
566
+ FAILURES=$((FAILURES + 1))
567
+ fi
568
+
569
+ # Token budget: context should stay under 6000 chars (~1500 tokens)
570
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
571
+ char_count=${#ctx}
572
+ TESTS=$((TESTS + 1))
573
+ if [[ $char_count -lt 6000 ]]; then
574
+ echo "PASS: context batch 3: under 6000 chars ($char_count)"
575
+ else
576
+ echo "FAIL: context batch 3: over 6000 chars ($char_count)"
577
+ FAILURES=$((FAILURES + 1))
578
+ fi
579
+
580
+ # Failure patterns injection
581
+ mkdir -p "$WORK/logs"
582
+ cat > "$WORK/logs/failure-patterns.json" << 'JSON'
583
+ [{"batch_title_pattern": "integration", "failure_type": "missing import", "frequency": 3, "winning_fix": "check all imports before running tests"}]
584
+ JSON
585
+
586
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
587
+ assert_contains "context: includes failure pattern warning" "missing import" "$ctx"
588
+
589
+ # === Summary ===
590
+ echo ""
591
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
592
+ if [[ $FAILURES -gt 0 ]]; then
593
+ echo "FAILURES: $FAILURES"
594
+ exit 1
595
+ fi
596
+ echo "ALL PASSED"
597
+ ```
598
+
599
+ **Step 2: Run test to verify it fails**
600
+
601
+ Run: `bash scripts/tests/test-run-plan-context.sh`
602
+ Expected: FAIL — `run-plan-context.sh` doesn't exist yet
603
+
604
+ **Step 3: Implement generate_batch_context**
605
+
606
+ Create `scripts/lib/run-plan-context.sh`:
607
+
608
+ ```bash
609
+ #!/usr/bin/env bash
610
+ # run-plan-context.sh — Per-batch context assembler for run-plan
611
+ #
612
+ # Assembles relevant context for a batch agent within a token budget.
613
+ # Reads: state file, progress.txt, git log, context_refs, failure patterns.
614
+ # Outputs: markdown section for CLAUDE.md injection.
615
+ #
616
+ # Functions:
617
+ # generate_batch_context <plan_file> <batch_num> <worktree> -> markdown string
618
+
619
+ TOKEN_BUDGET_CHARS=6000 # ~1500 tokens
620
+
621
+ generate_batch_context() {
622
+ local plan_file="$1" batch_num="$2" worktree="$3"
623
+ local context=""
624
+ local chars_used=0
625
+
626
+ context+="## Run-Plan: Batch $batch_num"$'\n\n'
627
+
628
+ # 1. Directives from state (highest priority)
629
+ local state_file="$worktree/.run-plan-state.json"
630
+ if [[ -f "$state_file" ]]; then
631
+ local prev_test_count
632
+ prev_test_count=$(jq -r '[.batches[].test_count // 0] | max' "$state_file" 2>/dev/null || echo "0")
633
+ if [[ "$prev_test_count" -gt 0 ]]; then
634
+ context+="**Directive:** tests must stay above $prev_test_count (current high water mark)"$'\n\n'
635
+ fi
636
+
637
+ # Prior batch summary (most recent 2 batches only)
638
+ local start_batch=$(( batch_num - 2 ))
639
+ [[ $start_batch -lt 1 ]] && start_batch=1
640
+ for ((b = start_batch; b < batch_num; b++)); do
641
+ local passed duration tests
642
+ passed=$(jq -r ".batches[\"$b\"].passed // false" "$state_file" 2>/dev/null)
643
+ tests=$(jq -r ".batches[\"$b\"].test_count // 0" "$state_file" 2>/dev/null)
644
+ duration=$(jq -r ".batches[\"$b\"].duration // 0" "$state_file" 2>/dev/null)
645
+ if [[ "$passed" == "true" ]]; then
646
+ context+="Batch $b: PASSED ($tests tests, ${duration}s)"$'\n'
647
+ fi
648
+ done
649
+ context+=$'\n'
650
+ fi
651
+
652
+ # 2. Failure patterns (cross-run learning)
653
+ local patterns_file="$worktree/logs/failure-patterns.json"
654
+ if [[ -f "$patterns_file" ]]; then
655
+ local batch_title
656
+ batch_title=$(get_batch_title "$plan_file" "$batch_num" 2>/dev/null || echo "")
657
+ local title_lower
658
+ title_lower=$(echo "$batch_title" | tr '[:upper:]' '[:lower:]')
659
+
660
+ # Match failure patterns by batch title keywords
661
+ local matches
662
+ matches=$(jq -r --arg title "$title_lower" \
663
+ '.[] | select(.batch_title_pattern as $p | $title | contains($p)) | "WARNING: Previously failed with \(.failure_type) (\(.frequency)x). Fix that worked: \(.winning_fix)"' \
664
+ "$patterns_file" 2>/dev/null || true)
665
+ if [[ -n "$matches" ]]; then
666
+ context+="### Known Failure Patterns"$'\n'
667
+ context+="$matches"$'\n\n'
668
+ fi
669
+ fi
670
+
671
+ chars_used=${#context}
672
+
673
+ # 3. Context refs file contents (if budget allows)
674
+ if command -v get_batch_context_refs >/dev/null 2>&1; then
675
+ local refs
676
+ refs=$(get_batch_context_refs "$plan_file" "$batch_num" 2>/dev/null || true)
677
+ if [[ -n "$refs" ]]; then
678
+ local refs_section="### Referenced Files"$'\n'
679
+ while IFS= read -r ref_file; do
680
+ ref_file=$(echo "$ref_file" | xargs) # trim whitespace
681
+ [[ -z "$ref_file" ]] && continue
682
+ local full_path="$worktree/$ref_file"
683
+ if [[ -f "$full_path" ]]; then
684
+ local file_content
685
+ file_content=$(head -50 "$full_path" 2>/dev/null || true)
686
+ local addition
687
+ addition=$'\n'"**$ref_file:**"$'\n'"$file_content"$'\n'
688
+ if [[ $(( chars_used + ${#refs_section} + ${#addition} )) -lt $TOKEN_BUDGET_CHARS ]]; then
689
+ refs_section+="$addition"
690
+ fi
691
+ fi
692
+ done <<< "$refs"
693
+ context+="$refs_section"$'\n'
694
+ fi
695
+ fi
696
+
697
+ chars_used=${#context}
698
+
699
+ # 4. Git log (if budget allows)
700
+ if [[ $(( chars_used + 500 )) -lt $TOKEN_BUDGET_CHARS ]]; then
701
+ local git_log
702
+ git_log=$(cd "$worktree" && git log --oneline -5 2>/dev/null || true)
703
+ if [[ -n "$git_log" ]]; then
704
+ context+="### Recent Commits"$'\n'
705
+ context+="$git_log"$'\n\n'
706
+ fi
707
+ fi
708
+
709
+ chars_used=${#context}
710
+
711
+ # 5. Progress.txt (if budget allows, last 10 lines)
712
+ if [[ $(( chars_used + 500 )) -lt $TOKEN_BUDGET_CHARS ]]; then
713
+ local progress_file="$worktree/progress.txt"
714
+ if [[ -f "$progress_file" ]]; then
715
+ local progress
716
+ progress=$(tail -10 "$progress_file" 2>/dev/null || true)
717
+ if [[ -n "$progress" ]]; then
718
+ context+="### Progress Notes"$'\n'
719
+ context+="$progress"$'\n\n'
720
+ fi
721
+ fi
722
+ fi
723
+
724
+ echo "$context"
725
+ }
726
+ ```
727
+
728
+ **Step 4: Run tests to verify**
729
+
730
+ Run: `bash scripts/tests/test-run-plan-context.sh`
731
+ Expected: ALL PASSED
732
+
733
+ **Step 5: Commit**
734
+
735
+ ```bash
736
+ git add scripts/lib/run-plan-context.sh scripts/tests/test-run-plan-context.sh
737
+ git commit -m "feat: add per-batch context assembler with token budget and failure patterns"
738
+ ```
739
+
740
+ ### Task 8: Wire context assembler into run-plan-headless.sh
741
+
742
+ **Files:**
743
+ - Modify: `scripts/lib/run-plan-headless.sh`
744
+ - Modify: `scripts/run-plan.sh` (source the new lib)
745
+
746
+ **Step 1: Source run-plan-context.sh in run-plan.sh**
747
+
748
+ In `scripts/run-plan.sh`, find where other libs are sourced and add:
749
+
750
+ ```bash
751
+ source "$SCRIPT_DIR/lib/run-plan-context.sh"
752
+ ```
753
+
754
+ **Step 2: Inject context into CLAUDE.md before each batch**
755
+
756
+ In `scripts/lib/run-plan-headless.sh`, after the empty batch check (added in Task 1) and before `build_batch_prompt`, add:
757
+
758
+ ```bash
759
+ # Generate and inject per-batch context into CLAUDE.md
760
+ local batch_context
761
+ batch_context=$(generate_batch_context "$PLAN_FILE" "$batch" "$WORKTREE")
762
+ if [[ -n "$batch_context" ]]; then
763
+ local claude_md="$WORKTREE/CLAUDE.md"
764
+ # Remove previous run-plan context section if present
765
+ if [[ -f "$claude_md" ]] && grep -q "^## Run-Plan:" "$claude_md"; then
766
+ # Remove from "## Run-Plan:" to end of file or next ## heading
767
+ local tmp
768
+ tmp=$(mktemp)
769
+ sed '/^## Run-Plan:/,/^## [^R]/{ /^## [^R]/!d; }' "$claude_md" > "$tmp"
770
+ # Also remove the trailing ## Run-Plan: line if it's still there
771
+ sed -i '/^## Run-Plan:/d' "$tmp"
772
+ mv "$tmp" "$claude_md"
773
+ fi
774
+ # Append new context
775
+ echo "" >> "$claude_md"
776
+ echo "$batch_context" >> "$claude_md"
777
+ fi
778
+ ```
779
+
780
+ **Step 3: Run all tests to verify**
781
+
782
+ Run: `bash scripts/tests/run-all-tests.sh`
783
+ Expected: ALL PASSED
784
+
785
+ **Step 4: Commit**
786
+
787
+ ```bash
788
+ git add scripts/run-plan.sh scripts/lib/run-plan-headless.sh
789
+ git commit -m "feat: wire context assembler into headless loop — injects per-batch CLAUDE.md section"
790
+ ```
791
+
792
+ ### Task 9: Add failure pattern persistence
793
+
794
+ **Files:**
795
+ - Modify: `scripts/lib/run-plan-context.sh` (add `record_failure_pattern` function)
796
+ - Modify: `scripts/lib/run-plan-headless.sh` (call on batch failure)
797
+ - Test: `scripts/tests/test-run-plan-context.sh` (add persistence tests)
798
+
799
+ **Step 1: Write the failing test**
800
+
801
+ Add to `scripts/tests/test-run-plan-context.sh`:
802
+
803
+ ```bash
804
+ # === Failure pattern recording ===
805
+ record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
806
+
807
+ assert_eq "record_failure_pattern: creates file" "true" "$(test -f "$WORK/logs/failure-patterns.json" && echo true || echo false)"
808
+
809
+ # Record same pattern again — should increment frequency
810
+ record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
811
+ freq=$(jq '.[0].frequency' "$WORK/logs/failure-patterns.json")
812
+ assert_eq "record_failure_pattern: increments frequency" "2" "$freq"
813
+
814
+ # Record different pattern
815
+ record_failure_pattern "$WORK" "Test Suite" "flaky assertion" "use deterministic comparisons"
816
+ count=$(jq 'length' "$WORK/logs/failure-patterns.json")
817
+ assert_eq "record_failure_pattern: adds new pattern" "2" "$count"
818
+ ```
819
+
820
+ **Step 2: Run test to verify it fails**
821
+
822
+ Run: `bash scripts/tests/test-run-plan-context.sh`
823
+ Expected: FAIL — `record_failure_pattern` doesn't exist
824
+
825
+ **Step 3: Implement record_failure_pattern**
826
+
827
+ Add to `scripts/lib/run-plan-context.sh`:
828
+
829
+ ```bash
830
+ record_failure_pattern() {
831
+ local worktree="$1" batch_title="$2" failure_type="$3" winning_fix="$4"
832
+ local patterns_file="$worktree/logs/failure-patterns.json"
833
+ local title_lower
834
+ title_lower=$(echo "$batch_title" | tr '[:upper:]' '[:lower:]')
835
+
836
+ mkdir -p "$(dirname "$patterns_file")"
837
+
838
+ if [[ ! -f "$patterns_file" ]]; then
839
+ echo "[]" > "$patterns_file"
840
+ fi
841
+
842
+ # Check if pattern already exists
843
+ local existing
844
+ existing=$(jq -r --arg t "$title_lower" --arg f "$failure_type" \
845
+ '[.[] | select(.batch_title_pattern == $t and .failure_type == $f)] | length' \
846
+ "$patterns_file" 2>/dev/null || echo "0")
847
+
848
+ if [[ "$existing" -gt 0 ]]; then
849
+ # Increment frequency
850
+ jq --arg t "$title_lower" --arg f "$failure_type" \
851
+ '[.[] | if .batch_title_pattern == $t and .failure_type == $f then .frequency += 1 | .last_seen = now | todate else . end]' \
852
+ "$patterns_file" > "$patterns_file.tmp" && mv "$patterns_file.tmp" "$patterns_file"
853
+ else
854
+ # Add new pattern
855
+ jq --arg t "$title_lower" --arg f "$failure_type" --arg w "$winning_fix" \
856
+ '. += [{"batch_title_pattern": $t, "failure_type": $f, "frequency": 1, "winning_fix": $w, "last_seen": (now | todate)}]' \
857
+ "$patterns_file" > "$patterns_file.tmp" && mv "$patterns_file.tmp" "$patterns_file"
858
+ fi
859
+ }
860
+ ```
861
+
862
+ **Step 4: Wire into run-plan-headless.sh**
863
+
864
+ In the failure handling section of `run-plan-headless.sh` (after quality gate fails), add before the retry/skip/stop logic:
865
+
866
+ ```bash
867
+ # Record failure pattern for cross-run learning
868
+ local fail_type="quality gate failure"
869
+ if [[ -f "$log_file" ]]; then
870
+ # Try to extract failure type from log
871
+ fail_type=$(grep -oE "(FAIL|ERROR|FAILED).*" "$log_file" | head -1 | cut -c1-80 || echo "quality gate failure")
872
+ fi
873
+ record_failure_pattern "$WORKTREE" "$title" "$fail_type" "" || true
874
+ ```
875
+
876
+ **Step 5: Run tests to verify**
877
+
878
+ Run: `bash scripts/tests/test-run-plan-context.sh && bash scripts/tests/run-all-tests.sh`
879
+ Expected: ALL PASSED
880
+
881
+ **Step 6: Commit**
882
+
883
+ ```bash
884
+ git add scripts/lib/run-plan-context.sh scripts/lib/run-plan-headless.sh scripts/tests/test-run-plan-context.sh
885
+ git commit -m "feat: add cross-run failure pattern persistence — learn from past batch failures"
886
+ ```
887
+
888
+ ---
889
+
890
+ ## Batch 3: ast-grep Integration
891
+
892
+ context_refs: scripts/prior-art-search.sh, scripts/quality-gate.sh, scripts/lesson-check.sh, docs/lessons/TEMPLATE.md
893
+
894
+ ### Task 10: Create generate-ast-rules.sh
895
+
896
+ **Files:**
897
+ - Create: `scripts/generate-ast-rules.sh`
898
+ - Create: `scripts/patterns/` directory
899
+ - Test: `scripts/tests/test-generate-ast-rules.sh`
900
+
901
+ **Step 1: Write the failing test**
902
+
903
+ Create `scripts/tests/test-generate-ast-rules.sh`:
904
+
905
+ ```bash
906
+ #!/usr/bin/env bash
907
+ set -euo pipefail
908
+
909
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
910
+
911
+ FAILURES=0
912
+ TESTS=0
913
+
914
+ assert_eq() {
915
+ local desc="$1" expected="$2" actual="$3"
916
+ TESTS=$((TESTS + 1))
917
+ if [[ "$expected" != "$actual" ]]; then
918
+ echo "FAIL: $desc"
919
+ echo " expected: $expected"
920
+ echo " actual: $actual"
921
+ FAILURES=$((FAILURES + 1))
922
+ else
923
+ echo "PASS: $desc"
924
+ fi
925
+ }
926
+
927
+ assert_contains() {
928
+ local desc="$1" needle="$2" haystack="$3"
929
+ TESTS=$((TESTS + 1))
930
+ if [[ "$haystack" == *"$needle"* ]]; then
931
+ echo "PASS: $desc"
932
+ else
933
+ echo "FAIL: $desc"
934
+ echo " expected to contain: $needle"
935
+ FAILURES=$((FAILURES + 1))
936
+ fi
937
+ }
938
+
939
+ WORK=$(mktemp -d)
940
+ trap "rm -rf '$WORK'" EXIT
941
+
942
+ # Create test lesson files
943
+ mkdir -p "$WORK/lessons"
944
+ cat > "$WORK/lessons/0001-test.md" << 'LESSON'
945
+ ---
946
+ id: 1
947
+ title: "Bare except"
948
+ severity: blocker
949
+ languages: [python]
950
+ category: silent-failures
951
+ pattern:
952
+ type: syntactic
953
+ regex: "^\\s*except\\s*:"
954
+ description: "bare except"
955
+ fix: "Use specific exception"
956
+ example:
957
+ bad: |
958
+ except:
959
+ pass
960
+ good: |
961
+ except Exception as e:
962
+ logger.error(e)
963
+ ---
964
+ LESSON
965
+
966
+ cat > "$WORK/lessons/0033-async.md" << 'LESSON'
967
+ ---
968
+ id: 33
969
+ title: "Async iteration mutable"
970
+ severity: blocker
971
+ languages: [python]
972
+ category: async-traps
973
+ pattern:
974
+ type: semantic
975
+ description: "async loop iterates over mutable instance attribute"
976
+ fix: "Snapshot with list()"
977
+ example:
978
+ bad: |
979
+ async for item in self.connections:
980
+ await item.send(data)
981
+ good: |
982
+ for item in list(self.connections):
983
+ await item.send(data)
984
+ ---
985
+ LESSON
986
+
987
+ # Test: generates pattern files from lessons
988
+ "$SCRIPT_DIR/../generate-ast-rules.sh" --lessons-dir "$WORK/lessons" --output-dir "$WORK/patterns"
989
+
990
+ # Syntactic lessons should NOT generate ast-grep rules (grep handles them)
991
+ assert_eq "generate-ast-rules: skips syntactic patterns" "false" \
992
+ "$(test -f "$WORK/patterns/0001-test.yml" && echo true || echo false)"
993
+
994
+ # Semantic lessons with supported languages should generate rules
995
+ # (only if the pattern is convertible to ast-grep format)
996
+ ls "$WORK/patterns/" > "$WORK/pattern-list.txt" 2>/dev/null || true
997
+ TESTS=$((TESTS + 1))
998
+ echo "PASS: generate-ast-rules: runs without error"
999
+
1000
+ # Test: --list flag shows what would be generated
1001
+ output=$("$SCRIPT_DIR/../generate-ast-rules.sh" --lessons-dir "$WORK/lessons" --list 2>&1)
1002
+ assert_contains "generate-ast-rules: list shows lesson count" "lesson" "$output"
1003
+
1004
+ echo ""
1005
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
1006
+ if [[ $FAILURES -gt 0 ]]; then
1007
+ echo "FAILURES: $FAILURES"
1008
+ exit 1
1009
+ fi
1010
+ echo "ALL PASSED"
1011
+ ```
1012
+
1013
+ **Step 2: Run test to verify it fails**
1014
+
1015
+ Run: `bash scripts/tests/test-generate-ast-rules.sh`
1016
+ Expected: FAIL — script doesn't exist
1017
+
1018
+ **Step 3: Implement generate-ast-rules.sh**
1019
+
1020
+ Create `scripts/generate-ast-rules.sh`:
1021
+
1022
+ ```bash
1023
+ #!/usr/bin/env bash
1024
+ # generate-ast-rules.sh — Generate ast-grep rules from lesson YAML frontmatter
1025
+ #
1026
+ # Reads lesson files with pattern.type: semantic and supported languages,
1027
+ # generates ast-grep YAML rule files in the output directory.
1028
+ #
1029
+ # Usage: generate-ast-rules.sh --lessons-dir <dir> --output-dir <dir> [--list]
1030
+ set -euo pipefail
1031
+
1032
+ LESSONS_DIR=""
1033
+ OUTPUT_DIR=""
1034
+ LIST_ONLY=false
1035
+
1036
+ while [[ $# -gt 0 ]]; do
1037
+ case "$1" in
1038
+ --lessons-dir) LESSONS_DIR="$2"; shift 2 ;;
1039
+ --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
1040
+ --list) LIST_ONLY=true; shift ;;
1041
+ -h|--help)
1042
+ echo "Usage: generate-ast-rules.sh --lessons-dir <dir> --output-dir <dir> [--list]"
1043
+ exit 0 ;;
1044
+ *) echo "Unknown option: $1" >&2; exit 1 ;;
1045
+ esac
1046
+ done
1047
+
1048
+ if [[ -z "$LESSONS_DIR" ]]; then
1049
+ echo "ERROR: --lessons-dir required" >&2
1050
+ exit 1
1051
+ fi
1052
+
1053
+ generated=0
1054
+ skipped_syntactic=0
1055
+ skipped_unconvertible=0
1056
+
1057
+ for lesson_file in "$LESSONS_DIR"/*.md; do
1058
+ [[ -f "$lesson_file" ]] || continue
1059
+ [[ "$(basename "$lesson_file")" == "TEMPLATE.md" ]] && continue
1060
+ [[ "$(basename "$lesson_file")" == "SUMMARY.md" ]] && continue
1061
+ [[ "$(basename "$lesson_file")" == "FRAMEWORK.md" ]] && continue
1062
+
1063
+ # Extract frontmatter
1064
+ local_id=$(sed -n '/^---$/,/^---$/{/^id:/s/^id: *//p}' "$lesson_file" | head -1)
1065
+ local_type=$(sed -n '/^---$/,/^---$/{/^ type:/s/^ type: *//p}' "$lesson_file" | head -1)
1066
+ local_title=$(sed -n '/^---$/,/^---$/{/^title:/s/^title: *"*//p}' "$lesson_file" | head -1 | sed 's/"$//')
1067
+ local_langs=$(sed -n '/^---$/,/^---$/{/^languages:/s/^languages: *//p}' "$lesson_file" | head -1)
1068
+
1069
+ # Skip syntactic patterns (grep handles these)
1070
+ if [[ "$local_type" == "syntactic" ]]; then
1071
+ skipped_syntactic=$((skipped_syntactic + 1))
1072
+ continue
1073
+ fi
1074
+
1075
+ # Only generate for languages ast-grep supports
1076
+ if [[ "$local_langs" != *"python"* && "$local_langs" != *"javascript"* && "$local_langs" != *"typescript"* ]]; then
1077
+ skipped_unconvertible=$((skipped_unconvertible + 1))
1078
+ continue
1079
+ fi
1080
+
1081
+ local_basename=$(basename "$lesson_file" .md)
1082
+
1083
+ if [[ "$LIST_ONLY" == true ]]; then
1084
+ echo " Would generate: $local_basename.yml (lesson $local_id: $local_title)"
1085
+ generated=$((generated + 1))
1086
+ continue
1087
+ fi
1088
+
1089
+ mkdir -p "$OUTPUT_DIR"
1090
+
1091
+ # Extract bad example from frontmatter for rule pattern
1092
+ local_bad_example=$(sed -n '/^ bad: |$/,/^ good: |$/{/^ bad: |$/d; /^ good: |$/d; p}' "$lesson_file" | sed 's/^ //')
1093
+
1094
+ # Generate ast-grep rule YAML
1095
+ cat > "$OUTPUT_DIR/$local_basename.yml" << RULE
1096
+ id: $local_basename
1097
+ message: "$local_title"
1098
+ severity: warning
1099
+ language: $(echo "$local_langs" | sed 's/\[//;s/\]//;s/,.*//;s/ //g')
1100
+ note: "Auto-generated from lesson $local_id. See docs/lessons/$local_basename.md"
1101
+ RULE
1102
+
1103
+ generated=$((generated + 1))
1104
+ done
1105
+
1106
+ if [[ "$LIST_ONLY" == true ]]; then
1107
+ echo ""
1108
+ echo "Summary: $generated convertible, $skipped_syntactic syntactic (grep), $skipped_unconvertible unsupported language"
1109
+ else
1110
+ echo "Generated $generated ast-grep rules in ${OUTPUT_DIR:-<none>}"
1111
+ echo "Skipped: $skipped_syntactic syntactic (grep handles), $skipped_unconvertible unsupported language"
1112
+ fi
1113
+ ```
1114
+
1115
+ **Step 4: Create built-in pattern files**
1116
+
1117
+ Create `scripts/patterns/bare-except.yml`:
1118
+ ```yaml
1119
+ id: bare-except
1120
+ message: "Bare except clause swallows all exceptions — catch a specific exception class"
1121
+ severity: error
1122
+ language: python
1123
+ rule:
1124
+ pattern: "except: $$$BODY"
1125
+ ```
1126
+
1127
+ Create `scripts/patterns/async-no-await.yml`:
1128
+ ```yaml
1129
+ id: async-no-await
1130
+ message: "async def with no await in body — remove async keyword or add await"
1131
+ severity: warning
1132
+ language: python
1133
+ note: "Requires whole-function analysis — this is a simplified pattern"
1134
+ ```
1135
+
1136
+ Create `scripts/patterns/empty-catch.yml`:
1137
+ ```yaml
1138
+ id: empty-catch
1139
+ message: "Empty catch block silently swallows errors — add logging"
1140
+ severity: warning
1141
+ language: javascript
1142
+ rule:
1143
+ pattern: "catch ($ERR) {}"
1144
+ ```
1145
+
1146
+ **Step 5: Run tests to verify**
1147
+
1148
+ Run: `bash scripts/tests/test-generate-ast-rules.sh && bash scripts/tests/run-all-tests.sh`
1149
+ Expected: ALL PASSED
1150
+
1151
+ **Step 6: Commit**
1152
+
1153
+ ```bash
1154
+ git add scripts/generate-ast-rules.sh scripts/patterns/ scripts/tests/test-generate-ast-rules.sh
1155
+ git commit -m "feat: add ast-grep rule generation from lesson files + built-in patterns"
1156
+ ```
1157
+
1158
+ ### Task 11: Add ast-grep discovery mode to prior-art-search.sh
1159
+
1160
+ **Files:**
1161
+ - Modify: `scripts/prior-art-search.sh`
1162
+ - Test: `scripts/tests/test-prior-art-search.sh`
1163
+
1164
+ **Step 1: Write the failing test**
1165
+
1166
+ Add to `scripts/tests/test-prior-art-search.sh` (or create if not present):
1167
+
1168
+ ```bash
1169
+ # ast-grep discovery mode test (skips gracefully when not installed)
1170
+ output=$("$SCRIPT_DIR/../prior-art-search.sh" "error handling patterns" 2>&1 || true)
1171
+ if command -v ast-grep >/dev/null 2>&1; then
1172
+ assert_contains "prior-art: ast-grep section present" "Structural" "$output"
1173
+ else
1174
+ assert_contains "prior-art: ast-grep skip note" "ast-grep" "$output"
1175
+ fi
1176
+ ```
1177
+
1178
+ **Step 2: Implement ast-grep discovery in prior-art-search.sh**
1179
+
1180
+ Add a new section after existing text search:
1181
+
1182
+ ```bash
1183
+ # === Structural code search (ast-grep) ===
1184
+ if command -v ast-grep >/dev/null 2>&1; then
1185
+ echo ""
1186
+ echo "=== Structural Code Search (ast-grep) ==="
1187
+ # Run built-in patterns against local codebase
1188
+ PATTERNS_DIR="$SCRIPT_DIR/patterns"
1189
+ if [[ -d "$PATTERNS_DIR" ]]; then
1190
+ for pattern_file in "$PATTERNS_DIR"/*.yml; do
1191
+ [[ -f "$pattern_file" ]] || continue
1192
+ local_name=$(basename "$pattern_file" .yml)
1193
+ matches=$(ast-grep scan --rule "$pattern_file" . 2>/dev/null | head -5 || true)
1194
+ if [[ -n "$matches" ]]; then
1195
+ echo " Pattern '$local_name': $(echo "$matches" | wc -l) matches"
1196
+ fi
1197
+ done
1198
+ fi
1199
+ else
1200
+ echo ""
1201
+ echo "=== Structural Code Search ==="
1202
+ echo " ast-grep not installed — skipping structural analysis"
1203
+ echo " Install: npm i -g @ast-grep/cli"
1204
+ fi
1205
+ ```
1206
+
1207
+ **Step 3: Run tests and commit**
1208
+
1209
+ Run: `bash scripts/tests/run-all-tests.sh`
1210
+
1211
+ ```bash
1212
+ git add scripts/prior-art-search.sh scripts/tests/test-prior-art-search.sh
1213
+ git commit -m "feat: add ast-grep discovery mode to prior-art search"
1214
+ ```
1215
+
1216
+ ### Task 12: Add ast-grep enforcement mode to quality-gate.sh
1217
+
1218
+ **Files:**
1219
+ - Modify: `scripts/quality-gate.sh`
1220
+ - Test: `scripts/tests/test-quality-gate.sh`
1221
+
1222
+ **Step 1: Add ast-grep check section to quality-gate.sh**
1223
+
1224
+ After the lint check and before the test suite section, add:
1225
+
1226
+ ```bash
1227
+ # === Check 2.5: ast-grep structural analysis (optional) ===
1228
+ if [[ "$QUICK" != true ]] && command -v ast-grep >/dev/null 2>&1; then
1229
+ echo ""
1230
+ echo "=== Quality Gate: Structural Analysis (ast-grep) ==="
1231
+ PATTERNS_DIR="$SCRIPT_DIR/patterns"
1232
+ ast_violations=0
1233
+ if [[ -d "$PATTERNS_DIR" ]]; then
1234
+ for pattern_file in "$PATTERNS_DIR"/*.yml; do
1235
+ [[ -f "$pattern_file" ]] || continue
1236
+ matches=$(ast-grep scan --rule "$pattern_file" "$PROJECT_ROOT" 2>/dev/null || true)
1237
+ if [[ -n "$matches" ]]; then
1238
+ echo "WARNING: $(basename "$pattern_file" .yml): $(echo "$matches" | wc -l) matches"
1239
+ echo "$matches" | head -3
1240
+ ast_violations=$((ast_violations + 1))
1241
+ fi
1242
+ done
1243
+ fi
1244
+ if [[ $ast_violations -gt 0 ]]; then
1245
+ echo "ast-grep: $ast_violations pattern(s) matched (advisory)"
1246
+ else
1247
+ echo "ast-grep: clean"
1248
+ fi
1249
+ fi
1250
+ ```
1251
+
1252
+ Note: ast-grep violations are advisory (warnings) by default. No `exit 1` — this doesn't fail the gate unless `--strict-ast` is added in a future iteration.
1253
+
1254
+ **Step 2: Run tests and commit**
1255
+
1256
+ Run: `bash scripts/tests/run-all-tests.sh`
1257
+
1258
+ ```bash
1259
+ git add scripts/quality-gate.sh scripts/tests/test-quality-gate.sh
1260
+ git commit -m "feat: add ast-grep structural analysis to quality gate (advisory mode)"
1261
+ ```
1262
+
1263
+ ---
1264
+
1265
+ ## Batch 4: Team Mode with Decision Gate
1266
+
1267
+ context_refs: scripts/run-plan.sh, scripts/lib/run-plan-headless.sh, scripts/lib/run-plan-parser.sh, scripts/lib/run-plan-context.sh
1268
+
1269
+ ### Task 13: Create run-plan-routing.sh with plan analysis
1270
+
1271
+ **Files:**
1272
+ - Create: `scripts/lib/run-plan-routing.sh`
1273
+ - Test: `scripts/tests/test-run-plan-routing.sh`
1274
+
1275
+ **Step 1: Write the failing test**
1276
+
1277
+ Create `scripts/tests/test-run-plan-routing.sh`:
1278
+
1279
+ ```bash
1280
+ #!/usr/bin/env bash
1281
+ set -euo pipefail
1282
+
1283
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
1284
+ source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
1285
+ source "$SCRIPT_DIR/../lib/run-plan-routing.sh"
1286
+
1287
+ FAILURES=0
1288
+ TESTS=0
1289
+
1290
+ assert_eq() {
1291
+ local desc="$1" expected="$2" actual="$3"
1292
+ TESTS=$((TESTS + 1))
1293
+ if [[ "$expected" != "$actual" ]]; then
1294
+ echo "FAIL: $desc"
1295
+ echo " expected: $expected"
1296
+ echo " actual: $actual"
1297
+ FAILURES=$((FAILURES + 1))
1298
+ else
1299
+ echo "PASS: $desc"
1300
+ fi
1301
+ }
1302
+
1303
+ WORK=$(mktemp -d)
1304
+ trap "rm -rf '$WORK'" EXIT
1305
+
1306
+ # Plan with clear parallel batches
1307
+ cat > "$WORK/parallel-plan.md" << 'PLAN'
1308
+ ## Batch 1: Foundation
1309
+
1310
+ **Files:**
1311
+ - Create: `src/lib.sh`
1312
+
1313
+ ### Task 1: Create lib
1314
+ Write lib.
1315
+
1316
+ ## Batch 2: Feature A
1317
+
1318
+ **Files:**
1319
+ - Create: `src/feature-a.sh`
1320
+ context_refs: src/lib.sh
1321
+
1322
+ ### Task 2: Build feature A
1323
+
1324
+ ## Batch 3: Feature B
1325
+
1326
+ **Files:**
1327
+ - Create: `src/feature-b.sh`
1328
+ context_refs: src/lib.sh
1329
+
1330
+ ### Task 3: Build feature B
1331
+
1332
+ ## Batch 4: Integration
1333
+
1334
+ **Files:**
1335
+ - Modify: `src/feature-a.sh`
1336
+ - Modify: `src/feature-b.sh`
1337
+ context_refs: src/feature-a.sh, src/feature-b.sh
1338
+
1339
+ ### Task 4: Wire together
1340
+ PLAN
1341
+
1342
+ # Test dependency graph building
1343
+ deps=$(build_dependency_graph "$WORK/parallel-plan.md")
1344
+ assert_eq "dep graph: B2 depends on B1" "true" "$(echo "$deps" | jq '.["2"] | contains(["1"])')"
1345
+ assert_eq "dep graph: B3 depends on B1" "true" "$(echo "$deps" | jq '.["3"] | contains(["1"])')"
1346
+ assert_eq "dep graph: B4 depends on B2 and B3" "true" "$(echo "$deps" | jq '.["4"] | (contains(["2"]) and contains(["3"]))')"
1347
+
1348
+ # Test parallelism score
1349
+ score=$(compute_parallelism_score "$WORK/parallel-plan.md")
1350
+ TESTS=$((TESTS + 1))
1351
+ if [[ "$score" -gt 40 ]]; then
1352
+ echo "PASS: parallelism score: $score > 40 (batches 2,3 can run parallel)"
1353
+ else
1354
+ echo "FAIL: parallelism score: $score <= 40"
1355
+ FAILURES=$((FAILURES + 1))
1356
+ fi
1357
+
1358
+ # Test mode recommendation
1359
+ mode=$(recommend_execution_mode "$score" "false" 21)
1360
+ assert_eq "recommend: team for high score" "team" "$mode"
1361
+
1362
+ # Sequential plan (each batch depends on previous)
1363
+ cat > "$WORK/sequential-plan.md" << 'PLAN'
1364
+ ## Batch 1: Setup
1365
+
1366
+ **Files:**
1367
+ - Create: `src/main.sh`
1368
+
1369
+ ### Task 1: Setup
1370
+
1371
+ ## Batch 2: Extend
1372
+
1373
+ **Files:**
1374
+ - Modify: `src/main.sh`
1375
+ context_refs: src/main.sh
1376
+
1377
+ ### Task 2: Extend
1378
+
1379
+ ## Batch 3: Finalize
1380
+
1381
+ **Files:**
1382
+ - Modify: `src/main.sh`
1383
+ context_refs: src/main.sh
1384
+
1385
+ ### Task 3: Finalize
1386
+ PLAN
1387
+
1388
+ score=$(compute_parallelism_score "$WORK/sequential-plan.md")
1389
+ TESTS=$((TESTS + 1))
1390
+ if [[ "$score" -lt 30 ]]; then
1391
+ echo "PASS: sequential plan score: $score < 30"
1392
+ else
1393
+ echo "FAIL: sequential plan score: $score >= 30"
1394
+ FAILURES=$((FAILURES + 1))
1395
+ fi
1396
+
1397
+ mode=$(recommend_execution_mode "$score" "false" 21)
1398
+ assert_eq "recommend: headless for low score" "headless" "$mode"
1399
+
1400
+ # Test model routing
1401
+ model=$(classify_batch_model "$WORK/parallel-plan.md" 1)
1402
+ assert_eq "model: batch with Create files = sonnet" "sonnet" "$model"
1403
+
1404
+ # Verification batch
1405
+ cat > "$WORK/verify-plan.md" << 'PLAN'
1406
+ ## Batch 1: Verify everything
1407
+
1408
+ ### Task 1: Run all tests
1409
+
1410
+ **Step 1: Run tests**
1411
+ Run: `bash scripts/tests/run-all-tests.sh`
1412
+
1413
+ **Step 2: Check line counts**
1414
+ Run: `wc -l scripts/*.sh`
1415
+ PLAN
1416
+
1417
+ model=$(classify_batch_model "$WORK/verify-plan.md" 1)
1418
+ assert_eq "model: batch with only Run commands = haiku" "haiku" "$model"
1419
+
1420
+ echo ""
1421
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
1422
+ if [[ $FAILURES -gt 0 ]]; then
1423
+ echo "FAILURES: $FAILURES"
1424
+ exit 1
1425
+ fi
1426
+ echo "ALL PASSED"
1427
+ ```
1428
+
1429
+ **Step 2: Run test to verify it fails**
1430
+
1431
+ Run: `bash scripts/tests/test-run-plan-routing.sh`
1432
+ Expected: FAIL
1433
+
1434
+ **Step 3: Implement run-plan-routing.sh**
1435
+
1436
+ Create `scripts/lib/run-plan-routing.sh` with:
1437
+ - `build_dependency_graph()` — parse Files/context_refs to build JSON dependency graph
1438
+ - `compute_parallelism_score()` — 0-100 score based on independence
1439
+ - `recommend_execution_mode()` — headless vs team based on score + capabilities
1440
+ - `classify_batch_model()` — sonnet/haiku/opus based on batch content
1441
+ - `generate_routing_plan()` — human-readable routing plan output
1442
+ - Configuration constants at top of file
1443
+
1444
+ Target: ~200 lines. Implementation should parse `**Files:**` sections for Create/Modify paths and `context_refs:` lines for dependencies. Build a JSON object mapping batch number to list of dependent batch numbers. Score based on: how many batches can run in parallel groups.
1445
+
1446
+ **Step 4: Run tests to verify**
1447
+
1448
+ Run: `bash scripts/tests/test-run-plan-routing.sh && bash scripts/tests/run-all-tests.sh`
1449
+ Expected: ALL PASSED
1450
+
1451
+ **Step 5: Commit**
1452
+
1453
+ ```bash
1454
+ git add scripts/lib/run-plan-routing.sh scripts/tests/test-run-plan-routing.sh
1455
+ git commit -m "feat: add plan analysis with dependency graph, parallelism scoring, and model routing"
1456
+ ```
1457
+
1458
+ ### Task 14: Wire decision gate into run-plan.sh
1459
+
1460
+ **Files:**
1461
+ - Modify: `scripts/run-plan.sh`
1462
+
1463
+ **Step 1: Source routing lib and add analysis before mode selection**
1464
+
1465
+ In `scripts/run-plan.sh`, after `print_banner` in `main()` and before the `case "$MODE"` block, add:
1466
+
1467
+ ```bash
1468
+ # Analyze plan and show routing plan
1469
+ source "$SCRIPT_DIR/lib/run-plan-routing.sh"
1470
+ local score
1471
+ score=$(compute_parallelism_score "$PLAN_FILE" 2>/dev/null || echo "0")
1472
+ local available_mem
1473
+ available_mem=$(free -g 2>/dev/null | awk '/Mem:/{print $7}' || echo "999")
1474
+ local teams_available=false
1475
+ [[ -n "${CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS:-}" ]] && teams_available=true
1476
+
1477
+ # Show routing plan
1478
+ generate_routing_plan "$PLAN_FILE" "$score" "$teams_available" "$available_mem" "$MODE"
1479
+
1480
+ # Auto-select mode if not explicitly set
1481
+ if [[ "$MODE" == "auto" ]]; then
1482
+ MODE=$(recommend_execution_mode "$score" "$teams_available" "$available_mem")
1483
+ echo ""
1484
+ echo "Auto-selected mode: $MODE (parallelism score: $score)"
1485
+ fi
1486
+ ```
1487
+
1488
+ Add `auto` as a new mode option (default when no `--mode` specified). Update arg parsing to default `MODE="auto"` instead of `MODE="headless"`.
1489
+
1490
+ **Step 2: Run tests to verify nothing breaks**
1491
+
1492
+ Run: `bash scripts/tests/run-all-tests.sh`
1493
+ Expected: ALL PASSED
1494
+
1495
+ **Step 3: Commit**
1496
+
1497
+ ```bash
1498
+ git add scripts/run-plan.sh
1499
+ git commit -m "feat: add decision gate — auto-select execution mode based on plan analysis"
1500
+ ```
1501
+
1502
+ ### Task 15: Implement run-plan-team.sh
1503
+
1504
+ **Files:**
1505
+ - Create: `scripts/lib/run-plan-team.sh`
1506
+ - Test: `scripts/tests/test-run-plan-team.sh`
1507
+
1508
+ **Step 1: Write run_mode_team()**
1509
+
1510
+ Create `scripts/lib/run-plan-team.sh`:
1511
+
1512
+ This is the most complex new module. It needs to:
1513
+ 1. Create a team (TeamCreate)
1514
+ 2. Create tasks from batches (respecting dependency graph)
1515
+ 3. Spawn worker agents with isolated worktrees
1516
+ 4. Monitor batch completions and run quality gates
1517
+ 5. Progressive merge after each batch passes
1518
+ 6. Handle speculative execution
1519
+
1520
+ Since this runs within a Claude Code session (not headless), it generates the team setup as a prompt/script that Claude Code executes. The headless fallback generates a shell script that orchestrates multiple `claude -p` processes.
1521
+
1522
+ For the headless case, implement a simplified version:
1523
+ - Sequential batch groups (parallel within a group)
1524
+ - Each group's batches run as parallel background `claude -p` processes
1525
+ - Wait for all in group, run quality gates, merge, next group
1526
+
1527
+ Target: ~200 lines.
1528
+
1529
+ **Key function:**
1530
+
1531
+ ```bash
1532
+ run_mode_team() {
1533
+ local dep_graph
1534
+ dep_graph=$(build_dependency_graph "$PLAN_FILE")
1535
+
1536
+ # Build parallel groups from dependency graph
1537
+ local groups
1538
+ groups=$(compute_parallel_groups "$dep_graph" "$START_BATCH" "$END_BATCH")
1539
+ # groups is a JSON array of arrays: [[1],[2,3],[4]]
1540
+
1541
+ local group_count
1542
+ group_count=$(echo "$groups" | jq 'length')
1543
+
1544
+ for ((g = 0; g < group_count; g++)); do
1545
+ local group_batches
1546
+ group_batches=$(echo "$groups" | jq -r ".[$g][]")
1547
+ local batch_count
1548
+ batch_count=$(echo "$group_batches" | wc -l)
1549
+
1550
+ echo ""
1551
+ echo "================================================================"
1552
+ echo " Group $((g+1)): $(echo "$group_batches" | tr '\n' ',' | sed 's/,$//')"
1553
+ echo " ($batch_count batches in parallel)"
1554
+ echo "================================================================"
1555
+
1556
+ # Launch each batch in the group in parallel
1557
+ local pids=()
1558
+ local batch_logs=()
1559
+ for batch in $group_batches; do
1560
+ local model
1561
+ model=$(classify_batch_model "$PLAN_FILE" "$batch")
1562
+ local log_file="$WORKTREE/logs/batch-${batch}-team.log"
1563
+ batch_logs+=("$log_file")
1564
+
1565
+ # Create isolated worktree for this batch
1566
+ local batch_worktree="$WORKTREE/.worktrees/batch-$batch"
1567
+ mkdir -p "$batch_worktree"
1568
+ # Use git worktree if in a git repo
1569
+ if git rev-parse --git-dir >/dev/null 2>&1; then
1570
+ git worktree add -q "$batch_worktree" HEAD 2>/dev/null || true
1571
+ fi
1572
+
1573
+ local prompt
1574
+ prompt=$(build_batch_prompt "$PLAN_FILE" "$batch" "$batch_worktree" "$PYTHON" "$QUALITY_GATE_CMD" "0")
1575
+
1576
+ echo " Starting batch $batch ($model) in background..."
1577
+ CLAUDECODE= claude -p "$prompt" \
1578
+ --model "$model" \
1579
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
1580
+ --permission-mode bypassPermissions \
1581
+ 2>&1 > "$log_file" &
1582
+ pids+=($!)
1583
+ done
1584
+
1585
+ # Wait for all batches in group
1586
+ local all_passed=true
1587
+ for i in "${!pids[@]}"; do
1588
+ local pid=${pids[$i]}
1589
+ local batch=$(echo "$group_batches" | sed -n "$((i+1))p")
1590
+ wait "$pid" || true
1591
+
1592
+ # Run quality gate
1593
+ local gate_exit=0
1594
+ run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "$batch" "0" || gate_exit=$?
1595
+ if [[ $gate_exit -ne 0 ]]; then
1596
+ echo " Batch $batch FAILED quality gate"
1597
+ all_passed=false
1598
+ else
1599
+ echo " Batch $batch PASSED"
1600
+ # Merge worktree back
1601
+ # (simplified: copy changed files back)
1602
+ fi
1603
+ done
1604
+
1605
+ if [[ "$all_passed" != true ]]; then
1606
+ echo "Group $((g+1)) had failures. Stopping."
1607
+ exit 1
1608
+ fi
1609
+ done
1610
+ }
1611
+ ```
1612
+
1613
+ Note: This is a simplified team mode. Full agent teams integration (TeamCreate, SendMessage) requires running inside a Claude Code session, not headless bash. The headless version uses parallel `claude -p` processes with worktree isolation.
1614
+
1615
+ **Step 2: Write tests**
1616
+
1617
+ Test `compute_parallel_groups()` with the parallel and sequential plans from Task 13's test fixtures.
1618
+
1619
+ **Step 3: Run tests and commit**
1620
+
1621
+ ```bash
1622
+ git add scripts/lib/run-plan-team.sh scripts/tests/test-run-plan-team.sh
1623
+ git commit -m "feat: implement team mode with parallel batch groups and worktree isolation"
1624
+ ```
1625
+
1626
+ ### Task 16: Add routing decision log
1627
+
1628
+ **Files:**
1629
+ - Modify: `scripts/lib/run-plan-routing.sh` (add `log_routing_decision()`)
1630
+ - Modify: `scripts/lib/run-plan-team.sh` (call logger)
1631
+ - Modify: `scripts/lib/run-plan-headless.sh` (call logger)
1632
+
1633
+ **Step 1: Implement log_routing_decision()**
1634
+
1635
+ Add to `scripts/lib/run-plan-routing.sh`:
1636
+
1637
+ ```bash
1638
+ log_routing_decision() {
1639
+ local worktree="$1" category="$2" message="$3"
1640
+ local log_file="$worktree/logs/routing-decisions.log"
1641
+ mkdir -p "$(dirname "$log_file")"
1642
+ echo "[$(date '+%H:%M:%S')] $category: $message" >> "$log_file"
1643
+ }
1644
+ ```
1645
+
1646
+ Wire into team.sh (MODE, PARALLEL, MODEL, GATE_PASS, MERGE decisions) and headless.sh (MODE selection).
1647
+
1648
+ **Step 2: Run tests and commit**
1649
+
1650
+ ```bash
1651
+ git add scripts/lib/run-plan-routing.sh scripts/lib/run-plan-team.sh scripts/lib/run-plan-headless.sh
1652
+ git commit -m "feat: add routing decision log for execution traceability"
1653
+ ```
1654
+
1655
+ ### Task 17: Wire pipeline-status.sh to show routing results
1656
+
1657
+ **Files:**
1658
+ - Modify: `scripts/pipeline-status.sh`
1659
+
1660
+ **Step 1: Add routing section to pipeline-status output**
1661
+
1662
+ After existing status sections, add:
1663
+
1664
+ ```bash
1665
+ # Routing decisions (if available)
1666
+ if [[ -f "$PROJECT_ROOT/logs/routing-decisions.log" ]]; then
1667
+ echo ""
1668
+ echo "=== Routing Decisions ==="
1669
+ tail -20 "$PROJECT_ROOT/logs/routing-decisions.log"
1670
+ fi
1671
+ ```
1672
+
1673
+ **Step 2: Run tests and commit**
1674
+
1675
+ ```bash
1676
+ git add scripts/pipeline-status.sh
1677
+ git commit -m "feat: show routing decisions in pipeline-status.sh output"
1678
+ ```
1679
+
1680
+ ---
1681
+
1682
+ ## Batch 5: Parallel Patch Sampling
1683
+
1684
+ context_refs: scripts/lib/run-plan-team.sh, scripts/lib/run-plan-routing.sh, scripts/lib/run-plan-headless.sh, scripts/lib/run-plan-context.sh
1685
+
1686
+ ### Task 18: Create run-plan-scoring.sh
1687
+
1688
+ **Files:**
1689
+ - Create: `scripts/lib/run-plan-scoring.sh`
1690
+ - Test: `scripts/tests/test-run-plan-scoring.sh`
1691
+
1692
+ **Step 1: Write the failing test**
1693
+
1694
+ Create `scripts/tests/test-run-plan-scoring.sh`:
1695
+
1696
+ ```bash
1697
+ #!/usr/bin/env bash
1698
+ set -euo pipefail
1699
+
1700
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
1701
+ source "$SCRIPT_DIR/../lib/run-plan-scoring.sh"
1702
+
1703
+ FAILURES=0
1704
+ TESTS=0
1705
+
1706
+ assert_eq() {
1707
+ local desc="$1" expected="$2" actual="$3"
1708
+ TESTS=$((TESTS + 1))
1709
+ if [[ "$expected" != "$actual" ]]; then
1710
+ echo "FAIL: $desc"
1711
+ echo " expected: $expected"
1712
+ echo " actual: $actual"
1713
+ FAILURES=$((FAILURES + 1))
1714
+ else
1715
+ echo "PASS: $desc"
1716
+ fi
1717
+ }
1718
+
1719
+ # Gate failed = score 0
1720
+ score=$(score_candidate 0 50 100 0 0 0)
1721
+ assert_eq "score: gate failed = 0" "0" "$score"
1722
+
1723
+ # Gate passed, good metrics
1724
+ score=$(score_candidate 1 50 100 2 0 0)
1725
+ TESTS=$((TESTS + 1))
1726
+ if [[ "$score" -gt 0 ]]; then
1727
+ echo "PASS: score: gate passed = positive ($score)"
1728
+ else
1729
+ echo "FAIL: score: gate passed should be positive ($score)"
1730
+ FAILURES=$((FAILURES + 1))
1731
+ fi
1732
+
1733
+ # More tests = higher score
1734
+ score_a=$(score_candidate 1 50 100 0 0 0)
1735
+ score_b=$(score_candidate 1 80 100 0 0 0)
1736
+ TESTS=$((TESTS + 1))
1737
+ if [[ "$score_b" -gt "$score_a" ]]; then
1738
+ echo "PASS: score: more tests = higher score ($score_b > $score_a)"
1739
+ else
1740
+ echo "FAIL: score: more tests should be higher ($score_b <= $score_a)"
1741
+ FAILURES=$((FAILURES + 1))
1742
+ fi
1743
+
1744
+ # Lesson violations = penalty
1745
+ score_clean=$(score_candidate 1 50 100 0 0 0)
1746
+ score_dirty=$(score_candidate 1 50 100 0 2 0)
1747
+ TESTS=$((TESTS + 1))
1748
+ if [[ "$score_clean" -gt "$score_dirty" ]]; then
1749
+ echo "PASS: score: lesson violations penalized ($score_clean > $score_dirty)"
1750
+ else
1751
+ echo "FAIL: score: lesson violations not penalized ($score_clean <= $score_dirty)"
1752
+ FAILURES=$((FAILURES + 1))
1753
+ fi
1754
+
1755
+ # select_winner picks highest score
1756
+ winner=$(select_winner "500 300 700 0")
1757
+ assert_eq "select_winner: picks index of highest" "2" "$winner"
1758
+
1759
+ # select_winner returns -1 when all zero
1760
+ winner=$(select_winner "0 0 0")
1761
+ assert_eq "select_winner: all zero = -1 (no winner)" "-1" "$winner"
1762
+
1763
+ echo ""
1764
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
1765
+ if [[ $FAILURES -gt 0 ]]; then
1766
+ echo "FAILURES: $FAILURES"
1767
+ exit 1
1768
+ fi
1769
+ echo "ALL PASSED"
1770
+ ```
1771
+
1772
+ **Step 2: Run test to verify it fails**
1773
+
1774
+ **Step 3: Implement run-plan-scoring.sh**
1775
+
1776
+ Create `scripts/lib/run-plan-scoring.sh`:
1777
+
1778
+ ```bash
1779
+ #!/usr/bin/env bash
1780
+ # run-plan-scoring.sh — Candidate scoring for parallel patch sampling
1781
+ #
1782
+ # Functions:
1783
+ # score_candidate <gate_passed> <test_count> <diff_lines> <lint_warnings> <lesson_violations> <ast_violations>
1784
+ # select_winner <scores_string> -> index of highest score (0-based), -1 if all zero
1785
+
1786
+ score_candidate() {
1787
+ local gate_passed="${1:-0}"
1788
+ local test_count="${2:-0}"
1789
+ local diff_lines="${3:-1}"
1790
+ local lint_warnings="${4:-0}"
1791
+ local lesson_violations="${5:-0}"
1792
+ local ast_violations="${6:-0}"
1793
+
1794
+ if [[ "$gate_passed" -ne 1 ]]; then
1795
+ echo 0
1796
+ return
1797
+ fi
1798
+
1799
+ # Avoid division by zero
1800
+ [[ "$diff_lines" -lt 1 ]] && diff_lines=1
1801
+
1802
+ local score=$(( (test_count * 10) + (10000 / (diff_lines + 1)) + (1000 / (lint_warnings + 1)) - (lesson_violations * 200) - (ast_violations * 100) ))
1803
+
1804
+ # Floor at 1 (gate passed = always positive)
1805
+ [[ "$score" -lt 1 ]] && score=1
1806
+ echo "$score"
1807
+ }
1808
+
1809
+ select_winner() {
1810
+ local scores_str="$1"
1811
+ local max_score=0
1812
+ local max_idx=-1
1813
+ local idx=0
1814
+
1815
+ for score in $scores_str; do
1816
+ if [[ "$score" -gt "$max_score" ]]; then
1817
+ max_score="$score"
1818
+ max_idx=$idx
1819
+ fi
1820
+ idx=$((idx + 1))
1821
+ done
1822
+
1823
+ echo "$max_idx"
1824
+ }
1825
+ ```
1826
+
1827
+ **Step 4: Run tests and commit**
1828
+
1829
+ ```bash
1830
+ git add scripts/lib/run-plan-scoring.sh scripts/tests/test-run-plan-scoring.sh
1831
+ git commit -m "feat: add candidate scoring for parallel patch sampling"
1832
+ ```
1833
+
1834
+ ### Task 19: Implement sampling in run-plan-headless.sh
1835
+
1836
+ **Files:**
1837
+ - Modify: `scripts/lib/run-plan-headless.sh`
1838
+ - Modify: `scripts/run-plan.sh` (add --sample flag)
1839
+
1840
+ **Step 1: Add --sample flag to run-plan.sh arg parsing**
1841
+
1842
+ Add to arg parsing:
1843
+ ```bash
1844
+ SAMPLE_COUNT=0 # 0 = disabled
1845
+
1846
+ # In parse_args:
1847
+ --sample) SAMPLE_COUNT="${2:-3}"; shift 2 ;;
1848
+ --no-sample) SAMPLE_COUNT=0; shift ;;
1849
+ ```
1850
+
1851
+ **Step 2: Add sampling logic to retry path in run-plan-headless.sh**
1852
+
1853
+ In the retry section (after first failure), instead of simple retry, check if sampling is enabled:
1854
+
1855
+ ```bash
1856
+ # If sampling enabled and this is a retry, use parallel candidates
1857
+ if [[ "$SAMPLE_COUNT" -gt 0 && $attempt -ge 2 ]]; then
1858
+ echo " Sampling $SAMPLE_COUNT candidates for batch $batch..."
1859
+ local scores=""
1860
+ local candidate_logs=()
1861
+
1862
+ for ((c = 0; c < SAMPLE_COUNT; c++)); do
1863
+ local variant_suffix=""
1864
+ case $c in
1865
+ 0) variant_suffix="" ;; # vanilla
1866
+ 1) variant_suffix=$'\nIMPORTANT: Take a fundamentally different approach than the previous attempt.' ;;
1867
+ 2) variant_suffix=$'\nIMPORTANT: Make the minimum possible change to pass the quality gate.' ;;
1868
+ esac
1869
+
1870
+ local candidate_log="$WORKTREE/logs/batch-${batch}-candidate-${c}.log"
1871
+ candidate_logs+=("$candidate_log")
1872
+
1873
+ CLAUDECODE= claude -p "${full_prompt}${variant_suffix}" \
1874
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
1875
+ --permission-mode bypassPermissions \
1876
+ 2>&1 > "$candidate_log" || true
1877
+
1878
+ # Score this candidate
1879
+ local gate_exit=0
1880
+ run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "sample-$c" "0" || gate_exit=$?
1881
+ local gate_passed=0
1882
+ [[ $gate_exit -eq 0 ]] && gate_passed=1
1883
+
1884
+ local new_tests
1885
+ new_tests=$(get_previous_test_count "$WORKTREE")
1886
+ local diff_size
1887
+ diff_size=$(cd "$WORKTREE" && git diff --stat HEAD~1 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1 || echo "100")
1888
+
1889
+ local score
1890
+ score=$(score_candidate "$gate_passed" "${new_tests:-0}" "${diff_size:-100}" "0" "0" "0")
1891
+ scores+="$score "
1892
+
1893
+ # If gate failed, reset for next candidate
1894
+ if [[ $gate_passed -eq 0 ]]; then
1895
+ cd "$WORKTREE" && git checkout . 2>/dev/null || true
1896
+ fi
1897
+ done
1898
+
1899
+ # Pick winner
1900
+ local winner
1901
+ winner=$(select_winner "$scores")
1902
+ if [[ "$winner" -ge 0 ]]; then
1903
+ echo " Winner: candidate $winner (scores: $scores)"
1904
+ batch_passed=true
1905
+ break
1906
+ else
1907
+ echo " No candidate passed quality gate"
1908
+ fi
1909
+ fi
1910
+ ```
1911
+
1912
+ **Step 3: Add sampling outcome logging**
1913
+
1914
+ After a winner is selected, append to `logs/sampling-outcomes.json`:
1915
+
1916
+ ```bash
1917
+ if [[ "$winner" -ge 0 ]]; then
1918
+ local outcomes_file="$WORKTREE/logs/sampling-outcomes.json"
1919
+ mkdir -p "$(dirname "$outcomes_file")"
1920
+ [[ ! -f "$outcomes_file" ]] && echo "[]" > "$outcomes_file"
1921
+
1922
+ local variant_name="vanilla"
1923
+ [[ "$winner" -eq 1 ]] && variant_name="different-approach"
1924
+ [[ "$winner" -eq 2 ]] && variant_name="minimal-change"
1925
+
1926
+ jq --arg bt "$title" --arg vn "$variant_name" --arg sc "${scores%% *}" \
1927
+ '. += [{"batch_type": $bt, "prompt_variant": $vn, "won": true, "score": ($sc | tonumber), "timestamp": (now | todate)}]' \
1928
+ "$outcomes_file" > "$outcomes_file.tmp" && mv "$outcomes_file.tmp" "$outcomes_file" || true
1929
+ fi
1930
+ ```
1931
+
1932
+ **Step 4: Run all tests to verify**
1933
+
1934
+ Run: `bash scripts/tests/run-all-tests.sh`
1935
+ Expected: ALL PASSED
1936
+
1937
+ **Step 5: Commit**
1938
+
1939
+ ```bash
1940
+ git add scripts/run-plan.sh scripts/lib/run-plan-headless.sh scripts/lib/run-plan-scoring.sh
1941
+ git commit -m "feat: implement parallel patch sampling with candidate scoring and outcome logging"
1942
+ ```
1943
+
1944
+ ### Task 20: Verify all scripts under 300 lines
1945
+
1946
+ **Step 1: Check line counts**
1947
+
1948
+ Run: `wc -l scripts/*.sh scripts/lib/*.sh | sort -n`
1949
+
1950
+ If any script exceeds 300 lines, extract functions into a new lib module.
1951
+
1952
+ **Step 2: Run full test suite**
1953
+
1954
+ Run: `bash scripts/tests/run-all-tests.sh`
1955
+ Expected: ALL PASSED
1956
+
1957
+ **Step 3: Run quality gate**
1958
+
1959
+ Run: `bash scripts/quality-gate.sh --project-root .`
1960
+ Expected: ALL PASSED (should now detect bash test suite)
1961
+
1962
+ ### Task 21: Final verification — vertical pipeline trace
1963
+
1964
+ **Step 1: Dry-run auto-compound.sh**
1965
+
1966
+ Run: `bash scripts/auto-compound.sh . --dry-run`
1967
+ Expected: Shows all 6+ stages of the pipeline (analyze, branch, prior-art, PRD, quality gate config, ralph loop, push/PR)
1968
+
1969
+ **Step 2: Run pipeline-status.sh**
1970
+
1971
+ Run: `bash scripts/pipeline-status.sh --project-root .`
1972
+ Expected: Shows state, routing decisions, test counts
1973
+
1974
+ **Step 3: Verify run-plan.sh shows routing plan**
1975
+
1976
+ Run: `bash scripts/run-plan.sh docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md --dry-run`
1977
+ Expected: Shows parallelism score, dependency graph, model routing, mode recommendation
1978
+
1979
+ **Step 4: Commit any remaining changes**
1980
+
1981
+ ```bash
1982
+ git add -A
1983
+ git commit -m "chore: final verification — all scripts under 300 lines, pipeline trace clean"
1984
+ ```
1985
+
1986
+ ---
1987
+
1988
+ ## Integration Wiring (Batch 5 final)
1989
+
1990
+ ### Task 22: Update CLAUDE.md with new capabilities
1991
+
1992
+ **Files:**
1993
+ - Modify: `CLAUDE.md`
1994
+
1995
+ Add to the Quality Gates section:
1996
+ - ast-grep structural analysis (optional)
1997
+ - `--sample N` flag for patch sampling
1998
+ - Team mode with `--mode team` or auto-detection
1999
+
2000
+ Add to State & Persistence section:
2001
+ - `logs/failure-patterns.json` — cross-run failure learning
2002
+ - `logs/routing-decisions.log` — execution traceability
2003
+ - `logs/sampling-outcomes.json` — prompt variant learning
2004
+
2005
+ **Step 1: Update CLAUDE.md**
2006
+
2007
+ **Step 2: Commit**
2008
+
2009
+ ```bash
2010
+ git add CLAUDE.md
2011
+ git commit -m "docs: update CLAUDE.md with Phase 4 capabilities — context assembler, ast-grep, team mode, sampling"
2012
+ ```