autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,32 @@
1
+ ---
2
+ id: 0041
3
+ title: "Ambiguous base dir variable causes path double-nesting"
4
+ severity: should-fix
5
+ languages: [python, shell, all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Variable named log_dir already contains subdirectory, but os.path.join adds it again"
11
+ fix: "Name variables to encode their scope (log_base_dir vs intelligence_dir); verify paths before first use"
12
+ example:
13
+ bad: |
14
+ log_dir = "/var/logs/app/intelligence"
15
+ # Developer thinks log_dir is base, adds another level
16
+ intelligence_output = os.path.join(log_dir, "intelligence", "output.json")
17
+ # Result: /var/logs/app/intelligence/intelligence/output.json
18
+ good: |
19
+ log_base_dir = "/var/logs/app"
20
+ intelligence_dir = os.path.join(log_base_dir, "intelligence")
21
+ intelligence_output = os.path.join(intelligence_dir, "output.json")
22
+ # Result: /var/logs/app/intelligence/output.json
23
+ ---
24
+
25
+ ## Observation
26
+ Path variables are created with unclear semantics. A variable named `log_dir` might contain `/var/logs/app` or `/var/logs/app/intelligence`. Later code blindly adds subdirectories without checking the base, resulting in nested duplicates like `intelligence/intelligence/output.json`.
27
+
28
+ ## Insight
29
+ Variable naming doesn't encode the directory's depth or scope. Different developers interpret the same variable name differently, leading to double-nesting or missing levels.
30
+
31
+ ## Lesson
32
+ Name path variables to encode their scope: use `_base_dir` for top-level, `_dir` for specific subdirectories. Verify all paths at initialization time before they're used. Print and assert the structure early: `assert log_base_dir.endswith('/logs/app')` and `assert intelligence_dir.endswith('/intelligence')`. Test with actual filesystem operations to catch these bugs immediately.
@@ -0,0 +1,36 @@
1
+ ---
2
+ id: 0042
3
+ title: "Spec compliance without quality review misses defensive gaps"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Code review checks only spec compliance but misses error handling, cleanup, validation, and timeouts"
11
+ fix: "Include a defensive gaps checklist in code review, separate from spec compliance"
12
+ example:
13
+ bad: |
14
+ # Spec: "Call API and return result"
15
+ def fetch_data(url):
16
+ response = requests.get(url) # No timeout, no error handling
17
+ return response.json() # Crashes if invalid JSON
18
+ good: |
19
+ # Spec + defensive: Call API with timeout, handle errors, validate
20
+ def fetch_data(url):
21
+ try:
22
+ response = requests.get(url, timeout=30)
23
+ return response.json()
24
+ except (requests.Timeout, requests.JSONDecodeError) as e:
25
+ logger.error(f"Fetch failed: {e}")
26
+ return None
27
+ ---
28
+
29
+ ## Observation
30
+ Code review focuses on whether the implementation matches the specification (does it call the API? does it return the result?). It skips defensive programming: timeouts, error handling, input validation, cleanup paths, and null checks. The code is spec-compliant but fragile.
31
+
32
+ ## Insight
33
+ Spec compliance is a floor, not a ceiling. Defensive programming is orthogonal to spec compliance. Reviewers who are trained to check spec often skip defensive gaps because they're not part of the spec.
34
+
35
+ ## Lesson
36
+ Create a separate defensive gaps checklist for code review: Does the code have timeouts? Error handling? Input validation? Cleanup paths? Null checks? Is there logging for failure cases? Run this checklist independently from spec compliance. Make it part of the merge gate, not optional. Test with fault injection and chaos testing to verify defensive behavior.
@@ -0,0 +1,32 @@
1
+ ---
2
+ id: 0043
3
+ title: "Exact count assertions on extensible collections break on addition"
4
+ severity: should-fix
5
+ languages: [python, javascript, all]
6
+ scope: [universal]
7
+ category: test-anti-patterns
8
+ pattern:
9
+ type: syntactic
10
+ regex: "assert.*len\\(.*==\\s*\\d+"
11
+ description: "Test asserts exact collection length that breaks when collection grows"
12
+ fix: "Use >= for extensible collections, or assert specific items exist rather than total count"
13
+ example:
14
+ bad: |
15
+ def test_users():
16
+ users = get_users()
17
+ assert len(users) == 3 # Breaks when a 4th user is added
18
+ good: |
19
+ def test_users():
20
+ users = get_users()
21
+ assert len(users) >= 3 # Allows growth
22
+ assert "alice" in [u.name for u in users]
23
+ ---
24
+
25
+ ## Observation
26
+ Tests assert that a collection has an exact count (`assert len(items) == 5`). When the feature grows and items are added to the collection, the test fails even though the new behavior is correct. Tests become brittle and must be updated constantly.
27
+
28
+ ## Insight
29
+ Exact counts are too restrictive for evolving features. The test really cares about specific items being present, not the total count. Switching to exact assertions makes tests fragile to future additions.
30
+
31
+ ## Lesson
32
+ Use `>=` for collection length assertions in tests of extensible collections. Instead of asserting total count, assert that specific items exist: `assert "item" in collection` or `assert any(x.id == 5 for x in items)`. This makes tests resilient to future growth. Only use exact counts for fixed-size collections (e.g., tuple return values).
@@ -0,0 +1,39 @@
1
+ ---
2
+ id: 0044
3
+ title: "Relative `file:` deps break in git worktrees"
4
+ severity: should-fix
5
+ languages: [javascript, typescript]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "package.json file: dependencies use relative paths that break in git worktrees at different depths"
11
+ fix: "Use workspace protocols, absolute paths resolved at install time, or npm/yarn workspaces"
12
+ example:
13
+ bad: |
14
+ // package.json in monorepo/services/api
15
+ {
16
+ "dependencies": {
17
+ "shared": "file:../shared" // Breaks in worktree
18
+ }
19
+ }
20
+ good: |
21
+ {
22
+ "workspaces": [
23
+ "packages/*",
24
+ "services/*"
25
+ ],
26
+ "dependencies": {
27
+ "shared": "workspace:*"
28
+ }
29
+ }
30
+ ---
31
+
32
+ ## Observation
33
+ npm/yarn `file:` dependencies use relative paths. When code is checked out into a git worktree at a different depth than the main repo, the relative path resolves to the wrong location (or doesn't exist). This breaks CI in specific git workflows.
34
+
35
+ ## Insight
36
+ Git worktrees can be created at arbitrary depths relative to the main repo. Relative path dependencies were designed for a single repository layout and fail when the layout changes.
37
+
38
+ ## Lesson
39
+ Use workspace protocols (`workspace:*`) in monorepos instead of `file:` dependencies. If `file:` is necessary, resolve relative paths to absolute paths at install time. For standalone packages, use npm/yarn workspaces or lerna to manage dependencies. Test with `git worktree add` at different depths to verify dependencies resolve correctly.
@@ -0,0 +1,33 @@
1
+ ---
2
+ id: 0045
3
+ title: "Iterative 'how would you improve' catches 35% more design gaps"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Single-pass design review misses gaps that iterative improvement rounds would catch"
11
+ fix: "Ask 'how would you improve this section?' after each design section; 5 rounds is the sweet spot"
12
+ example:
13
+ bad: |
14
+ # Single design pass
15
+ Review once. Approve. Start building.
16
+ # Later: discover missing error handling, untested edge case
17
+ good: |
18
+ # Iterative design
19
+ Round 1: "What could break here?" -> Add timeout handling
20
+ Round 2: "How scale this to 10K items?" -> Add pagination
21
+ Round 3: "What if database is down?" -> Add circuit breaker
22
+ Round 4: "How to monitor this?" -> Add metrics
23
+ Round 5: "Any security risks?" -> Add auth validation
24
+ ---
25
+
26
+ ## Observation
27
+ Design review done in a single pass typically covers the happy path. Iterative rounds of "how would you improve this section?" reveal gaps: edge cases, scale limits, failure modes, monitoring, and security issues that a single review missed.
28
+
29
+ ## Insight
30
+ Single-pass review relies on reviewers catching everything. Iterative rounds make gaps explicit by forcing the designer to consider improvements from different angles. Each round builds on the previous one and surfaces new concerns.
31
+
32
+ ## Lesson
33
+ After each major design section, ask "How would you improve this section?" Require at least 3 rounds; 5 is optimal. Each round should surface a new category: performance, fault tolerance, monitoring, security, or operational concerns. Document improvements and rationale. This catches design gaps before implementation and reduces rework later.
@@ -0,0 +1,38 @@
1
+ ---
2
+ id: 0046
3
+ title: "Plan-specified test assertions can have math bugs"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: test-anti-patterns
8
+ pattern:
9
+ type: semantic
10
+ description: "Implementation plan specifies test thresholds with math errors that implementer copies verbatim"
11
+ fix: "Verify threshold boundary logic independently before writing the test"
12
+ example:
13
+ bad: |
14
+ # Plan says: "Assert that 90% of requests succeed"
15
+ # Implementer writes (copying from plan):
16
+ assert success_count / total_count >= 0.9
17
+ # But 0.9 is already 90%, so this is correct.
18
+ # But what if plan meant: "Assert that error rate is below 10%"?
19
+ # assert error_count / total_count <= 0.1 # Different logic
20
+
21
+ # Implementer didn't verify the math matched intent
22
+ good: |
23
+ # Plan specifies: "Assert 90% success rate (>= 0.9)"
24
+ # Before implementing, verify:
25
+ # 90% = 0.9 (correct multiplier)
26
+ # 10% = 0.1 (correct error rate)
27
+ # Test with known values: 9/10 = 0.9 ✓
28
+ assert success_count / total_count >= 0.9
29
+ ---
30
+
31
+ ## Observation
32
+ Implementation plans specify test thresholds and assertions. Implementers copy these verbatim without verifying the math. If the plan has a boundary condition error (off-by-one, wrong direction, incorrect multiplier), the implementer creates a test that passes despite incorrect logic.
33
+
34
+ ## Insight
35
+ Plan authors may write thresholds informally or with implicit assumptions. Implementers assume the math is correct and don't double-check. Boundary logic errors slip through undetected.
36
+
37
+ ## Lesson
38
+ Before implementing any threshold-based assertion, verify the math independently. Test with concrete values to confirm the boundary is correct. For example, if the plan says "90% success rate," verify: success_count=9, total=10, then assert 9/10 >= 0.9 should pass. success_count=8, total=10, then assert 8/10 >= 0.9 should fail. Write and run these boundary tests before implementing the main test.
@@ -0,0 +1,37 @@
1
+ ---
2
+ id: 0047
3
+ title: "pytest runs single-threaded by default -- add xdist"
4
+ severity: should-fix
5
+ languages: [python]
6
+ scope: [framework:pytest]
7
+ category: performance
8
+ pattern:
9
+ type: semantic
10
+ description: "pytest test suite runs single-threaded when parallel execution would be significantly faster"
11
+ fix: "Add pytest-xdist to dev deps and addopts = '-n auto' to pytest config"
12
+ example:
13
+ bad: |
14
+ # pytest.ini or pyproject.toml
15
+ [tool.pytest.ini_options]
16
+ testpaths = ["tests"]
17
+ # Result: runs tests one at a time (slow)
18
+
19
+ good: |
20
+ # pyproject.toml
21
+ [tool.pytest.ini_options]
22
+ testpaths = ["tests"]
23
+ addopts = "-n auto --dist load"
24
+
25
+ # requirements-dev.txt or pyproject.toml
26
+ pytest-xdist>=3.5.0
27
+ # Result: runs tests in parallel (fast)
28
+ ---
29
+
30
+ ## Observation
31
+ pytest, by default, runs tests sequentially in a single worker process. For test suites with 50+ tests, this is significantly slower than parallel execution. Developers run test suites serially and accept the slow feedback loop, unaware that xdist can parallelize.
32
+
33
+ ## Insight
34
+ pytest-xdist provides automatic parallelization across multiple CPU cores. Running tests in parallel often provides 3-6x speedup on modern hardware, but requires explicit configuration. This is a low-effort, high-impact performance improvement.
35
+
36
+ ## Lesson
37
+ Add `pytest-xdist>=3.5.0` to dev dependencies. Add `addopts = "-n auto --dist load"` to pytest configuration. This parallelizes tests automatically, using all available CPU cores. Use `-n 0` to disable parallelization temporarily for debugging. Test with your specific test suite to measure speedup. For very large test suites, use `-n 6` instead of `-n auto` to prevent memory exhaustion.
@@ -0,0 +1,40 @@
1
+ ---
2
+ id: 0048
3
+ title: "Multi-batch plans need explicit integration wiring batch"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Multi-batch plan builds components separately but skips the step of wiring them together"
11
+ fix: "Plans with 3+ batches must include a final integration wiring batch"
12
+ example:
13
+ bad: |
14
+ # Plan with 3 batches:
15
+ Batch 1: Build API endpoint
16
+ Batch 2: Build database schema
17
+ Batch 3: Build client code
18
+ # Missing: wire components together
19
+
20
+ # Result: Each piece works in isolation, but together they fail
21
+ good: |
22
+ # Plan with 4 batches:
23
+ Batch 1: Build API endpoint
24
+ Batch 2: Build database schema
25
+ Batch 3: Build client code
26
+ Batch 4: Integration wiring
27
+ - Connect API to database
28
+ - Connect client to API
29
+ - Verify end-to-end flow
30
+ - Run integration tests
31
+ ---
32
+
33
+ ## Observation
34
+ Multi-batch plans build components (API, database, client) independently. Each batch passes its own tests. But components aren't wired together during implementation. Integration happens only at the end, revealing coupling issues, interface mismatches, and missing adapters too late.
35
+
36
+ ## Insight
37
+ Batch-driven development optimizes for parallel work but can miss integration points. Components are unit-tested in isolation but may fail when combined. Without an explicit wiring batch, integration is assumed to "just work."
38
+
39
+ ## Lesson
40
+ Plans with 3+ batches must include a final integration wiring batch. This batch connects components built in earlier batches, verifies data flows through the full pipeline, and runs end-to-end integration tests. Include this batch in the plan before implementation starts. Test the full system (not just individual components) after wiring is complete.
@@ -0,0 +1,41 @@
1
+ ---
2
+ id: 0049
3
+ title: "A/B verification finds zero-overlap bug classes"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Using only bottom-up or only top-down review misses entire classes of bugs"
11
+ fix: "Run both bottom-up (code-level) and top-down (architecture-level) review after 3+ batch implementations"
12
+ example:
13
+ bad: |
14
+ # Bottom-up only: review each component's code
15
+ # Result: logic errors caught, but coupling issues missed
16
+ # Reviewer doesn't see: API expects array, client sends object
17
+
18
+ # Top-down only: review architecture diagrams
19
+ # Result: structure looks good, but off-by-one in retry logic missed
20
+ # Reviewer doesn't see: code-level bugs
21
+ good: |
22
+ # Bottom-up: Review code implementation
23
+ - Are loops correct? Error handling present? State managed correctly?
24
+
25
+ # Top-down: Review architecture
26
+ - Do components couple correctly? Is data flow end-to-end?
27
+
28
+ # Both perspectives together catch more bugs than either alone
29
+ ---
30
+
31
+ ## Observation
32
+ Code reviews conducted only from the bottom-up (code-level logic) miss architectural coupling issues. Reviews conducted only from the top-down (architecture diagrams) miss implementation bugs. Different bugs are visible from different angles.
33
+
34
+ ## Insight
35
+ Bugs fall into different categories based on visibility:
36
+ - **Bottom-up visible:** off-by-one errors, null checks, state management, loop logic
37
+ - **Top-down visible:** coupling between components, interface mismatches, data flow breaks, missing error propagation
38
+ - **Requires both:** race conditions, distributed state consistency, integration deadlocks
39
+
40
+ ## Lesson
41
+ Run both bottom-up and top-down review after implementing 3+ batches. Bottom-up: inspect code for logic errors, edge cases, resource cleanup. Top-down: trace data flow end-to-end, verify component interfaces match, check for coupling leaks. Document findings from each perspective. Bugs caught only in top-down review indicate architectural issues; bugs caught only in bottom-up indicate implementation issues. Fix both before declaring done.
@@ -0,0 +1,33 @@
1
+ ---
2
+ id: 50
3
+ title: "Editing files sourced by a running process breaks function signatures"
4
+ severity: blocker
5
+ languages: [shell]
6
+ scope: [project:autonomous-coding-toolkit]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Modifying function signatures in files that are actively sourced by a running bash process (e.g., editing run-plan-notify.sh while run-plan.sh is executing)"
11
+ fix: "Never edit library files while they're being sourced by a running process. Wait for the run to complete, or commit changes that only new runs will pick up."
12
+ example:
13
+ bad: |
14
+ # While run-plan.sh is running (sources run-plan-notify.sh at startup):
15
+ # Edit run-plan-notify.sh to change format_success_message from 6 to 9 params
16
+ # -> Next batch call crashes with wrong argument count
17
+ good: |
18
+ # Wait for run-plan.sh to finish, then edit
19
+ # Or: make changes backward-compatible (add params with defaults)
20
+ format_success_message() {
21
+ local plan="$1" batch="$2" total="${3:-?}" title="${4:-}"
22
+ # ... rest uses defaults for missing params
23
+ }
24
+ ---
25
+
26
+ ## Observation
27
+ During Phase 4 execution, `run-plan-notify.sh` was edited to add `total_batches` and `batch_title` parameters to `format_success_message` (6 → 9 params). The running `run-plan.sh` process had already sourced the original file at startup. When the next batch called `notify_success` with the old 6-parameter signature, the quality gate detected uncommitted changes and failed.
28
+
29
+ ## Insight
30
+ Bash sources files once at startup — there's no hot-reload. But the *file on disk* is what `git diff` sees. So editing a sourced file creates a two-way failure: (1) the running process uses stale function signatures, and (2) the quality gate sees uncommitted changes. The fix had to be committed to unblock the gate, but that commit changed signatures the running process was still calling with old argument counts.
31
+
32
+ ## Lesson
33
+ Treat sourced library files as immutable during execution. If you must change them: (a) make changes backward-compatible with default parameter values, (b) commit immediately so the quality gate stays clean, and (c) accept that the current run uses the old behavior. Never change function arity in a file that a running process has already sourced.
@@ -0,0 +1,30 @@
1
+ ---
2
+ id: 51
3
+ title: "Infrastructure fixes in a plan cannot benefit the run executing that plan"
4
+ severity: should-fix
5
+ languages: [shell]
6
+ scope: [project:autonomous-coding-toolkit]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "A plan includes tasks that fix the execution infrastructure (e.g., empty batch detection, parser improvements) but the current run-plan.sh process loaded the old code at startup"
11
+ fix: "Place infrastructure fixes in a separate pre-flight plan, or accept that the current run uses old behavior and the fix only helps future runs."
12
+ example:
13
+ bad: |
14
+ # Plan Batch 1: Fix empty batch detection in run-plan-headless.sh
15
+ # -> Fix is committed, but the running bash process already loaded old code
16
+ # -> Batches 6-19 still spawn claude for empty batches (43s each)
17
+ good: |
18
+ # Option A: Separate pre-flight plan for infra fixes, then main plan
19
+ # Option B: Accept the cost — document that infra fixes are forward-looking
20
+ # Option C: Use --start-batch to re-run from where infra fix takes effect
21
+ ---
22
+
23
+ ## Observation
24
+ The Phase 4 plan included Task 1: "Fix empty batch detection in run-plan-headless.sh." The fix was committed during Batch 1. However, the `run-plan.sh` bash process had already loaded `run-plan-headless.sh` at startup. Batches 6-19 (parser artifacts) still spawned a `claude -p` process for each empty batch (~30-50s each), wasting ~7 minutes and API calls.
25
+
26
+ ## Insight
27
+ Bash reads `source` files once. The running process keeps the in-memory version of all sourced functions. Committing a fix to disk doesn't update the running process — only a new invocation reads the new code. This is fundamentally different from interpreted languages with hot-reload (Python's importlib, Node's require cache invalidation).
28
+
29
+ ## Lesson
30
+ Infrastructure fixes (parser, quality gate, notification format) cannot benefit the execution that implements them. Either: (1) run infra fixes as a separate pre-flight step before the main plan, (2) accept the waste and document it as known, or (3) after the infra batch, stop and re-run with `--resume` so a fresh process loads the fixed code.
@@ -0,0 +1,31 @@
1
+ ---
2
+ id: 52
3
+ title: "Uncommitted changes from parallel work fail the quality gate git-clean check"
4
+ severity: blocker
5
+ languages: [shell]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Manual edits to files in a worktree where run-plan.sh is executing — the git-clean check in quality-gate.sh detects uncommitted changes and fails the batch"
11
+ fix: "Never make uncommitted changes in a worktree with an active run-plan. Use a separate worktree or commit before the next quality gate runs."
12
+ example:
13
+ bad: |
14
+ # run-plan.sh is executing batches in ~/project/
15
+ # Meanwhile, manually edit scripts/lib/run-plan-notify.sh
16
+ # -> Quality gate runs check_git_clean() -> finds dirty working tree -> FAIL
17
+ good: |
18
+ # Option A: Edit in a separate worktree
19
+ git worktree add ../project-notify-fix -b fix/notifications
20
+ # Option B: Commit immediately before next quality gate
21
+ git add scripts/lib/run-plan-notify.sh && git commit -m "fix: ..."
22
+ ---
23
+
24
+ ## Observation
25
+ During Phase 4 execution, Telegram notification format was improved by editing `run-plan-notify.sh` and its test file directly in the worktree where `run-plan.sh` was running. When Batch 9 completed and the quality gate ran `check_git_clean()`, it found 3 uncommitted files and failed the batch. The batch agent then spent a full retry attempt (5+ minutes) trying to fix a problem that wasn't caused by its own work.
26
+
27
+ ## Insight
28
+ The quality gate's git-clean check exists to ensure every batch's work is committed before the next batch starts. It can't distinguish between "the batch agent forgot to commit" and "a human made parallel edits." Both look the same: dirty working tree. The retry agent wastes time investigating a failure it can't fix, since the dirty files aren't part of its batch.
29
+
30
+ ## Lesson
31
+ A worktree with an active run-plan is a no-edit zone. All parallel work must happen in a separate worktree or be committed immediately. If you must edit files in the active worktree, commit them before the next quality gate runs. The cost of a wasted retry (5+ minutes, API calls) far exceeds the cost of a quick commit.
@@ -0,0 +1,31 @@
1
+ ---
2
+ id: 53
3
+ title: "Missing jq -c flag causes string comparison failures in tests"
4
+ severity: should-fix
5
+ languages: [shell]
6
+ scope: [project:autonomous-coding-toolkit]
7
+ category: test-anti-patterns
8
+ pattern:
9
+ type: syntactic
10
+ regex: "assert_eq.*\\$\\(.*jq [^-]"
11
+ description: "Using jq without -c flag in a string comparison assertion — pretty-printed output won't match compact expected values"
12
+ fix: "Always use jq -c (compact) when the output will be compared as a string. Or compare with jq equality instead of string equality."
13
+ example:
14
+ bad: |
15
+ result=$(echo "$json" | jq '.[0] | sort')
16
+ assert_eq "group is [1]" '[1]' "$result"
17
+ # FAIL: expected [1], got [\n 1\n]
18
+ good: |
19
+ result=$(echo "$json" | jq -c '.[0] | sort')
20
+ assert_eq "group is [1]" '[1]' "$result"
21
+ # PASS: both are [1]
22
+ ---
23
+
24
+ ## Observation
25
+ In `test-run-plan-team.sh`, three assertions failed because one `jq` call used `jq '.[2] | sort'` (pretty-printed) while the test expected compact JSON `[4]`. The other two calls on adjacent lines correctly used `jq -c`. The inconsistency was introduced when the test was generated — two of three similar lines got the `-c` flag, one didn't.
26
+
27
+ ## Insight
28
+ jq defaults to pretty-printing (multi-line, indented). When output is stored in a variable and compared with `assert_eq`, the multi-line string `[\n 4\n]` never matches the compact string `[4]`. This is invisible until the test runs because the pattern looks correct at a glance. The failure message shows the actual as multi-line, making the `-c` omission obvious only in hindsight.
29
+
30
+ ## Lesson
31
+ In shell test scripts, always use `jq -c` when the result will be compared as a string. Better yet, use `jq -e` for boolean checks or compare with `jq --argjson expected '[4]' '. == $expected'` to avoid format sensitivity entirely.
@@ -0,0 +1,30 @@
1
+ ---
2
+ id: 54
3
+ title: "Markdown parser matches headers inside code blocks and test fixtures"
4
+ severity: should-fix
5
+ languages: [shell]
6
+ scope: [project:autonomous-coding-toolkit]
7
+ category: silent-failures
8
+ pattern:
9
+ type: semantic
10
+ description: "A markdown parser using simple regex (grep/awk) matches ## headers that appear inside fenced code blocks, heredocs, or test fixture content — inflating batch/task counts"
11
+ fix: "Track fenced code block state (``` toggles) and skip matches inside code blocks. Or use a proper markdown AST parser."
12
+ example:
13
+ bad: |
14
+ # count_batches uses: grep -c '^## Batch'
15
+ # Plan has a test fixture with '## Batch 2: Also Real' inside a heredoc
16
+ # -> Parser counts 19 batches for a 5-batch plan
17
+ good: |
18
+ count_batches() {
19
+ awk '/^```/{fence=!fence} !fence && /^## Batch/{n++} END{print n}' "$1"
20
+ }
21
+ ---
22
+
23
+ ## Observation
24
+ The Phase 4 plan had 5 real batches, but `count_batches` found 19. The extra 14 came from `## Batch` and `### Task` headers inside test fixtures, code examples, and plan documentation sections. Each phantom batch spawned a `claude -p` process (~30-50s each), wasting ~7 minutes and API credits.
25
+
26
+ ## Insight
27
+ Simple `grep '^## Batch'` treats all lines equally — it cannot distinguish a real plan header from one inside a fenced code block (` ``` `), a heredoc, or an inline example. This is a fundamental limitation of line-by-line regex parsing of markdown. The problem compounds: the plan's own test (Task 1) includes sample plan content with headers, creating a recursive parsing trap.
28
+
29
+ ## Lesson
30
+ Any markdown parser that affects execution (batch counting, task extraction) must be code-block-aware. Minimum viable fix: track ` ``` ` fence state with a toggle variable and skip matches inside fences. Better: use a dedicated markdown heading extraction that respects the CommonMark spec. The empty-batch-skip mitigates the cost but doesn't prevent the API calls for the initial `claude -p` attempt on each phantom batch.
@@ -0,0 +1,31 @@
1
+ ---
2
+ id: 55
3
+ title: "LLM agents compensate for garbled batch prompts using cross-batch context"
4
+ severity: nice-to-have
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "An agent receives a malformed or empty batch prompt but successfully infers the correct work from progress.txt, recent git commits, and the full plan file"
11
+ fix: "Design for resilience: include progress notes, recent commits, and the full plan in every batch prompt so agents can self-correct when the parsed batch content is wrong."
12
+ example:
13
+ bad: |
14
+ # Batch prompt: "Batch 9: (empty)" with no tasks
15
+ # Agent has no context -> does nothing or hallucinates
16
+ good: |
17
+ # Batch prompt: "Batch 9: (empty)" BUT includes:
18
+ # - progress.txt with completed tasks listed
19
+ # - Recent git log showing what's been done
20
+ # - Full plan file reference
21
+ # -> Agent reads plan, deduces remaining work, implements correctly
22
+ ---
23
+
24
+ ## Observation
25
+ During Phase 4, batches 2 and 9 received garbled prompts — Batch 2 got fake content from a test fixture ("Task 2: Do more / Write more code"), and Batch 9 got an empty batch title. Despite this, both agents successfully implemented the correct plan tasks. Batch 2 implemented Tasks 7-9 (context assembler), and Batch 9 implemented Tasks 10, 11, 12, 15, and 17 (ast-grep + team mode).
26
+
27
+ ## Insight
28
+ The cross-batch context system (progress.txt, recent commits in the prompt, and the plan file reference) provides enough information for agents to self-correct. The agent reads what's been done, compares it to the full plan, and picks up the next logical tasks. This resilience is an emergent property of including redundant context — no single source needs to be correct as long as the ensemble is informative.
29
+
30
+ ## Lesson
31
+ Always include multiple context signals in batch prompts: (1) progress notes listing completed work, (2) recent git commits showing actual changes, (3) the full plan file path for reference. This creates graceful degradation — even when the parser sends wrong batch content, agents can figure out what work remains. The cost is slightly larger prompts; the benefit is resilience to parser bugs.
@@ -0,0 +1,42 @@
1
+ ---
2
+ id: 56
3
+ title: "grep -c exits 1 on zero matches, breaking || fallback arithmetic"
4
+ severity: should-fix
5
+ languages: [shell]
6
+ scope: [language:bash]
7
+ category: silent-failures
8
+ pattern:
9
+ type: syntactic
10
+ regex: "grep\\s+-c.*\\|\\|\\s*echo\\s+[\"']?0[\"']?"
11
+ description: "grep -c with || echo 0 fallback — produces multiline output on zero matches"
12
+ fix: "Use || true with ${var:-0} default instead of || echo 0"
13
+ example:
14
+ bad: |
15
+ count=$(echo "$text" | grep -c "pattern" || echo "0")
16
+ result=$((count + 1)) # breaks: count="0\n0" from both outputs
17
+ good: |
18
+ count=$(echo "$text" | grep -c "pattern" || true)
19
+ count=${count:-0}
20
+ result=$((count + 1))
21
+ ---
22
+
23
+ ## Observation
24
+
25
+ `grep -c` returns both the count AND exit code 1 when count is 0.
26
+ With `|| echo "0"`, the fallback fires AND grep's "0" output is kept,
27
+ producing `"0\n0"`. Bash arithmetic `$((0\n0 + 1))` fails with
28
+ "syntax error in expression".
29
+
30
+ ## Insight
31
+
32
+ `grep -c` violates the common assumption that exit code 1 means "error."
33
+ In grep, exit 1 means "no matches found" — a valid result, not a failure.
34
+ The `|| echo "0"` pattern double-counts because the subshell captures
35
+ grep's stdout ("0") AND the fallback echo ("0") on separate lines.
36
+
37
+ ## Lesson
38
+
39
+ Never use `grep -c ... || echo "0"` for count fallback. Use
40
+ `grep -c ... || true` to suppress the exit code, then `${var:-0}` as
41
+ the numeric default. This pattern is safe because `|| true` doesn't
42
+ add to stdout — it only prevents `set -e` from aborting the script.
@@ -0,0 +1,42 @@
1
+ ---
2
+ id: 57
3
+ title: "New generated artifacts break git-clean quality gates"
4
+ severity: should-fix
5
+ languages: [all]
6
+ scope: [universal]
7
+ category: integration-boundaries
8
+ pattern:
9
+ type: semantic
10
+ description: "Adding a new generated file to a pipeline without updating gitignore and E2E tests"
11
+ fix: "When adding generated artifacts, update .gitignore AND all E2E test gitignore fixtures"
12
+ example:
13
+ bad: |
14
+ # Added generate_agents_md() to startup
15
+ # AGENTS.md created in worktree
16
+ # E2E test fails: "uncommitted changes in worktree"
17
+ good: |
18
+ # Added generate_agents_md() to startup
19
+ # Updated E2E test .gitignore to include AGENTS.md
20
+ # E2E test passes: git-clean check ignores AGENTS.md
21
+ ---
22
+
23
+ ## Observation
24
+
25
+ Adding `generate_agents_md()` to the headless runner startup created
26
+ AGENTS.md in the worktree. The function's own unit test passed. But the
27
+ E2E test failed because its git worktree now had an untracked file,
28
+ and the quality gate's `check_git_clean` rejected it.
29
+
30
+ ## Insight
31
+
32
+ This is Cluster B (Integration Boundaries). When a pipeline generates
33
+ new files, the git-clean check sees them as uncommitted work. Every
34
+ generated artifact needs a corresponding gitignore entry — both in the
35
+ real project AND in test fixtures that simulate the worktree.
36
+
37
+ ## Lesson
38
+
39
+ Whenever you add a new generated file to a pipeline: (1) add it to the
40
+ project's `.gitignore`, (2) add it to every E2E test fixture's
41
+ `.gitignore`, (3) run the E2E test before committing. The unit test for
42
+ the generator won't catch this because it doesn't run the quality gate.