autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,305 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ source "$SCRIPT_DIR/../lib/common.sh"
6
+ source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
7
+ source "$SCRIPT_DIR/../lib/progress-writer.sh"
8
+ source "$SCRIPT_DIR/../lib/run-plan-context.sh"
9
+
10
+ FAILURES=0
11
+ TESTS=0
12
+
13
+ assert_eq() {
14
+ local desc="$1" expected="$2" actual="$3"
15
+ TESTS=$((TESTS + 1))
16
+ if [[ "$expected" != "$actual" ]]; then
17
+ echo "FAIL: $desc"
18
+ echo " expected: $expected"
19
+ echo " actual: $actual"
20
+ FAILURES=$((FAILURES + 1))
21
+ else
22
+ echo "PASS: $desc"
23
+ fi
24
+ }
25
+
26
+ assert_contains() {
27
+ local desc="$1" needle="$2" haystack="$3"
28
+ TESTS=$((TESTS + 1))
29
+ if [[ "$haystack" == *"$needle"* ]]; then
30
+ echo "PASS: $desc"
31
+ else
32
+ echo "FAIL: $desc"
33
+ echo " expected to contain: $needle"
34
+ echo " actual: ${haystack:0:200}..."
35
+ FAILURES=$((FAILURES + 1))
36
+ fi
37
+ }
38
+
39
+ assert_not_contains() {
40
+ local desc="$1" needle="$2" haystack="$3"
41
+ TESTS=$((TESTS + 1))
42
+ if [[ "$haystack" != *"$needle"* ]]; then
43
+ echo "PASS: $desc"
44
+ else
45
+ echo "FAIL: $desc"
46
+ echo " expected NOT to contain: $needle"
47
+ FAILURES=$((FAILURES + 1))
48
+ fi
49
+ }
50
+
51
+ WORK=$(mktemp -d)
52
+ trap 'rm -rf "$WORK"' EXIT # #59: ensure cleanup on any exit path, including early test failure
53
+
54
+ # === Setup test fixtures ===
55
+
56
+ # State file
57
+ cat > "$WORK/.run-plan-state.json" << 'JSON'
58
+ {
59
+ "plan": "test-plan.md",
60
+ "mode": "headless",
61
+ "batches": {
62
+ "1": {"passed": true, "test_count": 50, "duration": 120},
63
+ "2": {"passed": true, "test_count": 75, "duration": 90}
64
+ }
65
+ }
66
+ JSON
67
+
68
+ # Progress file
69
+ cat > "$WORK/progress.txt" << 'TXT'
70
+ Batch 1: Created shared library
71
+ Batch 2: Fixed test parsing
72
+ Discovery: jest output needs special handling
73
+ TXT
74
+
75
+ # Git repo for git log
76
+ cd "$WORK" && git init -q && git commit --allow-empty -m "batch 1: initial" -q && git commit --allow-empty -m "batch 2: add tests" -q
77
+ cd - > /dev/null
78
+
79
+ # Plan with context_refs
80
+ cat > "$WORK/test-plan.md" << 'PLAN'
81
+ ## Batch 1: Foundation
82
+ ### Task 1: Setup
83
+ Create lib.
84
+
85
+ ## Batch 2: Tests
86
+ ### Task 2: Add tests
87
+ context_refs: src/lib.sh
88
+
89
+ ## Batch 3: Integration
90
+ ### Task 3: Wire together
91
+ context_refs: src/lib.sh, tests/test-lib.sh
92
+ PLAN
93
+
94
+ # Context ref files
95
+ mkdir -p "$WORK/src" "$WORK/tests"
96
+ echo "#!/bin/bash" > "$WORK/src/lib.sh"
97
+ echo "echo hello" >> "$WORK/src/lib.sh"
98
+ echo "#!/bin/bash" > "$WORK/tests/test-lib.sh"
99
+
100
+ # === Tests ===
101
+
102
+ # generate_batch_context for batch 3 (has context_refs and prior batches)
103
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
104
+ assert_contains "context: includes quality gate expectation" "tests must stay above 75" "$ctx"
105
+ assert_contains "context: includes prior batch summary" "Batch 2" "$ctx"
106
+ assert_contains "context: includes context_refs content" "echo hello" "$ctx"
107
+ assert_not_contains "context: excludes batch 1 details for batch 3" "Batch 1: Foundation" "$ctx"
108
+
109
+ # generate_batch_context for batch 1 (no prior context)
110
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 1 "$WORK")
111
+ assert_contains "context batch 1: minimal context" "Run-Plan" "$ctx"
112
+ # Should be short — no prior batches, no context_refs
113
+ char_count=${#ctx}
114
+ TESTS=$((TESTS + 1))
115
+ if [[ $char_count -lt 2000 ]]; then
116
+ echo "PASS: context batch 1: under 2000 chars ($char_count)"
117
+ else
118
+ echo "FAIL: context batch 1: over 2000 chars ($char_count)"
119
+ FAILURES=$((FAILURES + 1))
120
+ fi
121
+
122
+ # Token budget: context should stay under 6000 chars (~1500 tokens)
123
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
124
+ char_count=${#ctx}
125
+ TESTS=$((TESTS + 1))
126
+ if [[ $char_count -lt 6000 ]]; then
127
+ echo "PASS: context batch 3: under 6000 chars ($char_count)"
128
+ else
129
+ echo "FAIL: context batch 3: over 6000 chars ($char_count)"
130
+ FAILURES=$((FAILURES + 1))
131
+ fi
132
+
133
+ # Failure patterns injection
134
+ mkdir -p "$WORK/logs"
135
+ cat > "$WORK/logs/failure-patterns.json" << 'JSON'
136
+ [{"batch_title_pattern": "integration", "failure_type": "missing import", "frequency": 3, "winning_fix": "check all imports before running tests"}]
137
+ JSON
138
+
139
+ ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
140
+ assert_contains "context: includes failure pattern warning" "missing import" "$ctx"
141
+
142
+ # === MAB lessons injection ===
143
+
144
+ # Create MAB lessons file
145
+ cat > "$WORK/logs/mab-lessons.json" << 'JSON'
146
+ [{"pattern": "check imports before tests", "context": "integration", "winner": "superpowers", "occurrences": 3, "promoted": false}]
147
+ JSON
148
+
149
+ ctx_mab=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
150
+ assert_contains "context: MAB lessons injected when file present" "check imports before tests" "$ctx_mab"
151
+ assert_contains "context: MAB lessons section header" "MAB Lessons" "$ctx_mab"
152
+
153
+ # Remove MAB lessons and verify no section
154
+ rm -f "$WORK/logs/mab-lessons.json"
155
+ ctx_no_mab=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
156
+ assert_not_contains "context: no MAB section when file absent" "MAB Lessons" "$ctx_no_mab"
157
+
158
+ # === No tail fallback: structured read returns empty, no wrong-batch data injected (#54) ===
159
+
160
+ # Progress.txt with only batch 1 content (no batch 2)
161
+ WORK_NOTAIL=$(mktemp -d)
162
+ trap 'rm -rf "$WORK_NOTAIL"' EXIT
163
+ cat > "$WORK_NOTAIL/test-plan.md" << 'PLAN_NOTAIL'
164
+ ## Batch 1: Alpha
165
+ ### Task 1: Do alpha
166
+ Do something.
167
+
168
+ ## Batch 2: Beta
169
+ ### Task 2: Do beta
170
+ Do more.
171
+ PLAN_NOTAIL
172
+ cat > "$WORK_NOTAIL/.run-plan-state.json" << 'JSON_NOTAIL'
173
+ {"plan": "test-plan.md", "mode": "headless", "batches": {}}
174
+ JSON_NOTAIL
175
+
176
+ # Write unrelated content to progress.txt (no structured headers)
177
+ echo "some unrelated content from a different run" > "$WORK_NOTAIL/progress.txt"
178
+ echo "batch 99 leftovers here" >> "$WORK_NOTAIL/progress.txt"
179
+
180
+ # generate_batch_context for batch 2: progress.txt exists but has no structured batch 1 data
181
+ # Should NOT inject the tail content as "Progress Notes"
182
+ cd "$WORK_NOTAIL" && git init -q && git commit --allow-empty -m "init" -q
183
+ cd - > /dev/null
184
+ ctx_notail=$(generate_batch_context "$WORK_NOTAIL/test-plan.md" 2 "$WORK_NOTAIL")
185
+ assert_not_contains "no-tail-fallback: unrelated progress.txt content not injected" "batch 99 leftovers" "$ctx_notail"
186
+ assert_not_contains "no-tail-fallback: tail content not injected as Progress Notes" "unrelated content from a different run" "$ctx_notail"
187
+
188
+ # === git -C fix: git log works without cd (#61) ===
189
+
190
+ # Verify the generate_batch_context produces git log output without needing cwd change
191
+ ctx_gitlog=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
192
+ assert_contains "git-C: recent commits appear in context" "Recent Commits" "$ctx_gitlog"
193
+
194
+ # === Failure pattern recording ===
195
+
196
+ # Clean up pre-existing patterns file for isolated testing
197
+ rm -f "$WORK/logs/failure-patterns.json"
198
+
199
+ record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
200
+
201
+ assert_eq "record_failure_pattern: creates file" "true" "$(test -f "$WORK/logs/failure-patterns.json" && echo true || echo false)"
202
+
203
+ # Record same pattern again — should increment frequency
204
+ record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
205
+ freq=$(jq '.[0].frequency' "$WORK/logs/failure-patterns.json")
206
+ assert_eq "record_failure_pattern: increments frequency" "2" "$freq"
207
+
208
+ # Record different pattern
209
+ record_failure_pattern "$WORK" "Test Suite" "flaky assertion" "use deterministic comparisons"
210
+ count=$(jq 'length' "$WORK/logs/failure-patterns.json")
211
+ assert_eq "record_failure_pattern: adds new pattern" "2" "$count"
212
+
213
+ # === Bug #60 BEHAVIORAL: whitespace in context_refs is trimmed ===
214
+ # context_refs: " src/lib.sh , tests/test-lib.sh " should resolve both files
215
+ # despite leading/trailing spaces around each path.
216
+
217
+ WORK_WS=$(mktemp -d)
218
+ trap 'rm -rf "$WORK_WS"' EXIT
219
+
220
+ # Plan with whitespace-padded context_refs
221
+ cat > "$WORK_WS/test-plan.md" << 'PLAN_WS'
222
+ ## Batch 1: Setup
223
+ ### Task 1: Init
224
+ Do init work.
225
+
226
+ ## Batch 2: Test whitespace
227
+ ### Task 2: Check refs
228
+ context_refs: src/padded.sh , tests/padded-test.sh
229
+ PLAN_WS
230
+
231
+ # Create the referenced files
232
+ mkdir -p "$WORK_WS/src" "$WORK_WS/tests"
233
+ echo "PADDED_CONTENT=true" > "$WORK_WS/src/padded.sh"
234
+ echo "PADDED_TEST=true" > "$WORK_WS/tests/padded-test.sh"
235
+
236
+ # State and git setup
237
+ cat > "$WORK_WS/.run-plan-state.json" << 'JSON_WS'
238
+ {"plan": "test-plan.md", "mode": "headless", "batches": {}}
239
+ JSON_WS
240
+ cd "$WORK_WS" && git init -q && git commit --allow-empty -m "init" -q
241
+ cd - > /dev/null
242
+
243
+ ctx_ws=$(generate_batch_context "$WORK_WS/test-plan.md" 2 "$WORK_WS")
244
+
245
+ assert_contains "whitespace-trimmed: padded.sh content included" "PADDED_CONTENT=true" "$ctx_ws"
246
+ assert_contains "whitespace-trimmed: padded-test.sh content included" "PADDED_TEST=true" "$ctx_ws"
247
+
248
+ # === Bug #50 BEHAVIORAL: non-readable progress.txt propagates error ===
249
+ # When progress.txt exists but has restricted permissions, the tail call should
250
+ # not silently swallow the error — stderr should show the permission denial.
251
+
252
+ WORK_PERM=$(mktemp -d)
253
+ trap 'rm -rf "$WORK_PERM"' EXIT
254
+
255
+ mkdir -p "$WORK_PERM/src"
256
+ cat > "$WORK_PERM/test-plan.md" << 'PLAN_PERM'
257
+ ## Batch 1: Init
258
+ ### Task 1: Do something
259
+ Do something.
260
+
261
+ ## Batch 2: Next
262
+ ### Task 2: Do more
263
+ Do more.
264
+ PLAN_PERM
265
+
266
+ cat > "$WORK_PERM/.run-plan-state.json" << 'JSON_PERM'
267
+ {"plan": "test-plan.md", "mode": "headless", "batches": {"1": {"passed": true, "test_count": 10, "duration": 30}}}
268
+ JSON_PERM
269
+
270
+ # Create an unreadable progress.txt (note: this only works when not root)
271
+ echo "some progress" > "$WORK_PERM/progress.txt"
272
+ chmod 000 "$WORK_PERM/progress.txt"
273
+
274
+ cd "$WORK_PERM" && git init -q && git commit --allow-empty -m "init" -q
275
+ cd - > /dev/null
276
+
277
+ # build_variable_suffix should produce stderr when progress.txt is unreadable
278
+ # (the fix removed || true, so tail's permission error is visible)
279
+ prompt_stderr=""
280
+ prompt_stderr=$(build_variable_suffix "$WORK_PERM/test-plan.md" 2 "$WORK_PERM" 10 2>&1 >/dev/null) || true
281
+
282
+ TESTS=$((TESTS + 1))
283
+ # Only check if we're not root (root can read anything)
284
+ if [[ "$(id -u)" -eq 0 ]]; then
285
+ echo "PASS: (skipped — running as root, permission test not applicable)"
286
+ else
287
+ if [[ -n "$prompt_stderr" ]]; then
288
+ echo "PASS: unreadable progress.txt: error propagated to stderr"
289
+ else
290
+ echo "FAIL: unreadable progress.txt: error should propagate (not suppressed by || true)"
291
+ FAILURES=$((FAILURES + 1))
292
+ fi
293
+ fi
294
+
295
+ # Cleanup (restore permission before rm can work)
296
+ chmod 644 "$WORK_PERM/progress.txt" 2>/dev/null || true
297
+
298
+ # === Summary ===
299
+ echo ""
300
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
301
+ if [[ $FAILURES -gt 0 ]]; then
302
+ echo "FAILURES: $FAILURES"
303
+ exit 1
304
+ fi
305
+ echo "ALL PASSED"
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env bash
2
+ # Test run-plan.sh end-to-end — exercises the full Mode C headless loop
3
+ # with a fake claude binary and fake quality gate (no real API calls).
4
+ set -euo pipefail
5
+
6
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7
+ RUN_PLAN="$SCRIPT_DIR/../run-plan.sh"
8
+
9
+ FAILURES=0
10
+ TESTS=0
11
+
12
+ # --- Setup temp workspace ---
13
+ WORK=$(mktemp -d)
14
+ FIXTURES=$(mktemp -d)
15
+ trap 'rm -rf "$WORK" "$FIXTURES"' EXIT
16
+
17
+ # 1. Git init the worktree
18
+ git -C "$WORK" init -q
19
+ git -C "$WORK" config user.email "test@test.com"
20
+ git -C "$WORK" config user.name "Test"
21
+
22
+ # Gitignore run-plan artifacts so check_git_clean passes
23
+ cat > "$WORK/.gitignore" <<'GITIGNORE'
24
+ .run-plan-state.json
25
+ .run-plan-prefix.txt
26
+ AGENTS.md
27
+ logs/
28
+ progress.txt
29
+ GITIGNORE
30
+
31
+ git -C "$WORK" add -A
32
+ git -C "$WORK" commit -q -m "init"
33
+
34
+ # 2. Create a small plan file (2 batches, 2 tasks each)
35
+ cat > "$WORK/plan.md" <<'PLAN'
36
+ # Test Plan
37
+
38
+ ## Batch 1: Setup foundation
39
+
40
+ ### Task 1: Create config module
41
+ Create the config module with defaults.
42
+
43
+ ### Task 2: Add config tests
44
+ Write tests for the config module.
45
+
46
+ ## Batch 2: Build feature
47
+
48
+ ### Task 3: Implement feature
49
+ Build the main feature on top of config.
50
+
51
+ ### Task 4: Add feature tests
52
+ Write tests for the feature.
53
+ PLAN
54
+
55
+ git -C "$WORK" add plan.md
56
+ git -C "$WORK" commit -q -m "add plan"
57
+
58
+ # 3. Create fake claude binary (outside worktree to keep git clean)
59
+ FAKE_BIN="$FIXTURES/bin"
60
+ mkdir -p "$FAKE_BIN"
61
+
62
+ cat > "$FAKE_BIN/claude" <<'FAKECLAUDE'
63
+ #!/usr/bin/env bash
64
+ # Fake claude — simulates batch work without API calls
65
+ echo "Working on batch..."
66
+ echo "42 passed, 0 failed in 1.2s"
67
+ exit 0
68
+ FAKECLAUDE
69
+ chmod +x "$FAKE_BIN/claude"
70
+
71
+ # 4. Create fake quality gate script (outside worktree)
72
+ cat > "$FIXTURES/fake-quality-gate.sh" <<'FAKEGATE'
73
+ #!/usr/bin/env bash
74
+ # Fake quality gate — always passes
75
+ echo "42 passed in 1.0s"
76
+ exit 0
77
+ FAKEGATE
78
+ chmod +x "$FIXTURES/fake-quality-gate.sh"
79
+
80
+ # 5. Run run-plan.sh with fake claude first on PATH
81
+ export PATH="$FAKE_BIN:$PATH"
82
+
83
+ OUTPUT=$(cd "$WORK" && "$RUN_PLAN" "$WORK/plan.md" \
84
+ --worktree "$WORK" \
85
+ --quality-gate "$FIXTURES/fake-quality-gate.sh" \
86
+ --on-failure stop \
87
+ 2>&1) && EXIT_CODE=0 || EXIT_CODE=$?
88
+
89
+ # --- Assertions ---
90
+
91
+ assert() {
92
+ local desc="$1" result="$2"
93
+ TESTS=$((TESTS + 1))
94
+ if [[ "$result" == "true" ]]; then
95
+ echo "PASS: $desc"
96
+ else
97
+ echo "FAIL: $desc"
98
+ FAILURES=$((FAILURES + 1))
99
+ fi
100
+ }
101
+
102
+ # 1. Exit code is 0
103
+ assert "exit code is 0" "$([ "$EXIT_CODE" -eq 0 ] && echo true || echo false)"
104
+
105
+ # 2. State file was created
106
+ assert "state file exists" "$([ -f "$WORK/.run-plan-state.json" ] && echo true || echo false)"
107
+
108
+ # 3. Both batches in completed_batches
109
+ if [[ -f "$WORK/.run-plan-state.json" ]]; then
110
+ HAS_BATCH_1=$(jq '.completed_batches | contains([1])' "$WORK/.run-plan-state.json")
111
+ HAS_BATCH_2=$(jq '.completed_batches | contains([2])' "$WORK/.run-plan-state.json")
112
+ assert "batch 1 in completed_batches" "$HAS_BATCH_1"
113
+ assert "batch 2 in completed_batches" "$HAS_BATCH_2"
114
+ else
115
+ assert "batch 1 in completed_batches (no state file)" "false"
116
+ assert "batch 2 in completed_batches (no state file)" "false"
117
+ fi
118
+
119
+ # 4. Log files exist
120
+ assert "batch 1 log exists" "$([ -f "$WORK/logs/batch-1-attempt-1.log" ] && echo true || echo false)"
121
+ assert "batch 2 log exists" "$([ -f "$WORK/logs/batch-2-attempt-1.log" ] && echo true || echo false)"
122
+
123
+ # 5. Prefix file was created (#51)
124
+ # The stable prefix is built once before the batch loop and cached to disk.
125
+ # If this file is missing the per-batch prompt assembly would silently use an empty prefix.
126
+ assert "prefix file exists" "$([ -f "$WORK/.run-plan-prefix.txt" ] && echo true || echo false)"
127
+
128
+ # 6. Prefix file is non-empty and contains stable content
129
+ if [[ -f "$WORK/.run-plan-prefix.txt" ]]; then
130
+ PREFIX_CONTENT=$(cat "$WORK/.run-plan-prefix.txt")
131
+ assert "prefix file contains TDD rule" "$(echo "$PREFIX_CONTENT" | grep -q "TDD" && echo true || echo false)"
132
+ assert "prefix file contains worktree path" "$(echo "$PREFIX_CONTENT" | grep -q "$WORK" && echo true || echo false)"
133
+ else
134
+ assert "prefix file contains TDD rule (no file)" "false"
135
+ assert "prefix file contains worktree path (no file)" "false"
136
+ fi
137
+
138
+ # --- Summary ---
139
+ echo ""
140
+ echo "Results: $((TESTS - FAILURES))/$TESTS passed"
141
+ if [[ $FAILURES -gt 0 ]]; then
142
+ echo "FAILURES: $FAILURES"
143
+ echo ""
144
+ echo "--- Debug output ---"
145
+ echo "$OUTPUT"
146
+ if [[ -f "$WORK/.run-plan-state.json" ]]; then
147
+ echo ""
148
+ echo "--- State file ---"
149
+ cat "$WORK/.run-plan-state.json"
150
+ fi
151
+ exit 1
152
+ fi
153
+ echo "ALL PASSED"