autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,2046 @@
1
+ # Multi-Armed Bandit System Implementation Plan
2
+
3
+ > **Status:** SUPERSEDED by `docs/plans/2026-02-23-roadmap-to-completion.md` Phase 4.
4
+ > This plan is preserved for reference. The roadmap plan incorporates research findings
5
+ > that changed the architecture (Thompson Sampling replaces LLM planner, human calibration
6
+ > for first 10 decisions, selective MAB, 4 batches instead of 6).
7
+
8
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
9
+
10
+ **Goal:** Implement competing autonomous agents (superpowers vs ralph-wiggum) that execute the same brief in parallel worktrees, judged by an LLM that extracts lessons and updates strategy performance data.
11
+
12
+ **Architecture:** Thin bash orchestrator (`mab-run.sh`) creates worktrees, launches agents via `claude -p`, runs quality gates on both, then launches a judge agent that picks a winner and extracts lessons. ~~A planner agent routes work units to MAB or single strategy based on `strategy-perf.json` historical data.~~ **Updated:** Thompson Sampling function replaces LLM planner agent — cheaper and better calibrated. An architecture map generator scans the project source to produce `ARCHITECTURE-MAP.json` for planner/judge context.
13
+
14
+ **Tech Stack:** Bash (orchestration), `claude -p` (agents), `jq` (JSON manipulation), Git worktrees (isolation)
15
+
16
+ **Design doc:** `docs/plans/2026-02-22-mab-run-design.md` (updated 2026-02-23 with research findings)
17
+
18
+ ---
19
+
20
+ ## Batch 1: Agent Prompts and Architecture Map Generator
21
+
22
+ Core prompt files that define agent behavior, plus the architecture map generator that feeds project structure to the planner and judge.
23
+
24
+ ### Task 1: Create Agent A (superpowers) prompt
25
+
26
+ **Files:**
27
+ - Create: `scripts/prompts/agent-a-superpowers.md`
28
+
29
+ **Step 1: Write the prompt file**
30
+
31
+ ```markdown
32
+ # Agent A — Superpowers Strategy
33
+
34
+ You are executing a work unit using the **superpowers skill chain**.
35
+
36
+ ## Shared Brief
37
+
38
+ {DESIGN_DOC}
39
+
40
+ ## PRD
41
+
42
+ {PRD_CONTENT}
43
+
44
+ ## Architecture Map
45
+
46
+ {ARCHITECTURE_MAP}
47
+
48
+ ## Previous MMAB Lessons
49
+
50
+ {MAB_LESSONS}
51
+
52
+ ## Instructions
53
+
54
+ 1. **Write your own implementation plan first.** Analyze the design doc, PRD, and architecture map. Produce a step-by-step plan before writing any code.
55
+ 2. **Follow TDD:** Write failing test → verify it fails → implement minimal code → verify it passes → commit.
56
+ 3. **Run quality gates between logical groups of tasks.** Use the quality gate command: `{QUALITY_GATE_CMD}`
57
+ 4. **Commit after each passing gate** with descriptive messages.
58
+ 5. **Append discoveries to progress.txt** after each logical unit.
59
+
60
+ ## Toolkit Context
61
+
62
+ You have access to all toolkit skills, lessons, and hooks. Follow CLAUDE.md conventions. Use `lesson-check.sh` before committing.
63
+
64
+ ## Completion
65
+
66
+ You are done when all PRD acceptance criteria pass (exit 0). Run each criterion and report results.
67
+ ```
68
+
69
+ **Step 2: Verify file exists and is valid markdown**
70
+
71
+ Run: `test -f scripts/prompts/agent-a-superpowers.md && echo "OK" || echo "MISSING"`
72
+ Expected: OK
73
+
74
+ ### Task 2: Create Agent B (ralph) prompt
75
+
76
+ **Files:**
77
+ - Create: `scripts/prompts/agent-b-ralph.md`
78
+
79
+ **Step 1: Write the prompt file**
80
+
81
+ ```markdown
82
+ # Agent B — Ralph Wiggum Strategy
83
+
84
+ You are executing a work unit using the **ralph-loop approach**.
85
+
86
+ ## Shared Brief
87
+
88
+ {DESIGN_DOC}
89
+
90
+ ## PRD
91
+
92
+ {PRD_CONTENT}
93
+
94
+ ## Architecture Map
95
+
96
+ {ARCHITECTURE_MAP}
97
+
98
+ ## Previous MMAB Lessons
99
+
100
+ {MAB_LESSONS}
101
+
102
+ ## Instructions
103
+
104
+ 1. **All PRD acceptance criteria in the PRD section must pass (exit 0).**
105
+ 2. **Iterate until done.** Read the criteria, start coding, test, fix, repeat.
106
+ 3. **Use any toolkit skills as needed** — TDD, debugging, etc. are available but not mandated in a specific order.
107
+ 4. **Run quality gate periodically:** `{QUALITY_GATE_CMD}`
108
+ 5. **Commit working increments** with descriptive messages.
109
+ 6. **Append discoveries to progress.txt** as you go.
110
+
111
+ ## Toolkit Context
112
+
113
+ You have access to all toolkit skills, lessons, and hooks. Follow CLAUDE.md conventions. Use `lesson-check.sh` before committing.
114
+
115
+ ## Completion
116
+
117
+ You are done when ALL acceptance criteria pass. Run each criterion and report results.
118
+ ```
119
+
120
+ **Step 2: Verify file exists**
121
+
122
+ Run: `test -f scripts/prompts/agent-b-ralph.md && echo "OK" || echo "MISSING"`
123
+ Expected: OK
124
+
125
+ ### Task 3: Create planner agent prompt
126
+
127
+ **Files:**
128
+ - Create: `scripts/prompts/planner-agent.md`
129
+
130
+ **Step 1: Write the prompt file**
131
+
132
+ ```markdown
133
+ # Planner Agent — MAB Routing Decisions
134
+
135
+ You are a routing planner for the Multi-Armed Bandit system. Your job is to decide which work units should be MAB tested and which should go to a single strategy.
136
+
137
+ ## Inputs
138
+
139
+ ### Design Doc
140
+ {DESIGN_DOC}
141
+
142
+ ### PRD Task Graph
143
+ {PRD_CONTENT}
144
+
145
+ ### Architecture Map
146
+ {ARCHITECTURE_MAP}
147
+
148
+ ### Strategy Performance Data
149
+ {STRATEGY_PERF}
150
+
151
+ ## Decision Rules
152
+
153
+ For each work unit:
154
+
155
+ 1. **Classify type:** new-file, refactoring, integration, test-only
156
+ 2. **Check strategy-perf data** for this type
157
+ 3. **If clear winner** (>70% win rate, 10+ data points): route to winner
158
+ 4. **If uncertain** or insufficient data: MAB run
159
+ 5. **If error-prone type** (historically high retry rate): MAB run
160
+
161
+ ## Work Unit Sizing
162
+
163
+ | Project size | Strategy |
164
+ |-------------|----------|
165
+ | Small (< 5 PRD tasks) | MAB the whole project |
166
+ | Medium (5-15 PRD tasks) | Chunk by PRD dependency groups, route per chunk |
167
+ | Large (15+ PRD tasks) | Phase 1: MAB (explore), Phase 2+: route to winners (exploit) |
168
+
169
+ ## Output Format
170
+
171
+ Respond with ONLY this JSON (no markdown fences, no explanation):
172
+
173
+ {
174
+ "routing": [
175
+ {
176
+ "unit": 1,
177
+ "description": "description of work unit",
178
+ "type": "new-file|refactoring|integration|test-only",
179
+ "decision": "mab_run|single",
180
+ "strategy": "superpowers|ralph|null",
181
+ "reasoning": "brief explanation"
182
+ }
183
+ ]
184
+ }
185
+ ```
186
+
187
+ **Step 2: Verify file exists**
188
+
189
+ Run: `test -f scripts/prompts/planner-agent.md && echo "OK" || echo "MISSING"`
190
+ Expected: OK
191
+
192
+ ### Task 4: Create judge agent prompt
193
+
194
+ **Files:**
195
+ - Create: `scripts/prompts/judge-agent.md`
196
+
197
+ **Step 1: Write the prompt file**
198
+
199
+ ```markdown
200
+ # Judge Agent — MAB Evaluation
201
+
202
+ You are evaluating two competing implementations of the same work unit. Pick the winner and extract lessons.
203
+
204
+ ## Context
205
+
206
+ ### Design Doc
207
+ {DESIGN_DOC}
208
+
209
+ ### PRD
210
+ {PRD_CONTENT}
211
+
212
+ ### Architecture Map
213
+ {ARCHITECTURE_MAP}
214
+
215
+ ### Previous MMAB Lessons
216
+ {MAB_LESSONS}
217
+
218
+ ## Agent A Diff (superpowers strategy)
219
+ ```
220
+ {DIFF_A}
221
+ ```
222
+
223
+ ## Agent A Quality Gate Results
224
+ {GATE_A}
225
+
226
+ ## Agent B Diff (ralph strategy)
227
+ ```
228
+ {DIFF_B}
229
+ ```
230
+
231
+ ## Agent B Quality Gate Results
232
+ {GATE_B}
233
+
234
+ ## Automated Scores
235
+ - Agent A: gate_passed={GATE_A_PASSED}, test_count={TESTS_A}, diff_lines={DIFF_SIZE_A}
236
+ - Agent B: gate_passed={GATE_B_PASSED}, test_count={TESTS_B}, diff_lines={DIFF_SIZE_B}
237
+
238
+ ## Evaluation Criteria
239
+
240
+ 1. **WINNER SELECTION** — Which implementation better serves the overall architecture?
241
+ 2. **BIDIRECTIONAL LESSONS** — What did the winner do well that the loser should learn from? What did the loser do well that the winner should learn from?
242
+ 3. **FAILURE MODE CLASSIFICATION** — Categories: over-engineering, under-testing, code-duplication, integration-gap, convention-violation, wrong-abstraction-level
243
+ 4. **TOOLKIT COMPLIANCE** — CLAUDE.md conventions? TDD? Hookify blocks? Verification?
244
+ 5. **STRATEGY RECOMMENDATION** — For this work unit type, which strategy should be preferred? Confidence?
245
+
246
+ ## Output Format
247
+
248
+ Respond with ONLY this JSON (no markdown fences, no explanation):
249
+
250
+ {
251
+ "winner": "agent_a|agent_b",
252
+ "confidence": "low|medium|high",
253
+ "reasoning": "2-3 sentences explaining the decision",
254
+ "failure_mode": "category from list above",
255
+ "toolkit_compliance": {
256
+ "agent_a": {"tdd": true/false, "conventions": true/false, "hookify_blocks": 0},
257
+ "agent_b": {"tdd": true/false, "conventions": true/false, "hookify_blocks": 0}
258
+ },
259
+ "lessons": [
260
+ {
261
+ "pattern": "what was learned",
262
+ "context": "when this applies",
263
+ "recommendation": "what to do differently",
264
+ "source_strategy": "agent_a|agent_b",
265
+ "lesson_type": "syntactic|semantic"
266
+ }
267
+ ],
268
+ "strategy_update": {
269
+ "batch_type": "new-file|refactoring|integration|test-only",
270
+ "winner": "superpowers|ralph",
271
+ "confidence": "low|medium|high"
272
+ }
273
+ }
274
+ ```
275
+
276
+ **Step 2: Verify file exists**
277
+
278
+ Run: `test -f scripts/prompts/judge-agent.md && echo "OK" || echo "MISSING"`
279
+ Expected: OK
280
+
281
+ ### Task 5: Write failing tests for architecture-map.sh
282
+
283
+ **Files:**
284
+ - Create: `scripts/tests/test-architecture-map.sh`
285
+
286
+ **Step 1: Write the test file**
287
+
288
+ ```bash
289
+ #!/usr/bin/env bash
290
+ set -euo pipefail
291
+
292
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
293
+ source "$SCRIPT_DIR/test-helpers.sh"
294
+
295
+ SCRIPT="$SCRIPT_DIR/../architecture-map.sh"
296
+
297
+ # --- CLI tests ---
298
+ assert_exit "help exits 0" 0 "$SCRIPT" --help
299
+
300
+ output=$("$SCRIPT" --help 2>&1)
301
+ assert_contains "help mentions output" "ARCHITECTURE-MAP.json" "$output"
302
+
303
+ # --- Generate on a temp project ---
304
+ TMPDIR=$(mktemp -d)
305
+ trap 'rm -rf "$TMPDIR"' EXIT
306
+
307
+ # Create a minimal project structure
308
+ mkdir -p "$TMPDIR/src" "$TMPDIR/tests"
309
+ cat > "$TMPDIR/src/main.sh" << 'SH'
310
+ #!/usr/bin/env bash
311
+ source ./src/utils.sh
312
+ SH
313
+ cat > "$TMPDIR/src/utils.sh" << 'SH'
314
+ #!/usr/bin/env bash
315
+ echo "utility"
316
+ SH
317
+ cat > "$TMPDIR/src/app.py" << 'PY'
318
+ from src.utils import helper
319
+ import os
320
+ PY
321
+
322
+ output=$("$SCRIPT" --project-root "$TMPDIR" 2>&1)
323
+ assert_exit "generates successfully" 0 "$SCRIPT" --project-root "$TMPDIR"
324
+ assert_contains "output is JSON" "modules" "$output"
325
+
326
+ # Verify output file
327
+ assert_eq "creates ARCHITECTURE-MAP.json" "true" "$(test -f "$TMPDIR/docs/ARCHITECTURE-MAP.json" && echo true || echo false)"
328
+
329
+ # Verify JSON structure
330
+ map_content=$(cat "$TMPDIR/docs/ARCHITECTURE-MAP.json")
331
+ assert_contains "has generated_at" "generated_at" "$map_content"
332
+ assert_contains "has modules array" "modules" "$map_content"
333
+
334
+ # Verify module detection
335
+ assert_contains "detects shell source" "utils.sh" "$map_content"
336
+
337
+ # --- Empty project ---
338
+ EMPTY_DIR=$(mktemp -d)
339
+ assert_exit "empty project exits 0" 0 "$SCRIPT" --project-root "$EMPTY_DIR"
340
+ rm -rf "$EMPTY_DIR"
341
+
342
+ report_results
343
+ ```
344
+
345
+ **Step 2: Run tests to verify they fail**
346
+
347
+ Run: `bash scripts/tests/test-architecture-map.sh 2>&1 | tail -5`
348
+ Expected: FAIL (script doesn't exist yet)
349
+
350
+ ### Task 6: Implement architecture-map.sh
351
+
352
+ **Files:**
353
+ - Create: `scripts/architecture-map.sh`
354
+
355
+ **Step 1: Write the script**
356
+
357
+ ```bash
358
+ #!/usr/bin/env bash
359
+ set -euo pipefail
360
+ # architecture-map.sh — Generate ARCHITECTURE-MAP.json from project source
361
+ #
362
+ # Usage: architecture-map.sh --project-root <dir> [--output <file>]
363
+ #
364
+ # Scans source files for import/source statements and produces a module
365
+ # dependency graph as JSON.
366
+
367
+ # --- Usage ---
368
+ usage() {
369
+ cat <<'USAGE'
370
+ architecture-map.sh — Generate module dependency graph
371
+
372
+ Usage:
373
+ architecture-map.sh --project-root <dir> [--output <file>]
374
+
375
+ Options:
376
+ --project-root <dir> Project root directory to scan
377
+ --output <file> Output file (default: <project-root>/docs/ARCHITECTURE-MAP.json)
378
+ -h, --help Show this help
379
+
380
+ Output:
381
+ Produces docs/ARCHITECTURE-MAP.json with module names, files, and dependency edges
382
+ derived from import/source/require statements.
383
+ USAGE
384
+ }
385
+
386
+ # --- Argument parsing ---
387
+ PROJECT_ROOT=""
388
+ OUTPUT_FILE=""
389
+
390
+ while [[ $# -gt 0 ]]; do
391
+ case "$1" in
392
+ -h|--help) usage; exit 0 ;;
393
+ --project-root) PROJECT_ROOT="$2"; shift 2 ;;
394
+ --output) OUTPUT_FILE="$2"; shift 2 ;;
395
+ *) echo "ERROR: Unknown option: $1" >&2; exit 1 ;;
396
+ esac
397
+ done
398
+
399
+ if [[ -z "$PROJECT_ROOT" ]]; then
400
+ echo "ERROR: --project-root required" >&2
401
+ exit 1
402
+ fi
403
+
404
+ if [[ -z "$OUTPUT_FILE" ]]; then
405
+ OUTPUT_FILE="$PROJECT_ROOT/docs/ARCHITECTURE-MAP.json"
406
+ fi
407
+
408
+ mkdir -p "$(dirname "$OUTPUT_FILE")"
409
+
410
+ # --- Scan functions ---
411
+
412
+ # Extract shell source dependencies
413
+ scan_shell() {
414
+ local file="$1"
415
+ grep -oE '(source|\.)\s+[^ ;]+' "$file" 2>/dev/null | \
416
+ sed -E 's/^(source|\.) +//' | \
417
+ sed 's/"//g; s/'\''//g' || true
418
+ }
419
+
420
+ # Extract Python import dependencies
421
+ scan_python() {
422
+ local file="$1"
423
+ {
424
+ grep -oE '^from [a-zA-Z0-9_.]+' "$file" 2>/dev/null | sed 's/^from //' || true
425
+ grep -oE '^import [a-zA-Z0-9_.]+' "$file" 2>/dev/null | sed 's/^import //' || true
426
+ } | grep -v '^$' || true
427
+ }
428
+
429
+ # Extract JS/TS import dependencies
430
+ scan_js() {
431
+ local file="$1"
432
+ {
433
+ grep -oE "from ['\"][^'\"]+['\"]" "$file" 2>/dev/null | sed "s/from ['\"]//; s/['\"]$//" || true
434
+ grep -oE "require\(['\"][^'\"]+['\"]\)" "$file" 2>/dev/null | sed "s/require(['\"]//; s/['\"]\)$//" || true
435
+ } | grep -v '^$' || true
436
+ }
437
+
438
+ # --- Main scan ---
439
+ modules_json="[]"
440
+
441
+ # Find source files (skip node_modules, .git, __pycache__, .venv)
442
+ while IFS= read -r -d '' file; do
443
+ rel_path="${file#"$PROJECT_ROOT/"}"
444
+ deps="[]"
445
+
446
+ case "$file" in
447
+ *.sh|*.bash)
448
+ while IFS= read -r dep; do
449
+ [[ -z "$dep" ]] && continue
450
+ deps=$(echo "$deps" | jq --arg d "$dep" '. + [$d]')
451
+ done < <(scan_shell "$file")
452
+ ;;
453
+ *.py)
454
+ while IFS= read -r dep; do
455
+ [[ -z "$dep" ]] && continue
456
+ deps=$(echo "$deps" | jq --arg d "$dep" '. + [$d]')
457
+ done < <(scan_python "$file")
458
+ ;;
459
+ *.js|*.ts|*.jsx|*.tsx|*.mjs)
460
+ while IFS= read -r dep; do
461
+ [[ -z "$dep" ]] && continue
462
+ deps=$(echo "$deps" | jq --arg d "$dep" '. + [$d]')
463
+ done < <(scan_js "$file")
464
+ ;;
465
+ *) continue ;;
466
+ esac
467
+
468
+ # Derive module name from directory
469
+ module_name=$(dirname "$rel_path")
470
+ [[ "$module_name" == "." ]] && module_name="root"
471
+
472
+ # Add to modules (merge if module already exists)
473
+ modules_json=$(echo "$modules_json" | jq \
474
+ --arg name "$module_name" \
475
+ --arg file "$rel_path" \
476
+ --argjson deps "$deps" \
477
+ '
478
+ if any(.[]; .name == $name) then
479
+ map(if .name == $name then
480
+ .files += [$file] |
481
+ .depends_on += $deps |
482
+ .depends_on |= unique
483
+ else . end)
484
+ else
485
+ . + [{"name": $name, "files": [$file], "depends_on": $deps}]
486
+ end
487
+ ')
488
+ done < <(find "$PROJECT_ROOT" \
489
+ -not -path '*/node_modules/*' \
490
+ -not -path '*/.git/*' \
491
+ -not -path '*/__pycache__/*' \
492
+ -not -path '*/.venv/*' \
493
+ -not -path '*/vendor/*' \
494
+ -not -path '*/.claude/*' \
495
+ \( -name '*.sh' -o -name '*.bash' -o -name '*.py' -o -name '*.js' -o -name '*.ts' -o -name '*.jsx' -o -name '*.tsx' -o -name '*.mjs' \) \
496
+ -print0 2>/dev/null)
497
+
498
+ # Produce final JSON
499
+ jq -n \
500
+ --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
501
+ --argjson modules "$modules_json" \
502
+ '{"generated_at": $ts, "modules": $modules}' \
503
+ > "$OUTPUT_FILE"
504
+
505
+ # Also print to stdout
506
+ cat "$OUTPUT_FILE"
507
+ ```
508
+
509
+ **Step 2: Make executable**
510
+
511
+ Run: `chmod +x scripts/architecture-map.sh`
512
+
513
+ **Step 3: Run tests to verify they pass**
514
+
515
+ Run: `bash scripts/tests/test-architecture-map.sh`
516
+ Expected: ALL PASSED
517
+
518
+ **Step 4: Commit**
519
+
520
+ ```bash
521
+ git add scripts/prompts/ scripts/architecture-map.sh scripts/tests/test-architecture-map.sh
522
+ git commit -m "feat: add agent prompts and architecture-map.sh for Multi-Armed Bandit system"
523
+ ```
524
+
525
+ ---
526
+
527
+ ## Batch 2: MAB Run Orchestrator (mab-run.sh)
528
+
529
+ The core orchestrator that creates worktrees, launches agents, runs gates, invokes the judge, and merges the winner.
530
+
531
+ ### Task 7: Write failing tests for mab-run.sh
532
+
533
+ **Files:**
534
+ - Create: `scripts/tests/test-mab-run.sh`
535
+
536
+ **Step 1: Write the test file**
537
+
538
+ ```bash
539
+ #!/usr/bin/env bash
540
+ set -euo pipefail
541
+
542
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
543
+ source "$SCRIPT_DIR/test-helpers.sh"
544
+
545
+ SCRIPT="$SCRIPT_DIR/../mab-run.sh"
546
+
547
+ # --- CLI tests ---
548
+ assert_exit "help exits 0" 0 "$SCRIPT" --help
549
+
550
+ output=$("$SCRIPT" --help 2>&1)
551
+ assert_contains "help mentions worktree" "worktree" "$output"
552
+ assert_contains "help mentions judge" "judge" "$output"
553
+ assert_contains "help mentions design" "design" "$output"
554
+
555
+ # --- Missing args ---
556
+ assert_exit "no args exits 1" 1 "$SCRIPT"
557
+
558
+ # --- Dry-run mode ---
559
+ TMPDIR=$(mktemp -d)
560
+ trap 'rm -rf "$TMPDIR"' EXIT
561
+
562
+ # Create minimal project for dry-run
563
+ mkdir -p "$TMPDIR/docs" "$TMPDIR/tasks" "$TMPDIR/logs"
564
+ cd "$TMPDIR" && git init -q && git add -A && git commit -q -m "init" && cd -
565
+
566
+ cat > "$TMPDIR/tasks/prd.json" << 'JSON'
567
+ {"tasks": [{"id": 1, "description": "test task", "criterion": "exit 0"}]}
568
+ JSON
569
+
570
+ cat > "$TMPDIR/design.md" << 'MD'
571
+ # Test Design
572
+ Simple test project.
573
+ MD
574
+
575
+ output=$("$SCRIPT" \
576
+ --design "$TMPDIR/design.md" \
577
+ --prd "$TMPDIR/tasks/prd.json" \
578
+ --project-root "$TMPDIR" \
579
+ --dry-run 2>&1)
580
+ assert_contains "dry-run shows worktree creation" "worktree" "$output"
581
+ assert_contains "dry-run shows agent launch" "agent" "$output"
582
+
583
+ # --- Data file initialization ---
584
+ output=$("$SCRIPT" \
585
+ --design "$TMPDIR/design.md" \
586
+ --prd "$TMPDIR/tasks/prd.json" \
587
+ --project-root "$TMPDIR" \
588
+ --init-data 2>&1)
589
+
590
+ assert_eq "creates strategy-perf.json" "true" \
591
+ "$(test -f "$TMPDIR/logs/strategy-perf.json" && echo true || echo false)"
592
+ assert_eq "creates mab-lessons.json" "true" \
593
+ "$(test -f "$TMPDIR/logs/mab-lessons.json" && echo true || echo false)"
594
+
595
+ # Verify JSON structure
596
+ strat=$(cat "$TMPDIR/logs/strategy-perf.json")
597
+ assert_contains "has new-file type" "new-file" "$strat"
598
+ assert_contains "has refactoring type" "refactoring" "$strat"
599
+
600
+ report_results
601
+ ```
602
+
603
+ **Step 2: Run tests to verify they fail**
604
+
605
+ Run: `bash scripts/tests/test-mab-run.sh 2>&1 | tail -5`
606
+ Expected: FAIL
607
+
608
+ ### Task 8: Implement mab-run.sh — argument parsing and data init
609
+
610
+ **Files:**
611
+ - Create: `scripts/mab-run.sh`
612
+
613
+ **Step 1: Write the script (part 1 — args, data init, dry-run)**
614
+
615
+ ```bash
616
+ #!/usr/bin/env bash
617
+ set -euo pipefail
618
+ # mab-run.sh — MAB execution orchestrator
619
+ #
620
+ # Creates two worktrees, launches competing agents (superpowers vs ralph),
621
+ # runs quality gates on both, invokes an LLM judge, merges the winner,
622
+ # and records lessons.
623
+ #
624
+ # Usage:
625
+ # mab-run.sh --design <doc> --prd <file> --project-root <dir> [options]
626
+ #
627
+ # Options:
628
+ # --design <file> Design document (required)
629
+ # --prd <file> PRD JSON file (required)
630
+ # --project-root <dir> Project root (required)
631
+ # --quality-gate <cmd> Quality gate command
632
+ # --work-unit <desc> Work unit description (for logging)
633
+ # --batch-type <type> Batch type: new-file|refactoring|integration|test-only
634
+ # --dry-run Print what would happen without executing
635
+ # --init-data Initialize data files and exit
636
+ # --notify Send Telegram notifications
637
+ # -h, --help Show this help
638
+
639
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
640
+
641
+ # Source shared libs
642
+ if [[ -f "$SCRIPT_DIR/lib/run-plan-quality-gate.sh" ]]; then
643
+ source "$SCRIPT_DIR/lib/run-plan-quality-gate.sh"
644
+ fi
645
+ if [[ -f "$SCRIPT_DIR/lib/run-plan-scoring.sh" ]]; then
646
+ source "$SCRIPT_DIR/lib/run-plan-scoring.sh"
647
+ fi
648
+ if [[ -f "$SCRIPT_DIR/lib/run-plan-notify.sh" ]]; then
649
+ source "$SCRIPT_DIR/lib/run-plan-notify.sh"
650
+ fi
651
+
652
+ # Ignore HUP/PIPE for background execution safety
653
+ trap '' HUP PIPE
654
+
655
+ usage() {
656
+ cat <<'USAGE'
657
+ mab-run.sh — MAB execution orchestrator
658
+
659
+ Creates two worktrees with competing agents (superpowers vs ralph-wiggum),
660
+ runs quality gates, invokes an LLM judge, merges the winner, and records lessons.
661
+
662
+ Usage:
663
+ mab-run.sh --design <doc> --prd <file> --project-root <dir> [options]
664
+
665
+ Options:
666
+ --design <file> Design document (required)
667
+ --prd <file> PRD JSON file (required)
668
+ --project-root <dir> Project root directory (required)
669
+ --quality-gate <cmd> Quality gate command (default: scripts/quality-gate.sh --project-root .)
670
+ --work-unit <desc> Work unit description for logging
671
+ --batch-type <type> new-file|refactoring|integration|test-only (default: auto-detect)
672
+ --dry-run Print actions without executing
673
+ --init-data Initialize data files (strategy-perf.json, mab-lessons.json) and exit
674
+ --notify Send Telegram notifications
675
+ -h, --help Show this help
676
+ USAGE
677
+ }
678
+
679
+ # --- Defaults ---
680
+ DESIGN_DOC=""
681
+ PRD_FILE=""
682
+ PROJECT_ROOT=""
683
+ QUALITY_GATE_CMD="scripts/quality-gate.sh --project-root ."
684
+ WORK_UNIT=""
685
+ BATCH_TYPE=""
686
+ DRY_RUN=false
687
+ INIT_DATA=false
688
+ NOTIFY=false
689
+
690
+ # --- Argument parsing ---
691
+ parse_mmab_args() {
692
+ while [[ $# -gt 0 ]]; do
693
+ case "$1" in
694
+ -h|--help) usage; exit 0 ;;
695
+ --design) DESIGN_DOC="$2"; shift 2 ;;
696
+ --prd) PRD_FILE="$2"; shift 2 ;;
697
+ --project-root) PROJECT_ROOT="$2"; shift 2 ;;
698
+ --quality-gate) QUALITY_GATE_CMD="$2"; shift 2 ;;
699
+ --work-unit) WORK_UNIT="$2"; shift 2 ;;
700
+ --batch-type) BATCH_TYPE="$2"; shift 2 ;;
701
+ --dry-run) DRY_RUN=true; shift ;;
702
+ --init-data) INIT_DATA=true; shift ;;
703
+ --notify) NOTIFY=true; shift ;;
704
+ *) echo "ERROR: Unknown option: $1" >&2; usage >&2; exit 1 ;;
705
+ esac
706
+ done
707
+
708
+ if [[ -z "$PROJECT_ROOT" ]]; then
709
+ echo "ERROR: --project-root required" >&2
710
+ exit 1
711
+ fi
712
+
713
+ if [[ "$INIT_DATA" == true ]]; then
714
+ init_data_files
715
+ exit 0
716
+ fi
717
+
718
+ if [[ -z "$DESIGN_DOC" ]]; then
719
+ echo "ERROR: --design required" >&2
720
+ exit 1
721
+ fi
722
+ if [[ -z "$PRD_FILE" ]]; then
723
+ echo "ERROR: --prd required" >&2
724
+ exit 1
725
+ fi
726
+ }
727
+
728
+ # --- Data file initialization ---
729
+ init_data_files() {
730
+ mkdir -p "$PROJECT_ROOT/logs"
731
+
732
+ # strategy-perf.json — win rates per strategy x batch type
733
+ if [[ ! -f "$PROJECT_ROOT/logs/strategy-perf.json" ]]; then
734
+ cat > "$PROJECT_ROOT/logs/strategy-perf.json" << 'JSON'
735
+ {
736
+ "new-file": {
737
+ "superpowers": {"wins": 0, "losses": 0, "total": 0},
738
+ "ralph": {"wins": 0, "losses": 0, "total": 0}
739
+ },
740
+ "refactoring": {
741
+ "superpowers": {"wins": 0, "losses": 0, "total": 0},
742
+ "ralph": {"wins": 0, "losses": 0, "total": 0}
743
+ },
744
+ "integration": {
745
+ "superpowers": {"wins": 0, "losses": 0, "total": 0},
746
+ "ralph": {"wins": 0, "losses": 0, "total": 0}
747
+ },
748
+ "test-only": {
749
+ "superpowers": {"wins": 0, "losses": 0, "total": 0},
750
+ "ralph": {"wins": 0, "losses": 0, "total": 0}
751
+ }
752
+ }
753
+ JSON
754
+ echo "Created: $PROJECT_ROOT/logs/strategy-perf.json"
755
+ fi
756
+
757
+ # mab-lessons.json — accumulated MAB lessons
758
+ if [[ ! -f "$PROJECT_ROOT/logs/mab-lessons.json" ]]; then
759
+ echo "[]" > "$PROJECT_ROOT/logs/mab-lessons.json"
760
+ echo "Created: $PROJECT_ROOT/logs/mab-lessons.json"
761
+ fi
762
+ }
763
+
764
+ # --- Prompt assembly ---
765
+ # Reads a prompt template and substitutes placeholders with actual content
766
+ assemble_prompt() {
767
+ local template_file="$1"
768
+ local design_content prd_content map_content lessons_content
769
+
770
+ design_content=$(cat "$DESIGN_DOC" 2>/dev/null || echo "(no design doc)")
771
+ prd_content=$(cat "$PRD_FILE" 2>/dev/null || echo "(no PRD)")
772
+ map_content=""
773
+ if [[ -f "$PROJECT_ROOT/docs/ARCHITECTURE-MAP.json" ]]; then
774
+ map_content=$(cat "$PROJECT_ROOT/docs/ARCHITECTURE-MAP.json")
775
+ else
776
+ map_content="(no architecture map — run architecture-map.sh to generate)"
777
+ fi
778
+ lessons_content="[]"
779
+ if [[ -f "$PROJECT_ROOT/logs/mab-lessons.json" ]]; then
780
+ lessons_content=$(cat "$PROJECT_ROOT/logs/mab-lessons.json")
781
+ fi
782
+
783
+ local prompt
784
+ prompt=$(cat "$template_file")
785
+
786
+ # Substitute placeholders
787
+ prompt="${prompt//\{DESIGN_DOC\}/$design_content}"
788
+ prompt="${prompt//\{PRD_CONTENT\}/$prd_content}"
789
+ prompt="${prompt//\{ARCHITECTURE_MAP\}/$map_content}"
790
+ prompt="${prompt//\{AB_LESSONS\}/$lessons_content}"
791
+ prompt="${prompt//\{QUALITY_GATE_CMD\}/$QUALITY_GATE_CMD}"
792
+
793
+ echo "$prompt"
794
+ }
795
+
796
+ # --- Worktree management ---
797
+ create_worktrees() {
798
+ local timestamp
799
+ timestamp=$(date +%s)
800
+ WORKTREE_A="$PROJECT_ROOT/.claude/worktrees/mab-a-$timestamp"
801
+ WORKTREE_B="$PROJECT_ROOT/.claude/worktrees/mab-b-$timestamp"
802
+ BRANCH_A="mab-a-$timestamp"
803
+ BRANCH_B="mab-b-$timestamp"
804
+
805
+ git -C "$PROJECT_ROOT" worktree add "$WORKTREE_A" -b "$BRANCH_A" HEAD
806
+ git -C "$PROJECT_ROOT" worktree add "$WORKTREE_B" -b "$BRANCH_B" HEAD
807
+
808
+ echo "Created worktree A: $WORKTREE_A (branch: $BRANCH_A)"
809
+ echo "Created worktree B: $WORKTREE_B (branch: $BRANCH_B)"
810
+ }
811
+
812
+ cleanup_worktrees() {
813
+ echo "Cleaning up worktrees..."
814
+ {
815
+ git -C "$PROJECT_ROOT" worktree remove "$WORKTREE_A" 2>/dev/null
816
+ git -C "$PROJECT_ROOT" branch -d "$BRANCH_A" 2>/dev/null
817
+ } || echo "WARNING: Failed to cleanup worktree A" >&2
818
+ {
819
+ git -C "$PROJECT_ROOT" worktree remove "$WORKTREE_B" 2>/dev/null
820
+ git -C "$PROJECT_ROOT" branch -d "$BRANCH_B" 2>/dev/null
821
+ } || echo "WARNING: Failed to cleanup worktree B" >&2
822
+ }
823
+
824
+ # --- Agent execution ---
825
+ run_agent() {
826
+ local worktree="$1" prompt="$2" label="$3"
827
+ local log_file="$worktree/logs/mab-agent-$label.log"
828
+ mkdir -p "$worktree/logs"
829
+
830
+ echo "Launching agent $label in $worktree..."
831
+ local exit_code=0
832
+ CLAUDECODE='' claude -p "$prompt" \
833
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
834
+ --permission-mode bypassPermissions \
835
+ > "$log_file" 2>&1 || exit_code=$?
836
+
837
+ echo "Agent $label finished (exit code: $exit_code)"
838
+ return $exit_code
839
+ }
840
+
841
+ # --- Quality gate ---
842
+ run_agent_gate() {
843
+ local worktree="$1" label="$2"
844
+ local gate_exit=0
845
+ local gate_output
846
+
847
+ echo "Running quality gate for $label..."
848
+ gate_output=$(cd "$worktree" && eval "$QUALITY_GATE_CMD" 2>&1) || gate_exit=$?
849
+
850
+ echo "$gate_output" > "$worktree/logs/mab-gate-$label.log"
851
+
852
+ if [[ $gate_exit -eq 0 ]]; then
853
+ echo " $label: PASSED"
854
+ else
855
+ echo " $label: FAILED (exit $gate_exit)"
856
+ fi
857
+
858
+ return $gate_exit
859
+ }
860
+
861
+ # --- Judge ---
862
+ run_judge() {
863
+ local judge_prompt
864
+ local template="$SCRIPT_DIR/prompts/judge-agent.md"
865
+
866
+ judge_prompt=$(assemble_prompt "$template")
867
+
868
+ # Add diffs
869
+ local diff_a diff_b
870
+ diff_a=$(git -C "$WORKTREE_A" diff HEAD~..HEAD 2>/dev/null || git -C "$WORKTREE_A" diff HEAD 2>/dev/null || echo "(no diff)")
871
+ diff_b=$(git -C "$WORKTREE_B" diff HEAD~..HEAD 2>/dev/null || git -C "$WORKTREE_B" diff HEAD 2>/dev/null || echo "(no diff)")
872
+
873
+ # Add gate results
874
+ local gate_a gate_b
875
+ gate_a=$(cat "$WORKTREE_A/logs/mab-gate-agent_a.log" 2>/dev/null || echo "(no gate output)")
876
+ gate_b=$(cat "$WORKTREE_B/logs/mab-gate-agent_b.log" 2>/dev/null || echo "(no gate output)")
877
+
878
+ # Add scores
879
+ local tests_a tests_b diff_size_a diff_size_b
880
+ tests_a=$(cd "$WORKTREE_A" && grep -cE '(def test_|it\(|test\()' tests/**/* 2>/dev/null || echo "0")
881
+ tests_b=$(cd "$WORKTREE_B" && grep -cE '(def test_|it\(|test\()' tests/**/* 2>/dev/null || echo "0")
882
+ diff_size_a=$(git -C "$WORKTREE_A" diff --stat HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1 || echo "0")
883
+ diff_size_b=$(git -C "$WORKTREE_B" diff --stat HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1 || echo "0")
884
+
885
+ local gate_a_passed=0 gate_b_passed=0
886
+ [[ -f "$WORKTREE_A/logs/mab-gate-agent_a.log" ]] && grep -q "PASSED\|passed\|OK" "$WORKTREE_A/logs/mab-gate-agent_a.log" && gate_a_passed=1
887
+ [[ -f "$WORKTREE_B/logs/mab-gate-agent_b.log" ]] && grep -q "PASSED\|passed\|OK" "$WORKTREE_B/logs/mab-gate-agent_b.log" && gate_b_passed=1
888
+
889
+ # Substitute remaining placeholders
890
+ judge_prompt="${judge_prompt//\{DIFF_A\}/$diff_a}"
891
+ judge_prompt="${judge_prompt//\{DIFF_B\}/$diff_b}"
892
+ judge_prompt="${judge_prompt//\{GATE_A\}/$gate_a}"
893
+ judge_prompt="${judge_prompt//\{GATE_B\}/$gate_b}"
894
+ judge_prompt="${judge_prompt//\{GATE_A_PASSED\}/$gate_a_passed}"
895
+ judge_prompt="${judge_prompt//\{GATE_B_PASSED\}/$gate_b_passed}"
896
+ judge_prompt="${judge_prompt//\{TESTS_A\}/$tests_a}"
897
+ judge_prompt="${judge_prompt//\{TESTS_B\}/$tests_b}"
898
+ judge_prompt="${judge_prompt//\{DIFF_SIZE_A\}/$diff_size_a}"
899
+ judge_prompt="${judge_prompt//\{DIFF_SIZE_B\}/$diff_size_b}"
900
+
901
+ local judge_log="$PROJECT_ROOT/logs/mab-judge-$(date +%s).log"
902
+ echo "Running judge agent..."
903
+ local judge_output
904
+ judge_output=$(CLAUDECODE='' claude -p "$judge_prompt" \
905
+ --allowedTools "Read,Grep,Glob" \
906
+ --permission-mode bypassPermissions 2>/dev/null) || true
907
+
908
+ echo "$judge_output" > "$judge_log"
909
+
910
+ # Extract JSON from judge output (may be wrapped in text)
911
+ local judge_json
912
+ judge_json=$(echo "$judge_output" | grep -o '{.*}' | head -1 || echo "{}")
913
+
914
+ echo "$judge_json"
915
+ }
916
+
917
+ # --- Data updates ---
918
+ update_strategy_perf() {
919
+ local winner="$1" batch_type="$2"
920
+ local perf_file="$PROJECT_ROOT/logs/strategy-perf.json"
921
+
922
+ [[ ! -f "$perf_file" ]] && init_data_files
923
+
924
+ local winner_strategy loser_strategy
925
+ if [[ "$winner" == "agent_a" ]]; then
926
+ winner_strategy="superpowers"
927
+ loser_strategy="ralph"
928
+ else
929
+ winner_strategy="ralph"
930
+ loser_strategy="superpowers"
931
+ fi
932
+
933
+ # Ensure batch_type exists in perf file
934
+ local bt="${batch_type:-unknown}"
935
+ jq --arg bt "$bt" --arg ws "$winner_strategy" --arg ls "$loser_strategy" '
936
+ .[$bt] //= {"superpowers": {"wins": 0, "losses": 0, "total": 0}, "ralph": {"wins": 0, "losses": 0, "total": 0}} |
937
+ .[$bt][$ws].wins += 1 |
938
+ .[$bt][$ws].total += 1 |
939
+ .[$bt][$ls].losses += 1 |
940
+ .[$bt][$ls].total += 1
941
+ ' "$perf_file" > "$perf_file.tmp" && mv "$perf_file.tmp" "$perf_file"
942
+
943
+ echo "Updated strategy-perf.json: $winner_strategy wins for $bt"
944
+ }
945
+
946
+ record_mab_lessons() {
947
+ local judge_json="$1" batch_type="$2" work_unit="$3"
948
+ local lessons_file="$PROJECT_ROOT/logs/mab-lessons.json"
949
+
950
+ [[ ! -f "$lessons_file" ]] && echo "[]" > "$lessons_file"
951
+
952
+ # Extract lessons array from judge output
953
+ local lessons
954
+ lessons=$(echo "$judge_json" | jq -r '.lessons // []' 2>/dev/null || echo "[]")
955
+
956
+ local winner
957
+ winner=$(echo "$judge_json" | jq -r '.winner // "unknown"' 2>/dev/null || echo "unknown")
958
+ local failure_mode
959
+ failure_mode=$(echo "$judge_json" | jq -r '.failure_mode // "unknown"' 2>/dev/null || echo "unknown")
960
+
961
+ # Append each lesson
962
+ echo "$lessons" | jq -c '.[]' 2>/dev/null | while IFS= read -r lesson; do
963
+ jq --argjson lesson "$lesson" \
964
+ --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
965
+ --arg project "$(basename "$PROJECT_ROOT")" \
966
+ --arg wu "$work_unit" \
967
+ --arg bt "$batch_type" \
968
+ --arg winner "$winner" \
969
+ --arg fm "$failure_mode" \
970
+ '. += [$lesson + {"timestamp": $ts, "project": $project, "work_unit": $wu, "batch_type": $bt, "winner": $winner, "failure_mode": $fm}]' \
971
+ "$lessons_file" > "$lessons_file.tmp" && mv "$lessons_file.tmp" "$lessons_file"
972
+ done
973
+
974
+ echo "Recorded $(echo "$lessons" | jq 'length') lessons to mab-lessons.json"
975
+ }
976
+
977
+ # --- Merge winner ---
978
+ merge_winner() {
979
+ local winner="$1"
980
+ local winner_branch
981
+
982
+ if [[ "$winner" == "agent_a" ]]; then
983
+ winner_branch="$BRANCH_A"
984
+ else
985
+ winner_branch="$BRANCH_B"
986
+ fi
987
+
988
+ echo "Merging winner branch: $winner_branch"
989
+ git -C "$PROJECT_ROOT" merge "$winner_branch" --no-edit
990
+ }
991
+
992
+ # --- Main orchestration ---
993
+ run_mab() {
994
+ if [[ "$DRY_RUN" == true ]]; then
995
+ echo "=== DRY RUN ==="
996
+ echo "Would create worktree A (superpowers agent)"
997
+ echo "Would create worktree B (ralph agent)"
998
+ echo "Would launch agent A with superpowers prompt"
999
+ echo "Would launch agent B with ralph prompt"
1000
+ echo "Would run quality gate on both"
1001
+ echo "Would invoke judge agent"
1002
+ echo "Would merge winner and record lessons"
1003
+ return 0
1004
+ fi
1005
+
1006
+ echo ""
1007
+ echo "╔══════════════════════════════════════════════════════╗"
1008
+ echo "║ MAB Run — Competing Agents ║"
1009
+ echo "║ Design: $(basename "$DESIGN_DOC")"
1010
+ echo "║ PRD: $(basename "$PRD_FILE")"
1011
+ echo "║ Type: ${BATCH_TYPE:-auto}"
1012
+ echo "╚══════════════════════════════════════════════════════╝"
1013
+
1014
+ # Initialize data files
1015
+ init_data_files
1016
+
1017
+ # Generate architecture map
1018
+ if [[ -f "$SCRIPT_DIR/architecture-map.sh" ]]; then
1019
+ echo "Generating architecture map..."
1020
+ { "$SCRIPT_DIR/architecture-map.sh" --project-root "$PROJECT_ROOT" > /dev/null 2>&1; } \
1021
+ || echo "WARNING: architecture-map.sh failed (non-fatal)" >&2
1022
+ fi
1023
+
1024
+ # Create worktrees
1025
+ create_worktrees
1026
+
1027
+ # Assemble prompts
1028
+ local prompt_a prompt_b
1029
+ prompt_a=$(assemble_prompt "$SCRIPT_DIR/prompts/agent-a-superpowers.md")
1030
+ prompt_b=$(assemble_prompt "$SCRIPT_DIR/prompts/agent-b-ralph.md")
1031
+
1032
+ # Launch agents in parallel
1033
+ echo ""
1034
+ echo "--- Launching agents in parallel ---"
1035
+ local pid_a pid_b exit_a=0 exit_b=0
1036
+
1037
+ run_agent "$WORKTREE_A" "$prompt_a" "agent_a" &
1038
+ pid_a=$!
1039
+
1040
+ run_agent "$WORKTREE_B" "$prompt_b" "agent_b" &
1041
+ pid_b=$!
1042
+
1043
+ # Wait for both
1044
+ wait "$pid_a" || exit_a=$?
1045
+ wait "$pid_b" || exit_b=$?
1046
+
1047
+ echo ""
1048
+ echo "Agent A exit: $exit_a"
1049
+ echo "Agent B exit: $exit_b"
1050
+
1051
+ # Run quality gates
1052
+ echo ""
1053
+ echo "--- Quality Gates ---"
1054
+ local gate_a=0 gate_b=0
1055
+ run_agent_gate "$WORKTREE_A" "agent_a" || gate_a=$?
1056
+ run_agent_gate "$WORKTREE_B" "agent_b" || gate_b=$?
1057
+
1058
+ # Invoke judge
1059
+ echo ""
1060
+ echo "--- Judge Evaluation ---"
1061
+ local judge_result
1062
+ judge_result=$(run_judge)
1063
+
1064
+ local winner confidence reasoning
1065
+ winner=$(echo "$judge_result" | jq -r '.winner // "agent_a"' 2>/dev/null || echo "agent_a")
1066
+ confidence=$(echo "$judge_result" | jq -r '.confidence // "low"' 2>/dev/null || echo "low")
1067
+ reasoning=$(echo "$judge_result" | jq -r '.reasoning // "no reasoning provided"' 2>/dev/null || echo "no reasoning")
1068
+
1069
+ echo ""
1070
+ echo "Winner: $winner (confidence: $confidence)"
1071
+ echo "Reasoning: $reasoning"
1072
+
1073
+ # If neither passed gate, don't merge
1074
+ if [[ $gate_a -ne 0 && $gate_b -ne 0 ]]; then
1075
+ echo ""
1076
+ echo "WARNING: Neither agent passed quality gate. No merge performed."
1077
+ echo "Review worktrees manually:"
1078
+ echo " Agent A: $WORKTREE_A"
1079
+ echo " Agent B: $WORKTREE_B"
1080
+
1081
+ # Still record lessons
1082
+ update_strategy_perf "$winner" "${BATCH_TYPE:-unknown}"
1083
+ record_mab_lessons "$judge_result" "${BATCH_TYPE:-unknown}" "${WORK_UNIT:-unnamed}"
1084
+ return 1
1085
+ fi
1086
+
1087
+ # If only one passed, override judge
1088
+ if [[ $gate_a -eq 0 && $gate_b -ne 0 ]]; then
1089
+ echo "Overriding judge: only Agent A passed quality gate"
1090
+ winner="agent_a"
1091
+ elif [[ $gate_a -ne 0 && $gate_b -eq 0 ]]; then
1092
+ echo "Overriding judge: only Agent B passed quality gate"
1093
+ winner="agent_b"
1094
+ fi
1095
+
1096
+ # Merge winner
1097
+ merge_winner "$winner"
1098
+
1099
+ # Update data files
1100
+ update_strategy_perf "$winner" "${BATCH_TYPE:-unknown}"
1101
+ record_mab_lessons "$judge_result" "${BATCH_TYPE:-unknown}" "${WORK_UNIT:-unnamed}"
1102
+
1103
+ # Log run
1104
+ local run_log="$PROJECT_ROOT/logs/mab-run-$(date +%s).json"
1105
+ jq -n \
1106
+ --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1107
+ --arg design "$(basename "$DESIGN_DOC")" \
1108
+ --arg prd "$(basename "$PRD_FILE")" \
1109
+ --arg winner "$winner" \
1110
+ --arg confidence "$confidence" \
1111
+ --arg reasoning "$reasoning" \
1112
+ --arg batch_type "${BATCH_TYPE:-unknown}" \
1113
+ --arg work_unit "${WORK_UNIT:-unnamed}" \
1114
+ --argjson judge "$judge_result" \
1115
+ '{
1116
+ "timestamp": $ts,
1117
+ "design": $design,
1118
+ "prd": $prd,
1119
+ "winner": $winner,
1120
+ "confidence": $confidence,
1121
+ "reasoning": $reasoning,
1122
+ "batch_type": $batch_type,
1123
+ "work_unit": $work_unit,
1124
+ "judge_output": $judge
1125
+ }' > "$run_log"
1126
+
1127
+ echo "Run logged to: $run_log"
1128
+
1129
+ # Cleanup
1130
+ cleanup_worktrees
1131
+
1132
+ echo ""
1133
+ echo "MAB run complete. Winner ($winner) merged."
1134
+ }
1135
+
1136
+ # --- Entry point ---
1137
+ parse_mmab_args "$@"
1138
+ run_mab
1139
+ ```
1140
+
1141
+ **Step 2: Make executable**
1142
+
1143
+ Run: `chmod +x scripts/mab-run.sh`
1144
+
1145
+ **Step 3: Run tests to verify they pass**
1146
+
1147
+ Run: `bash scripts/tests/test-mab-run.sh`
1148
+ Expected: ALL PASSED
1149
+
1150
+ **Step 4: Commit**
1151
+
1152
+ ```bash
1153
+ git add scripts/mab-run.sh scripts/tests/test-mab-run.sh
1154
+ git commit -m "feat: add mab-run.sh orchestrator with parallel agents, judge, and data tracking"
1155
+ ```
1156
+
1157
+ ---
1158
+
1159
+ ## Batch 3: Run-Plan Integration and MAB Context Injection
1160
+
1161
+ Wire `mab-run.sh` into `run-plan.sh` via an `--mab` flag, and inject MAB lessons into batch context.
1162
+
1163
+ ### Task 9: Write failing tests for --mab flag in run-plan CLI
1164
+
1165
+ **Files:**
1166
+ - Modify: `scripts/tests/test-run-plan-cli.sh`
1167
+
1168
+ **Step 1: Add test cases for --mab flag**
1169
+
1170
+ Append to the test file (before `report_results`):
1171
+
1172
+ ```bash
1173
+ # --- MAB mode tests ---
1174
+ output=$("$SCRIPT" --help 2>&1)
1175
+ assert_contains "help mentions ab flag" "--mab" "$output"
1176
+
1177
+ # --mab requires --design and --prd
1178
+ output=$("$SCRIPT" docs/plans/example.md --mab 2>&1 || true)
1179
+ assert_contains "ab requires design" "design" "$output"
1180
+ ```
1181
+
1182
+ **Step 2: Run tests to verify the new cases fail**
1183
+
1184
+ Run: `bash scripts/tests/test-run-plan-cli.sh 2>&1 | grep -E "FAIL|PASS" | tail -5`
1185
+ Expected: New tests FAIL
1186
+
1187
+ ### Task 10: Add --mab flag to run-plan.sh
1188
+
1189
+ **Files:**
1190
+ - Modify: `scripts/run-plan.sh`
1191
+
1192
+ **Step 1: Add --mab, --design, --prd flags to argument parsing**
1193
+
1194
+ Add after the `--max-budget` case in `parse_args()`:
1195
+
1196
+ ```bash
1197
+ --mab)
1198
+ MAB_MODE=true; shift
1199
+ ;;
1200
+ --design)
1201
+ MAB_DESIGN="$2"; shift 2
1202
+ ;;
1203
+ --prd-file)
1204
+ MAB_PRD="$2"; shift 2
1205
+ ;;
1206
+ ```
1207
+
1208
+ Add defaults after existing defaults block:
1209
+
1210
+ ```bash
1211
+ MAB_MODE=false
1212
+ MAB_DESIGN=""
1213
+ MAB_PRD=""
1214
+ ```
1215
+
1216
+ Add to `validate_args()` after the on-failure validation:
1217
+
1218
+ ```bash
1219
+ # MAB mode validation
1220
+ if [[ "$MAB_MODE" == true ]]; then
1221
+ if [[ -z "$MAB_DESIGN" ]]; then
1222
+ echo "ERROR: --mab requires --design <file>" >&2
1223
+ exit 1
1224
+ fi
1225
+ if [[ -z "$MAB_PRD" ]]; then
1226
+ echo "ERROR: --mab requires --prd-file <file>" >&2
1227
+ exit 1
1228
+ fi
1229
+ fi
1230
+ ```
1231
+
1232
+ Add `--mab` to the usage text in the Options section:
1233
+
1234
+ ```
1235
+ --mab Enable MAB competing agents mode
1236
+ --design <file> Design doc for MAB mode
1237
+ --prd-file <file> PRD JSON for MAB mode
1238
+ ```
1239
+
1240
+ Add `ab` to the valid modes comment in the mode dispatch:
1241
+
1242
+ ```bash
1243
+ ab)
1244
+ run_mode_mab
1245
+ ;;
1246
+ ```
1247
+
1248
+ And add the mode function:
1249
+
1250
+ ```bash
1251
+ run_mode_mab() {
1252
+ local mab_script="$SCRIPT_DIR/mab-run.sh"
1253
+ if [[ ! -f "$mab_script" ]]; then
1254
+ echo "ERROR: mab-run.sh not found at $mab_script" >&2
1255
+ exit 1
1256
+ fi
1257
+
1258
+ local mab_args=(
1259
+ --design "$MAB_DESIGN"
1260
+ --prd "$MAB_PRD"
1261
+ --project-root "$WORKTREE"
1262
+ --quality-gate "$QUALITY_GATE_CMD"
1263
+ )
1264
+
1265
+ if [[ "$NOTIFY" == true ]]; then
1266
+ mab_args+=(--notify)
1267
+ fi
1268
+
1269
+ "$mab_script" "${mab_args[@]}"
1270
+ }
1271
+ ```
1272
+
1273
+ **Step 2: Run CLI tests**
1274
+
1275
+ Run: `bash scripts/tests/test-run-plan-cli.sh`
1276
+ Expected: ALL PASSED
1277
+
1278
+ ### Task 11: Write failing tests for MAB context injection
1279
+
1280
+ **Files:**
1281
+ - Modify: `scripts/tests/test-run-plan-context.sh`
1282
+
1283
+ **Step 1: Add tests for MAB lesson injection**
1284
+
1285
+ Append test cases (before `report_results`):
1286
+
1287
+ ```bash
1288
+ # --- MAB lessons injection ---
1289
+ TMPDIR2=$(mktemp -d)
1290
+ mkdir -p "$TMPDIR2/logs"
1291
+ cat > "$TMPDIR2/logs/mab-lessons.json" << 'JSON'
1292
+ [
1293
+ {
1294
+ "pattern": "Extract shared validation before per-type validators",
1295
+ "context": "new-file batches with 3+ validators",
1296
+ "recommendation": "Create shared contract first",
1297
+ "batch_type": "new-file",
1298
+ "winner": "agent_a"
1299
+ }
1300
+ ]
1301
+ JSON
1302
+
1303
+ # The context should include MAB lessons when present
1304
+ context=$(generate_batch_context "$PLAN_FILE" 2 "$TMPDIR2" 2>/dev/null || true)
1305
+ assert_contains "includes AB lessons header" "MMAB Lessons" "$context"
1306
+ assert_contains "includes lesson pattern" "shared validation" "$context"
1307
+ rm -rf "$TMPDIR2"
1308
+ ```
1309
+
1310
+ **Step 2: Run to verify fail**
1311
+
1312
+ Run: `bash scripts/tests/test-run-plan-context.sh 2>&1 | tail -5`
1313
+ Expected: New tests FAIL
1314
+
1315
+ ### Task 12: Inject MAB lessons into batch context
1316
+
1317
+ **Files:**
1318
+ - Modify: `scripts/lib/run-plan-context.sh`
1319
+
1320
+ **Step 1: Add MAB lesson injection to `generate_batch_context()`**
1321
+
1322
+ Add after the failure patterns section (around line 75), before the context_refs section:
1323
+
1324
+ ```bash
1325
+ # 3. MAB lessons (if available)
1326
+ local mmab_lessons_file="$worktree/logs/mab-lessons.json"
1327
+ if [[ -f "$mmab_lessons_file" ]]; then
1328
+ local mab_count
1329
+ mab_count=$(jq 'length' "$mmab_lessons_file" 2>/dev/null || echo "0")
1330
+ if [[ "$mab_count" -gt 0 ]]; then
1331
+ local mab_section=""
1332
+ mab_section+=$'\n'"### MMAB Lessons (from previous competing agent runs)"$'\n'
1333
+
1334
+ # Include most recent 5 lessons (most relevant)
1335
+ local mab_entries
1336
+ mab_entries=$(jq -r '.[-5:] | .[] | "- **\(.pattern)** (\(.context // "general")): \(.recommendation // "")"' \
1337
+ "$mmab_lessons_file" 2>/dev/null || true)
1338
+
1339
+ if [[ -n "$mab_entries" ]]; then
1340
+ mab_section+="$mab_entries"$'\n'
1341
+ local mab_len=${#mab_section}
1342
+ if [[ $((chars_used + mab_len)) -lt $TOKEN_BUDGET_CHARS ]]; then
1343
+ context+="$mab_section"
1344
+ chars_used=$((chars_used + mab_len))
1345
+ fi
1346
+ fi
1347
+ fi
1348
+ fi
1349
+ ```
1350
+
1351
+ **Step 2: Run context tests**
1352
+
1353
+ Run: `bash scripts/tests/test-run-plan-context.sh`
1354
+ Expected: ALL PASSED
1355
+
1356
+ **Step 3: Commit**
1357
+
1358
+ ```bash
1359
+ git add scripts/run-plan.sh scripts/lib/run-plan-context.sh scripts/tests/test-run-plan-cli.sh scripts/tests/test-run-plan-context.sh
1360
+ git commit -m "feat: add --mab flag to run-plan.sh and inject MAB lessons into batch context"
1361
+ ```
1362
+
1363
+ ---
1364
+
1365
+ ## Batch 4: Community Sync and Lesson Promotion
1366
+
1367
+ Scripts for pulling community lessons upstream and auto-promoting recurring MAB lessons to `docs/lessons/`.
1368
+
1369
+ ### Task 13: Write failing tests for pull-community-lessons.sh
1370
+
1371
+ **Files:**
1372
+ - Create: `scripts/tests/test-pull-community-lessons.sh`
1373
+
1374
+ **Step 1: Write the test file**
1375
+
1376
+ ```bash
1377
+ #!/usr/bin/env bash
1378
+ set -euo pipefail
1379
+
1380
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
1381
+ source "$SCRIPT_DIR/test-helpers.sh"
1382
+
1383
+ SCRIPT="$SCRIPT_DIR/../pull-community-lessons.sh"
1384
+
1385
+ # --- CLI tests ---
1386
+ assert_exit "help exits 0" 0 "$SCRIPT" --help
1387
+
1388
+ output=$("$SCRIPT" --help 2>&1)
1389
+ assert_contains "help mentions upstream" "upstream" "$output"
1390
+ assert_contains "help mentions lessons" "lessons" "$output"
1391
+
1392
+ # --- Dry-run on temp repo ---
1393
+ TMPDIR=$(mktemp -d)
1394
+ trap 'rm -rf "$TMPDIR"' EXIT
1395
+
1396
+ cd "$TMPDIR"
1397
+ git init -q
1398
+ mkdir -p docs/lessons logs
1399
+ echo "[]" > logs/strategy-perf.json
1400
+ echo "# Lesson 1" > docs/lessons/0001-test.md
1401
+ git add -A && git commit -q -m "init"
1402
+
1403
+ # Should handle missing upstream gracefully
1404
+ output=$("$SCRIPT" --project-root "$TMPDIR" --dry-run 2>&1 || true)
1405
+ assert_contains "dry-run reports status" "dry" "$output"
1406
+
1407
+ report_results
1408
+ ```
1409
+
1410
+ **Step 2: Run to verify fail**
1411
+
1412
+ Run: `bash scripts/tests/test-pull-community-lessons.sh 2>&1 | tail -5`
1413
+ Expected: FAIL
1414
+
1415
+ ### Task 14: Implement pull-community-lessons.sh
1416
+
1417
+ **Files:**
1418
+ - Create: `scripts/pull-community-lessons.sh`
1419
+
1420
+ **Step 1: Write the script**
1421
+
1422
+ ```bash
1423
+ #!/usr/bin/env bash
1424
+ set -euo pipefail
1425
+ # pull-community-lessons.sh — Sync lessons and strategy data from upstream
1426
+ #
1427
+ # Usage: pull-community-lessons.sh --project-root <dir> [--dry-run]
1428
+ #
1429
+ # Fetches latest lessons and strategy performance data from the upstream
1430
+ # autonomous-coding-toolkit repo. New lessons are copied into docs/lessons/,
1431
+ # and community strategy-perf.json is merged with local data.
1432
+
1433
+ usage() {
1434
+ cat <<'USAGE'
1435
+ pull-community-lessons.sh — Pull community lessons from upstream
1436
+
1437
+ Fetches latest lessons and strategy performance data from the upstream
1438
+ autonomous-coding-toolkit repo.
1439
+
1440
+ Usage:
1441
+ pull-community-lessons.sh --project-root <dir> [--dry-run]
1442
+
1443
+ Options:
1444
+ --project-root <dir> Project root directory
1445
+ --upstream <remote> Git remote name (default: upstream)
1446
+ --branch <branch> Upstream branch (default: main)
1447
+ --dry-run Show what would be synced without doing it
1448
+ -h, --help Show this help
1449
+ USAGE
1450
+ }
1451
+
1452
+ PROJECT_ROOT=""
1453
+ UPSTREAM_REMOTE="upstream"
1454
+ UPSTREAM_BRANCH="main"
1455
+ DRY_RUN=false
1456
+
1457
+ while [[ $# -gt 0 ]]; do
1458
+ case "$1" in
1459
+ -h|--help) usage; exit 0 ;;
1460
+ --project-root) PROJECT_ROOT="$2"; shift 2 ;;
1461
+ --upstream) UPSTREAM_REMOTE="$2"; shift 2 ;;
1462
+ --branch) UPSTREAM_BRANCH="$2"; shift 2 ;;
1463
+ --dry-run) DRY_RUN=true; shift ;;
1464
+ *) echo "ERROR: Unknown option: $1" >&2; exit 1 ;;
1465
+ esac
1466
+ done
1467
+
1468
+ if [[ -z "$PROJECT_ROOT" ]]; then
1469
+ echo "ERROR: --project-root required" >&2
1470
+ exit 1
1471
+ fi
1472
+
1473
+ cd "$PROJECT_ROOT"
1474
+
1475
+ # Check if upstream remote exists
1476
+ if ! git remote get-url "$UPSTREAM_REMOTE" >/dev/null 2>&1; then
1477
+ echo "No '$UPSTREAM_REMOTE' remote configured."
1478
+ echo "Add one with: git remote add $UPSTREAM_REMOTE <repo-url>"
1479
+ if [[ "$DRY_RUN" == true ]]; then
1480
+ echo "(dry-run: would fetch from upstream)"
1481
+ exit 0
1482
+ fi
1483
+ exit 1
1484
+ fi
1485
+
1486
+ echo "Fetching from $UPSTREAM_REMOTE/$UPSTREAM_BRANCH..."
1487
+ if [[ "$DRY_RUN" == true ]]; then
1488
+ echo "(dry-run: would fetch $UPSTREAM_REMOTE)"
1489
+ else
1490
+ git fetch "$UPSTREAM_REMOTE" "$UPSTREAM_BRANCH"
1491
+ fi
1492
+
1493
+ # Sync lessons
1494
+ echo ""
1495
+ echo "--- Syncing lessons ---"
1496
+ local_lessons=$(ls docs/lessons/*.md 2>/dev/null | wc -l || echo "0")
1497
+ upstream_lessons=$(git ls-tree --name-only "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" -- docs/lessons/ 2>/dev/null || true)
1498
+
1499
+ new_count=0
1500
+ while IFS= read -r lesson_file; do
1501
+ [[ -z "$lesson_file" ]] && continue
1502
+ local_path="$PROJECT_ROOT/$lesson_file"
1503
+ if [[ ! -f "$local_path" ]]; then
1504
+ if [[ "$DRY_RUN" == true ]]; then
1505
+ echo " Would copy: $lesson_file"
1506
+ else
1507
+ git show "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH:$lesson_file" > "$local_path"
1508
+ echo " Copied: $lesson_file"
1509
+ fi
1510
+ new_count=$((new_count + 1))
1511
+ fi
1512
+ done <<< "$upstream_lessons"
1513
+
1514
+ echo "New lessons: $new_count (local total: $local_lessons)"
1515
+
1516
+ # Sync strategy-perf.json (merge, don't replace)
1517
+ echo ""
1518
+ echo "--- Syncing strategy data ---"
1519
+ local_perf="$PROJECT_ROOT/logs/strategy-perf.json"
1520
+ upstream_perf_content=$(git show "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH:logs/strategy-perf.json" 2>/dev/null || echo "")
1521
+
1522
+ if [[ -n "$upstream_perf_content" && -f "$local_perf" ]]; then
1523
+ if [[ "$DRY_RUN" == true ]]; then
1524
+ echo " Would merge upstream strategy-perf.json with local data"
1525
+ else
1526
+ # Merge: add upstream counts to local counts
1527
+ echo "$upstream_perf_content" | jq -s '
1528
+ .[0] as $local | .[1] as $upstream |
1529
+ $local | to_entries | map(
1530
+ .key as $type |
1531
+ .value | to_entries | map(
1532
+ .key as $strat |
1533
+ .value as $local_val |
1534
+ ($upstream[$type][$strat] // {"wins": 0, "losses": 0, "total": 0}) as $up_val |
1535
+ {
1536
+ key: $strat,
1537
+ value: {
1538
+ "wins": ($local_val.wins + $up_val.wins),
1539
+ "losses": ($local_val.losses + $up_val.losses),
1540
+ "total": ($local_val.total + $up_val.total)
1541
+ }
1542
+ }
1543
+ ) | from_entries |
1544
+ {key: $type, value: .}
1545
+ ) | from_entries
1546
+ ' "$local_perf" - > "$local_perf.tmp" && mv "$local_perf.tmp" "$local_perf"
1547
+ echo " Merged strategy performance data"
1548
+ fi
1549
+ elif [[ -n "$upstream_perf_content" ]]; then
1550
+ if [[ "$DRY_RUN" == true ]]; then
1551
+ echo " Would copy upstream strategy-perf.json (no local file)"
1552
+ else
1553
+ mkdir -p logs
1554
+ echo "$upstream_perf_content" > "$local_perf"
1555
+ echo " Copied strategy-perf.json from upstream"
1556
+ fi
1557
+ fi
1558
+
1559
+ echo ""
1560
+ echo "Community sync complete."
1561
+ ```
1562
+
1563
+ **Step 2: Make executable**
1564
+
1565
+ Run: `chmod +x scripts/pull-community-lessons.sh`
1566
+
1567
+ **Step 3: Run tests**
1568
+
1569
+ Run: `bash scripts/tests/test-pull-community-lessons.sh`
1570
+ Expected: ALL PASSED
1571
+
1572
+ ### Task 15: Write failing tests for lesson promotion
1573
+
1574
+ **Files:**
1575
+ - Create: `scripts/tests/test-promote-mab-lessons.sh`
1576
+
1577
+ **Step 1: Write test file**
1578
+
1579
+ ```bash
1580
+ #!/usr/bin/env bash
1581
+ set -euo pipefail
1582
+
1583
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
1584
+ source "$SCRIPT_DIR/test-helpers.sh"
1585
+
1586
+ SCRIPT="$SCRIPT_DIR/../promote-mab-lessons.sh"
1587
+
1588
+ # --- CLI tests ---
1589
+ assert_exit "help exits 0" 0 "$SCRIPT" --help
1590
+
1591
+ # --- Promotion threshold ---
1592
+ TMPDIR=$(mktemp -d)
1593
+ trap 'rm -rf "$TMPDIR"' EXIT
1594
+
1595
+ mkdir -p "$TMPDIR/docs/lessons" "$TMPDIR/logs"
1596
+
1597
+ # 2 occurrences — should NOT promote (threshold is 3)
1598
+ cat > "$TMPDIR/logs/mab-lessons.json" << 'JSON'
1599
+ [
1600
+ {"pattern": "Extract shared validation", "context": "new-file", "recommendation": "Create shared contract", "lesson_type": "semantic", "occurrences": 1},
1601
+ {"pattern": "Extract shared validation", "context": "new-file", "recommendation": "Create shared contract", "lesson_type": "semantic", "occurrences": 1}
1602
+ ]
1603
+ JSON
1604
+
1605
+ output=$("$SCRIPT" --project-root "$TMPDIR" 2>&1)
1606
+ assert_contains "reports no promotions" "0 lessons promoted" "$output"
1607
+
1608
+ # 3+ occurrences — should promote
1609
+ cat > "$TMPDIR/logs/mab-lessons.json" << 'JSON'
1610
+ [
1611
+ {"pattern": "Extract shared validation", "context": "new-file", "recommendation": "Create shared contract", "lesson_type": "semantic"},
1612
+ {"pattern": "Extract shared validation", "context": "new-file", "recommendation": "Create shared contract", "lesson_type": "semantic"},
1613
+ {"pattern": "Extract shared validation", "context": "new-file", "recommendation": "Create shared contract", "lesson_type": "semantic"}
1614
+ ]
1615
+ JSON
1616
+
1617
+ # Need existing lessons to get next number
1618
+ echo "---" > "$TMPDIR/docs/lessons/0060-existing.md"
1619
+
1620
+ output=$("$SCRIPT" --project-root "$TMPDIR" 2>&1)
1621
+ assert_contains "reports promotion" "1 lessons promoted" "$output"
1622
+
1623
+ # Verify lesson file was created
1624
+ promoted=$(ls "$TMPDIR/docs/lessons/"0061-*.md 2>/dev/null | wc -l)
1625
+ assert_eq "created lesson file" "1" "$promoted"
1626
+
1627
+ # Verify promoted lesson has correct YAML frontmatter
1628
+ content=$(cat "$TMPDIR/docs/lessons/"0061-*.md)
1629
+ assert_contains "has title" "title:" "$content"
1630
+ assert_contains "has category" "category:" "$content"
1631
+ assert_contains "has source" "ab-run" "$content"
1632
+
1633
+ report_results
1634
+ ```
1635
+
1636
+ **Step 2: Run to verify fail**
1637
+
1638
+ Run: `bash scripts/tests/test-promote-mab-lessons.sh 2>&1 | tail -5`
1639
+ Expected: FAIL
1640
+
1641
+ ### Task 16: Implement promote-mab-lessons.sh
1642
+
1643
+ **Files:**
1644
+ - Create: `scripts/promote-mab-lessons.sh`
1645
+
1646
+ **Step 1: Write the script**
1647
+
1648
+ ```bash
1649
+ #!/usr/bin/env bash
1650
+ set -euo pipefail
1651
+ # promote-mab-lessons.sh — Auto-promote recurring MAB lessons to docs/lessons/
1652
+ #
1653
+ # When the same pattern appears 3+ times in mab-lessons.json, create a proper
1654
+ # lesson file in docs/lessons/ so it becomes part of the permanent lesson corpus.
1655
+ #
1656
+ # Usage: promote-mab-lessons.sh --project-root <dir>
1657
+
1658
+ PROMOTION_THRESHOLD=3
1659
+
1660
+ usage() {
1661
+ cat <<'USAGE'
1662
+ promote-mab-lessons.sh — Promote recurring MAB lessons to docs/lessons/
1663
+
1664
+ When the same pattern appears 3+ times in logs/mab-lessons.json, creates a
1665
+ proper lesson file in docs/lessons/ with YAML frontmatter.
1666
+
1667
+ Usage:
1668
+ promote-mab-lessons.sh --project-root <dir>
1669
+
1670
+ Options:
1671
+ --project-root <dir> Project root directory
1672
+ --threshold N Promotion threshold (default: 3)
1673
+ --dry-run Show what would be promoted
1674
+ -h, --help Show this help
1675
+ USAGE
1676
+ }
1677
+
1678
+ PROJECT_ROOT=""
1679
+ DRY_RUN=false
1680
+
1681
+ while [[ $# -gt 0 ]]; do
1682
+ case "$1" in
1683
+ -h|--help) usage; exit 0 ;;
1684
+ --project-root) PROJECT_ROOT="$2"; shift 2 ;;
1685
+ --threshold) PROMOTION_THRESHOLD="$2"; shift 2 ;;
1686
+ --dry-run) DRY_RUN=true; shift ;;
1687
+ *) echo "ERROR: Unknown option: $1" >&2; exit 1 ;;
1688
+ esac
1689
+ done
1690
+
1691
+ if [[ -z "$PROJECT_ROOT" ]]; then
1692
+ echo "ERROR: --project-root required" >&2
1693
+ exit 1
1694
+ fi
1695
+
1696
+ lessons_file="$PROJECT_ROOT/logs/mab-lessons.json"
1697
+ if [[ ! -f "$lessons_file" ]]; then
1698
+ echo "No mab-lessons.json found. 0 lessons promoted."
1699
+ exit 0
1700
+ fi
1701
+
1702
+ lessons_dir="$PROJECT_ROOT/docs/lessons"
1703
+ mkdir -p "$lessons_dir"
1704
+
1705
+ # Find the next lesson number
1706
+ next_num=$(ls "$lessons_dir"/*.md 2>/dev/null | \
1707
+ grep -oE '[0-9]{4}' | sort -n | tail -1 || echo "0000")
1708
+ next_num=$((10#$next_num + 1))
1709
+
1710
+ # Group lessons by pattern and count occurrences
1711
+ # Use jq to group, count, and filter by threshold
1712
+ promotable=$(jq --argjson threshold "$PROMOTION_THRESHOLD" '
1713
+ group_by(.pattern) |
1714
+ map(select(length >= $threshold)) |
1715
+ map({
1716
+ pattern: .[0].pattern,
1717
+ context: .[0].context,
1718
+ recommendation: .[0].recommendation,
1719
+ lesson_type: (.[0].lesson_type // "semantic"),
1720
+ count: length
1721
+ })
1722
+ ' "$lessons_file" 2>/dev/null || echo "[]")
1723
+
1724
+ promoted_count=0
1725
+
1726
+ echo "$promotable" | jq -c '.[]' 2>/dev/null | while IFS= read -r entry; do
1727
+ pattern=$(echo "$entry" | jq -r '.pattern')
1728
+ context=$(echo "$entry" | jq -r '.context // "general"')
1729
+ recommendation=$(echo "$entry" | jq -r '.recommendation // ""')
1730
+ lesson_type=$(echo "$entry" | jq -r '.lesson_type // "semantic"')
1731
+ count=$(echo "$entry" | jq -r '.count')
1732
+
1733
+ # Generate slug from pattern
1734
+ slug=$(echo "$pattern" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | tr -cd 'a-z0-9-' | head -c 40)
1735
+
1736
+ lesson_file="$lessons_dir/$(printf '%04d' "$next_num")-$slug.md"
1737
+
1738
+ # Check if a lesson with similar slug already exists
1739
+ if ls "$lessons_dir"/*"$slug"*.md >/dev/null 2>&1; then
1740
+ echo " Skipping (already exists): $pattern"
1741
+ continue
1742
+ fi
1743
+
1744
+ if [[ "$DRY_RUN" == true ]]; then
1745
+ echo " Would promote: $pattern ($count occurrences)"
1746
+ else
1747
+ cat > "$lesson_file" << LESSON
1748
+ ---
1749
+ title: "$pattern"
1750
+ severity: warning
1751
+ category: mab-learned
1752
+ source: mab-run
1753
+ applies_to: all
1754
+ lesson_type: $lesson_type
1755
+ occurrences: $count
1756
+ ---
1757
+
1758
+ # $pattern
1759
+
1760
+ ## Context
1761
+
1762
+ $context
1763
+
1764
+ ## Recommendation
1765
+
1766
+ $recommendation
1767
+
1768
+ ## Source
1769
+
1770
+ Auto-promoted from MAB run lessons after $count occurrences.
1771
+ LESSON
1772
+ echo " Promoted: $lesson_file"
1773
+ fi
1774
+
1775
+ next_num=$((next_num + 1))
1776
+ promoted_count=$((promoted_count + 1))
1777
+ done
1778
+
1779
+ echo "$promoted_count lessons promoted."
1780
+ ```
1781
+
1782
+ **Step 2: Make executable**
1783
+
1784
+ Run: `chmod +x scripts/promote-mab-lessons.sh`
1785
+
1786
+ **Step 3: Run tests**
1787
+
1788
+ Run: `bash scripts/tests/test-promote-mab-lessons.sh`
1789
+ Expected: ALL PASSED
1790
+
1791
+ **Step 4: Commit**
1792
+
1793
+ ```bash
1794
+ git add scripts/pull-community-lessons.sh scripts/promote-mab-lessons.sh scripts/tests/test-pull-community-lessons.sh scripts/tests/test-promote-mab-lessons.sh
1795
+ git commit -m "feat: add community lesson sync and auto-promotion for MAB lessons"
1796
+ ```
1797
+
1798
+ ---
1799
+
1800
+ ## Batch 5: Documentation and ARCHITECTURE.md Updates
1801
+
1802
+ Update project documentation to cover the Multi-Armed Bandit system.
1803
+
1804
+ ### Task 17: Update ARCHITECTURE.md
1805
+
1806
+ **Files:**
1807
+ - Modify: `docs/ARCHITECTURE.md`
1808
+
1809
+ **Step 1: Add Multi-Armed Bandit System section**
1810
+
1811
+ Append a new section to `docs/ARCHITECTURE.md`:
1812
+
1813
+ ```markdown
1814
+ ## Multi-Armed Bandit System
1815
+
1816
+ Competing autonomous agents execute the same brief using different methodologies (superpowers skill chain vs ralph-wiggum iteration loop). An LLM judge evaluates both and extracts lessons that compound over time.
1817
+
1818
+ ### Architecture
1819
+
1820
+ ```
1821
+ PHASE 1 — HUMAN + SINGLE AGENT (shared)
1822
+ 1. Brainstorm → approved design doc
1823
+ 2. PRD → machine-verifiable acceptance criteria
1824
+ 3. Architecture map generated (architecture-map.sh)
1825
+
1826
+ PHASE 2 — PLANNER AGENT (LLM)
1827
+ Reads: design doc, PRD, architecture map, strategy-perf.json
1828
+ Decides per work unit: MAB or single? Which strategy? Unit size?
1829
+
1830
+ PHASE 3 — MAB EXECUTION (parallel worktrees)
1831
+ Agent A (superpowers): writes own plan, TDD, batch-by-batch
1832
+ Agent B (ralph): iterates until PRD criteria pass
1833
+
1834
+ PHASE 4 — JUDGE AGENT (LLM)
1835
+ Reads: both diffs, design doc, PRD, architecture map, lesson history
1836
+ Outputs: winner, bidirectional lessons, strategy update, failure mode
1837
+
1838
+ PHASE 5 — MERGE + LEARN
1839
+ Merge winner, log lessons, update strategy data, promote patterns
1840
+ ```
1841
+
1842
+ ### Data Files
1843
+
1844
+ | File | Purpose |
1845
+ |------|---------|
1846
+ | `logs/mab-lessons.json` | Accumulated MAB lessons (patterns, recommendations) |
1847
+ | `logs/strategy-perf.json` | Strategy win rates per batch type |
1848
+ | `logs/mab-run-<timestamp>.json` | Per-run log (judge output, winner, reasoning) |
1849
+ | `docs/ARCHITECTURE-MAP.json` | Auto-generated module dependency graph |
1850
+
1851
+ ### Scripts
1852
+
1853
+ | Script | Purpose |
1854
+ |--------|---------|
1855
+ | `scripts/mab-run.sh` | MAB execution orchestrator |
1856
+ | `scripts/architecture-map.sh` | Module dependency graph generator |
1857
+ | `scripts/pull-community-lessons.sh` | Sync lessons from upstream |
1858
+ | `scripts/promote-mab-lessons.sh` | Auto-promote recurring lessons |
1859
+ | `scripts/prompts/planner-agent.md` | Planner routing prompt |
1860
+ | `scripts/prompts/judge-agent.md` | Judge evaluation prompt |
1861
+ | `scripts/prompts/agent-a-superpowers.md` | Superpowers agent instructions |
1862
+ | `scripts/prompts/agent-b-ralph.md` | Ralph agent instructions |
1863
+
1864
+ ### Lesson Lifecycle
1865
+
1866
+ 1. Judge extracts lesson → `logs/mab-lessons.json`
1867
+ 2. Pattern recurs 3+ times → auto-promoted to `docs/lessons/NNNN-*.md`
1868
+ 3. Promoted lesson → enforced by `lesson-check.sh` (syntactic) or `lesson-scanner` (semantic)
1869
+ 4. User runs `/submit-lesson` → PR to upstream for community
1870
+
1871
+ ### Strategy Learning
1872
+
1873
+ The planner agent reads `logs/strategy-perf.json` to route work units:
1874
+ - **>70% win rate, 10+ data points** → route to winning strategy (exploit)
1875
+ - **Uncertain or insufficient data** → MAB run (explore)
1876
+ - **Error-prone type** → MAB run (gather more data)
1877
+
1878
+ New users start with community baseline data via `pull-community-lessons.sh`.
1879
+ ```
1880
+
1881
+ **Step 2: Verify update**
1882
+
1883
+ Run: `grep -c "Multi-Armed Bandit System" docs/ARCHITECTURE.md`
1884
+ Expected: 1
1885
+
1886
+ ### Task 18: Update CLAUDE.md
1887
+
1888
+ **Files:**
1889
+ - Modify: `CLAUDE.md`
1890
+
1891
+ **Step 1: Add Multi-Armed Bandit system to the skill chain table**
1892
+
1893
+ In the "The Skill Chain" table, add a new row after `4d. Execute (loop)`:
1894
+
1895
+ ```
1896
+ | 4e. Execute (MAB) | `scripts/mab-run.sh` | Parallel competing agents with LLM judge |
1897
+ ```
1898
+
1899
+ **Step 2: Add to the Scripts section of Directory Layout**
1900
+
1901
+ Add to the scripts section:
1902
+
1903
+ ```
1904
+ ├── mab-run.sh # MAB competing agents orchestrator
1905
+ ├── architecture-map.sh # Module dependency graph generator
1906
+ ├── pull-community-lessons.sh # Community lesson sync from upstream
1907
+ ├── promote-mab-lessons.sh # Auto-promote recurring MAB lessons
1908
+ ```
1909
+
1910
+ **Step 3: Add to Data Files section under State & Persistence**
1911
+
1912
+ Add entries:
1913
+
1914
+ ```
1915
+ - **`logs/mab-lessons.json`** — accumulated MAB lessons from competing agent runs.
1916
+ - **`logs/strategy-perf.json`** — strategy win rates per batch type (feeds planner decisions).
1917
+ - **`logs/mab-run-<timestamp>.json`** — per-run judge output, winner, and reasoning.
1918
+ - **`docs/ARCHITECTURE-MAP.json`** — auto-generated module dependency graph.
1919
+ ```
1920
+
1921
+ **Step 4: Commit**
1922
+
1923
+ ```bash
1924
+ git add docs/ARCHITECTURE.md CLAUDE.md
1925
+ git commit -m "docs: add Multi-Armed Bandit system to ARCHITECTURE.md and CLAUDE.md"
1926
+ ```
1927
+
1928
+ ---
1929
+
1930
+ ## Batch 6: Integration Wiring and Final Verification
1931
+
1932
+ Wire all components together, run the full test suite, and verify end-to-end.
1933
+
1934
+ ### Task 19: Add ab-run tests to run-all-tests.sh
1935
+
1936
+ **Files:**
1937
+ - Modify: `scripts/tests/run-all-tests.sh`
1938
+
1939
+ **Step 1: Add new test files**
1940
+
1941
+ Add to the test file list in `run-all-tests.sh`:
1942
+
1943
+ ```bash
1944
+ test-architecture-map.sh
1945
+ test-mab-run.sh
1946
+ test-pull-community-lessons.sh
1947
+ test-promote-mab-lessons.sh
1948
+ ```
1949
+
1950
+ **Step 2: Run the full test suite**
1951
+
1952
+ Run: `bash scripts/tests/run-all-tests.sh`
1953
+ Expected: ALL PASSED
1954
+
1955
+ ### Task 20: Add architecture-map.sh to quality-gate.sh (optional step)
1956
+
1957
+ **Files:**
1958
+ - Modify: `scripts/quality-gate.sh` (if appropriate — only if the project uses the architecture map)
1959
+
1960
+ **Step 1: Check if architecture map generation should be part of the gate**
1961
+
1962
+ The architecture map is informational, not a gate. Do NOT add it to quality-gate.sh. It should be run explicitly or as part of `mab-run.sh`.
1963
+
1964
+ Verify this is correct by checking the quality gate doesn't reference it:
1965
+
1966
+ Run: `grep -c "architecture-map" scripts/quality-gate.sh || echo "0"`
1967
+ Expected: 0 (not referenced)
1968
+
1969
+ ### Task 21: Verify all new scripts are executable
1970
+
1971
+ **Step 1: Check permissions**
1972
+
1973
+ Run: `ls -la scripts/mab-run.sh scripts/architecture-map.sh scripts/pull-community-lessons.sh scripts/promote-mab-lessons.sh | awk '{print $1, $NF}'`
1974
+ Expected: All show `-rwxr-xr-x` or similar executable permissions
1975
+
1976
+ ### Task 22: Run the full test suite
1977
+
1978
+ **Step 1: Run all tests**
1979
+
1980
+ Run: `bash scripts/tests/run-all-tests.sh`
1981
+ Expected: ALL PASSED with no regressions
1982
+
1983
+ ### Task 23: Verify mab-run.sh dry-run works end-to-end
1984
+
1985
+ **Step 1: Create a temp project and test dry-run**
1986
+
1987
+ Run:
1988
+ ```bash
1989
+ TMPDIR=$(mktemp -d)
1990
+ cd "$TMPDIR"
1991
+ git init -q
1992
+ mkdir -p tasks docs
1993
+ echo '{"tasks":[{"id":1,"description":"test","criterion":"exit 0"}]}' > tasks/prd.json
1994
+ echo "# Test Design" > docs/design.md
1995
+ git add -A && git commit -q -m "init"
1996
+ scripts/mab-run.sh --design docs/design.md --prd tasks/prd.json --project-root "$TMPDIR" --dry-run
1997
+ cd -
1998
+ rm -rf "$TMPDIR"
1999
+ ```
2000
+ Expected: Shows dry-run output mentioning worktree creation and agent launch
2001
+
2002
+ ### Task 24: Verify architecture-map.sh on the toolkit itself
2003
+
2004
+ **Step 1: Generate map for this project**
2005
+
2006
+ Run: `scripts/architecture-map.sh --project-root .`
2007
+ Expected: Produces JSON with `modules` array containing project modules (scripts/lib/*, etc.)
2008
+
2009
+ ### Task 25: Final commit
2010
+
2011
+ ```bash
2012
+ git add scripts/tests/run-all-tests.sh
2013
+ git commit -m "feat: wire Multi-Armed Bandit system into test suite and verify integration"
2014
+ ```
2015
+
2016
+ ### Task 26: Run quality gate
2017
+
2018
+ Run: `scripts/quality-gate.sh --project-root .`
2019
+ Expected: PASSED
2020
+
2021
+ ---
2022
+
2023
+ ## Quality Gates
2024
+
2025
+ Between each batch, run:
2026
+
2027
+ ```bash
2028
+ # Full quality gate
2029
+ scripts/quality-gate.sh --project-root .
2030
+
2031
+ # Or manually:
2032
+ bash scripts/tests/run-all-tests.sh # All tests pass
2033
+ scripts/lesson-check.sh scripts/*.sh # No lesson violations
2034
+ git diff --name-only # All changes committed
2035
+ ```
2036
+
2037
+ ## Summary
2038
+
2039
+ | Batch | Focus | New Files | Tests |
2040
+ |-------|-------|-----------|-------|
2041
+ | 1 | Agent prompts + architecture-map.sh | 6 | test-architecture-map.sh |
2042
+ | 2 | mab-run.sh orchestrator | 2 | test-mab-run.sh |
2043
+ | 3 | run-plan --mab flag + context injection | 0 (modifications) | test-run-plan-cli.sh, test-run-plan-context.sh (modified) |
2044
+ | 4 | Community sync + lesson promotion | 4 | test-pull-community-lessons.sh, test-promote-mab-lessons.sh |
2045
+ | 5 | Documentation updates | 0 (modifications) | — |
2046
+ | 6 | Integration wiring + verification | 0 (modifications) | run-all-tests.sh (modified) |