autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env bash
2
+ # run-plan-echo-back.sh — Spec echo-back gate for verifying agent understanding
3
+ #
4
+ # Standalone module: can be sourced by any execution mode (headless, team, ralph).
5
+ # No dependencies on batch loop state — only reads SKIP_ECHO_BACK and STRICT_ECHO_BACK globals.
6
+ #
7
+ # Functions:
8
+ # _echo_back_check <batch_text> <log_file>
9
+ # Lightweight keyword-match gate on agent output. Non-blocking by default.
10
+ # echo_back_check <batch_text> <log_dir> <batch_num> [claude_cmd]
11
+ # Full spec verification: agent restatement → haiku verdict → retry once.
12
+ #
13
+ # Globals (read-only): SKIP_ECHO_BACK, STRICT_ECHO_BACK
14
+ #
15
+ # Echo-back gate behavior (--strict-echo-back / --skip-echo-back):
16
+ # Default: NON-BLOCKING — prints a WARNING if agent echo-back looks wrong, then continues.
17
+ # --skip-echo-back: disables the echo-back check entirely (no prompt, no warning).
18
+ # --strict-echo-back: makes the echo-back check BLOCKING — returns 1 on mismatch, aborting the batch.
19
+
20
+ # Echo-back gate: ask agent to restate the batch intent, check for gross misalignment.
21
+ # Behavior controlled by SKIP_ECHO_BACK and STRICT_ECHO_BACK globals.
22
+ # Non-blocking by default (warns only). --strict-echo-back makes it blocking.
23
+ # Args: <batch_text> <log_file>
24
+ # Returns: 0 always (non-blocking default), or 1 on mismatch with --strict-echo-back
25
+ _echo_back_check() {
26
+ local batch_text="$1"
27
+ local log_file="$2"
28
+
29
+ # --skip-echo-back: disabled entirely
30
+ if [[ "${SKIP_ECHO_BACK:-false}" == "true" ]]; then
31
+ return 0
32
+ fi
33
+
34
+ # Log file must exist to read agent output
35
+ if [[ ! -f "$log_file" ]]; then
36
+ return 0
37
+ fi
38
+
39
+ # Extract first paragraph of batch_text as the expected intent keywords
40
+ local expected_keywords
41
+ expected_keywords=$(echo "$batch_text" | head -5 | grep -oE '\b[A-Za-z]{4,}\b' | sort -u | head -10 | tr '\n' '|' | sed 's/|$//' || true)
42
+
43
+ if [[ -z "$expected_keywords" ]]; then
44
+ return 0
45
+ fi
46
+
47
+ # Check if log output contains any of the expected keywords (basic alignment check)
48
+ local found_any=false
49
+ local keyword
50
+ while IFS= read -r keyword; do
51
+ [[ -z "$keyword" ]] && continue
52
+ if grep -qi "$keyword" "$log_file" 2>/dev/null; then
53
+ found_any=true
54
+ break
55
+ fi
56
+ done <<< "$(echo "$expected_keywords" | tr '|' '\n')"
57
+
58
+ if [[ "$found_any" == "false" ]]; then
59
+ echo "WARNING: Echo-back check: agent output may not address the batch intent (keywords not found: $expected_keywords)" >&2
60
+ # --strict-echo-back: blocking — return 1 to abort batch
61
+ if [[ "${STRICT_ECHO_BACK:-false}" == "true" ]]; then
62
+ echo "ERROR: --strict-echo-back is set. Aborting batch due to spec misalignment." >&2
63
+ return 1
64
+ fi
65
+ # Default: non-blocking, proceeding anyway
66
+ fi
67
+
68
+ return 0
69
+ }
70
+
71
+ # echo_back_check — Verify agent understands the batch spec before execution
72
+ # Args: <batch_text> <log_dir> <batch_num> [claude_cmd]
73
+ # Returns: 0 if restatement matches spec, 1 if mismatch after retry
74
+ # The optional claude_cmd parameter allows test injection of a mock.
75
+ echo_back_check() {
76
+ local batch_text="$1"
77
+ local log_dir="$2"
78
+ local batch_num="$3"
79
+ local claude_cmd="${4:-claude}"
80
+
81
+ local echo_prompt restatement verify_prompt verdict
82
+ local echo_log="$log_dir/batch-${batch_num}-echo-back.log"
83
+
84
+ # Step 1: Ask the agent to restate the batch spec
85
+ echo_prompt="Before implementing, restate in one paragraph what this batch must accomplish. Do not write any code. Just describe the goal and key deliverables.
86
+
87
+ The batch specification is:
88
+ ${batch_text}"
89
+
90
+ local claude_exit=0
91
+ restatement=$(CLAUDECODE='' "$claude_cmd" -p "$echo_prompt" \
92
+ --allowedTools "" \
93
+ --permission-mode bypassPermissions \
94
+ 2>"$echo_log") || claude_exit=$?
95
+
96
+ if [[ $claude_exit -ne 0 ]]; then
97
+ echo " Echo-back: claude failed (exit $claude_exit) — see $echo_log" >&2
98
+ return 0
99
+ fi
100
+
101
+ if [[ -z "$restatement" ]]; then
102
+ echo " Echo-back: no restatement received (skipping check)" >&2
103
+ return 0
104
+ fi
105
+
106
+ # Extract first paragraph (up to first blank line)
107
+ restatement=$(echo "$restatement" | awk '/^$/{exit} {print}')
108
+
109
+ # Step 2: Lightweight comparison via haiku
110
+ verify_prompt="Compare these two texts. Does the RESTATEMENT accurately capture the key goals of the ORIGINAL SPEC? Answer YES or NO followed by a brief reason.
111
+
112
+ ORIGINAL SPEC:
113
+ ${batch_text}
114
+
115
+ RESTATEMENT:
116
+ ${restatement}"
117
+
118
+ verdict=$(CLAUDECODE='' "$claude_cmd" -p "$verify_prompt" \
119
+ --model haiku \
120
+ --allowedTools "" \
121
+ --permission-mode bypassPermissions \
122
+ 2>>"$echo_log") || true
123
+
124
+ if echo "$verdict" | grep -qi "YES"; then
125
+ echo " Echo-back: PASSED (spec understood)"
126
+ return 0
127
+ fi
128
+
129
+ # Step 3: Retry once with clarified prompt
130
+ echo " Echo-back: MISMATCH — retrying with clarified prompt" >&2
131
+ local reason
132
+ reason=$(echo "$verdict" | head -2)
133
+
134
+ local retry_prompt="Your previous restatement did not match the spec. The reviewer said: ${reason}
135
+
136
+ Re-read the specification carefully and restate in one paragraph what this batch must accomplish:
137
+ ${batch_text}"
138
+
139
+ local retry_restatement
140
+ retry_restatement=$(CLAUDECODE='' "$claude_cmd" -p "$retry_prompt" \
141
+ --allowedTools "" \
142
+ --permission-mode bypassPermissions \
143
+ 2>>"$echo_log") || true
144
+
145
+ retry_restatement=$(echo "$retry_restatement" | awk '/^$/{exit} {print}')
146
+
147
+ local retry_verify="Compare these two texts. Does the RESTATEMENT accurately capture the key goals of the ORIGINAL SPEC? Answer YES or NO followed by a brief reason.
148
+
149
+ ORIGINAL SPEC:
150
+ ${batch_text}
151
+
152
+ RESTATEMENT:
153
+ ${retry_restatement}"
154
+
155
+ local retry_verdict
156
+ retry_verdict=$(CLAUDECODE='' "$claude_cmd" -p "$retry_verify" \
157
+ --model haiku \
158
+ --allowedTools "" \
159
+ --permission-mode bypassPermissions \
160
+ 2>>"$echo_log") || true
161
+
162
+ if echo "$retry_verdict" | grep -qi "YES"; then
163
+ echo " Echo-back: PASSED on retry (spec understood)"
164
+ return 0
165
+ fi
166
+
167
+ echo " Echo-back: FAILED after retry (spec not understood)" >&2
168
+ return 1
169
+ }
170
+
171
+ # --- Tier 2: LLM semantic verification ---
172
+ # Activates on batch 1, integration batches, or --strict-echo-back
173
+ # Requires: claude CLI available
174
+ run_echo_back_tier2() {
175
+ local batch_text="$1"
176
+ local agent_summary="$2"
177
+
178
+ if ! command -v claude >/dev/null 2>&1; then
179
+ echo "echo-back-tier2: claude CLI not available — skipping" >&2
180
+ return 0
181
+ fi
182
+
183
+ local prompt
184
+ prompt=$(cat <<PROMPT
185
+ You are a specification compliance reviewer. Compare:
186
+
187
+ SPECIFICATION:
188
+ $batch_text
189
+
190
+ AGENT'S UNDERSTANDING:
191
+ $agent_summary
192
+
193
+ Does the agent's understanding match the specification? Flag any:
194
+ - Missing requirements
195
+ - Added requirements not in spec
196
+ - Misinterpreted requirements
197
+ - Ambiguous interpretations
198
+
199
+ Output exactly one line: PASS or FAIL followed by a colon and explanation.
200
+ PROMPT
201
+ )
202
+
203
+ local result
204
+ result=$(echo "$prompt" | claude -p --max-tokens 200 2>/dev/null || echo "PASS: echo-back tier2 unavailable")
205
+
206
+ if echo "$result" | grep -qi "^FAIL"; then
207
+ echo "echo-back-tier2: FAILED — $result"
208
+ return 1
209
+ else
210
+ echo "echo-back-tier2: PASSED"
211
+ return 0
212
+ fi
213
+ }
214
+
215
+ # Determine if tier 2 should activate
216
+ should_run_tier2() {
217
+ local batch_number="${1:-0}"
218
+ local batch_type="${2:-unknown}"
219
+ local strict="${3:-false}"
220
+
221
+ # Always on batch 1 (disproportionate risk)
222
+ [[ "$batch_number" == "1" ]] && return 0
223
+
224
+ # Always on integration batches
225
+ [[ "$batch_type" == "integration" ]] && return 0
226
+
227
+ # When strict mode is set
228
+ [[ "$strict" == "true" ]] && return 0
229
+
230
+ return 1
231
+ }
@@ -0,0 +1,396 @@
1
+ #!/usr/bin/env bash
2
+ # run-plan-headless.sh — Headless batch execution loop for run-plan
3
+ #
4
+ # Requires globals: WORKTREE, RESUME, START_BATCH, END_BATCH, NOTIFY,
5
+ # PLAN_FILE, QUALITY_GATE_CMD, PYTHON, MAX_RETRIES, ON_FAILURE, VERIFY, MODE,
6
+ # SKIP_ECHO_BACK, STRICT_ECHO_BACK
7
+ # Requires libs: run-plan-parser, state, quality-gate, notify, prompt, scoring, echo-back
8
+
9
+ run_mode_headless() {
10
+ mkdir -p "$WORKTREE/logs"
11
+
12
+ # Initialize state if not resuming
13
+ if [[ "$RESUME" != true ]]; then
14
+ init_state "$WORKTREE" "$PLAN_FILE" "$MODE"
15
+
16
+ # Mark earlier batches as completed (if --start-batch > 1)
17
+ if [[ "$START_BATCH" -gt 1 ]]; then
18
+ for ((b = 1; b < START_BATCH; b++)); do
19
+ complete_batch "$WORKTREE" "$b" 0
20
+ done
21
+ fi
22
+ fi
23
+
24
+ # Generate AGENTS.md for agent awareness
25
+ generate_agents_md "$PLAN_FILE" "$WORKTREE" "$MODE"
26
+
27
+ # Load telegram credentials if notifications enabled
28
+ if [[ "$NOTIFY" == true ]]; then
29
+ _load_telegram_env || echo "WARNING: Telegram notifications unavailable" >&2
30
+ fi
31
+
32
+ local plan_name
33
+ plan_name=$(basename "$PLAN_FILE" .md)
34
+
35
+ # Build the stable prefix ONCE before the batch loop and cache it to disk.
36
+ # The stable prefix contains plan identity, worktree path, python, branch, and TDD rules —
37
+ # none of which change between batches. prev_test_count is intentionally excluded because
38
+ # it increases after each batch; it lives in the variable suffix (#48).
39
+ #
40
+ # #45: Check that the write succeeded. A silent failure here would leave all subsequent
41
+ # batches with a missing/stale prefix file — fail fast instead.
42
+ local stable_prefix
43
+ stable_prefix=$(build_stable_prefix "$PLAN_FILE" "$WORKTREE" "$PYTHON" "$QUALITY_GATE_CMD")
44
+ echo "$stable_prefix" > "$WORKTREE/.run-plan-prefix.txt" || {
45
+ echo "ERROR: Failed to write prefix file $WORKTREE/.run-plan-prefix.txt" >&2
46
+ exit 1
47
+ }
48
+
49
+ # Preserve user's --sample value before batch loop so per-batch reset doesn't clobber it (#16/#28)
50
+ local SAMPLE_DEFAULT=${SAMPLE_COUNT:-0}
51
+
52
+ for ((batch = START_BATCH; batch <= END_BATCH; batch++)); do
53
+ # Reset sampling count each batch — prevents leak from prior batch's retry/critical trigger (#16/#28)
54
+ SAMPLE_COUNT=$SAMPLE_DEFAULT
55
+
56
+ # Budget enforcement
57
+ if [[ -n "${MAX_BUDGET:-}" ]]; then
58
+ if ! check_budget "$WORKTREE" "$MAX_BUDGET"; then
59
+ echo "STOPPING: Budget limit reached (\$${MAX_BUDGET})"
60
+ exit 1
61
+ fi
62
+ fi
63
+
64
+ local title
65
+ title=$(get_batch_title "$PLAN_FILE" "$batch")
66
+ echo ""
67
+ echo "================================================================"
68
+ echo " Batch $batch: $title"
69
+ echo "================================================================"
70
+
71
+ local batch_text
72
+ batch_text=$(get_batch_text "$PLAN_FILE" "$batch")
73
+ if [[ -z "$batch_text" ]]; then
74
+ echo " (empty batch -- skipping)"
75
+ continue
76
+ fi
77
+
78
+ # Declare batch_passed before MAB routing — the MAB `continue` path
79
+ # skips the retry loop where it was originally declared (#4A review).
80
+ local batch_passed=false
81
+
82
+ # MAB routing (when --mab flag set)
83
+ if [[ "${MAB:-false}" == "true" ]]; then
84
+ local batch_type_for_route
85
+ batch_type_for_route=$(classify_batch_type "$PLAN_FILE" "$batch")
86
+ local perf_file="$WORKTREE/logs/strategy-perf.json"
87
+ [[ ! -f "$perf_file" ]] && init_strategy_perf "$perf_file"
88
+
89
+ local mab_route
90
+ mab_route=$(thompson_route "$batch_type_for_route" "$perf_file")
91
+ echo " [MAB] type=$batch_type_for_route → route=$mab_route"
92
+
93
+ if [[ "$mab_route" == "mab" ]]; then
94
+ local mab_exit=0
95
+ "$SCRIPT_DIR/mab-run.sh" \
96
+ --plan "$PLAN_FILE" --batch "$batch" \
97
+ --work-unit "$title" --worktree "$WORKTREE" \
98
+ --quality-gate "$QUALITY_GATE_CMD" || mab_exit=$?
99
+
100
+ if [[ $mab_exit -eq 0 ]]; then
101
+ local new_tc; new_tc=$(get_previous_test_count "$WORKTREE")
102
+ complete_batch "$WORKTREE" "$batch" "$new_tc"
103
+ batch_passed=true
104
+ else
105
+ echo "MAB batch $batch failed (exit $mab_exit)"
106
+ fi
107
+ # Skip normal headless execution — jump to batch_passed check
108
+ continue
109
+ fi
110
+ fi
111
+
112
+ # Write batch header to progress.txt at the start of each batch (#53)
113
+ # Non-fatal: progress tracking failure must not kill the run
114
+ if type write_batch_progress &>/dev/null; then
115
+ write_batch_progress "$WORKTREE" "$batch" "$title" || \
116
+ echo "WARNING: Failed to write batch progress header (non-fatal)" >&2
117
+ fi
118
+
119
+ # Generate and inject per-batch context into CLAUDE.md
120
+ # Guard all CLAUDE.md manipulation — failures here must not kill the run
121
+ local batch_context="" _claude_md_existed=false _claude_md_backup=""
122
+ batch_context=$(generate_batch_context "$PLAN_FILE" "$batch" "$WORKTREE" 2>/dev/null || true)
123
+ if [[ -n "$batch_context" ]]; then
124
+ {
125
+ local claude_md="$WORKTREE/CLAUDE.md"
126
+ if [[ -f "$claude_md" ]]; then
127
+ _claude_md_existed=true
128
+ _claude_md_backup=$(cat "$claude_md")
129
+ fi
130
+ # Remove previous run-plan context section if present.
131
+ # awk approach avoids the sed range-deletion bug (#4): if
132
+ # "## Run-Plan:" is the LAST section in CLAUDE.md, the sed
133
+ # pattern '/^## Run-Plan:/,/^## [^R]/' has no closing anchor
134
+ # and deletes from Run-Plan to EOF — eating the entire file.
135
+ # awk prints everything before the Run-Plan section, skips
136
+ # lines until the next ## header (or EOF), then resumes.
137
+ if [[ -f "$claude_md" ]] && grep -q "^## Run-Plan:" "$claude_md"; then
138
+ local tmp
139
+ tmp=$(mktemp)
140
+ awk '
141
+ /^## Run-Plan:/ { in_section=1; next }
142
+ in_section && /^## / { in_section=0 }
143
+ !in_section { print }
144
+ ' "$claude_md" > "$tmp"
145
+ mv "$tmp" "$claude_md"
146
+ fi
147
+ # Append new context
148
+ echo "" >> "$claude_md"
149
+ echo "$batch_context" >> "$claude_md"
150
+ } || echo "WARNING: Failed to inject batch context into CLAUDE.md (non-fatal)" >&2
151
+ fi
152
+
153
+ # Fetch the current test count INSIDE the loop — it increases after each batch.
154
+ # Combine the cached stable prefix with the per-batch variable suffix so the
155
+ # prompt always reflects the actual current test count (#48).
156
+ local prev_test_count
157
+ prev_test_count=$(get_previous_test_count "$WORKTREE")
158
+
159
+ local prompt
160
+ prompt=$(printf '%s\n\n%s\n' \
161
+ "$(build_variable_suffix "$PLAN_FILE" "$batch" "$WORKTREE" "$prev_test_count")" \
162
+ "$stable_prefix")
163
+
164
+ # Spec echo-back gate: verify agent understands the batch before executing
165
+ if [[ "${SKIP_ECHO_BACK:-false}" != "true" ]]; then
166
+ if ! echo_back_check "$batch_text" "$WORKTREE/logs" "$batch"; then
167
+ echo "WARNING: Echo-back check failed for batch $batch (proceeding anyway)" >&2
168
+ fi
169
+ fi
170
+
171
+ local max_attempts=$((MAX_RETRIES + 1))
172
+ local attempt=0
173
+
174
+ while [[ $attempt -lt $max_attempts ]]; do
175
+ attempt=$((attempt + 1))
176
+ local log_file="$WORKTREE/logs/batch-${batch}-attempt-${attempt}.log"
177
+ local batch_start
178
+ batch_start=$(date +%s)
179
+
180
+ echo ""
181
+ echo "--- Attempt $attempt of $max_attempts ---"
182
+
183
+ # Auto-sample on retry if configured
184
+ if [[ "${SAMPLE_ON_RETRY:-false}" == "true" && "${SAMPLE_COUNT:-0}" -eq 0 && $attempt -ge 2 ]]; then
185
+ SAMPLE_COUNT="${SAMPLE_DEFAULT_COUNT:-3}"
186
+ echo " Auto-enabling sampling ($SAMPLE_COUNT candidates) for retry"
187
+ fi
188
+
189
+ # Auto-sample on critical batches
190
+ if [[ "${SAMPLE_ON_CRITICAL:-false}" == "true" && "${SAMPLE_COUNT:-0}" -eq 0 && $attempt -eq 1 ]]; then
191
+ if is_critical_batch "$PLAN_FILE" "$batch"; then
192
+ SAMPLE_COUNT="${SAMPLE_DEFAULT_COUNT:-3}"
193
+ echo " Auto-enabling sampling ($SAMPLE_COUNT candidates) for critical batch"
194
+ fi
195
+ fi
196
+
197
+ # Memory guard for sampling
198
+ if [[ "${SAMPLE_COUNT:-0}" -gt 0 ]]; then
199
+ check_memory_for_sampling || true
200
+ fi
201
+
202
+ # If sampling enabled and this is a retry, use parallel candidates
203
+ if [[ "${SAMPLE_COUNT:-0}" -gt 0 && $attempt -ge 2 ]]; then
204
+ if run_sampling_candidates "$WORKTREE" "$PLAN_FILE" "$batch" "$prompt" "$QUALITY_GATE_CMD"; then
205
+ batch_passed=true
206
+ break
207
+ fi
208
+ continue # Skip normal retry path below
209
+ fi
210
+
211
+ # Build escalation context for retries
212
+ local full_prompt="$prompt"
213
+ if [[ $attempt -eq 2 ]]; then
214
+ local prev_log="$WORKTREE/logs/batch-${batch}-attempt-$((attempt - 1)).log"
215
+ full_prompt="$prompt
216
+
217
+ IMPORTANT: Previous attempt failed. Review the quality gate output and fix the issues.
218
+ The previous attempt log is available at: $prev_log"
219
+ elif [[ $attempt -ge 3 ]]; then
220
+ local prev_log="$WORKTREE/logs/batch-${batch}-attempt-$((attempt - 1)).log"
221
+ local log_digest=""
222
+ if [[ -f "$prev_log" ]]; then
223
+ log_digest=$("$SCRIPT_DIR/../failure-digest.sh" "$prev_log" 2>/dev/null || tail -50 "$prev_log" 2>/dev/null || true)
224
+ fi
225
+ full_prompt="$prompt
226
+
227
+ IMPORTANT: Previous attempts failed ($((attempt - 1)) so far). This is attempt $attempt.
228
+ Failure digest from previous attempt:
229
+ \`\`\`
230
+ $log_digest
231
+ \`\`\`
232
+ Focus on fixing the root cause. Check test output carefully."
233
+ fi
234
+
235
+ # Run claude headless (unset CLAUDECODE to allow nested invocation)
236
+ # Use --output-format json to capture session_id for cost tracking
237
+ # NOTE: this sacrifices real-time streaming — if streaming is needed,
238
+ # remove --output-format json and use tee instead (#38).
239
+ local claude_exit=0
240
+ local claude_json_output=""
241
+ claude_json_output=$(CLAUDECODE='' claude -p "$full_prompt" \
242
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
243
+ --permission-mode bypassPermissions \
244
+ --output-format json \
245
+ 2>"$log_file.stderr") || claude_exit=$?
246
+
247
+ # Extract session_id and result from JSON output
248
+ local batch_session_id=""
249
+ if [[ -n "$claude_json_output" ]]; then
250
+ batch_session_id=$(echo "$claude_json_output" | jq -r '.session_id // empty' 2>/dev/null || true)
251
+ # Write result text to log file (was previously done by tee)
252
+ echo "$claude_json_output" | jq -r '.result // empty' 2>/dev/null > "$log_file" || true
253
+ # Append stderr to log
254
+ cat "$log_file.stderr" >> "$log_file" 2>/dev/null || true
255
+ fi
256
+ rm -f "$log_file.stderr"
257
+
258
+ if [[ $claude_exit -ne 0 ]]; then
259
+ echo "WARNING: claude exited with code $claude_exit"
260
+ fi
261
+
262
+ # Diagnostic: if log file is empty or missing, claude likely crashed with no output (#38)
263
+ if [[ ! -s "$log_file" ]]; then
264
+ echo "WARNING: claude produced no output (log file empty or missing). Claude may have crashed." >&2
265
+ echo " Log path: $log_file" >&2
266
+ echo " Exit code: $claude_exit" >&2
267
+ echo "[run-plan] claude produced no output for batch $batch attempt $attempt (exit=$claude_exit)" >> "$log_file"
268
+ fi
269
+
270
+ # Echo-back gate: check agent output reflects batch intent (#30)
271
+ # NON-BLOCKING by default; use --strict-echo-back to make it blocking.
272
+ _echo_back_check "$batch_text" "$log_file" || {
273
+ if [[ "${STRICT_ECHO_BACK:-false}" == "true" ]]; then
274
+ echo "Batch $batch FAILED on attempt $attempt: echo-back gate (strict mode)"
275
+ # Fall through to quality gate failure handling
276
+ fi
277
+ }
278
+
279
+ # Restore CLAUDE.md after context injection (prevent git-clean failure)
280
+ # Try git checkout first (works when CLAUDE.md is tracked).
281
+ # Fallback: if file didn't exist before injection, remove it;
282
+ # if it did exist, restore from backup.
283
+ if [[ -n "$batch_context" ]]; then
284
+ {
285
+ git -C "$WORKTREE" checkout -- CLAUDE.md 2>/dev/null
286
+ } || {
287
+ if [[ "$_claude_md_existed" == false ]]; then
288
+ rm -f "$WORKTREE/CLAUDE.md"
289
+ elif [[ -n "$_claude_md_backup" ]]; then
290
+ printf '%s\n' "$_claude_md_backup" > "$WORKTREE/CLAUDE.md"
291
+ fi
292
+ } || echo "WARNING: Failed to restore CLAUDE.md (non-fatal)" >&2
293
+ fi
294
+
295
+ # Compute duration before quality gate (includes claude time, not gate time)
296
+ local batch_end
297
+ batch_end=$(date +%s)
298
+ local duration_secs="$((batch_end - batch_start))"
299
+ local duration="${duration_secs}s"
300
+
301
+ # Run quality gate (passes duration for state tracking)
302
+ local gate_exit=0
303
+ run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "$batch" "$duration_secs" || gate_exit=$?
304
+
305
+ if [[ $gate_exit -eq 0 ]]; then
306
+ echo "Batch $batch PASSED (${duration})"
307
+ batch_passed=true
308
+
309
+ # Record cost for this batch
310
+ if [[ -n "${batch_session_id:-}" ]]; then
311
+ record_batch_cost "$WORKTREE" "$batch" "$batch_session_id" || \
312
+ echo "WARNING: Failed to record batch cost (non-fatal)" >&2
313
+ fi
314
+
315
+ # Append State section to progress.txt after quality gate passes (#53)
316
+ # Records test count, duration, and cost for cross-context memory.
317
+ if type append_progress_section &>/dev/null; then
318
+ {
319
+ local _state_test_count
320
+ _state_test_count=$(get_previous_test_count "$WORKTREE" 2>/dev/null || echo "0")
321
+ local _state_cost=""
322
+ _state_cost=$(jq -r ".costs[\"$batch\"].estimated_cost_usd // empty" "$WORKTREE/.run-plan-state.json" 2>/dev/null || true)
323
+ local _state_content="- Tests: ${_state_test_count} passing"$'\n'"- Duration: ${duration}"
324
+ [[ -n "$_state_cost" ]] && _state_content+=$'\n'"- Cost: \$${_state_cost}"
325
+ append_progress_section "$WORKTREE" "State" "$_state_content"
326
+ } || echo "WARNING: Failed to append progress State section (non-fatal)" >&2
327
+ fi
328
+
329
+ if [[ "$NOTIFY" == true ]]; then
330
+ {
331
+ local new_test_count
332
+ new_test_count=$(get_previous_test_count "$WORKTREE")
333
+ # Build summary from git log (commits in this batch)
334
+ local batch_summary=""
335
+ batch_summary=$(cd "$WORKTREE" && git log --oneline -5 2>/dev/null | head -3 | sed 's/^[a-f0-9]* /• /' | tr '\n' '; ' | sed 's/; $//') || true
336
+ local batch_cost=""
337
+ batch_cost=$(jq -r ".costs[\"$batch\"].estimated_cost_usd // empty" "$WORKTREE/.run-plan-state.json" 2>/dev/null || true)
338
+ notify_success "$plan_name" "$batch" "$END_BATCH" "$title" "$new_test_count" "$prev_test_count" "$duration" "$MODE" "$batch_summary" "$batch_cost"
339
+ } || echo "WARNING: Telegram notification failed (non-fatal)" >&2
340
+ fi
341
+ break
342
+ else
343
+ echo "Batch $batch FAILED on attempt $attempt (${duration})"
344
+
345
+ if [[ "$NOTIFY" == true ]]; then
346
+ notify_failure "$plan_name" "$batch" "$END_BATCH" "$title" "0" "?" "Quality gate failed" "$ON_FAILURE" || echo "WARNING: Telegram notification failed (non-fatal)" >&2
347
+ fi
348
+
349
+ # Record failure pattern for cross-run learning
350
+ {
351
+ local fail_type="quality gate failure"
352
+ if [[ -f "$log_file" ]]; then
353
+ fail_type=$(grep -oE "(FAIL|ERROR|FAILED).*" "$log_file" | head -1 | cut -c1-80 || echo "quality gate failure")
354
+ [[ -z "$fail_type" ]] && fail_type="quality gate failure"
355
+ fi
356
+ record_failure_pattern "$WORKTREE" "$title" "$fail_type" ""
357
+ } || echo "WARNING: Failed to record failure pattern (non-fatal)" >&2
358
+
359
+ # Handle failure mode
360
+ if [[ "$ON_FAILURE" == "stop" ]]; then
361
+ echo "STOPPING: --on-failure=stop. Fix issues and use --resume to continue."
362
+ exit 1
363
+ elif [[ "$ON_FAILURE" == "skip" ]]; then
364
+ echo "SKIPPING: Batch $batch failed, moving to next batch."
365
+ break
366
+ elif [[ "$ON_FAILURE" == "retry" ]]; then
367
+ if [[ $attempt -ge $max_attempts ]]; then
368
+ echo "EXHAUSTED: All $max_attempts attempts failed for batch $batch."
369
+ echo "STOPPING: No more retries."
370
+ exit 1
371
+ fi
372
+ echo "RETRYING: Attempt $((attempt + 1)) of $max_attempts..."
373
+ fi
374
+ fi
375
+ done
376
+
377
+ if [[ "$batch_passed" != true && "$ON_FAILURE" != "skip" ]]; then
378
+ echo "Batch $batch never passed. Exiting."
379
+ exit 1
380
+ fi
381
+ done
382
+
383
+ echo ""
384
+ echo "================================================================"
385
+ echo " All batches complete ($START_BATCH → $END_BATCH)"
386
+ echo "================================================================"
387
+
388
+ if [[ "$VERIFY" == true ]]; then
389
+ echo ""
390
+ echo "Running final verification..."
391
+ run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "final" || {
392
+ echo "FINAL VERIFICATION FAILED"
393
+ exit 1
394
+ }
395
+ fi
396
+ }
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env bash
2
+ # run-plan-notify.sh — Telegram notification helpers for run-plan
3
+ #
4
+ # Functions:
5
+ # format_success_message <plan_name> <batch_num> <total_batches> <batch_title> <test_count> <prev_count> <duration> <mode> [summary]
6
+ # format_failure_message <plan_name> <batch_num> <total_batches> <batch_title> <test_count> <failing_count> <error> <action>
7
+ # notify_success (same args as format_success_message) — format + send
8
+ # notify_failure (same args as format_failure_message) — format + send
9
+
10
+ # Source shared telegram functions
11
+ _NOTIFY_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
+ source "$_NOTIFY_SCRIPT_DIR/telegram.sh"
13
+
14
+ format_success_message() {
15
+ local plan_name="$1" batch_num="$2" total_batches="$3" batch_title="$4"
16
+ local test_count="$5" prev_count="$6" duration="$7" mode="$8"
17
+ local summary="${9:-}" cost="${10:-}"
18
+ local delta=$(( test_count - prev_count ))
19
+
20
+ local msg
21
+ msg=$(printf '%s — Batch %s/%s ✓\n*%s*\nTests: %s (↑%s) | %s | %s' \
22
+ "$plan_name" "$batch_num" "$total_batches" "$batch_title" \
23
+ "$test_count" "$delta" "$duration" "$mode")
24
+
25
+ if [[ -n "$cost" && "$cost" != "0" ]]; then
26
+ msg+=" | \$${cost}"
27
+ fi
28
+
29
+ if [[ -n "$summary" ]]; then
30
+ msg+=$'\n'"$summary"
31
+ fi
32
+
33
+ echo "$msg"
34
+ }
35
+
36
+ format_failure_message() {
37
+ local plan_name="$1" batch_num="$2" total_batches="$3" batch_title="$4"
38
+ local test_count="$5" failing_count="$6" error="$7" action="$8"
39
+
40
+ printf '%s — Batch %s/%s ✗\n*%s*\nTests: %s (%s failing)\nIssue: %s\nAction: %s' \
41
+ "$plan_name" "$batch_num" "$total_batches" "$batch_title" \
42
+ "$test_count" "$failing_count" "$error" "$action"
43
+ }
44
+
45
+ notify_success() {
46
+ local msg
47
+ msg=$(format_success_message "$@")
48
+ # Notification failure is non-critical — don't abort the pipeline
49
+ _send_telegram "$msg" || echo "WARNING: notify_success: Telegram send failed (non-fatal)" >&2
50
+ }
51
+
52
+ notify_failure() {
53
+ local msg
54
+ msg=$(format_failure_message "$@")
55
+ # Notification failure is non-critical — don't abort the pipeline
56
+ _send_telegram "$msg" || echo "WARNING: notify_failure: Telegram send failed (non-fatal)" >&2
57
+ }