autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,1062 @@
1
+ # Phase 3: Cost Infrastructure — Implementation Plan
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Add per-batch cost tracking via JSONL session file parsing, prompt prefix/suffix splitting for cache optimization, and structured progress.txt format.
6
+
7
+ **Architecture:** Three new library files (`cost-tracking.sh`, `progress-writer.sh`, updated `run-plan-prompt.sh`) with integration into the headless execution loop, pipeline status display, and notification system. All cost data stored in `.run-plan-state.json` alongside existing test_counts and durations.
8
+
9
+ **Tech Stack:** Bash, jq, Claude CLI JSONL session files
10
+
11
+ ---
12
+
13
+ ## Batch 1: Per-Batch Cost Tracking (Tasks 1-7)
14
+
15
+ ### Task 1: Write failing tests for cost extraction
16
+
17
+ **Files:**
18
+ - Create: `scripts/tests/test-cost-tracking.sh`
19
+
20
+ **Step 1: Write the test file**
21
+
22
+ ```bash
23
+ #!/usr/bin/env bash
24
+ # Test cost tracking functions
25
+ set -euo pipefail
26
+
27
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
28
+ source "$SCRIPT_DIR/test-helpers.sh"
29
+ source "$SCRIPT_DIR/../lib/run-plan-state.sh"
30
+ source "$SCRIPT_DIR/../lib/cost-tracking.sh"
31
+
32
+ # --- Setup ---
33
+ WORK=$(mktemp -d)
34
+ trap 'rm -rf "$WORK"' EXIT
35
+
36
+ # Create mock JSONL session directory
37
+ MOCK_SESSION_DIR="$WORK/.claude/projects/test-project"
38
+ mkdir -p "$MOCK_SESSION_DIR"
39
+
40
+ # Mock session JSONL with token usage data
41
+ MOCK_SESSION_ID="test-session-abc-123"
42
+ cat > "$MOCK_SESSION_DIR/${MOCK_SESSION_ID}.jsonl" << 'JSONL'
43
+ {"type":"summary","costUSD":0.0423,"durationMs":12345,"inputTokens":8500,"outputTokens":2100,"cacheReadTokens":3200,"cacheWriteTokens":1000,"model":"claude-sonnet-4-6"}
44
+ JSONL
45
+
46
+ # --- Test: find_session_jsonl locates file ---
47
+ result=$(find_session_jsonl "$MOCK_SESSION_ID" "$WORK/.claude")
48
+ assert_contains "find_session_jsonl: returns path" "$MOCK_SESSION_ID" "$result"
49
+
50
+ # --- Test: find_session_jsonl returns empty for missing session ---
51
+ result=$(find_session_jsonl "nonexistent-session" "$WORK/.claude")
52
+ assert_eq "find_session_jsonl: empty for missing" "" "$result"
53
+
54
+ # --- Test: extract_session_cost returns JSON with token fields ---
55
+ cost_json=$(extract_session_cost "$MOCK_SESSION_ID" "$WORK/.claude")
56
+ assert_contains "extract: has input_tokens" "input_tokens" "$cost_json"
57
+ assert_contains "extract: has output_tokens" "output_tokens" "$cost_json"
58
+ assert_contains "extract: has cache_read_tokens" "$cost_json" "cache_read_tokens"
59
+ assert_contains "extract: has estimated_cost_usd" "estimated_cost_usd" "$cost_json"
60
+
61
+ input_tokens=$(echo "$cost_json" | jq -r '.input_tokens')
62
+ assert_eq "extract: input_tokens value" "8500" "$input_tokens"
63
+
64
+ output_tokens=$(echo "$cost_json" | jq -r '.output_tokens')
65
+ assert_eq "extract: output_tokens value" "2100" "$output_tokens"
66
+
67
+ cache_read=$(echo "$cost_json" | jq -r '.cache_read_tokens')
68
+ assert_eq "extract: cache_read_tokens value" "3200" "$cache_read"
69
+
70
+ cost_usd=$(echo "$cost_json" | jq -r '.estimated_cost_usd')
71
+ assert_eq "extract: cost from JSONL summary" "0.0423" "$cost_usd"
72
+
73
+ # --- Test: extract_session_cost handles missing session ---
74
+ cost_json=$(extract_session_cost "nonexistent" "$WORK/.claude")
75
+ input_tokens=$(echo "$cost_json" | jq -r '.input_tokens')
76
+ assert_eq "extract: missing session returns 0 input_tokens" "0" "$input_tokens"
77
+
78
+ # --- Test: record_batch_cost writes to state ---
79
+ init_state "$WORK" "plan.md" "headless"
80
+ record_batch_cost "$WORK" 1 "$MOCK_SESSION_ID" "$WORK/.claude"
81
+
82
+ costs_batch_1=$(jq -r '.costs["1"].input_tokens' "$WORK/.run-plan-state.json")
83
+ assert_eq "record: batch 1 input_tokens in state" "8500" "$costs_batch_1"
84
+
85
+ cost_usd=$(jq -r '.costs["1"].estimated_cost_usd' "$WORK/.run-plan-state.json")
86
+ assert_eq "record: batch 1 cost_usd in state" "0.0423" "$cost_usd"
87
+
88
+ session_id=$(jq -r '.costs["1"].session_id' "$WORK/.run-plan-state.json")
89
+ assert_eq "record: batch 1 session_id in state" "$MOCK_SESSION_ID" "$session_id"
90
+
91
+ total_cost=$(jq -r '.total_cost_usd' "$WORK/.run-plan-state.json")
92
+ assert_eq "record: total_cost_usd updated" "0.0423" "$total_cost"
93
+
94
+ # --- Test: record_batch_cost accumulates across batches ---
95
+ MOCK_SESSION_ID_2="test-session-def-456"
96
+ cat > "$MOCK_SESSION_DIR/${MOCK_SESSION_ID_2}.jsonl" << 'JSONL'
97
+ {"type":"summary","costUSD":0.031,"durationMs":9000,"inputTokens":7200,"outputTokens":1800,"cacheReadTokens":5000,"cacheWriteTokens":500,"model":"claude-sonnet-4-6"}
98
+ JSONL
99
+
100
+ record_batch_cost "$WORK" 2 "$MOCK_SESSION_ID_2" "$WORK/.claude"
101
+
102
+ total_cost=$(jq -r '.total_cost_usd' "$WORK/.run-plan-state.json")
103
+ # 0.0423 + 0.031 = 0.0733
104
+ assert_eq "record: total_cost accumulates" "0.0733" "$total_cost"
105
+
106
+ # --- Test: check_budget returns 0 when under budget ---
107
+ check_budget "$WORK" "1.00"
108
+ assert_exit "check_budget: under budget returns 0" 0 check_budget "$WORK" "1.00"
109
+
110
+ # --- Test: check_budget returns 1 when over budget ---
111
+ assert_exit "check_budget: over budget returns 1" 1 check_budget "$WORK" "0.05"
112
+
113
+ # --- Test: get_total_cost returns accumulated cost ---
114
+ total=$(get_total_cost "$WORK")
115
+ assert_eq "get_total_cost: returns accumulated" "0.0733" "$total"
116
+
117
+ report_results
118
+ ```
119
+
120
+ **Step 2: Run test to verify it fails**
121
+
122
+ Run: `bash scripts/tests/test-cost-tracking.sh`
123
+ Expected: FAIL (source cost-tracking.sh not found)
124
+
125
+ **Step 3: Commit test file**
126
+
127
+ ```bash
128
+ git add scripts/tests/test-cost-tracking.sh
129
+ git commit -m "test: add failing tests for cost-tracking.sh"
130
+ ```
131
+
132
+ ### Task 2: Implement cost-tracking.sh
133
+
134
+ **Files:**
135
+ - Create: `scripts/lib/cost-tracking.sh`
136
+
137
+ **Step 1: Write the implementation**
138
+
139
+ ```bash
140
+ #!/usr/bin/env bash
141
+ # cost-tracking.sh — Per-batch cost tracking via Claude CLI JSONL session files
142
+ #
143
+ # Claude CLI stores session data in JSONL files at:
144
+ # ~/.claude/projects/<project>/<session-id>.jsonl
145
+ # The last line with type "summary" contains token counts and cost.
146
+ #
147
+ # Functions:
148
+ # find_session_jsonl <session_id> <claude_dir> -> path to JSONL file (empty if not found)
149
+ # extract_session_cost <session_id> <claude_dir> -> JSON: {input_tokens, output_tokens, cache_read_tokens, estimated_cost_usd, model}
150
+ # record_batch_cost <worktree> <batch_num> <session_id> [claude_dir] -> updates .run-plan-state.json
151
+ # check_budget <worktree> <max_budget_usd> -> exits 0 if under, 1 if over
152
+ # get_total_cost <worktree> -> prints total_cost_usd from state
153
+
154
+ find_session_jsonl() {
155
+ local session_id="$1" claude_dir="$2"
156
+ local found=""
157
+ # Search all project directories for the session JSONL
158
+ while IFS= read -r -d '' f; do
159
+ found="$f"
160
+ break
161
+ done < <(find "$claude_dir" -name "${session_id}.jsonl" -print0 2>/dev/null)
162
+ echo "$found"
163
+ }
164
+
165
+ extract_session_cost() {
166
+ local session_id="$1" claude_dir="$2"
167
+ local jsonl_path
168
+ jsonl_path=$(find_session_jsonl "$session_id" "$claude_dir")
169
+
170
+ if [[ -z "$jsonl_path" || ! -f "$jsonl_path" ]]; then
171
+ # Return zero-cost JSON for missing sessions
172
+ echo '{"input_tokens":0,"output_tokens":0,"cache_read_tokens":0,"estimated_cost_usd":0,"model":"unknown","session_id":"'"$session_id"'"}'
173
+ return 0
174
+ fi
175
+
176
+ # Extract the summary line (last line with type "summary")
177
+ # Fall back to aggregating individual message usage if no summary line
178
+ local summary
179
+ summary=$(grep '"type":"summary"' "$jsonl_path" | tail -1)
180
+
181
+ if [[ -n "$summary" ]]; then
182
+ echo "$summary" | jq -c '{
183
+ input_tokens: (.inputTokens // 0),
184
+ output_tokens: (.outputTokens // 0),
185
+ cache_read_tokens: (.cacheReadTokens // 0),
186
+ estimated_cost_usd: (.costUSD // 0),
187
+ model: (.model // "unknown"),
188
+ session_id: "'"$session_id"'"
189
+ }'
190
+ else
191
+ # No summary line — return zeros
192
+ echo '{"input_tokens":0,"output_tokens":0,"cache_read_tokens":0,"estimated_cost_usd":0,"model":"unknown","session_id":"'"$session_id"'"}'
193
+ fi
194
+ }
195
+
196
+ record_batch_cost() {
197
+ local worktree="$1" batch_num="$2" session_id="$3"
198
+ local claude_dir="${4:-$HOME/.claude}"
199
+ local sf="$worktree/.run-plan-state.json"
200
+
201
+ if [[ ! -f "$sf" ]]; then
202
+ echo "WARNING: No state file at $sf" >&2
203
+ return 1
204
+ fi
205
+
206
+ local cost_json
207
+ cost_json=$(extract_session_cost "$session_id" "$claude_dir")
208
+
209
+ local tmp
210
+ tmp=$(mktemp)
211
+
212
+ # Add cost entry for this batch and update total
213
+ jq --arg batch "$batch_num" --argjson cost "$cost_json" '
214
+ .costs //= {} |
215
+ .costs[$batch] = $cost |
216
+ .total_cost_usd = ([.costs[].estimated_cost_usd] | add)
217
+ ' "$sf" > "$tmp" && mv "$tmp" "$sf"
218
+ }
219
+
220
+ check_budget() {
221
+ local worktree="$1" max_budget="$2"
222
+ local sf="$worktree/.run-plan-state.json"
223
+
224
+ if [[ ! -f "$sf" ]]; then
225
+ return 0 # No state = no cost = under budget
226
+ fi
227
+
228
+ local total
229
+ total=$(jq -r '.total_cost_usd // 0' "$sf")
230
+
231
+ # Compare using bc (bash can't do float comparison)
232
+ if (( $(echo "$total > $max_budget" | bc -l 2>/dev/null || echo 0) )); then
233
+ echo "BUDGET EXCEEDED: \$${total} spent of \$${max_budget} limit" >&2
234
+ return 1
235
+ fi
236
+ return 0
237
+ }
238
+
239
+ get_total_cost() {
240
+ local worktree="$1"
241
+ local sf="$worktree/.run-plan-state.json"
242
+ jq -r '.total_cost_usd // 0' "$sf" 2>/dev/null || echo "0"
243
+ }
244
+ ```
245
+
246
+ **Step 2: Run tests**
247
+
248
+ Run: `bash scripts/tests/test-cost-tracking.sh`
249
+ Expected: PASS (all assertions)
250
+
251
+ **Step 3: Commit**
252
+
253
+ ```bash
254
+ git add scripts/lib/cost-tracking.sh
255
+ git commit -m "feat: add cost-tracking.sh — JSONL session file parsing for per-batch cost"
256
+ ```
257
+
258
+ ### Task 3: Update init_state to include costs schema
259
+
260
+ **Files:**
261
+ - Modify: `scripts/lib/run-plan-state.sh:25-38`
262
+
263
+ **Step 1: Write failing test (append to test-run-plan-state.sh)**
264
+
265
+ Add to the end of `scripts/tests/test-run-plan-state.sh` (before the results block):
266
+
267
+ ```bash
268
+ # --- Test: init_state includes costs object ---
269
+ WORK_COST=$(mktemp -d)
270
+ trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5" "$WORK6" "$WORK7" "$WORK_COST"' EXIT
271
+ init_state "$WORK_COST" "plan.md" "headless"
272
+
273
+ val=$(jq -r '.costs | type' "$WORK_COST/.run-plan-state.json")
274
+ assert_eq "init_state: has costs object" "object" "$val"
275
+
276
+ val=$(jq -r '.total_cost_usd' "$WORK_COST/.run-plan-state.json")
277
+ assert_eq "init_state: total_cost_usd starts at 0" "0" "$val"
278
+ ```
279
+
280
+ **Step 2: Run test to verify it fails**
281
+
282
+ Run: `bash scripts/tests/test-run-plan-state.sh`
283
+ Expected: FAIL on "has costs object" (currently no costs field in init_state)
284
+
285
+ **Step 3: Update init_state in run-plan-state.sh**
286
+
287
+ In `scripts/lib/run-plan-state.sh`, modify the `jq -n` call inside `init_state()` (lines 25-38) to add the `costs` and `total_cost_usd` fields:
288
+
289
+ ```bash
290
+ jq -n \
291
+ --arg plan_file "$plan_file" \
292
+ --arg mode "$mode" \
293
+ --arg started_at "$now" \
294
+ '{
295
+ plan_file: $plan_file,
296
+ mode: $mode,
297
+ current_batch: 1,
298
+ completed_batches: [],
299
+ test_counts: {},
300
+ durations: {},
301
+ costs: {},
302
+ total_cost_usd: 0,
303
+ started_at: $started_at,
304
+ last_quality_gate: null
305
+ }' > "$sf"
306
+ ```
307
+
308
+ **Step 4: Run tests**
309
+
310
+ Run: `bash scripts/tests/test-run-plan-state.sh`
311
+ Expected: ALL PASSED
312
+
313
+ **Step 5: Commit**
314
+
315
+ ```bash
316
+ git add scripts/lib/run-plan-state.sh scripts/tests/test-run-plan-state.sh
317
+ git commit -m "feat: add costs and total_cost_usd to state schema"
318
+ ```
319
+
320
+ ### Task 4: Wire cost tracking into headless execution loop
321
+
322
+ **Files:**
323
+ - Modify: `scripts/run-plan.sh:48` (add source)
324
+ - Modify: `scripts/lib/run-plan-headless.sh:380-389` (capture session_id, record cost)
325
+ - Modify: `scripts/lib/run-plan-headless.sh:280-283` (capture session_id for sampling)
326
+
327
+ **Step 1: Source cost-tracking in run-plan.sh**
328
+
329
+ In `scripts/run-plan.sh`, after line 48 (`source "$SCRIPT_DIR/lib/run-plan-scoring.sh"`), add:
330
+
331
+ ```bash
332
+ source "$SCRIPT_DIR/lib/cost-tracking.sh"
333
+ ```
334
+
335
+ **Step 2: Modify main claude -p call to capture session_id**
336
+
337
+ In `scripts/lib/run-plan-headless.sh`, replace the main `claude -p` block (lines 380-385):
338
+
339
+ From:
340
+ ```bash
341
+ # Run claude headless (unset CLAUDECODE to allow nested invocation)
342
+ local claude_exit=0
343
+ CLAUDECODE='' claude -p "$full_prompt" \
344
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
345
+ --permission-mode bypassPermissions \
346
+ 2>&1 | tee "$log_file" || claude_exit=$?
347
+ ```
348
+
349
+ To:
350
+ ```bash
351
+ # Run claude headless (unset CLAUDECODE to allow nested invocation)
352
+ # Use --output-format json to capture session_id for cost tracking
353
+ local claude_exit=0
354
+ local claude_json_output=""
355
+ claude_json_output=$(CLAUDECODE='' claude -p "$full_prompt" \
356
+ --allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
357
+ --permission-mode bypassPermissions \
358
+ --output-format json \
359
+ 2>"$log_file.stderr") || claude_exit=$?
360
+
361
+ # Extract session_id and result from JSON output
362
+ local batch_session_id=""
363
+ if [[ -n "$claude_json_output" ]]; then
364
+ batch_session_id=$(echo "$claude_json_output" | jq -r '.session_id // empty' 2>/dev/null || true)
365
+ # Write result text to log file (was previously done by tee)
366
+ echo "$claude_json_output" | jq -r '.result // empty' 2>/dev/null > "$log_file" || true
367
+ # Append stderr to log
368
+ cat "$log_file.stderr" >> "$log_file" 2>/dev/null || true
369
+ fi
370
+ rm -f "$log_file.stderr"
371
+ ```
372
+
373
+ **Step 3: Record cost after quality gate passes**
374
+
375
+ In `scripts/lib/run-plan-headless.sh`, inside the `if [[ $gate_exit -eq 0 ]]` block (after line 418), add cost recording:
376
+
377
+ ```bash
378
+ # Record cost for this batch
379
+ if [[ -n "$batch_session_id" ]]; then
380
+ record_batch_cost "$WORKTREE" "$batch" "$batch_session_id" || \
381
+ echo "WARNING: Failed to record batch cost (non-fatal)" >&2
382
+ fi
383
+ ```
384
+
385
+ **Step 4: Wire --max-budget enforcement**
386
+
387
+ In `scripts/lib/run-plan-headless.sh`, at the top of the batch loop body (after the SAMPLE_COUNT reset around line 140), add budget check:
388
+
389
+ ```bash
390
+ # Budget enforcement
391
+ if [[ -n "${MAX_BUDGET:-}" ]]; then
392
+ if ! check_budget "$WORKTREE" "$MAX_BUDGET"; then
393
+ echo "STOPPING: Budget limit reached (\$${MAX_BUDGET})"
394
+ exit 1
395
+ fi
396
+ fi
397
+ ```
398
+
399
+ **Step 5: Run make ci**
400
+
401
+ Run: `make ci`
402
+ Expected: ALL PASSED (no regression)
403
+
404
+ **Step 6: Commit**
405
+
406
+ ```bash
407
+ git add scripts/run-plan.sh scripts/lib/run-plan-headless.sh
408
+ git commit -m "feat: wire cost tracking into headless loop — capture session_id, record per-batch cost"
409
+ ```
410
+
411
+ ### Task 5: Add cost section to pipeline-status.sh
412
+
413
+ **Files:**
414
+ - Modify: `scripts/pipeline-status.sh:38-43`
415
+ - Modify: `scripts/tests/test-pipeline-status.sh`
416
+
417
+ **Step 1: Read existing pipeline-status test**
418
+
419
+ Read: `scripts/tests/test-pipeline-status.sh` to understand test pattern.
420
+
421
+ **Step 2: Add cost display to pipeline-status.sh**
422
+
423
+ In `scripts/pipeline-status.sh`, after the "Last gate" line (line 42), before the `echo ""`, add:
424
+
425
+ ```bash
426
+ # Cost tracking
427
+ total_cost=$(jq -r '.total_cost_usd // 0' "$STATE_FILE")
428
+ if [[ "$total_cost" != "0" ]]; then
429
+ echo " Cost: \$${total_cost}"
430
+ # Per-batch breakdown
431
+ jq -r '.costs // {} | to_entries[] | " Batch \(.key): $\(.value.estimated_cost_usd // 0) (\(.value.input_tokens // 0) in / \(.value.output_tokens // 0) out)"' "$STATE_FILE" 2>/dev/null || true
432
+ fi
433
+ ```
434
+
435
+ **Step 3: Add --show-costs flag**
436
+
437
+ In `scripts/pipeline-status.sh`, add argument parsing at the top (after PROJECT_ROOT assignment):
438
+
439
+ ```bash
440
+ SHOW_COSTS=false
441
+ for arg in "$@"; do
442
+ case "$arg" in
443
+ --show-costs) SHOW_COSTS=true ;;
444
+ esac
445
+ done
446
+ ```
447
+
448
+ And add a detailed cost section at the end (before final separator):
449
+
450
+ ```bash
451
+ # Detailed cost breakdown (only with --show-costs)
452
+ if [[ "$SHOW_COSTS" == true && -f "$STATE_FILE" ]]; then
453
+ echo "--- Cost Details ---"
454
+ jq -r '
455
+ .costs // {} | to_entries | sort_by(.key | tonumber) |
456
+ .[] | " Batch \(.key): $\(.value.estimated_cost_usd) | \(.value.input_tokens) in | \(.value.output_tokens) out | cache: \(.value.cache_read_tokens) read | \(.value.model // "unknown")"
457
+ ' "$STATE_FILE" 2>/dev/null || echo " No cost data"
458
+ total=$(jq -r '.total_cost_usd // 0' "$STATE_FILE")
459
+ echo " Total: \$${total}"
460
+ echo ""
461
+ fi
462
+ ```
463
+
464
+ **Step 4: Run tests**
465
+
466
+ Run: `bash scripts/tests/test-pipeline-status.sh`
467
+ Expected: ALL PASSED
468
+
469
+ **Step 5: Commit**
470
+
471
+ ```bash
472
+ git add scripts/pipeline-status.sh
473
+ git commit -m "feat: add cost display to pipeline-status.sh with --show-costs flag"
474
+ ```
475
+
476
+ ### Task 6: Add cost to Telegram notifications
477
+
478
+ **Files:**
479
+ - Modify: `scripts/lib/run-plan-notify.sh:14-29`
480
+
481
+ **Step 1: Update format_success_message to accept cost parameter**
482
+
483
+ In `scripts/lib/run-plan-notify.sh`, modify `format_success_message` to add an optional 10th parameter for cost:
484
+
485
+ ```bash
486
+ format_success_message() {
487
+ local plan_name="$1" batch_num="$2" total_batches="$3" batch_title="$4"
488
+ local test_count="$5" prev_count="$6" duration="$7" mode="$8"
489
+ local summary="${9:-}" cost="${10:-}"
490
+ local delta=$(( test_count - prev_count ))
491
+
492
+ local msg
493
+ msg=$(printf '%s — Batch %s/%s ✓\n*%s*\nTests: %s (↑%s) | %s | %s' \
494
+ "$plan_name" "$batch_num" "$total_batches" "$batch_title" \
495
+ "$test_count" "$delta" "$duration" "$mode")
496
+
497
+ if [[ -n "$cost" && "$cost" != "0" ]]; then
498
+ msg+=" | \$${cost}"
499
+ fi
500
+
501
+ if [[ -n "$summary" ]]; then
502
+ msg+=$'\n'"$summary"
503
+ fi
504
+
505
+ echo "$msg"
506
+ }
507
+ ```
508
+
509
+ **Step 2: Update the notify_success call in run-plan-headless.sh**
510
+
511
+ In the success notification block (around line 428 of `run-plan-headless.sh`), pass cost as the 10th arg:
512
+
513
+ ```bash
514
+ local batch_cost=""
515
+ batch_cost=$(jq -r ".costs[\"$batch\"].estimated_cost_usd // empty" "$WORKTREE/.run-plan-state.json" 2>/dev/null || true)
516
+ notify_success "$plan_name" "$batch" "$END_BATCH" "$title" "$new_test_count" "$prev_test_count" "$duration" "$MODE" "$batch_summary" "$batch_cost"
517
+ ```
518
+
519
+ **Step 3: Run notification tests**
520
+
521
+ Run: `bash scripts/tests/test-run-plan-notify.sh`
522
+ Expected: ALL PASSED
523
+
524
+ **Step 4: Commit**
525
+
526
+ ```bash
527
+ git add scripts/lib/run-plan-notify.sh scripts/lib/run-plan-headless.sh
528
+ git commit -m "feat: add cost to Telegram success notifications"
529
+ ```
530
+
531
+ ### Task 7: Run full CI and verify
532
+
533
+ **Step 1: Run make ci**
534
+
535
+ Run: `make ci`
536
+ Expected: ALL PASSED (40+ test files, 0 failures)
537
+
538
+ **Step 2: Commit any fixes if needed**
539
+
540
+ ---
541
+
542
+ ## Batch 2: Prompt Caching Structure (Tasks 8-11)
543
+
544
+ ### Task 8: Write failing tests for prefix/suffix split
545
+
546
+ **Files:**
547
+ - Modify: `scripts/tests/test-run-plan-prompt.sh`
548
+
549
+ **Step 1: Add prefix/suffix tests to test-run-plan-prompt.sh**
550
+
551
+ Append before the results section:
552
+
553
+ ```bash
554
+ # =============================================================================
555
+ # Stable prefix / variable suffix split tests
556
+ # =============================================================================
557
+
558
+ # --- Test: build_stable_prefix produces consistent output ---
559
+ prefix1=$(build_stable_prefix "$FIXTURE" "$WORKTREE" "/usr/bin/python3" "scripts/quality-gate.sh" 0)
560
+ prefix2=$(build_stable_prefix "$FIXTURE" "$WORKTREE" "/usr/bin/python3" "scripts/quality-gate.sh" 0)
561
+ assert_eq "stable prefix: identical across calls" "$prefix1" "$prefix2"
562
+
563
+ # --- Test: build_stable_prefix is different from build_variable_suffix ---
564
+ suffix1=$(build_variable_suffix "$FIXTURE" 1 "$WORKTREE" 0)
565
+ assert_not_contains "prefix does not contain batch tasks" "Task 1: Create Data Model" "$prefix1"
566
+ assert_contains "suffix contains batch tasks" "Task 1: Create Data Model" "$suffix1"
567
+
568
+ # --- Test: build_variable_suffix changes with batch number ---
569
+ suffix2=$(build_variable_suffix "$FIXTURE" 2 "$WORKTREE" 0)
570
+ assert_not_contains "suffix batch 2: no batch 1 tasks" "Create Data Model" "$suffix2"
571
+ assert_contains "suffix batch 2: has batch 2 tasks" "Wire Together" "$suffix2"
572
+
573
+ # --- Test: build_batch_prompt still works (backward compat) ---
574
+ full_prompt=$(build_batch_prompt "$FIXTURE" 1 "$WORKTREE" "/usr/bin/python3" "scripts/quality-gate.sh" 0)
575
+ assert_contains "full prompt: still has XML tags" "<batch_tasks>" "$full_prompt"
576
+ assert_contains "full prompt: still has requirements" "<requirements>" "$full_prompt"
577
+
578
+ # --- Test: prefix contains metadata, suffix contains batch-specific ---
579
+ assert_contains "prefix: has working directory" "$WORKTREE" "$prefix1"
580
+ assert_contains "prefix: has python path" "/usr/bin/python3" "$prefix1"
581
+ assert_contains "suffix: has <batch_tasks>" "<batch_tasks>" "$suffix1"
582
+ assert_contains "suffix: has <requirements>" "<requirements>" "$suffix1"
583
+ ```
584
+
585
+ **Step 2: Run test to verify it fails**
586
+
587
+ Run: `bash scripts/tests/test-run-plan-prompt.sh`
588
+ Expected: FAIL (build_stable_prefix not defined)
589
+
590
+ **Step 3: Commit**
591
+
592
+ ```bash
593
+ git add scripts/tests/test-run-plan-prompt.sh
594
+ git commit -m "test: add failing tests for prompt prefix/suffix split"
595
+ ```
596
+
597
+ ### Task 9: Implement prefix/suffix split in run-plan-prompt.sh
598
+
599
+ **Files:**
600
+ - Modify: `scripts/lib/run-plan-prompt.sh:12-127`
601
+
602
+ **Step 1: Add build_stable_prefix and build_variable_suffix functions**
603
+
604
+ Add these two new functions before the existing `build_batch_prompt` function. Then refactor `build_batch_prompt` to compose them.
605
+
606
+ ```bash
607
+ # Build the stable portion of the prompt (identical across batches — enables API cache hits).
608
+ # Args: <plan_file> <worktree> <python> <quality_gate_cmd> <prev_test_count>
609
+ build_stable_prefix() {
610
+ local plan_file="$1"
611
+ local worktree="$2"
612
+ local python="$3"
613
+ local quality_gate_cmd="$4"
614
+ local prev_test_count="$5"
615
+
616
+ local branch
617
+ branch=$(git -C "$worktree" branch --show-current 2>/dev/null || echo "unknown")
618
+
619
+ local prefix=""
620
+ prefix+="You are implementing batches from ${plan_file}."$'\n'
621
+ prefix+=""$'\n'
622
+ prefix+="Working directory: ${worktree}"$'\n'
623
+ prefix+="Python: ${python}"$'\n'
624
+ prefix+="Branch: ${branch}"$'\n'
625
+ prefix+=""$'\n'
626
+ prefix+="<requirements>"$'\n'
627
+ prefix+="- TDD: write test -> verify fail -> implement -> verify pass -> commit each task"$'\n'
628
+ prefix+="- After all tasks: run quality gate (${quality_gate_cmd})"$'\n'
629
+ prefix+="- Update progress.txt with batch summary and commit"$'\n'
630
+ prefix+="- All ${prev_test_count}+ tests must pass"$'\n'
631
+ prefix+="</requirements>"$'\n'
632
+
633
+ printf '%s' "$prefix"
634
+ }
635
+
636
+ # Build the variable portion of the prompt (changes each batch).
637
+ # Args: <plan_file> <batch_num> <worktree> <prev_test_count>
638
+ build_variable_suffix() {
639
+ local plan_file="$1"
640
+ local batch_num="$2"
641
+ local worktree="$3"
642
+ local prev_test_count="$4"
643
+
644
+ local title batch_text
645
+ title=$(get_batch_title "$plan_file" "$batch_num")
646
+ batch_text=$(get_batch_text "$plan_file" "$batch_num")
647
+
648
+ local recent_commits progress_tail prev_gate
649
+
650
+ recent_commits=$(git -C "$worktree" log --oneline -5 2>/dev/null || echo "(no commits)")
651
+
652
+ progress_tail=""
653
+ if [[ -f "$worktree/progress.txt" ]]; then
654
+ progress_tail=$(tail -20 "$worktree/progress.txt" 2>/dev/null || true)
655
+ fi
656
+
657
+ prev_gate=""
658
+ if [[ -f "$worktree/.run-plan-state.json" ]]; then
659
+ prev_gate=$(jq -r '.last_quality_gate // empty' "$worktree/.run-plan-state.json" 2>/dev/null || true)
660
+ fi
661
+
662
+ # Context refs
663
+ local context_refs_content=""
664
+ local refs
665
+ refs=$(get_batch_context_refs "$plan_file" "$batch_num")
666
+ if [[ -n "$refs" ]]; then
667
+ while IFS= read -r ref; do
668
+ [[ -z "$ref" ]] && continue
669
+ if [[ -f "$worktree/$ref" ]]; then
670
+ context_refs_content+="
671
+ --- $ref ---
672
+ $(head -100 "$worktree/$ref")
673
+ "
674
+ fi
675
+ done <<< "$refs"
676
+ fi
677
+
678
+ # Research warnings
679
+ local research_warnings=""
680
+ for rj in "$worktree"/tasks/research-*.json; do
681
+ [[ -f "$rj" ]] || continue
682
+ local warnings
683
+ warnings=$(jq -r '.blocking_issues[]? // empty' "$rj" 2>/dev/null || true)
684
+ if [[ -n "$warnings" ]]; then
685
+ research_warnings+="$warnings"$'\n'
686
+ fi
687
+ done
688
+
689
+ local suffix=""
690
+ suffix+="Now implementing Batch ${batch_num}: ${title}"$'\n'
691
+ suffix+=""$'\n'
692
+ suffix+="<batch_tasks>"$'\n'
693
+ suffix+="${batch_text}"$'\n'
694
+ suffix+="</batch_tasks>"$'\n'
695
+
696
+ suffix+=""$'\n'
697
+ suffix+="<prior_context>"$'\n'
698
+ suffix+="Recent commits:"$'\n'
699
+ suffix+="${recent_commits}"$'\n'
700
+ if [[ -n "$progress_tail" ]]; then
701
+ suffix+=""$'\n'
702
+ suffix+="<prior_progress>"$'\n'
703
+ suffix+="${progress_tail}"$'\n'
704
+ suffix+="</prior_progress>"$'\n'
705
+ fi
706
+ if [[ -n "$prev_gate" && "$prev_gate" != "null" ]]; then
707
+ suffix+=""$'\n'
708
+ suffix+="Previous quality gate: ${prev_gate}"$'\n'
709
+ fi
710
+ suffix+="</prior_context>"$'\n'
711
+
712
+ if [[ -n "$context_refs_content" ]]; then
713
+ suffix+=""$'\n'
714
+ suffix+="<referenced_files>"$'\n'
715
+ suffix+="${context_refs_content}"$'\n'
716
+ suffix+="</referenced_files>"$'\n'
717
+ fi
718
+
719
+ if [[ -n "$research_warnings" ]]; then
720
+ suffix+=""$'\n'
721
+ suffix+="<research_warnings>"$'\n'
722
+ suffix+="${research_warnings}"$'\n'
723
+ suffix+="</research_warnings>"$'\n'
724
+ fi
725
+
726
+ printf '%s' "$suffix"
727
+ }
728
+ ```
729
+
730
+ Then refactor `build_batch_prompt` to compose the two:
731
+
732
+ ```bash
733
+ build_batch_prompt() {
734
+ local plan_file="$1"
735
+ local batch_num="$2"
736
+ local worktree="$3"
737
+ local python="$4"
738
+ local quality_gate_cmd="$5"
739
+ local prev_test_count="$6"
740
+
741
+ local prefix suffix
742
+ prefix=$(build_stable_prefix "$plan_file" "$worktree" "$python" "$quality_gate_cmd" "$prev_test_count")
743
+ suffix=$(build_variable_suffix "$plan_file" "$batch_num" "$worktree" "$prev_test_count")
744
+
745
+ printf '%s\n%s' "$prefix" "$suffix"
746
+ }
747
+ ```
748
+
749
+ **Step 2: Run tests**
750
+
751
+ Run: `bash scripts/tests/test-run-plan-prompt.sh`
752
+ Expected: ALL PASSED (existing tests still pass + new prefix/suffix tests pass)
753
+
754
+ **Step 3: Commit**
755
+
756
+ ```bash
757
+ git add scripts/lib/run-plan-prompt.sh
758
+ git commit -m "feat: split build_batch_prompt into stable prefix and variable suffix"
759
+ ```
760
+
761
+ ### Task 10: Write prefix to disk for reuse
762
+
763
+ **Files:**
764
+ - Modify: `scripts/lib/run-plan-headless.sh`
765
+
766
+ **Step 1: Cache prefix file at start of batch loop**
767
+
768
+ In `run_mode_headless()`, just before the batch `for` loop (around line 138), add:
769
+
770
+ ```bash
771
+ # Build and cache stable prompt prefix (reused across batches for API cache hits)
772
+ local prev_test_count_initial
773
+ prev_test_count_initial=$(get_previous_test_count "$WORKTREE")
774
+ local stable_prefix
775
+ stable_prefix=$(build_stable_prefix "$PLAN_FILE" "$WORKTREE" "$PYTHON" "$QUALITY_GATE_CMD" "$prev_test_count_initial")
776
+ echo "$stable_prefix" > "$WORKTREE/.run-plan-prefix.txt"
777
+ ```
778
+
779
+ **Step 2: Add .run-plan-prefix.txt to .gitignore if not already**
780
+
781
+ Check if `.gitignore` already covers it (likely via `.run-plan-*` pattern). If not, add it.
782
+
783
+ **Step 3: Commit**
784
+
785
+ ```bash
786
+ git add scripts/lib/run-plan-headless.sh
787
+ git commit -m "feat: cache stable prompt prefix to disk for API cache optimization"
788
+ ```
789
+
790
+ ### Task 11: Run full CI
791
+
792
+ **Step 1: Run make ci**
793
+
794
+ Run: `make ci`
795
+ Expected: ALL PASSED
796
+
797
+ **Step 2: Commit any fixes**
798
+
799
+ ---
800
+
801
+ ## Batch 3: Structured progress.txt (Tasks 12-16)
802
+
803
+ ### Task 12: Write failing tests for progress-writer.sh
804
+
805
+ **Files:**
806
+ - Create: `scripts/tests/test-progress-writer.sh`
807
+
808
+ **Step 1: Write the test file**
809
+
810
+ ```bash
811
+ #!/usr/bin/env bash
812
+ # Test structured progress writer
813
+ set -euo pipefail
814
+
815
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
816
+ source "$SCRIPT_DIR/test-helpers.sh"
817
+ source "$SCRIPT_DIR/../lib/progress-writer.sh"
818
+
819
+ # --- Setup ---
820
+ WORK=$(mktemp -d)
821
+ trap 'rm -rf "$WORK"' EXIT
822
+
823
+ # --- Test: write_batch_progress creates file with header ---
824
+ write_batch_progress "$WORK" 1 "Foundation"
825
+ assert_contains "creates progress.txt" "Batch 1" "$(cat "$WORK/progress.txt")"
826
+ assert_contains "has batch title" "Foundation" "$(cat "$WORK/progress.txt")"
827
+
828
+ # --- Test: append_progress_section adds to current batch ---
829
+ append_progress_section "$WORK" "Files Modified" "- src/models.py (created)"
830
+ assert_contains "has Files Modified section" "### Files Modified" "$(cat "$WORK/progress.txt")"
831
+ assert_contains "has file entry" "src/models.py (created)" "$(cat "$WORK/progress.txt")"
832
+
833
+ # --- Test: append_progress_section adds Decisions ---
834
+ append_progress_section "$WORK" "Decisions" "- Used jq for JSON parsing: lightweight, no deps"
835
+ assert_contains "has Decisions section" "### Decisions" "$(cat "$WORK/progress.txt")"
836
+
837
+ # --- Test: append_progress_section adds State ---
838
+ append_progress_section "$WORK" "State" "- Tests: 42 passing\n- Duration: 120s\n- Cost: \$0.42"
839
+ assert_contains "has State section" "### State" "$(cat "$WORK/progress.txt")"
840
+ assert_contains "has test count" "42 passing" "$(cat "$WORK/progress.txt")"
841
+
842
+ # --- Test: write_batch_progress for second batch appends ---
843
+ write_batch_progress "$WORK" 2 "Integration"
844
+ assert_contains "has batch 2 header" "Batch 2" "$(cat "$WORK/progress.txt")"
845
+ assert_contains "batch 1 still present" "Batch 1" "$(cat "$WORK/progress.txt")"
846
+
847
+ # --- Test: read_batch_progress extracts single batch ---
848
+ result=$(read_batch_progress "$WORK" 1)
849
+ assert_contains "read batch 1: has title" "Foundation" "$result"
850
+ assert_contains "read batch 1: has files section" "src/models.py" "$result"
851
+ assert_not_contains "read batch 1: no batch 2 content" "Integration" "$result"
852
+
853
+ # --- Test: read_batch_progress for nonexistent batch returns empty ---
854
+ result=$(read_batch_progress "$WORK" 99)
855
+ assert_eq "read batch 99: empty" "" "$result"
856
+
857
+ # --- Test: read_batch_progress for batch 2 ---
858
+ append_progress_section "$WORK" "Files Modified" "- src/api.py (created)"
859
+ result=$(read_batch_progress "$WORK" 2)
860
+ assert_contains "read batch 2: has title" "Integration" "$result"
861
+ assert_contains "read batch 2: has api.py" "src/api.py" "$result"
862
+
863
+ report_results
864
+ ```
865
+
866
+ **Step 2: Run test to verify it fails**
867
+
868
+ Run: `bash scripts/tests/test-progress-writer.sh`
869
+ Expected: FAIL (source progress-writer.sh not found)
870
+
871
+ **Step 3: Commit**
872
+
873
+ ```bash
874
+ git add scripts/tests/test-progress-writer.sh
875
+ git commit -m "test: add failing tests for progress-writer.sh"
876
+ ```
877
+
878
+ ### Task 13: Implement progress-writer.sh
879
+
880
+ **Files:**
881
+ - Create: `scripts/lib/progress-writer.sh`
882
+
883
+ **Step 1: Write the implementation**
884
+
885
+ ```bash
886
+ #!/usr/bin/env bash
887
+ # progress-writer.sh — Structured progress.txt writer
888
+ #
889
+ # Replaces freeform progress.txt with defined sections per batch.
890
+ #
891
+ # Functions:
892
+ # write_batch_progress <worktree> <batch_num> <title> -> writes batch header
893
+ # append_progress_section <worktree> <section> <content> -> appends to current batch
894
+ # read_batch_progress <worktree> <batch_num> -> extracts single batch's content
895
+
896
+ write_batch_progress() {
897
+ local worktree="$1" batch_num="$2" title="$3"
898
+ local progress_file="$worktree/progress.txt"
899
+ local timestamp
900
+ timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
901
+
902
+ {
903
+ echo ""
904
+ echo "## Batch ${batch_num}: ${title} (${timestamp})"
905
+ } >> "$progress_file"
906
+ }
907
+
908
+ append_progress_section() {
909
+ local worktree="$1" section="$2" content="$3"
910
+ local progress_file="$worktree/progress.txt"
911
+
912
+ {
913
+ echo "### ${section}"
914
+ echo -e "$content"
915
+ echo ""
916
+ } >> "$progress_file"
917
+ }
918
+
919
+ read_batch_progress() {
920
+ local worktree="$1" batch_num="$2"
921
+ local progress_file="$worktree/progress.txt"
922
+
923
+ if [[ ! -f "$progress_file" ]]; then
924
+ return 0
925
+ fi
926
+
927
+ # Extract content between "## Batch N:" and the next "## Batch" or EOF
928
+ awk -v batch="$batch_num" '
929
+ /^## Batch / {
930
+ if (found) exit
931
+ if ($3 == batch":") found=1
932
+ }
933
+ found { print }
934
+ ' "$progress_file"
935
+ }
936
+ ```
937
+
938
+ **Step 2: Run tests**
939
+
940
+ Run: `bash scripts/tests/test-progress-writer.sh`
941
+ Expected: ALL PASSED
942
+
943
+ **Step 3: Commit**
944
+
945
+ ```bash
946
+ git add scripts/lib/progress-writer.sh
947
+ git commit -m "feat: add progress-writer.sh — structured progress.txt format"
948
+ ```
949
+
950
+ ### Task 14: Source progress-writer in run-plan.sh
951
+
952
+ **Files:**
953
+ - Modify: `scripts/run-plan.sh`
954
+
955
+ **Step 1: Add source line**
956
+
957
+ In `scripts/run-plan.sh`, after the cost-tracking source line, add:
958
+
959
+ ```bash
960
+ source "$SCRIPT_DIR/lib/progress-writer.sh"
961
+ ```
962
+
963
+ **Step 2: Commit**
964
+
965
+ ```bash
966
+ git add scripts/run-plan.sh
967
+ git commit -m "chore: source progress-writer.sh in run-plan.sh"
968
+ ```
969
+
970
+ ### Task 15: Update run-plan-context.sh to use read_batch_progress
971
+
972
+ **Files:**
973
+ - Modify: `scripts/lib/run-plan-context.sh:102-113`
974
+
975
+ **Step 1: Replace tail-based progress reading**
976
+
977
+ In `scripts/lib/run-plan-context.sh`, replace the progress.txt section (lines 102-113):
978
+
979
+ From:
980
+ ```bash
981
+ # 5. Progress.txt (if budget allows, last 10 lines)
982
+ if [[ $(( chars_used + 500 )) -lt $TOKEN_BUDGET_CHARS ]]; then
983
+ local progress_file="$worktree/progress.txt"
984
+ if [[ -f "$progress_file" ]]; then
985
+ local progress
986
+ progress=$(tail -10 "$progress_file" 2>/dev/null || true)
987
+ if [[ -n "$progress" ]]; then
988
+ context+="### Progress Notes"$'\n'
989
+ context+="$progress"$'\n\n'
990
+ fi
991
+ fi
992
+ fi
993
+ ```
994
+
995
+ To:
996
+ ```bash
997
+ # 5. Progress.txt — structured batch progress (last 2 batches)
998
+ if [[ $(( chars_used + 500 )) -lt $TOKEN_BUDGET_CHARS ]]; then
999
+ if [[ -f "$worktree/progress.txt" ]]; then
1000
+ # Try structured read first, fall back to tail
1001
+ local progress=""
1002
+ if type read_batch_progress &>/dev/null; then
1003
+ local start_batch=$(( batch_num - 2 ))
1004
+ [[ $start_batch -lt 1 ]] && start_batch=1
1005
+ for ((b = start_batch; b < batch_num; b++)); do
1006
+ local bp
1007
+ bp=$(read_batch_progress "$worktree" "$b" 2>/dev/null || true)
1008
+ [[ -n "$bp" ]] && progress+="$bp"$'\n'
1009
+ done
1010
+ fi
1011
+ # Fallback to tail if no structured content
1012
+ if [[ -z "$progress" ]]; then
1013
+ progress=$(tail -10 "$worktree/progress.txt" 2>/dev/null || true)
1014
+ fi
1015
+ if [[ -n "$progress" ]]; then
1016
+ context+="### Progress Notes"$'\n'
1017
+ context+="$progress"$'\n\n'
1018
+ fi
1019
+ fi
1020
+ fi
1021
+ ```
1022
+
1023
+ **Step 2: Run context tests**
1024
+
1025
+ Run: `bash scripts/tests/test-run-plan-context.sh`
1026
+ Expected: ALL PASSED
1027
+
1028
+ **Step 3: Commit**
1029
+
1030
+ ```bash
1031
+ git add scripts/lib/run-plan-context.sh
1032
+ git commit -m "feat: use structured read_batch_progress in context injection"
1033
+ ```
1034
+
1035
+ ### Task 16: Run full CI and final verification
1036
+
1037
+ **Step 1: Run make ci**
1038
+
1039
+ Run: `make ci`
1040
+ Expected: ALL PASSED (42+ test files including 2 new ones, 0 failures)
1041
+
1042
+ **Step 2: Verify test count increased**
1043
+
1044
+ Run: `make ci 2>&1 | grep -E "TOTAL|PASSED|FAILED"`
1045
+ Expected: TOTAL ≥ 42, PASSED = TOTAL, FAILED = 0
1046
+
1047
+ **Step 3: Commit any remaining fixes**
1048
+
1049
+ ```bash
1050
+ git add -A
1051
+ git commit -m "fix: address any CI issues from Phase 3 integration"
1052
+ ```
1053
+
1054
+ ---
1055
+
1056
+ ## Summary
1057
+
1058
+ | Batch | Tasks | New Files | Modified Files |
1059
+ |-------|-------|-----------|----------------|
1060
+ | 1: Cost Tracking | 1-7 | `cost-tracking.sh`, `test-cost-tracking.sh` | `run-plan-state.sh`, `run-plan-headless.sh`, `run-plan.sh`, `pipeline-status.sh`, `run-plan-notify.sh` |
1061
+ | 2: Prompt Caching | 8-11 | — | `run-plan-prompt.sh`, `run-plan-headless.sh`, `test-run-plan-prompt.sh` |
1062
+ | 3: Structured Progress | 12-16 | `progress-writer.sh`, `test-progress-writer.sh` | `run-plan.sh`, `run-plan-context.sh` |