autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env bash
2
+ # runner.sh — Benchmark orchestrator for the Autonomous Coding Toolkit
3
+ #
4
+ # Usage:
5
+ # runner.sh run [task-name] Run all or one benchmark
6
+ # runner.sh compare <a> <b> Compare two result files
7
+ # runner.sh list List available benchmarks
8
+ set -euo pipefail
9
+
10
+ SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
11
+ TASKS_DIR="$SCRIPT_DIR/tasks"
12
+ RESULTS_DIR="${BENCHMARK_RESULTS_DIR:-$SCRIPT_DIR/results}"
13
+
14
+ usage() {
15
+ cat <<'USAGE'
16
+ Usage: runner.sh <run|compare|list> [options]
17
+
18
+ Commands:
19
+ run [name] Run all benchmarks, or a specific one by directory name
20
+ compare <a> <b> Compare two result JSON files
21
+ list List available benchmark tasks
22
+
23
+ Options:
24
+ --help, -h Show this help
25
+
26
+ Results are saved to benchmarks/results/ (gitignored).
27
+ USAGE
28
+ exit 0
29
+ }
30
+
31
+ SUBCOMMAND="${1:-}"
32
+ shift || true
33
+
34
+ case "$SUBCOMMAND" in
35
+ list)
36
+ echo "Available benchmarks:"
37
+ for task_dir in "$TASKS_DIR"/*/; do
38
+ [[ -d "$task_dir" ]] || continue
39
+ name=$(basename "$task_dir")
40
+ desc=""
41
+ if [[ -f "$task_dir/task.md" ]]; then
42
+ desc=$(head -1 "$task_dir/task.md" | sed 's/^# //')
43
+ fi
44
+ echo " $name — $desc"
45
+ done
46
+ ;;
47
+
48
+ run)
49
+ TARGET="${1:-all}"
50
+ mkdir -p "$RESULTS_DIR"
51
+ timestamp=$(date -u +%Y%m%dT%H%M%SZ)
52
+
53
+ run_benchmark() {
54
+ local task_dir="$1"
55
+ local name=$(basename "$task_dir")
56
+ echo "=== Benchmark: $name ==="
57
+
58
+ if [[ ! -f "$task_dir/rubric.sh" ]]; then
59
+ echo " SKIP: no rubric.sh found"
60
+ return
61
+ fi
62
+
63
+ local score=0
64
+ local total=0
65
+ local pass=0
66
+
67
+ # Run rubric — each line of output is "PASS: desc" or "FAIL: desc"
68
+ while IFS= read -r line; do
69
+ total=$((total + 1))
70
+ if [[ "$line" == PASS:* ]]; then
71
+ pass=$((pass + 1))
72
+ fi
73
+ echo " $line"
74
+ done < <(bash "$task_dir/rubric.sh" 2>&1 || true)
75
+
76
+ if [[ $total -gt 0 ]]; then
77
+ score=$((pass * 100 / total))
78
+ fi
79
+ echo " Score: ${score}% ($pass/$total)"
80
+ echo ""
81
+
82
+ # Write result
83
+ jq -n --arg name "$name" --argjson score "$score" \
84
+ --argjson pass "$pass" --argjson total "$total" \
85
+ --arg ts "$timestamp" \
86
+ '{name: $name, score: $score, passed: $pass, total: $total, timestamp: $ts}' \
87
+ >> "$RESULTS_DIR/$timestamp.jsonl"
88
+ }
89
+
90
+ if [[ "$TARGET" == "all" ]]; then
91
+ for task_dir in "$TASKS_DIR"/*/; do
92
+ [[ -d "$task_dir" ]] || continue
93
+ run_benchmark "$task_dir"
94
+ done
95
+ else
96
+ if [[ -d "$TASKS_DIR/$TARGET" ]]; then
97
+ run_benchmark "$TASKS_DIR/$TARGET"
98
+ else
99
+ echo "Benchmark not found: $TARGET" >&2
100
+ echo "Run 'runner.sh list' to see available benchmarks." >&2
101
+ exit 1
102
+ fi
103
+ fi
104
+
105
+ echo "Results saved to: $RESULTS_DIR/$timestamp.jsonl"
106
+ ;;
107
+
108
+ compare)
109
+ FILE_A="${1:-}"
110
+ FILE_B="${2:-}"
111
+ if [[ -z "$FILE_A" || -z "$FILE_B" ]]; then
112
+ echo "Usage: runner.sh compare <result-a.jsonl> <result-b.jsonl>" >&2
113
+ exit 1
114
+ fi
115
+ if [[ ! -f "$FILE_A" || ! -f "$FILE_B" ]]; then
116
+ echo "One or both files not found." >&2
117
+ exit 1
118
+ fi
119
+
120
+ echo "Benchmark Comparison"
121
+ echo "═════════════════════════════════════"
122
+ printf "%-25s %8s %8s %8s\n" "Task" "Before" "After" "Delta"
123
+ echo "─────────────────────────────────────────────"
124
+
125
+ jq -s '
126
+ [.[0], .[1]] | transpose | .[] |
127
+ select(.[0] != null and .[1] != null) |
128
+ "\(.[0].name)|\(.[0].score)|\(.[1].score)|\(.[1].score - .[0].score)"
129
+ ' <(jq -s '.' "$FILE_A") <(jq -s '.' "$FILE_B") 2>/dev/null | \
130
+ while IFS='|' read -r name before after delta; do
131
+ sign=""
132
+ [[ "$delta" -gt 0 ]] && sign="+"
133
+ printf "%-25s %7s%% %7s%% %7s%%\n" "$name" "$before" "$after" "${sign}${delta}"
134
+ done
135
+
136
+ echo "═════════════════════════════════════"
137
+ ;;
138
+
139
+ help|--help|-h|"")
140
+ usage
141
+ ;;
142
+
143
+ *)
144
+ echo "Unknown command: $SUBCOMMAND" >&2
145
+ usage
146
+ ;;
147
+ esac
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env bash
2
+ # Rubric for 01-rest-endpoint benchmark
3
+ set -euo pipefail
4
+
5
+ PROJECT_ROOT="${BENCHMARK_PROJECT_ROOT:-.}"
6
+
7
+ # Criterion 1: Health endpoint file exists
8
+ if compgen -G "$PROJECT_ROOT/src/*health*" >/dev/null 2>&1 || \
9
+ compgen -G "$PROJECT_ROOT/app/*health*" >/dev/null 2>&1 || \
10
+ grep -rl "health" "$PROJECT_ROOT/src/" "$PROJECT_ROOT/app/" 2>/dev/null | head -1 >/dev/null 2>&1; then
11
+ echo "PASS: Health endpoint file exists"
12
+ else
13
+ echo "FAIL: Health endpoint file not found"
14
+ fi
15
+
16
+ # Criterion 2: Test file exists
17
+ if compgen -G "$PROJECT_ROOT/tests/*health*" >/dev/null 2>&1 || \
18
+ compgen -G "$PROJECT_ROOT/test/*health*" >/dev/null 2>&1; then
19
+ echo "PASS: Health endpoint test file exists"
20
+ else
21
+ echo "FAIL: Health endpoint test file not found"
22
+ fi
23
+
24
+ # Criterion 3: Test passes
25
+ if cd "$PROJECT_ROOT" && (npm test 2>/dev/null || pytest 2>/dev/null || make test 2>/dev/null); then
26
+ echo "PASS: Tests pass"
27
+ else
28
+ echo "FAIL: Tests do not pass"
29
+ fi
@@ -0,0 +1,17 @@
1
+ # Add a REST Endpoint with Tests
2
+
3
+ **Complexity:** Simple (1 batch)
4
+ **Measures:** Basic execution, TDD compliance
5
+
6
+ ## Task
7
+
8
+ Add a `/health` endpoint to the project that:
9
+ 1. Returns HTTP 200 with JSON body `{"status": "ok", "timestamp": "<ISO8601>"}`
10
+ 2. Has a test that verifies the response status and body structure
11
+ 3. All tests pass
12
+
13
+ ## Constraints
14
+
15
+ - Use the project's existing web framework (or add minimal one if none exists)
16
+ - Follow existing code style and patterns
17
+ - Test must be automated (no manual verification)
@@ -0,0 +1,8 @@
1
+ # Refactor a Module into Two
2
+
3
+ **Complexity:** Medium (2 batches)
4
+ **Measures:** Refactoring quality, test preservation
5
+
6
+ ## Task
7
+
8
+ Split `src/utils.sh` into `src/string-utils.sh` and `src/file-utils.sh`, preserving all existing tests.
@@ -0,0 +1,8 @@
1
+ # Fix an Integration Bug
2
+
3
+ **Complexity:** Medium (2 batches)
4
+ **Measures:** Debugging, root cause analysis
5
+
6
+ ## Task
7
+
8
+ The `/api/users` endpoint returns 500 when the database connection pool is exhausted. Find and fix the root cause.
@@ -0,0 +1,8 @@
1
+ # Add Test Coverage to Untested Module
2
+
3
+ **Complexity:** Medium (2 batches)
4
+ **Measures:** Test quality, edge case discovery
5
+
6
+ ## Task
7
+
8
+ Add comprehensive tests to `src/parser.sh` which currently has 0% coverage. Cover happy path, edge cases, and error conditions.
@@ -0,0 +1,8 @@
1
+ # Multi-File Feature with API + DB + Tests
2
+
3
+ **Complexity:** Complex (4 batches)
4
+ **Measures:** Full pipeline, cross-file coordination
5
+
6
+ ## Task
7
+
8
+ Add a "bookmarks" feature: API endpoints (CRUD), database migration, and integration tests.
package/bin/act.js ADDED
@@ -0,0 +1,238 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const { execFileSync } = require('child_process');
5
+ const fs = require('fs');
6
+ const path = require('path');
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // Toolkit root: works for npm global, npx, and local clone
10
+ // ---------------------------------------------------------------------------
11
+ const TOOLKIT_ROOT = path.resolve(__dirname, '..');
12
+ let VERSION;
13
+ try {
14
+ const pkg = JSON.parse(fs.readFileSync(path.join(TOOLKIT_ROOT, 'package.json'), 'utf8'));
15
+ VERSION = pkg.version;
16
+ } catch (err) {
17
+ console.error('Error: Could not read package.json');
18
+ console.error(` ${err.message}`);
19
+ process.exit(1);
20
+ }
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Platform check — bash required
24
+ // ---------------------------------------------------------------------------
25
+ function checkPlatform() {
26
+ if (process.platform === 'win32') {
27
+ let inWsl = false;
28
+ try {
29
+ inWsl = fs.existsSync('/proc/version') &&
30
+ fs.readFileSync('/proc/version', 'utf8').toLowerCase().includes('microsoft');
31
+ } catch (_) {
32
+ // If /proc/version is unreadable, assume not WSL
33
+ }
34
+ if (!inWsl) {
35
+ console.error(
36
+ 'Error: act requires bash, which is not available on native Windows.\n' +
37
+ 'Hint: Install WSL2 (https://aka.ms/wsl) and run act from a WSL terminal.'
38
+ );
39
+ process.exit(1);
40
+ }
41
+ }
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Dependency check
46
+ // ---------------------------------------------------------------------------
47
+ function checkDep(cmd) {
48
+ try {
49
+ execFileSync('which', [cmd], { stdio: 'pipe' });
50
+ } catch (_) {
51
+ console.error(`Error: Required dependency "${cmd}" not found on PATH.`);
52
+ console.error(`Install it and try again.`);
53
+ process.exit(1);
54
+ }
55
+ }
56
+
57
+ function checkDependencies() {
58
+ checkDep('bash');
59
+ checkDep('git');
60
+ checkDep('jq');
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Script runner
65
+ // ---------------------------------------------------------------------------
66
+ function scripts(name) {
67
+ return path.join(TOOLKIT_ROOT, 'scripts', name);
68
+ }
69
+
70
+ function runScript(scriptPath, args) {
71
+ if (!fs.existsSync(scriptPath)) {
72
+ console.error(`Error: Script not found: ${scriptPath}`);
73
+ console.error('This script may not be included in the current installation.');
74
+ console.error('Try reinstalling: npm install -g autonomous-coding-toolkit');
75
+ process.exit(1);
76
+ }
77
+ try {
78
+ execFileSync('bash', [scriptPath, ...args], { stdio: 'inherit' });
79
+ } catch (err) {
80
+ process.exit(err.status != null ? err.status : 1);
81
+ }
82
+ }
83
+
84
+ // ---------------------------------------------------------------------------
85
+ // Help text
86
+ // ---------------------------------------------------------------------------
87
+ function printHelp() {
88
+ console.log(`Autonomous Coding Toolkit v${VERSION}
89
+
90
+ Usage: act <command> [options]
91
+
92
+ Execution:
93
+ plan <file> [flags] Headless/team/MAB batch execution
94
+ plan --resume Resume interrupted execution
95
+ compound [dir] Full pipeline: report→PRD→execute→PR
96
+ mab <flags> Multi-Armed Bandit competing agents
97
+
98
+ Quality:
99
+ gate [flags] Composite quality gate (lesson-check + tests + memory)
100
+ check [files...] Syntactic anti-pattern scan from lesson files
101
+ policy [files...] Advisory positive-pattern checker
102
+ research-gate [flags] Block PRD if unresolved research issues
103
+ validate Run all validators
104
+ validate-plan <file> Validate plan quality score
105
+ validate-prd [file] Validate PRD shell-command criteria
106
+
107
+ Lessons:
108
+ lessons pull Pull community lessons from upstream
109
+ lessons check List active lesson checks
110
+ lessons promote Promote MAB-discovered lessons
111
+ lessons infer Infer scope metadata for lesson files
112
+
113
+ Analysis:
114
+ audit [flags] Entropy audit: doc drift, naming violations
115
+ batch-audit [flags] Cross-project audit runner
116
+ batch-test [flags] Memory-aware cross-project test runner
117
+ analyze [report] Analyze audit/test report
118
+ digest [flags] Failure digest from run logs
119
+ status [flags] Pipeline status summary
120
+ architecture [flags] Generate architecture map
121
+
122
+ Telemetry:
123
+ telemetry [flags] Telemetry reporting
124
+
125
+ Benchmarks:
126
+ benchmark [flags] Run benchmark suite
127
+
128
+ Setup:
129
+ init [flags] Initialize toolkit in current project
130
+ license-check [flags] Check dependency licenses
131
+ module-size [flags] Check module sizes against budget
132
+
133
+ Meta:
134
+ version Print version
135
+ help Show this help text
136
+ `);
137
+ }
138
+
139
+ // ---------------------------------------------------------------------------
140
+ // Command map
141
+ // ---------------------------------------------------------------------------
142
+ const COMMANDS = {
143
+ // Execution
144
+ plan: { script: scripts('run-plan.sh') },
145
+ compound: { script: scripts('auto-compound.sh') },
146
+ mab: { script: scripts('mab-run.sh') },
147
+
148
+ // Quality
149
+ gate: { script: scripts('quality-gate.sh') },
150
+ check: { script: scripts('lesson-check.sh') },
151
+ policy: { script: scripts('policy-check.sh') },
152
+ 'research-gate': { script: scripts('research-gate.sh') },
153
+ validate: { script: scripts('validate-all.sh') },
154
+ 'validate-plan': { script: scripts('validate-plan-quality.sh') },
155
+ 'validate-prd': { script: scripts('validate-prd.sh') },
156
+
157
+ // Analysis
158
+ audit: { script: scripts('entropy-audit.sh') },
159
+ 'batch-audit': { script: scripts('batch-audit.sh') },
160
+ 'batch-test': { script: scripts('batch-test.sh') },
161
+ analyze: { script: scripts('analyze-report.sh') },
162
+ digest: { script: scripts('failure-digest.sh') },
163
+ status: { script: scripts('pipeline-status.sh') },
164
+ architecture: { script: scripts('architecture-map.sh') },
165
+
166
+ // Setup
167
+ init: { script: scripts('init.sh') },
168
+ 'license-check': { script: scripts('license-check.sh') },
169
+ 'module-size': { script: scripts('module-size-check.sh') },
170
+
171
+ // Telemetry
172
+ telemetry: { script: scripts('telemetry.sh') },
173
+
174
+ // Benchmarks (note: under benchmarks/, not scripts/)
175
+ benchmark: { script: path.join(TOOLKIT_ROOT, 'benchmarks', 'runner.sh') },
176
+ };
177
+
178
+ // Lessons sub-dispatch
179
+ const LESSONS_COMMANDS = {
180
+ pull: { script: scripts('pull-community-lessons.sh'), args: [] },
181
+ check: { script: scripts('lesson-check.sh'), args: ['--list'] },
182
+ promote: { script: scripts('promote-mab-lessons.sh'), args: [] },
183
+ infer: { script: scripts('scope-infer.sh'), args: [] },
184
+ };
185
+
186
+ // ---------------------------------------------------------------------------
187
+ // Main
188
+ // ---------------------------------------------------------------------------
189
+ function main() {
190
+ const args = process.argv.slice(2);
191
+ const cmd = args[0];
192
+ const rest = args.slice(1);
193
+
194
+ // Built-in meta commands (no bash needed)
195
+ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
196
+ printHelp();
197
+ process.exit(0);
198
+ }
199
+
200
+ if (cmd === 'version' || cmd === '--version' || cmd === '-v') {
201
+ console.log(`act v${VERSION}`);
202
+ process.exit(0);
203
+ }
204
+
205
+ // Platform + dependency checks for all other commands
206
+ checkPlatform();
207
+ checkDependencies();
208
+
209
+ // Lessons sub-dispatch
210
+ if (cmd === 'lessons') {
211
+ const sub = rest[0];
212
+ const subArgs = rest.slice(1);
213
+ if (!sub) {
214
+ console.error('Error: "lessons" requires a subcommand: pull, check, promote, infer');
215
+ process.exit(1);
216
+ }
217
+ const lessonCmd = LESSONS_COMMANDS[sub];
218
+ if (!lessonCmd) {
219
+ console.error(`Error: Unknown lessons subcommand: ${sub}`);
220
+ console.error('Available: pull, check, promote, infer');
221
+ process.exit(1);
222
+ }
223
+ runScript(lessonCmd.script, [...lessonCmd.args, ...subArgs]);
224
+ return;
225
+ }
226
+
227
+ // Standard command routing
228
+ const entry = COMMANDS[cmd];
229
+ if (!entry) {
230
+ console.error(`Error: Unknown command: ${cmd}`);
231
+ console.error(`Run "act help" to see available commands.`);
232
+ process.exit(1);
233
+ }
234
+
235
+ runScript(entry.script, rest);
236
+ }
237
+
238
+ main();
@@ -0,0 +1,6 @@
1
+ ---
2
+ description: "Run the full autonomous coding pipeline — brainstorm → PRD → plan → execute → verify → finish"
3
+ argument-hint: "<feature description, report path, or issue #>"
4
+ ---
5
+
6
+ Invoke the `autonomous-coding-toolkit:autocode` skill to run the full pipeline for: $ARGUMENTS
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: "Cancel active Ralph Loop"
3
+ allowed-tools: ["Bash(test -f .claude/ralph-loop.local.md:*)", "Bash(rm .claude/ralph-loop.local.md)", "Read(.claude/ralph-loop.local.md)"]
4
+ hide-from-slash-command-tool: "true"
5
+ ---
6
+
7
+ # Cancel Ralph
8
+
9
+ To cancel the Ralph loop:
10
+
11
+ 1. Check if `.claude/ralph-loop.local.md` exists using Bash: `test -f .claude/ralph-loop.local.md && echo "EXISTS" || echo "NOT_FOUND"`
12
+
13
+ 2. **If NOT_FOUND**: Say "No active Ralph loop found."
14
+
15
+ 3. **If EXISTS**:
16
+ - Read `.claude/ralph-loop.local.md` to get the current iteration number from the `iteration:` field
17
+ - Remove the file using Bash: `rm .claude/ralph-loop.local.md`
18
+ - Report: "Cancelled Ralph loop (was at iteration N)" where N is the iteration value
@@ -0,0 +1,53 @@
1
+ ---
2
+ description: "Run the full Code Factory pipeline — brainstorm → PRD → plan → execute → verify"
3
+ argument-hint: "<feature description or report path>"
4
+ ---
5
+
6
+ # Code Factory
7
+
8
+ Run the full agent-driven development pipeline for: $ARGUMENTS
9
+
10
+ ## Pipeline
11
+
12
+ This command orchestrates the superpowers skill chain with Code Factory enhancements integrated at each stage. Follow each step in order — do not skip stages.
13
+
14
+ ### Stage 1: Brainstorming
15
+ Invoke `superpowers:brainstorming` to explore the idea, ask questions, propose approaches, and produce an approved design doc at `docs/plans/YYYY-MM-DD-<topic>-design.md`.
16
+
17
+ ### Stage 2: PRD Generation
18
+ After the design is approved, generate `tasks/prd.json` using the `/create-prd` format:
19
+ - 8-15 granular tasks with machine-verifiable acceptance criteria (shell commands)
20
+ - Separate investigation tasks from implementation tasks
21
+ - Order by dependency
22
+ - Save both `tasks/prd.json` and `tasks/prd-<feature>.md`
23
+
24
+ ### Stage 3: Writing Plans
25
+ Invoke `superpowers:writing-plans` to create the implementation plan. Enhance the plan with:
26
+ - A `## Quality Gates` section listing project checks (auto-detect: pytest, npm test, npm run lint, make test)
27
+ - Cross-references to `tasks/prd.json` task IDs
28
+ - `progress.txt` initialization as the first step
29
+
30
+ ### Stage 4: Execution
31
+ Invoke `superpowers:executing-plans` to execute in batches. Between each batch:
32
+ - Run quality gate commands and report results
33
+ - Update `tasks/prd.json` — mark passing tasks
34
+ - Append batch summary to `progress.txt`
35
+ - Fix any failures before proceeding
36
+
37
+ ### Stage 5: Verification
38
+ Invoke `superpowers:verification-before-completion`:
39
+ - Run ALL `tasks/prd.json` acceptance criteria
40
+ - Confirm every task has `"passes": true`
41
+ - Show quality gate evidence
42
+ - Only claim completion with full evidence
43
+
44
+ ### Stage 6: Finish
45
+ Invoke `superpowers:finishing-a-development-branch` to handle commit, PR, or merge.
46
+
47
+ ## Rules
48
+
49
+ - Never skip a stage. The design must be approved before PRD generation.
50
+ - Every acceptance criterion is a shell command. No vague criteria.
51
+ - Quality gates run between EVERY batch, not just at the end.
52
+ - Progress.txt is append-only during execution — never truncate it.
53
+ - If the input is a report file path, run `scripts/analyze-report.sh` first to identify the top priority, then use that as the feature description for brainstorming.
@@ -0,0 +1,55 @@
1
+ ---
2
+ description: "Generate a PRD with machine-verifiable acceptance criteria from a feature description"
3
+ argument-hint: "<feature description>"
4
+ ---
5
+
6
+ # Create PRD
7
+
8
+ Generate a Product Requirements Document for the given feature.
9
+
10
+ ## Input
11
+
12
+ The user provides a feature description: $ARGUMENTS
13
+
14
+ ## Process
15
+
16
+ 1. **Understand the feature** — Ask clarifying questions if the description is ambiguous
17
+ 2. **Break into tasks** — Generate 8-15 small, granular tasks (not 3-5 large ones)
18
+ 3. **Machine-verifiable criteria** — Every acceptance criterion must be a command that returns pass/fail:
19
+ - Test commands: `pytest tests/test_feature.py -x`
20
+ - Lint commands: `python3 -m py_compile file.py`
21
+ - Endpoint checks: `curl -s -o /dev/null -w '%{http_code}' http://localhost:8000/endpoint`
22
+ - File existence: `test -f path/to/file`
23
+ - Pattern checks: `grep -q 'expected_pattern' file`
24
+ 4. **Separate investigation from implementation** — "Research X" and "Implement X" are different tasks
25
+ 5. **Order by dependency** — Tasks should be ordered so each builds on the previous
26
+
27
+ ## Output Format
28
+
29
+ Save to `tasks/prd.json` (create `tasks/` directory if needed):
30
+
31
+ ```json
32
+ [
33
+ {
34
+ "id": 1,
35
+ "title": "Short imperative title",
36
+ "description": "What needs to be done and why",
37
+ "acceptance_criteria": [
38
+ "pytest tests/test_auth.py::test_login -x",
39
+ "test -f src/auth/handler.py"
40
+ ],
41
+ "passes": false,
42
+ "blocked_by": []
43
+ }
44
+ ]
45
+ ```
46
+
47
+ Also save a human-readable version to `tasks/prd-<feature-slug>.md` with full descriptions.
48
+
49
+ ## Rules
50
+
51
+ - Each task should take 1-3 iterations of a Ralph loop to complete
52
+ - Acceptance criteria MUST be shell commands that exit 0 on success, non-zero on failure
53
+ - No vague criteria like "code is clean" or "well-tested" — everything is boolean
54
+ - Include setup tasks (create directories, install deps) as separate tasks
55
+ - Final task should always be "Run full quality gate" with all checks combined
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: "Start Ralph Loop in current session"
3
+ argument-hint: "PROMPT [--max-iterations N] [--completion-promise TEXT]"
4
+ allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-ralph-loop.sh:*)"]
5
+ hide-from-slash-command-tool: "true"
6
+ ---
7
+
8
+ # Ralph Loop Command
9
+
10
+ Execute the setup script to initialize the Ralph loop:
11
+
12
+ ```!
13
+ "${CLAUDE_PLUGIN_ROOT}/scripts/setup-ralph-loop.sh" $ARGUMENTS
14
+ ```
15
+
16
+ Please work on the task. When you try to exit, the Ralph loop will feed the SAME PROMPT back to you for the next iteration. You'll see your previous work in files and git history, allowing you to iterate and improve.
17
+
18
+ CRITICAL RULE: If a completion promise is set, you may ONLY output it when the statement is completely and unequivocally TRUE. Do not output false promises to escape the loop, even if you think you're stuck or should exit for other reasons. The loop is designed to continue until genuine completion.