npm - autonomous-coding-toolkit - Versions diffs - 1.0.0 - Mend

autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (324) hide show

package/.claude-plugin/marketplace.json +22 -0
package/.claude-plugin/plugin.json +13 -0
package/LICENSE +21 -0
package/Makefile +21 -0
package/README.md +140 -0
package/SECURITY.md +28 -0
package/agents/bash-expert.md +113 -0
package/agents/dependency-auditor.md +138 -0
package/agents/integration-tester.md +120 -0
package/agents/lesson-scanner.md +149 -0
package/agents/python-expert.md +179 -0
package/agents/service-monitor.md +141 -0
package/agents/shell-expert.md +147 -0
package/benchmarks/runner.sh +147 -0
package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
package/benchmarks/tasks/02-refactor-module/task.md +8 -0
package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
package/bin/act.js +238 -0
package/commands/autocode.md +6 -0
package/commands/cancel-ralph.md +18 -0
package/commands/code-factory.md +53 -0
package/commands/create-prd.md +55 -0
package/commands/ralph-loop.md +18 -0
package/commands/run-plan.md +117 -0
package/commands/submit-lesson.md +122 -0
package/docs/ARCHITECTURE.md +630 -0
package/docs/CONTRIBUTING.md +125 -0
package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
package/docs/lessons/0002-async-def-without-await.md +28 -0
package/docs/lessons/0003-create-task-without-callback.md +28 -0
package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
package/docs/lessons/0005-sqlite-without-closing.md +33 -0
package/docs/lessons/0006-venv-pip-path.md +27 -0
package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
package/docs/lessons/0010-local-outside-function-bash.md +33 -0
package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
package/docs/lessons/0020-persist-state-incrementally.md +44 -0
package/docs/lessons/0021-dual-axis-testing.md +48 -0
package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
package/docs/lessons/0023-static-analysis-spiral.md +51 -0
package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
package/docs/lessons/0045-iterative-design-improvement.md +33 -0
package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
package/docs/lessons/0048-integration-wiring-batch.md +40 -0
package/docs/lessons/0049-ab-verification.md +41 -0
package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
package/docs/lessons/0078-static-review-without-live-test.md +30 -0
package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
package/docs/lessons/FRAMEWORK.md +161 -0
package/docs/lessons/SUMMARY.md +201 -0
package/docs/lessons/TEMPLATE.md +85 -0
package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
package/docs/plans/2026-02-21-mab-research-report.md +406 -0
package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
package/docs/plans/2026-02-22-mab-run-design.md +462 -0
package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
package/docs/plans/2026-02-24-headless-module-split.md +443 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
package/docs/plans/audit-findings.md +186 -0
package/docs/telegram-notification-format.md +98 -0
package/examples/example-plan.md +51 -0
package/examples/example-prd.json +72 -0
package/examples/example-roadmap.md +33 -0
package/examples/quickstart-plan.md +63 -0
package/hooks/hooks.json +26 -0
package/hooks/setup-symlinks.sh +48 -0
package/hooks/stop-hook.sh +135 -0
package/package.json +47 -0
package/policies/bash.md +71 -0
package/policies/python.md +71 -0
package/policies/testing.md +61 -0
package/policies/universal.md +60 -0
package/scripts/analyze-report.sh +97 -0
package/scripts/architecture-map.sh +145 -0
package/scripts/auto-compound.sh +273 -0
package/scripts/batch-audit.sh +42 -0
package/scripts/batch-test.sh +101 -0
package/scripts/entropy-audit.sh +221 -0
package/scripts/failure-digest.sh +51 -0
package/scripts/generate-ast-rules.sh +96 -0
package/scripts/init.sh +112 -0
package/scripts/lesson-check.sh +428 -0
package/scripts/lib/common.sh +61 -0
package/scripts/lib/cost-tracking.sh +153 -0
package/scripts/lib/ollama.sh +60 -0
package/scripts/lib/progress-writer.sh +128 -0
package/scripts/lib/run-plan-context.sh +215 -0
package/scripts/lib/run-plan-echo-back.sh +231 -0
package/scripts/lib/run-plan-headless.sh +396 -0
package/scripts/lib/run-plan-notify.sh +57 -0
package/scripts/lib/run-plan-parser.sh +81 -0
package/scripts/lib/run-plan-prompt.sh +215 -0
package/scripts/lib/run-plan-quality-gate.sh +132 -0
package/scripts/lib/run-plan-routing.sh +315 -0
package/scripts/lib/run-plan-sampling.sh +170 -0
package/scripts/lib/run-plan-scoring.sh +146 -0
package/scripts/lib/run-plan-state.sh +142 -0
package/scripts/lib/run-plan-team.sh +199 -0
package/scripts/lib/telegram.sh +54 -0
package/scripts/lib/thompson-sampling.sh +176 -0
package/scripts/license-check.sh +74 -0
package/scripts/mab-run.sh +575 -0
package/scripts/module-size-check.sh +146 -0
package/scripts/patterns/async-no-await.yml +5 -0
package/scripts/patterns/bare-except.yml +6 -0
package/scripts/patterns/empty-catch.yml +6 -0
package/scripts/patterns/hardcoded-localhost.yml +9 -0
package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
package/scripts/pipeline-status.sh +197 -0
package/scripts/policy-check.sh +226 -0
package/scripts/prior-art-search.sh +133 -0
package/scripts/promote-mab-lessons.sh +126 -0
package/scripts/prompts/agent-a-superpowers.md +29 -0
package/scripts/prompts/agent-b-ralph.md +29 -0
package/scripts/prompts/judge-agent.md +61 -0
package/scripts/prompts/planner-agent.md +44 -0
package/scripts/pull-community-lessons.sh +90 -0
package/scripts/quality-gate.sh +266 -0
package/scripts/research-gate.sh +90 -0
package/scripts/run-plan.sh +329 -0
package/scripts/scope-infer.sh +159 -0
package/scripts/setup-ralph-loop.sh +155 -0
package/scripts/telemetry.sh +230 -0
package/scripts/tests/run-all-tests.sh +52 -0
package/scripts/tests/test-act-cli.sh +46 -0
package/scripts/tests/test-agents-md.sh +87 -0
package/scripts/tests/test-analyze-report.sh +114 -0
package/scripts/tests/test-architecture-map.sh +89 -0
package/scripts/tests/test-auto-compound.sh +169 -0
package/scripts/tests/test-batch-test.sh +65 -0
package/scripts/tests/test-benchmark-runner.sh +25 -0
package/scripts/tests/test-common.sh +168 -0
package/scripts/tests/test-cost-tracking.sh +158 -0
package/scripts/tests/test-echo-back.sh +180 -0
package/scripts/tests/test-entropy-audit.sh +146 -0
package/scripts/tests/test-failure-digest.sh +66 -0
package/scripts/tests/test-generate-ast-rules.sh +145 -0
package/scripts/tests/test-helpers.sh +82 -0
package/scripts/tests/test-init.sh +47 -0
package/scripts/tests/test-lesson-check.sh +278 -0
package/scripts/tests/test-lesson-local.sh +55 -0
package/scripts/tests/test-license-check.sh +109 -0
package/scripts/tests/test-mab-run.sh +182 -0
package/scripts/tests/test-ollama-lib.sh +49 -0
package/scripts/tests/test-ollama.sh +60 -0
package/scripts/tests/test-pipeline-status.sh +198 -0
package/scripts/tests/test-policy-check.sh +124 -0
package/scripts/tests/test-prior-art-search.sh +96 -0
package/scripts/tests/test-progress-writer.sh +140 -0
package/scripts/tests/test-promote-mab-lessons.sh +110 -0
package/scripts/tests/test-pull-community-lessons.sh +149 -0
package/scripts/tests/test-quality-gate.sh +241 -0
package/scripts/tests/test-research-gate.sh +132 -0
package/scripts/tests/test-run-plan-cli.sh +86 -0
package/scripts/tests/test-run-plan-context.sh +305 -0
package/scripts/tests/test-run-plan-e2e.sh +153 -0
package/scripts/tests/test-run-plan-headless.sh +424 -0
package/scripts/tests/test-run-plan-notify.sh +124 -0
package/scripts/tests/test-run-plan-parser.sh +217 -0
package/scripts/tests/test-run-plan-prompt.sh +254 -0
package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
package/scripts/tests/test-run-plan-routing.sh +178 -0
package/scripts/tests/test-run-plan-scoring.sh +148 -0
package/scripts/tests/test-run-plan-state.sh +261 -0
package/scripts/tests/test-run-plan-team.sh +157 -0
package/scripts/tests/test-scope-infer.sh +150 -0
package/scripts/tests/test-setup-ralph-loop.sh +63 -0
package/scripts/tests/test-telegram-env.sh +38 -0
package/scripts/tests/test-telegram.sh +121 -0
package/scripts/tests/test-telemetry.sh +46 -0
package/scripts/tests/test-thompson-sampling.sh +139 -0
package/scripts/tests/test-validate-all.sh +60 -0
package/scripts/tests/test-validate-commands.sh +89 -0
package/scripts/tests/test-validate-hooks.sh +98 -0
package/scripts/tests/test-validate-lessons.sh +150 -0
package/scripts/tests/test-validate-plan-quality.sh +235 -0
package/scripts/tests/test-validate-plans.sh +187 -0
package/scripts/tests/test-validate-plugin.sh +106 -0
package/scripts/tests/test-validate-prd.sh +184 -0
package/scripts/tests/test-validate-skills.sh +134 -0
package/scripts/validate-all.sh +57 -0
package/scripts/validate-commands.sh +67 -0
package/scripts/validate-hooks.sh +89 -0
package/scripts/validate-lessons.sh +98 -0
package/scripts/validate-plan-quality.sh +369 -0
package/scripts/validate-plans.sh +120 -0
package/scripts/validate-plugin.sh +86 -0
package/scripts/validate-policies.sh +42 -0
package/scripts/validate-prd.sh +118 -0
package/scripts/validate-skills.sh +96 -0
package/skills/autocode/SKILL.md +285 -0
package/skills/autocode/ab-verification.md +51 -0
package/skills/autocode/code-quality-standards.md +37 -0
package/skills/autocode/competitive-mode.md +364 -0
package/skills/brainstorming/SKILL.md +97 -0
package/skills/capture-lesson/SKILL.md +187 -0
package/skills/check-lessons/SKILL.md +116 -0
package/skills/dispatching-parallel-agents/SKILL.md +110 -0
package/skills/executing-plans/SKILL.md +85 -0
package/skills/finishing-a-development-branch/SKILL.md +201 -0
package/skills/receiving-code-review/SKILL.md +72 -0
package/skills/requesting-code-review/SKILL.md +59 -0
package/skills/requesting-code-review/code-reviewer.md +82 -0
package/skills/research/SKILL.md +145 -0
package/skills/roadmap/SKILL.md +115 -0
package/skills/subagent-driven-development/SKILL.md +98 -0
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
package/skills/subagent-driven-development/implementer-prompt.md +73 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
package/skills/systematic-debugging/SKILL.md +134 -0
package/skills/systematic-debugging/condition-based-waiting.md +64 -0
package/skills/systematic-debugging/defense-in-depth.md +32 -0
package/skills/systematic-debugging/root-cause-tracing.md +55 -0
package/skills/test-driven-development/SKILL.md +167 -0
package/skills/using-git-worktrees/SKILL.md +219 -0
package/skills/using-superpowers/SKILL.md +54 -0
package/skills/verification-before-completion/SKILL.md +140 -0
package/skills/verify/SKILL.md +82 -0
package/skills/writing-plans/SKILL.md +128 -0
package/skills/writing-skills/SKILL.md +93 -0

package/scripts/lib/run-plan-parser.sh ADDED Viewed

@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# run-plan-parser.sh — Parse markdown plan files into batch/task structures
+#
+# Plan format:
+#   ## Batch N: Title          <- batch boundary
+#   ### Task M: Name           <- task within batch
+#   [full text...]             <- extracted verbatim
+#
+# Functions:
+#   count_batches <plan_file>                -> number of batches
+#   get_batch_title <plan_file> <batch_num>  -> batch title text
+#   get_batch_text <plan_file> <batch_num>   -> full batch content (tasks + text)
+#   get_batch_task_count <plan_file> <batch_num> -> number of tasks in batch
+#   is_critical_batch <plan_file> <batch_num>    -> exit 0 if tagged CRITICAL
+count_batches() {
+    local plan_file="$1"
+    local count
+    count=$(grep -cE '^## Batch [0-9]+' "$plan_file" 2>/dev/null || true)
+    echo "${count:-0}"
+}
+get_batch_title() {
+    local plan_file="$1" batch_num="$2"
+    local line
+    line=$(grep -E "^## Batch ${batch_num}:" "$plan_file" 2>/dev/null | head -1)
+    if [[ -z "$line" ]]; then
+        echo ""
+        return
+    fi
+    # Strip "## Batch N: " prefix using bash parameter expansion
+    echo "${line#\#\# Batch ${batch_num}: }"
+}
+get_batch_text() {
+    local plan_file="$1" batch_num="$2"
+    # Extract everything after "## Batch N" up to the next "## Batch" or EOF
+    # Uses POSIX-compatible awk: $3 + 0 extracts batch number
+    awk -v batch="$batch_num" '
+        /^## Batch [0-9]+/ {
+            n = $3 + 0
+            if (n == batch) { printing = 1; next }
+            else if (printing) { exit }
+        }
+        printing { print }
+    ' "$plan_file"
+}
+get_batch_task_count() {
+    local plan_file="$1" batch_num="$2"
+    local text count
+    text=$(get_batch_text "$plan_file" "$batch_num")
+    if [[ -z "$text" ]]; then
+        echo "0"
+        return
+    fi
+    count=$(echo "$text" | grep -cE '^### Task [0-9]+' 2>/dev/null || true)
+    echo "${count:-0}"
+}
+get_batch_context_refs() {
+    local plan_file="$1" batch_num="$2"
+    local batch_text
+    batch_text=$(get_batch_text "$plan_file" "$batch_num")
+    # Extract "context_refs: file1, file2, ..." line
+    local refs_line
+    refs_line=$(echo "$batch_text" | grep -E '^context_refs:' | head -1 || true)
+    if [[ -z "$refs_line" ]]; then
+        echo ""
+        return
+    fi
+    # Strip "context_refs: " prefix and split on comma
+    echo "${refs_line#context_refs: }" | tr ',' '\n' | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//'
+}
+is_critical_batch() {
+    local plan_file="$1" batch_num="$2"
+    local header
+    header=$(grep -E "^## Batch ${batch_num}:" "$plan_file" 2>/dev/null | head -1)
+    [[ "$header" == *"CRITICAL"* ]]
+}

package/scripts/lib/run-plan-prompt.sh ADDED Viewed

@@ -0,0 +1,215 @@
+#!/usr/bin/env bash
+# run-plan-prompt.sh — Build prompts for headless claude -p batch execution
+#
+# Requires run-plan-parser.sh to be sourced first (provides get_batch_title, get_batch_text)
+#
+# Functions:
+#   build_stable_prefix <plan_file> <worktree> <python> <quality_gate_cmd>
+#     -> stable portion of the prompt (plan identity, worktree, python, branch, TDD rules)
+#     -> safe to cache across batches; does NOT include prev_test_count or other per-batch data
+#   build_variable_suffix <plan_file> <batch_num> <worktree> <prev_test_count>
+#     -> per-batch portion of the prompt (tasks, commits, progress, gate results, test count)
+#   build_batch_prompt <plan_file> <batch_num> <worktree> <python> <quality_gate_cmd> <prev_test_count>
+#     -> full self-contained prompt (stable prefix + variable suffix) for claude -p
+#   generate_agents_md <plan_file> <worktree> <mode>
+#     -> writes AGENTS.md to worktree for agent team awareness
+# build_stable_prefix — assemble the stable (batch-invariant) portion of a batch prompt.
+#
+# Stability contract: output depends only on plan_file path, worktree path, python path,
+# quality_gate_cmd, and the git branch name. None of these change between batches in a
+# normal run, so the result may be cached and reused across batches.
+#
+# NOTE: prev_test_count intentionally excluded — it changes each batch. It belongs in
+# build_variable_suffix (see issue #48).
+#
+# Args: <plan_file> <worktree> <python> <quality_gate_cmd>
+build_stable_prefix() {
+    local plan_file="$1"
+    local worktree="$2"
+    local python="$3"
+    local quality_gate_cmd="$4"
+    local branch
+    # #46: Check worktree exists before calling git. Log a warning if git fails so
+    # the caller knows the branch name is unreliable rather than silently caching "unknown".
+    if [[ ! -d "$worktree" ]]; then
+        echo "WARNING: worktree directory does not exist: $worktree" >&2
+        branch="unknown"
+    else
+        branch=$(git -C "$worktree" branch --show-current 2>/dev/null) || {
+            echo "WARNING: git branch failed for worktree: $worktree — using 'unknown'" >&2
+            branch="unknown"
+        }
+        # git can succeed but print nothing (detached HEAD)
+        [[ -z "$branch" ]] && branch="unknown"
+    fi
+    cat <<PREFIX
+Working directory: ${worktree}
+Python: ${python}
+Branch: ${branch}
+Requirements:
+- TDD: write test -> verify fail -> implement -> verify pass -> commit each task
+- After all tasks: run quality gate (${quality_gate_cmd})
+- Update progress.txt with batch summary and commit
+PREFIX
+}
+# build_variable_suffix — assemble the per-batch (variable) portion of a batch prompt.
+#
+# Contains everything that can differ between batches: batch number, title, task text,
+# recent commits, progress tail, previous quality gate result, context refs, and the
+# current prev_test_count (which increases after each batch).
+#
+# Args: <plan_file> <batch_num> <worktree> <prev_test_count>
+build_variable_suffix() {
+    local plan_file="$1"
+    local batch_num="$2"
+    local worktree="$3"
+    local prev_test_count="$4"
+    local title batch_text recent_commits progress_tail prev_gate
+    title=$(get_batch_title "$plan_file" "$batch_num")
+    batch_text=$(get_batch_text "$plan_file" "$batch_num")
+    # Cross-batch context: recent commits
+    recent_commits=$(git -C "$worktree" log --oneline -5 2>/dev/null || echo "(no commits)")
+    # Cross-batch context: progress.txt tail
+    # #50: File existence is already checked before calling tail.
+    # Remove 2>/dev/null || true — permission errors on a confirmed-existing file should
+    # propagate so the caller sees the real error rather than silently getting no progress.
+    progress_tail=""
+    if [[ -f "$worktree/progress.txt" ]]; then
+        progress_tail=$(tail -20 "$worktree/progress.txt")
+    fi
+    # Cross-batch context: previous quality gate result
+    # #47: Distinguish "no state file / no key" (expected) from "corrupted JSON" (error).
+    # jq returns exit 5 on parse failure. Check exit code and warn on corruption so the
+    # caller knows prev_gate is empty due to an error, not just an absent first batch.
+    prev_gate=""
+    if [[ -f "$worktree/.run-plan-state.json" ]]; then
+        local jq_exit=0
+        prev_gate=$(jq -r '.last_quality_gate // empty' "$worktree/.run-plan-state.json" 2>/dev/null) || jq_exit=$?
+        if [[ $jq_exit -ne 0 ]]; then
+            echo "WARNING: .run-plan-state.json is corrupted (jq exit $jq_exit) — proceeding without previous gate context" >&2
+            prev_gate=""
+        fi
+    fi
+    # Cross-batch context: referenced files from context_refs
+    local context_refs_content=""
+    local refs
+    refs=$(get_batch_context_refs "$plan_file" "$batch_num")
+    if [[ -n "$refs" ]]; then
+        while IFS= read -r ref; do
+            [[ -z "$ref" ]] && continue
+            if [[ -f "$worktree/$ref" ]]; then
+                context_refs_content+="
+--- $ref ---
+$(head -100 "$worktree/$ref")
+"
+            fi
+        done <<< "$refs"
+    fi
+    cat <<SUFFIX
+You are implementing Batch ${batch_num}: ${title} from ${plan_file}.
+Tasks in this batch:
+${batch_text}
+Recent commits:
+${recent_commits}
+$(if [[ -n "$progress_tail" ]]; then
+echo "
+Previous progress:
+${progress_tail}"
+fi)
+$(if [[ -n "$prev_gate" && "$prev_gate" != "null" ]]; then
+echo "
+Previous quality gate: ${prev_gate}"
+fi)
+$(if [[ -n "$context_refs_content" ]]; then
+echo "
+Referenced files from prior batches:
+${context_refs_content}"
+fi)
+- All ${prev_test_count}+ tests must pass
+SUFFIX
+}
+# build_batch_prompt — full prompt for a single batch (stable prefix + variable suffix).
+#
+# Callers that run multiple batches should prefer calling build_stable_prefix once and
+# caching the result, then calling build_variable_suffix per batch — see run-plan-headless.sh.
+# This function is a convenience wrapper for single-batch callers and tests.
+#
+# Args: <plan_file> <batch_num> <worktree> <python> <quality_gate_cmd> <prev_test_count>
+build_batch_prompt() {
+    local plan_file="$1"
+    local batch_num="$2"
+    local worktree="$3"
+    local python="$4"
+    local quality_gate_cmd="$5"
+    local prev_test_count="$6"
+    local stable_prefix variable_suffix
+    stable_prefix=$(build_stable_prefix "$plan_file" "$worktree" "$python" "$quality_gate_cmd")
+    variable_suffix=$(build_variable_suffix "$plan_file" "$batch_num" "$worktree" "$prev_test_count")
+    printf '%s\n\n%s\n' "$variable_suffix" "$stable_prefix"
+}
+# Generate AGENTS.md in the worktree for agent team awareness.
+# Args: <plan_file> <worktree> <mode>
+generate_agents_md() {
+    local plan_file="$1" worktree="$2" mode="${3:-headless}"
+    # Source parser if needed
+    type count_batches &>/dev/null || source "$(dirname "${BASH_SOURCE[0]}")/run-plan-parser.sh"
+    local total_batches
+    total_batches=$(count_batches "$plan_file")
+    local batch_info=""
+    for ((b = 1; b <= total_batches; b++)); do
+        local title
+        title=$(get_batch_title "$plan_file" "$b")
+        [[ -z "$title" ]] && continue
+        batch_info+="| $b | $title |"$'\n'
+    done
+    cat > "$worktree/AGENTS.md" << EOF
+# Agent Configuration
+**Plan:** $(basename "$plan_file")
+**Mode:** $mode
+**Total:** $total_batches batches
+## Tools Allowed
+Bash, Read, Write, Edit, Grep, Glob
+## Permission Mode
+bypassPermissions
+## Batches
+| # | Title |
+|---|-------|
+${batch_info}
+## Guidelines
+- Run quality gate after each batch
+- Commit after passing gate
+- Append discoveries to progress.txt
+- Do not modify files outside your batch scope
+EOF
+}

package/scripts/lib/run-plan-quality-gate.sh ADDED Viewed

@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+# run-plan-quality-gate.sh — Quality gate runner for plan execution
+#
+# Wraps quality-gate.sh with test count regression detection + git status check.
+# Runs between every batch in all modes.
+#
+# Functions:
+#   extract_test_count <test_output>                          -> parse "N passed" from pytest output
+#   check_test_count_regression <new_count> <previous_count>  -> 0 if new >= previous, 1 otherwise
+#   check_git_clean <worktree>                                -> 0 if clean, 1 if dirty
+#   run_quality_gate <worktree> <quality_gate_cmd> <batch_num> [duration] -> full gate: cmd + regression + clean + state update
+#
+# Requires: run-plan-state.sh sourced for run_quality_gate (state functions)
+QUALITY_GATE_SCRIPT="${QUALITY_GATE_SCRIPT:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/../quality-gate.sh}"
+# Extract passed test count from test output.
+# Supports pytest ("N passed"), jest ("Tests: N passed"), go test ("ok" lines).
+# Returns -1 if no recognized format found (signals "skip regression check").
+extract_test_count() {
+    local output="$1"
+    local count
+    # 1. pytest: "N passed" (e.g., "85 passed" in "3 failed, 85 passed, 2 skipped in 30.1s")
+    count=$(echo "$output" | grep -oE '[0-9]+ passed' | tail -1 | grep -oE '^[0-9]+' || true)
+    if [[ -n "$count" ]]; then
+        echo "$count"
+        return
+    fi
+    # 2. jest: "Tests: N passed" (e.g., "Tests:       45 passed, 48 total")
+    count=$(echo "$output" | grep -oE 'Tests:[[:space:]]+([0-9]+ failed, )?[0-9]+ passed' | grep -oE '[0-9]+ passed' | grep -oE '^[0-9]+' || true)
+    if [[ -n "$count" ]]; then
+        echo "$count"
+        return
+    fi
+    # 3. go test: count "ok" lines (each = one passing package)
+    count=$(echo "$output" | grep -c '^ok' || true)
+    if [[ "$count" -gt 0 ]]; then
+        echo "$count"
+        return
+    fi
+    # 4. No recognized format — return -1 to signal "skip regression check"
+    echo "-1"
+}
+# Check for test count regression.
+# Returns 0 if new_count >= previous_count, 1 otherwise.
+# -1 means unrecognized format — skip regression check.
+check_test_count_regression() {
+    local new_count="$1" previous_count="$2"
+    if [[ "$new_count" == "-1" || "$previous_count" == "-1" ]]; then
+        echo "INFO: Skipping test count regression check (unrecognized test format)" >&2
+        return 0
+    fi
+    if [[ "$new_count" -ge "$previous_count" ]]; then
+        return 0
+    else
+        echo "WARNING: Test count regression: $new_count < $previous_count (previous)" >&2
+        return 1
+    fi
+}
+# Check if worktree has uncommitted changes.
+# Returns 0 if clean, 1 if dirty.
+check_git_clean() {
+    local worktree="$1"
+    local status
+    status=$(git -C "$worktree" status --porcelain 2>/dev/null)
+    if [[ -z "$status" ]]; then
+        return 0
+    else
+        echo "WARNING: Worktree has uncommitted changes:" >&2
+        echo "$status" >&2
+        return 1
+    fi
+}
+# Run the full quality gate for a batch.
+# Executes quality_gate_cmd, checks test regression, checks git clean, updates state.
+# Returns 0 on pass, 1 on fail.
+#
+# Requires run-plan-state.sh functions: get_previous_test_count, complete_batch, set_quality_gate
+run_quality_gate() {
+    local worktree="$1" quality_gate_cmd="$2" batch_num="$3" duration="${4:-0}"
+    local gate_output gate_exit test_count previous_count
+    echo "=== Quality Gate: Batch $batch_num ==="
+    # 1. Execute quality gate command in worktree.
+    # Use 'bash -c' rather than 'eval' to avoid command injection from the
+    # user-supplied --quality-gate CLI flag (#3). The command may contain
+    # shell features (pipes, redirects) so a simple array exec isn't viable,
+    # but 'bash -c' still runs in a fresh subshell which limits injection scope.
+    gate_output=$(cd "$worktree" && bash -c "$quality_gate_cmd" 2>&1) && gate_exit=0 || gate_exit=$?
+    echo "$gate_output"
+    if [[ $gate_exit -ne 0 ]]; then
+        echo ""
+        echo "QUALITY GATE FAILED: command exited $gate_exit"
+        set_quality_gate "$worktree" "$batch_num" "false" 0
+        return 1
+    fi
+    # 2. Extract test count from output
+    test_count=$(extract_test_count "$gate_output")
+    echo "Test count: $test_count"
+    # 3. Compare against previous batch count
+    previous_count=$(get_previous_test_count "$worktree")
+    if ! check_test_count_regression "$test_count" "$previous_count"; then
+        echo "QUALITY GATE FAILED: test count regression ($test_count < $previous_count)"
+        set_quality_gate "$worktree" "$batch_num" "false" "$test_count"
+        return 1
+    fi
+    # 4. Check git clean
+    if ! check_git_clean "$worktree"; then
+        echo "QUALITY GATE FAILED: uncommitted changes in worktree"
+        set_quality_gate "$worktree" "$batch_num" "false" "$test_count"
+        return 1
+    fi
+    # 5. Update state — batch complete, gate passed
+    complete_batch "$worktree" "$batch_num" "$test_count" "$duration"
+    set_quality_gate "$worktree" "$batch_num" "true" "$test_count"
+    echo "QUALITY GATE PASSED (batch $batch_num, $test_count tests)"
+    return 0
+}