npm - autonomous-coding-toolkit - Versions diffs - 1.0.0 - Mend

autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (324) hide show

package/.claude-plugin/marketplace.json +22 -0
package/.claude-plugin/plugin.json +13 -0
package/LICENSE +21 -0
package/Makefile +21 -0
package/README.md +140 -0
package/SECURITY.md +28 -0
package/agents/bash-expert.md +113 -0
package/agents/dependency-auditor.md +138 -0
package/agents/integration-tester.md +120 -0
package/agents/lesson-scanner.md +149 -0
package/agents/python-expert.md +179 -0
package/agents/service-monitor.md +141 -0
package/agents/shell-expert.md +147 -0
package/benchmarks/runner.sh +147 -0
package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
package/benchmarks/tasks/02-refactor-module/task.md +8 -0
package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
package/bin/act.js +238 -0
package/commands/autocode.md +6 -0
package/commands/cancel-ralph.md +18 -0
package/commands/code-factory.md +53 -0
package/commands/create-prd.md +55 -0
package/commands/ralph-loop.md +18 -0
package/commands/run-plan.md +117 -0
package/commands/submit-lesson.md +122 -0
package/docs/ARCHITECTURE.md +630 -0
package/docs/CONTRIBUTING.md +125 -0
package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
package/docs/lessons/0002-async-def-without-await.md +28 -0
package/docs/lessons/0003-create-task-without-callback.md +28 -0
package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
package/docs/lessons/0005-sqlite-without-closing.md +33 -0
package/docs/lessons/0006-venv-pip-path.md +27 -0
package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
package/docs/lessons/0010-local-outside-function-bash.md +33 -0
package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
package/docs/lessons/0020-persist-state-incrementally.md +44 -0
package/docs/lessons/0021-dual-axis-testing.md +48 -0
package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
package/docs/lessons/0023-static-analysis-spiral.md +51 -0
package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
package/docs/lessons/0045-iterative-design-improvement.md +33 -0
package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
package/docs/lessons/0048-integration-wiring-batch.md +40 -0
package/docs/lessons/0049-ab-verification.md +41 -0
package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
package/docs/lessons/0078-static-review-without-live-test.md +30 -0
package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
package/docs/lessons/FRAMEWORK.md +161 -0
package/docs/lessons/SUMMARY.md +201 -0
package/docs/lessons/TEMPLATE.md +85 -0
package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
package/docs/plans/2026-02-21-mab-research-report.md +406 -0
package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
package/docs/plans/2026-02-22-mab-run-design.md +462 -0
package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
package/docs/plans/2026-02-24-headless-module-split.md +443 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
package/docs/plans/audit-findings.md +186 -0
package/docs/telegram-notification-format.md +98 -0
package/examples/example-plan.md +51 -0
package/examples/example-prd.json +72 -0
package/examples/example-roadmap.md +33 -0
package/examples/quickstart-plan.md +63 -0
package/hooks/hooks.json +26 -0
package/hooks/setup-symlinks.sh +48 -0
package/hooks/stop-hook.sh +135 -0
package/package.json +47 -0
package/policies/bash.md +71 -0
package/policies/python.md +71 -0
package/policies/testing.md +61 -0
package/policies/universal.md +60 -0
package/scripts/analyze-report.sh +97 -0
package/scripts/architecture-map.sh +145 -0
package/scripts/auto-compound.sh +273 -0
package/scripts/batch-audit.sh +42 -0
package/scripts/batch-test.sh +101 -0
package/scripts/entropy-audit.sh +221 -0
package/scripts/failure-digest.sh +51 -0
package/scripts/generate-ast-rules.sh +96 -0
package/scripts/init.sh +112 -0
package/scripts/lesson-check.sh +428 -0
package/scripts/lib/common.sh +61 -0
package/scripts/lib/cost-tracking.sh +153 -0
package/scripts/lib/ollama.sh +60 -0
package/scripts/lib/progress-writer.sh +128 -0
package/scripts/lib/run-plan-context.sh +215 -0
package/scripts/lib/run-plan-echo-back.sh +231 -0
package/scripts/lib/run-plan-headless.sh +396 -0
package/scripts/lib/run-plan-notify.sh +57 -0
package/scripts/lib/run-plan-parser.sh +81 -0
package/scripts/lib/run-plan-prompt.sh +215 -0
package/scripts/lib/run-plan-quality-gate.sh +132 -0
package/scripts/lib/run-plan-routing.sh +315 -0
package/scripts/lib/run-plan-sampling.sh +170 -0
package/scripts/lib/run-plan-scoring.sh +146 -0
package/scripts/lib/run-plan-state.sh +142 -0
package/scripts/lib/run-plan-team.sh +199 -0
package/scripts/lib/telegram.sh +54 -0
package/scripts/lib/thompson-sampling.sh +176 -0
package/scripts/license-check.sh +74 -0
package/scripts/mab-run.sh +575 -0
package/scripts/module-size-check.sh +146 -0
package/scripts/patterns/async-no-await.yml +5 -0
package/scripts/patterns/bare-except.yml +6 -0
package/scripts/patterns/empty-catch.yml +6 -0
package/scripts/patterns/hardcoded-localhost.yml +9 -0
package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
package/scripts/pipeline-status.sh +197 -0
package/scripts/policy-check.sh +226 -0
package/scripts/prior-art-search.sh +133 -0
package/scripts/promote-mab-lessons.sh +126 -0
package/scripts/prompts/agent-a-superpowers.md +29 -0
package/scripts/prompts/agent-b-ralph.md +29 -0
package/scripts/prompts/judge-agent.md +61 -0
package/scripts/prompts/planner-agent.md +44 -0
package/scripts/pull-community-lessons.sh +90 -0
package/scripts/quality-gate.sh +266 -0
package/scripts/research-gate.sh +90 -0
package/scripts/run-plan.sh +329 -0
package/scripts/scope-infer.sh +159 -0
package/scripts/setup-ralph-loop.sh +155 -0
package/scripts/telemetry.sh +230 -0
package/scripts/tests/run-all-tests.sh +52 -0
package/scripts/tests/test-act-cli.sh +46 -0
package/scripts/tests/test-agents-md.sh +87 -0
package/scripts/tests/test-analyze-report.sh +114 -0
package/scripts/tests/test-architecture-map.sh +89 -0
package/scripts/tests/test-auto-compound.sh +169 -0
package/scripts/tests/test-batch-test.sh +65 -0
package/scripts/tests/test-benchmark-runner.sh +25 -0
package/scripts/tests/test-common.sh +168 -0
package/scripts/tests/test-cost-tracking.sh +158 -0
package/scripts/tests/test-echo-back.sh +180 -0
package/scripts/tests/test-entropy-audit.sh +146 -0
package/scripts/tests/test-failure-digest.sh +66 -0
package/scripts/tests/test-generate-ast-rules.sh +145 -0
package/scripts/tests/test-helpers.sh +82 -0
package/scripts/tests/test-init.sh +47 -0
package/scripts/tests/test-lesson-check.sh +278 -0
package/scripts/tests/test-lesson-local.sh +55 -0
package/scripts/tests/test-license-check.sh +109 -0
package/scripts/tests/test-mab-run.sh +182 -0
package/scripts/tests/test-ollama-lib.sh +49 -0
package/scripts/tests/test-ollama.sh +60 -0
package/scripts/tests/test-pipeline-status.sh +198 -0
package/scripts/tests/test-policy-check.sh +124 -0
package/scripts/tests/test-prior-art-search.sh +96 -0
package/scripts/tests/test-progress-writer.sh +140 -0
package/scripts/tests/test-promote-mab-lessons.sh +110 -0
package/scripts/tests/test-pull-community-lessons.sh +149 -0
package/scripts/tests/test-quality-gate.sh +241 -0
package/scripts/tests/test-research-gate.sh +132 -0
package/scripts/tests/test-run-plan-cli.sh +86 -0
package/scripts/tests/test-run-plan-context.sh +305 -0
package/scripts/tests/test-run-plan-e2e.sh +153 -0
package/scripts/tests/test-run-plan-headless.sh +424 -0
package/scripts/tests/test-run-plan-notify.sh +124 -0
package/scripts/tests/test-run-plan-parser.sh +217 -0
package/scripts/tests/test-run-plan-prompt.sh +254 -0
package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
package/scripts/tests/test-run-plan-routing.sh +178 -0
package/scripts/tests/test-run-plan-scoring.sh +148 -0
package/scripts/tests/test-run-plan-state.sh +261 -0
package/scripts/tests/test-run-plan-team.sh +157 -0
package/scripts/tests/test-scope-infer.sh +150 -0
package/scripts/tests/test-setup-ralph-loop.sh +63 -0
package/scripts/tests/test-telegram-env.sh +38 -0
package/scripts/tests/test-telegram.sh +121 -0
package/scripts/tests/test-telemetry.sh +46 -0
package/scripts/tests/test-thompson-sampling.sh +139 -0
package/scripts/tests/test-validate-all.sh +60 -0
package/scripts/tests/test-validate-commands.sh +89 -0
package/scripts/tests/test-validate-hooks.sh +98 -0
package/scripts/tests/test-validate-lessons.sh +150 -0
package/scripts/tests/test-validate-plan-quality.sh +235 -0
package/scripts/tests/test-validate-plans.sh +187 -0
package/scripts/tests/test-validate-plugin.sh +106 -0
package/scripts/tests/test-validate-prd.sh +184 -0
package/scripts/tests/test-validate-skills.sh +134 -0
package/scripts/validate-all.sh +57 -0
package/scripts/validate-commands.sh +67 -0
package/scripts/validate-hooks.sh +89 -0
package/scripts/validate-lessons.sh +98 -0
package/scripts/validate-plan-quality.sh +369 -0
package/scripts/validate-plans.sh +120 -0
package/scripts/validate-plugin.sh +86 -0
package/scripts/validate-policies.sh +42 -0
package/scripts/validate-prd.sh +118 -0
package/scripts/validate-skills.sh +96 -0
package/skills/autocode/SKILL.md +285 -0
package/skills/autocode/ab-verification.md +51 -0
package/skills/autocode/code-quality-standards.md +37 -0
package/skills/autocode/competitive-mode.md +364 -0
package/skills/brainstorming/SKILL.md +97 -0
package/skills/capture-lesson/SKILL.md +187 -0
package/skills/check-lessons/SKILL.md +116 -0
package/skills/dispatching-parallel-agents/SKILL.md +110 -0
package/skills/executing-plans/SKILL.md +85 -0
package/skills/finishing-a-development-branch/SKILL.md +201 -0
package/skills/receiving-code-review/SKILL.md +72 -0
package/skills/requesting-code-review/SKILL.md +59 -0
package/skills/requesting-code-review/code-reviewer.md +82 -0
package/skills/research/SKILL.md +145 -0
package/skills/roadmap/SKILL.md +115 -0
package/skills/subagent-driven-development/SKILL.md +98 -0
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
package/skills/subagent-driven-development/implementer-prompt.md +73 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
package/skills/systematic-debugging/SKILL.md +134 -0
package/skills/systematic-debugging/condition-based-waiting.md +64 -0
package/skills/systematic-debugging/defense-in-depth.md +32 -0
package/skills/systematic-debugging/root-cause-tracing.md +55 -0
package/skills/test-driven-development/SKILL.md +167 -0
package/skills/using-git-worktrees/SKILL.md +219 -0
package/skills/using-superpowers/SKILL.md +54 -0
package/skills/verification-before-completion/SKILL.md +140 -0
package/skills/verify/SKILL.md +82 -0
package/skills/writing-plans/SKILL.md +128 -0
package/skills/writing-skills/SKILL.md +93 -0

package/scripts/tests/test-run-plan-context.sh ADDED Viewed

@@ -0,0 +1,305 @@
+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/common.sh"
+source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
+source "$SCRIPT_DIR/../lib/progress-writer.sh"
+source "$SCRIPT_DIR/../lib/run-plan-context.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+assert_contains() {
+    local desc="$1" needle="$2" haystack="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$haystack" == *"$needle"* ]]; then
+        echo "PASS: $desc"
+    else
+        echo "FAIL: $desc"
+        echo "  expected to contain: $needle"
+        echo "  actual: ${haystack:0:200}..."
+        FAILURES=$((FAILURES + 1))
+    fi
+}
+assert_not_contains() {
+    local desc="$1" needle="$2" haystack="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$haystack" != *"$needle"* ]]; then
+        echo "PASS: $desc"
+    else
+        echo "FAIL: $desc"
+        echo "  expected NOT to contain: $needle"
+        FAILURES=$((FAILURES + 1))
+    fi
+}
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT  # #59: ensure cleanup on any exit path, including early test failure
+# === Setup test fixtures ===
+# State file
+cat > "$WORK/.run-plan-state.json" << 'JSON'
+{
+  "plan": "test-plan.md",
+  "mode": "headless",
+  "batches": {
+    "1": {"passed": true, "test_count": 50, "duration": 120},
+    "2": {"passed": true, "test_count": 75, "duration": 90}
+  }
+}
+JSON
+# Progress file
+cat > "$WORK/progress.txt" << 'TXT'
+Batch 1: Created shared library
+Batch 2: Fixed test parsing
+Discovery: jest output needs special handling
+TXT
+# Git repo for git log
+cd "$WORK" && git init -q && git commit --allow-empty -m "batch 1: initial" -q && git commit --allow-empty -m "batch 2: add tests" -q
+cd - > /dev/null
+# Plan with context_refs
+cat > "$WORK/test-plan.md" << 'PLAN'
+## Batch 1: Foundation
+### Task 1: Setup
+Create lib.
+## Batch 2: Tests
+### Task 2: Add tests
+context_refs: src/lib.sh
+## Batch 3: Integration
+### Task 3: Wire together
+context_refs: src/lib.sh, tests/test-lib.sh
+PLAN
+# Context ref files
+mkdir -p "$WORK/src" "$WORK/tests"
+echo "#!/bin/bash" > "$WORK/src/lib.sh"
+echo "echo hello" >> "$WORK/src/lib.sh"
+echo "#!/bin/bash" > "$WORK/tests/test-lib.sh"
+# === Tests ===
+# generate_batch_context for batch 3 (has context_refs and prior batches)
+ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+assert_contains "context: includes quality gate expectation" "tests must stay above 75" "$ctx"
+assert_contains "context: includes prior batch summary" "Batch 2" "$ctx"
+assert_contains "context: includes context_refs content" "echo hello" "$ctx"
+assert_not_contains "context: excludes batch 1 details for batch 3" "Batch 1: Foundation" "$ctx"
+# generate_batch_context for batch 1 (no prior context)
+ctx=$(generate_batch_context "$WORK/test-plan.md" 1 "$WORK")
+assert_contains "context batch 1: minimal context" "Run-Plan" "$ctx"
+# Should be short — no prior batches, no context_refs
+char_count=${#ctx}
+TESTS=$((TESTS + 1))
+if [[ $char_count -lt 2000 ]]; then
+    echo "PASS: context batch 1: under 2000 chars ($char_count)"
+else
+    echo "FAIL: context batch 1: over 2000 chars ($char_count)"
+    FAILURES=$((FAILURES + 1))
+fi
+# Token budget: context should stay under 6000 chars (~1500 tokens)
+ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+char_count=${#ctx}
+TESTS=$((TESTS + 1))
+if [[ $char_count -lt 6000 ]]; then
+    echo "PASS: context batch 3: under 6000 chars ($char_count)"
+else
+    echo "FAIL: context batch 3: over 6000 chars ($char_count)"
+    FAILURES=$((FAILURES + 1))
+fi
+# Failure patterns injection
+mkdir -p "$WORK/logs"
+cat > "$WORK/logs/failure-patterns.json" << 'JSON'
+[{"batch_title_pattern": "integration", "failure_type": "missing import", "frequency": 3, "winning_fix": "check all imports before running tests"}]
+JSON
+ctx=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+assert_contains "context: includes failure pattern warning" "missing import" "$ctx"
+# === MAB lessons injection ===
+# Create MAB lessons file
+cat > "$WORK/logs/mab-lessons.json" << 'JSON'
+[{"pattern": "check imports before tests", "context": "integration", "winner": "superpowers", "occurrences": 3, "promoted": false}]
+JSON
+ctx_mab=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+assert_contains "context: MAB lessons injected when file present" "check imports before tests" "$ctx_mab"
+assert_contains "context: MAB lessons section header" "MAB Lessons" "$ctx_mab"
+# Remove MAB lessons and verify no section
+rm -f "$WORK/logs/mab-lessons.json"
+ctx_no_mab=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+assert_not_contains "context: no MAB section when file absent" "MAB Lessons" "$ctx_no_mab"
+# === No tail fallback: structured read returns empty, no wrong-batch data injected (#54) ===
+# Progress.txt with only batch 1 content (no batch 2)
+WORK_NOTAIL=$(mktemp -d)
+trap 'rm -rf "$WORK_NOTAIL"' EXIT
+cat > "$WORK_NOTAIL/test-plan.md" << 'PLAN_NOTAIL'
+## Batch 1: Alpha
+### Task 1: Do alpha
+Do something.
+## Batch 2: Beta
+### Task 2: Do beta
+Do more.
+PLAN_NOTAIL
+cat > "$WORK_NOTAIL/.run-plan-state.json" << 'JSON_NOTAIL'
+{"plan": "test-plan.md", "mode": "headless", "batches": {}}
+JSON_NOTAIL
+# Write unrelated content to progress.txt (no structured headers)
+echo "some unrelated content from a different run" > "$WORK_NOTAIL/progress.txt"
+echo "batch 99 leftovers here" >> "$WORK_NOTAIL/progress.txt"
+# generate_batch_context for batch 2: progress.txt exists but has no structured batch 1 data
+# Should NOT inject the tail content as "Progress Notes"
+cd "$WORK_NOTAIL" && git init -q && git commit --allow-empty -m "init" -q
+cd - > /dev/null
+ctx_notail=$(generate_batch_context "$WORK_NOTAIL/test-plan.md" 2 "$WORK_NOTAIL")
+assert_not_contains "no-tail-fallback: unrelated progress.txt content not injected" "batch 99 leftovers" "$ctx_notail"
+assert_not_contains "no-tail-fallback: tail content not injected as Progress Notes" "unrelated content from a different run" "$ctx_notail"
+# === git -C fix: git log works without cd (#61) ===
+# Verify the generate_batch_context produces git log output without needing cwd change
+ctx_gitlog=$(generate_batch_context "$WORK/test-plan.md" 3 "$WORK")
+assert_contains "git-C: recent commits appear in context" "Recent Commits" "$ctx_gitlog"
+# === Failure pattern recording ===
+# Clean up pre-existing patterns file for isolated testing
+rm -f "$WORK/logs/failure-patterns.json"
+record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
+assert_eq "record_failure_pattern: creates file" "true" "$(test -f "$WORK/logs/failure-patterns.json" && echo true || echo false)"
+# Record same pattern again — should increment frequency
+record_failure_pattern "$WORK" "Integration Wiring" "missing import" "check imports before tests"
+freq=$(jq '.[0].frequency' "$WORK/logs/failure-patterns.json")
+assert_eq "record_failure_pattern: increments frequency" "2" "$freq"
+# Record different pattern
+record_failure_pattern "$WORK" "Test Suite" "flaky assertion" "use deterministic comparisons"
+count=$(jq 'length' "$WORK/logs/failure-patterns.json")
+assert_eq "record_failure_pattern: adds new pattern" "2" "$count"
+# === Bug #60 BEHAVIORAL: whitespace in context_refs is trimmed ===
+# context_refs: " src/lib.sh , tests/test-lib.sh " should resolve both files
+# despite leading/trailing spaces around each path.
+WORK_WS=$(mktemp -d)
+trap 'rm -rf "$WORK_WS"' EXIT
+# Plan with whitespace-padded context_refs
+cat > "$WORK_WS/test-plan.md" << 'PLAN_WS'
+## Batch 1: Setup
+### Task 1: Init
+Do init work.
+## Batch 2: Test whitespace
+### Task 2: Check refs
+context_refs:  src/padded.sh , tests/padded-test.sh
+PLAN_WS
+# Create the referenced files
+mkdir -p "$WORK_WS/src" "$WORK_WS/tests"
+echo "PADDED_CONTENT=true" > "$WORK_WS/src/padded.sh"
+echo "PADDED_TEST=true" > "$WORK_WS/tests/padded-test.sh"
+# State and git setup
+cat > "$WORK_WS/.run-plan-state.json" << 'JSON_WS'
+{"plan": "test-plan.md", "mode": "headless", "batches": {}}
+JSON_WS
+cd "$WORK_WS" && git init -q && git commit --allow-empty -m "init" -q
+cd - > /dev/null
+ctx_ws=$(generate_batch_context "$WORK_WS/test-plan.md" 2 "$WORK_WS")
+assert_contains "whitespace-trimmed: padded.sh content included" "PADDED_CONTENT=true" "$ctx_ws"
+assert_contains "whitespace-trimmed: padded-test.sh content included" "PADDED_TEST=true" "$ctx_ws"
+# === Bug #50 BEHAVIORAL: non-readable progress.txt propagates error ===
+# When progress.txt exists but has restricted permissions, the tail call should
+# not silently swallow the error — stderr should show the permission denial.
+WORK_PERM=$(mktemp -d)
+trap 'rm -rf "$WORK_PERM"' EXIT
+mkdir -p "$WORK_PERM/src"
+cat > "$WORK_PERM/test-plan.md" << 'PLAN_PERM'
+## Batch 1: Init
+### Task 1: Do something
+Do something.
+## Batch 2: Next
+### Task 2: Do more
+Do more.
+PLAN_PERM
+cat > "$WORK_PERM/.run-plan-state.json" << 'JSON_PERM'
+{"plan": "test-plan.md", "mode": "headless", "batches": {"1": {"passed": true, "test_count": 10, "duration": 30}}}
+JSON_PERM
+# Create an unreadable progress.txt (note: this only works when not root)
+echo "some progress" > "$WORK_PERM/progress.txt"
+chmod 000 "$WORK_PERM/progress.txt"
+cd "$WORK_PERM" && git init -q && git commit --allow-empty -m "init" -q
+cd - > /dev/null
+# build_variable_suffix should produce stderr when progress.txt is unreadable
+# (the fix removed || true, so tail's permission error is visible)
+prompt_stderr=""
+prompt_stderr=$(build_variable_suffix "$WORK_PERM/test-plan.md" 2 "$WORK_PERM" 10 2>&1 >/dev/null) || true
+TESTS=$((TESTS + 1))
+# Only check if we're not root (root can read anything)
+if [[ "$(id -u)" -eq 0 ]]; then
+    echo "PASS: (skipped — running as root, permission test not applicable)"
+else
+    if [[ -n "$prompt_stderr" ]]; then
+        echo "PASS: unreadable progress.txt: error propagated to stderr"
+    else
+        echo "FAIL: unreadable progress.txt: error should propagate (not suppressed by || true)"
+        FAILURES=$((FAILURES + 1))
+    fi
+fi
+# Cleanup (restore permission before rm can work)
+chmod 644 "$WORK_PERM/progress.txt" 2>/dev/null || true
+# === Summary ===
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"

package/scripts/tests/test-run-plan-e2e.sh ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+# Test run-plan.sh end-to-end — exercises the full Mode C headless loop
+# with a fake claude binary and fake quality gate (no real API calls).
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RUN_PLAN="$SCRIPT_DIR/../run-plan.sh"
+FAILURES=0
+TESTS=0
+# --- Setup temp workspace ---
+WORK=$(mktemp -d)
+FIXTURES=$(mktemp -d)
+trap 'rm -rf "$WORK" "$FIXTURES"' EXIT
+# 1. Git init the worktree
+git -C "$WORK" init -q
+git -C "$WORK" config user.email "test@test.com"
+git -C "$WORK" config user.name "Test"
+# Gitignore run-plan artifacts so check_git_clean passes
+cat > "$WORK/.gitignore" <<'GITIGNORE'
+.run-plan-state.json
+.run-plan-prefix.txt
+AGENTS.md
+logs/
+progress.txt
+GITIGNORE
+git -C "$WORK" add -A
+git -C "$WORK" commit -q -m "init"
+# 2. Create a small plan file (2 batches, 2 tasks each)
+cat > "$WORK/plan.md" <<'PLAN'
+# Test Plan
+## Batch 1: Setup foundation
+### Task 1: Create config module
+Create the config module with defaults.
+### Task 2: Add config tests
+Write tests for the config module.
+## Batch 2: Build feature
+### Task 3: Implement feature
+Build the main feature on top of config.
+### Task 4: Add feature tests
+Write tests for the feature.
+PLAN
+git -C "$WORK" add plan.md
+git -C "$WORK" commit -q -m "add plan"
+# 3. Create fake claude binary (outside worktree to keep git clean)
+FAKE_BIN="$FIXTURES/bin"
+mkdir -p "$FAKE_BIN"
+cat > "$FAKE_BIN/claude" <<'FAKECLAUDE'
+#!/usr/bin/env bash
+# Fake claude — simulates batch work without API calls
+echo "Working on batch..."
+echo "42 passed, 0 failed in 1.2s"
+exit 0
+FAKECLAUDE
+chmod +x "$FAKE_BIN/claude"
+# 4. Create fake quality gate script (outside worktree)
+cat > "$FIXTURES/fake-quality-gate.sh" <<'FAKEGATE'
+#!/usr/bin/env bash
+# Fake quality gate — always passes
+echo "42 passed in 1.0s"
+exit 0
+FAKEGATE
+chmod +x "$FIXTURES/fake-quality-gate.sh"
+# 5. Run run-plan.sh with fake claude first on PATH
+export PATH="$FAKE_BIN:$PATH"
+OUTPUT=$(cd "$WORK" && "$RUN_PLAN" "$WORK/plan.md" \
+    --worktree "$WORK" \
+    --quality-gate "$FIXTURES/fake-quality-gate.sh" \
+    --on-failure stop \
+    2>&1) && EXIT_CODE=0 || EXIT_CODE=$?
+# --- Assertions ---
+assert() {
+    local desc="$1" result="$2"
+    TESTS=$((TESTS + 1))
+    if [[ "$result" == "true" ]]; then
+        echo "PASS: $desc"
+    else
+        echo "FAIL: $desc"
+        FAILURES=$((FAILURES + 1))
+    fi
+}
+# 1. Exit code is 0
+assert "exit code is 0" "$([ "$EXIT_CODE" -eq 0 ] && echo true || echo false)"
+# 2. State file was created
+assert "state file exists" "$([ -f "$WORK/.run-plan-state.json" ] && echo true || echo false)"
+# 3. Both batches in completed_batches
+if [[ -f "$WORK/.run-plan-state.json" ]]; then
+    HAS_BATCH_1=$(jq '.completed_batches | contains([1])' "$WORK/.run-plan-state.json")
+    HAS_BATCH_2=$(jq '.completed_batches | contains([2])' "$WORK/.run-plan-state.json")
+    assert "batch 1 in completed_batches" "$HAS_BATCH_1"
+    assert "batch 2 in completed_batches" "$HAS_BATCH_2"
+else
+    assert "batch 1 in completed_batches (no state file)" "false"
+    assert "batch 2 in completed_batches (no state file)" "false"
+fi
+# 4. Log files exist
+assert "batch 1 log exists" "$([ -f "$WORK/logs/batch-1-attempt-1.log" ] && echo true || echo false)"
+assert "batch 2 log exists" "$([ -f "$WORK/logs/batch-2-attempt-1.log" ] && echo true || echo false)"
+# 5. Prefix file was created (#51)
+# The stable prefix is built once before the batch loop and cached to disk.
+# If this file is missing the per-batch prompt assembly would silently use an empty prefix.
+assert "prefix file exists" "$([ -f "$WORK/.run-plan-prefix.txt" ] && echo true || echo false)"
+# 6. Prefix file is non-empty and contains stable content
+if [[ -f "$WORK/.run-plan-prefix.txt" ]]; then
+    PREFIX_CONTENT=$(cat "$WORK/.run-plan-prefix.txt")
+    assert "prefix file contains TDD rule" "$(echo "$PREFIX_CONTENT" | grep -q "TDD" && echo true || echo false)"
+    assert "prefix file contains worktree path" "$(echo "$PREFIX_CONTENT" | grep -q "$WORK" && echo true || echo false)"
+else
+    assert "prefix file contains TDD rule (no file)" "false"
+    assert "prefix file contains worktree path (no file)" "false"
+fi
+# --- Summary ---
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    echo ""
+    echo "--- Debug output ---"
+    echo "$OUTPUT"
+    if [[ -f "$WORK/.run-plan-state.json" ]]; then
+        echo ""
+        echo "--- State file ---"
+        cat "$WORK/.run-plan-state.json"
+    fi
+    exit 1
+fi
+echo "ALL PASSED"