npm - autonomous-coding-toolkit - Versions diffs - 1.0.0 - Mend

autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (324) hide show

package/.claude-plugin/marketplace.json +22 -0
package/.claude-plugin/plugin.json +13 -0
package/LICENSE +21 -0
package/Makefile +21 -0
package/README.md +140 -0
package/SECURITY.md +28 -0
package/agents/bash-expert.md +113 -0
package/agents/dependency-auditor.md +138 -0
package/agents/integration-tester.md +120 -0
package/agents/lesson-scanner.md +149 -0
package/agents/python-expert.md +179 -0
package/agents/service-monitor.md +141 -0
package/agents/shell-expert.md +147 -0
package/benchmarks/runner.sh +147 -0
package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
package/benchmarks/tasks/02-refactor-module/task.md +8 -0
package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
package/bin/act.js +238 -0
package/commands/autocode.md +6 -0
package/commands/cancel-ralph.md +18 -0
package/commands/code-factory.md +53 -0
package/commands/create-prd.md +55 -0
package/commands/ralph-loop.md +18 -0
package/commands/run-plan.md +117 -0
package/commands/submit-lesson.md +122 -0
package/docs/ARCHITECTURE.md +630 -0
package/docs/CONTRIBUTING.md +125 -0
package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
package/docs/lessons/0002-async-def-without-await.md +28 -0
package/docs/lessons/0003-create-task-without-callback.md +28 -0
package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
package/docs/lessons/0005-sqlite-without-closing.md +33 -0
package/docs/lessons/0006-venv-pip-path.md +27 -0
package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
package/docs/lessons/0010-local-outside-function-bash.md +33 -0
package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
package/docs/lessons/0020-persist-state-incrementally.md +44 -0
package/docs/lessons/0021-dual-axis-testing.md +48 -0
package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
package/docs/lessons/0023-static-analysis-spiral.md +51 -0
package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
package/docs/lessons/0045-iterative-design-improvement.md +33 -0
package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
package/docs/lessons/0048-integration-wiring-batch.md +40 -0
package/docs/lessons/0049-ab-verification.md +41 -0
package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
package/docs/lessons/0078-static-review-without-live-test.md +30 -0
package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
package/docs/lessons/FRAMEWORK.md +161 -0
package/docs/lessons/SUMMARY.md +201 -0
package/docs/lessons/TEMPLATE.md +85 -0
package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
package/docs/plans/2026-02-21-mab-research-report.md +406 -0
package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
package/docs/plans/2026-02-22-mab-run-design.md +462 -0
package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
package/docs/plans/2026-02-24-headless-module-split.md +443 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
package/docs/plans/audit-findings.md +186 -0
package/docs/telegram-notification-format.md +98 -0
package/examples/example-plan.md +51 -0
package/examples/example-prd.json +72 -0
package/examples/example-roadmap.md +33 -0
package/examples/quickstart-plan.md +63 -0
package/hooks/hooks.json +26 -0
package/hooks/setup-symlinks.sh +48 -0
package/hooks/stop-hook.sh +135 -0
package/package.json +47 -0
package/policies/bash.md +71 -0
package/policies/python.md +71 -0
package/policies/testing.md +61 -0
package/policies/universal.md +60 -0
package/scripts/analyze-report.sh +97 -0
package/scripts/architecture-map.sh +145 -0
package/scripts/auto-compound.sh +273 -0
package/scripts/batch-audit.sh +42 -0
package/scripts/batch-test.sh +101 -0
package/scripts/entropy-audit.sh +221 -0
package/scripts/failure-digest.sh +51 -0
package/scripts/generate-ast-rules.sh +96 -0
package/scripts/init.sh +112 -0
package/scripts/lesson-check.sh +428 -0
package/scripts/lib/common.sh +61 -0
package/scripts/lib/cost-tracking.sh +153 -0
package/scripts/lib/ollama.sh +60 -0
package/scripts/lib/progress-writer.sh +128 -0
package/scripts/lib/run-plan-context.sh +215 -0
package/scripts/lib/run-plan-echo-back.sh +231 -0
package/scripts/lib/run-plan-headless.sh +396 -0
package/scripts/lib/run-plan-notify.sh +57 -0
package/scripts/lib/run-plan-parser.sh +81 -0
package/scripts/lib/run-plan-prompt.sh +215 -0
package/scripts/lib/run-plan-quality-gate.sh +132 -0
package/scripts/lib/run-plan-routing.sh +315 -0
package/scripts/lib/run-plan-sampling.sh +170 -0
package/scripts/lib/run-plan-scoring.sh +146 -0
package/scripts/lib/run-plan-state.sh +142 -0
package/scripts/lib/run-plan-team.sh +199 -0
package/scripts/lib/telegram.sh +54 -0
package/scripts/lib/thompson-sampling.sh +176 -0
package/scripts/license-check.sh +74 -0
package/scripts/mab-run.sh +575 -0
package/scripts/module-size-check.sh +146 -0
package/scripts/patterns/async-no-await.yml +5 -0
package/scripts/patterns/bare-except.yml +6 -0
package/scripts/patterns/empty-catch.yml +6 -0
package/scripts/patterns/hardcoded-localhost.yml +9 -0
package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
package/scripts/pipeline-status.sh +197 -0
package/scripts/policy-check.sh +226 -0
package/scripts/prior-art-search.sh +133 -0
package/scripts/promote-mab-lessons.sh +126 -0
package/scripts/prompts/agent-a-superpowers.md +29 -0
package/scripts/prompts/agent-b-ralph.md +29 -0
package/scripts/prompts/judge-agent.md +61 -0
package/scripts/prompts/planner-agent.md +44 -0
package/scripts/pull-community-lessons.sh +90 -0
package/scripts/quality-gate.sh +266 -0
package/scripts/research-gate.sh +90 -0
package/scripts/run-plan.sh +329 -0
package/scripts/scope-infer.sh +159 -0
package/scripts/setup-ralph-loop.sh +155 -0
package/scripts/telemetry.sh +230 -0
package/scripts/tests/run-all-tests.sh +52 -0
package/scripts/tests/test-act-cli.sh +46 -0
package/scripts/tests/test-agents-md.sh +87 -0
package/scripts/tests/test-analyze-report.sh +114 -0
package/scripts/tests/test-architecture-map.sh +89 -0
package/scripts/tests/test-auto-compound.sh +169 -0
package/scripts/tests/test-batch-test.sh +65 -0
package/scripts/tests/test-benchmark-runner.sh +25 -0
package/scripts/tests/test-common.sh +168 -0
package/scripts/tests/test-cost-tracking.sh +158 -0
package/scripts/tests/test-echo-back.sh +180 -0
package/scripts/tests/test-entropy-audit.sh +146 -0
package/scripts/tests/test-failure-digest.sh +66 -0
package/scripts/tests/test-generate-ast-rules.sh +145 -0
package/scripts/tests/test-helpers.sh +82 -0
package/scripts/tests/test-init.sh +47 -0
package/scripts/tests/test-lesson-check.sh +278 -0
package/scripts/tests/test-lesson-local.sh +55 -0
package/scripts/tests/test-license-check.sh +109 -0
package/scripts/tests/test-mab-run.sh +182 -0
package/scripts/tests/test-ollama-lib.sh +49 -0
package/scripts/tests/test-ollama.sh +60 -0
package/scripts/tests/test-pipeline-status.sh +198 -0
package/scripts/tests/test-policy-check.sh +124 -0
package/scripts/tests/test-prior-art-search.sh +96 -0
package/scripts/tests/test-progress-writer.sh +140 -0
package/scripts/tests/test-promote-mab-lessons.sh +110 -0
package/scripts/tests/test-pull-community-lessons.sh +149 -0
package/scripts/tests/test-quality-gate.sh +241 -0
package/scripts/tests/test-research-gate.sh +132 -0
package/scripts/tests/test-run-plan-cli.sh +86 -0
package/scripts/tests/test-run-plan-context.sh +305 -0
package/scripts/tests/test-run-plan-e2e.sh +153 -0
package/scripts/tests/test-run-plan-headless.sh +424 -0
package/scripts/tests/test-run-plan-notify.sh +124 -0
package/scripts/tests/test-run-plan-parser.sh +217 -0
package/scripts/tests/test-run-plan-prompt.sh +254 -0
package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
package/scripts/tests/test-run-plan-routing.sh +178 -0
package/scripts/tests/test-run-plan-scoring.sh +148 -0
package/scripts/tests/test-run-plan-state.sh +261 -0
package/scripts/tests/test-run-plan-team.sh +157 -0
package/scripts/tests/test-scope-infer.sh +150 -0
package/scripts/tests/test-setup-ralph-loop.sh +63 -0
package/scripts/tests/test-telegram-env.sh +38 -0
package/scripts/tests/test-telegram.sh +121 -0
package/scripts/tests/test-telemetry.sh +46 -0
package/scripts/tests/test-thompson-sampling.sh +139 -0
package/scripts/tests/test-validate-all.sh +60 -0
package/scripts/tests/test-validate-commands.sh +89 -0
package/scripts/tests/test-validate-hooks.sh +98 -0
package/scripts/tests/test-validate-lessons.sh +150 -0
package/scripts/tests/test-validate-plan-quality.sh +235 -0
package/scripts/tests/test-validate-plans.sh +187 -0
package/scripts/tests/test-validate-plugin.sh +106 -0
package/scripts/tests/test-validate-prd.sh +184 -0
package/scripts/tests/test-validate-skills.sh +134 -0
package/scripts/validate-all.sh +57 -0
package/scripts/validate-commands.sh +67 -0
package/scripts/validate-hooks.sh +89 -0
package/scripts/validate-lessons.sh +98 -0
package/scripts/validate-plan-quality.sh +369 -0
package/scripts/validate-plans.sh +120 -0
package/scripts/validate-plugin.sh +86 -0
package/scripts/validate-policies.sh +42 -0
package/scripts/validate-prd.sh +118 -0
package/scripts/validate-skills.sh +96 -0
package/skills/autocode/SKILL.md +285 -0
package/skills/autocode/ab-verification.md +51 -0
package/skills/autocode/code-quality-standards.md +37 -0
package/skills/autocode/competitive-mode.md +364 -0
package/skills/brainstorming/SKILL.md +97 -0
package/skills/capture-lesson/SKILL.md +187 -0
package/skills/check-lessons/SKILL.md +116 -0
package/skills/dispatching-parallel-agents/SKILL.md +110 -0
package/skills/executing-plans/SKILL.md +85 -0
package/skills/finishing-a-development-branch/SKILL.md +201 -0
package/skills/receiving-code-review/SKILL.md +72 -0
package/skills/requesting-code-review/SKILL.md +59 -0
package/skills/requesting-code-review/code-reviewer.md +82 -0
package/skills/research/SKILL.md +145 -0
package/skills/roadmap/SKILL.md +115 -0
package/skills/subagent-driven-development/SKILL.md +98 -0
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
package/skills/subagent-driven-development/implementer-prompt.md +73 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
package/skills/systematic-debugging/SKILL.md +134 -0
package/skills/systematic-debugging/condition-based-waiting.md +64 -0
package/skills/systematic-debugging/defense-in-depth.md +32 -0
package/skills/systematic-debugging/root-cause-tracing.md +55 -0
package/skills/test-driven-development/SKILL.md +167 -0
package/skills/using-git-worktrees/SKILL.md +219 -0
package/skills/using-superpowers/SKILL.md +54 -0
package/skills/verification-before-completion/SKILL.md +140 -0
package/skills/verify/SKILL.md +82 -0
package/skills/writing-plans/SKILL.md +128 -0
package/skills/writing-skills/SKILL.md +93 -0

package/scripts/tests/test-run-plan-parser.sh ADDED Viewed

@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+# Test plan parser functions
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+assert_contains() {
+    local desc="$1" needle="$2" haystack="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$haystack" != *"$needle"* ]]; then
+        echo "FAIL: $desc"
+        echo "  expected to contain: $needle"
+        echo "  in: ${haystack:0:200}..."
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+# --- Create test fixture ---
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT
+FIXTURE="$WORK/fixture.md"
+cat > "$FIXTURE" << 'EOF'
+# Feature X Implementation Plan
+**Goal:** Build feature X
+**Tech Stack:** Python, pytest
+---
+## Batch 1: Foundation (Tasks 1-2)
+### Task 1: Create Data Model
+**Files:**
+- Create: `src/models.py`
+- Test: `tests/test_models.py`
+**Step 1: Write the failing test**
+```python
+def test_model():
+    m = Model("test")
+    assert m.name == "test"
+```
+**Step 2: Implement**
+Create the Model class.
+### Task 2: Add Validation
+**Files:**
+- Modify: `src/models.py`
+Add validation to Model.
+## Batch 2: Integration (Tasks 3-4)
+### Task 3: Wire Together
+Wire the models into the API.
+### Task 4: End-to-End Test
+Write integration test.
+## Batch 3: Dashboard ⚠ CRITICAL
+### Task 5: UI Components
+Build the dashboard.
+EOF
+# --- Test: count_batches ---
+count=$(count_batches "$FIXTURE")
+assert_eq "count_batches returns 3" "3" "$count"
+# --- Test: get_batch_title ---
+title=$(get_batch_title "$FIXTURE" 1)
+assert_eq "batch 1 title" "Foundation (Tasks 1-2)" "$title"
+title=$(get_batch_title "$FIXTURE" 2)
+assert_eq "batch 2 title" "Integration (Tasks 3-4)" "$title"
+title=$(get_batch_title "$FIXTURE" 3)
+assert_eq "batch 3 title" "Dashboard ⚠ CRITICAL" "$title"
+# --- Test: get_batch_text ---
+text=$(get_batch_text "$FIXTURE" 1)
+assert_contains "batch 1 has Task 1" "Task 1: Create Data Model" "$text"
+assert_contains "batch 1 has Task 2" "Task 2: Add Validation" "$text"
+assert_contains "batch 1 has code" "def test_model" "$text"
+text2=$(get_batch_text "$FIXTURE" 2)
+assert_contains "batch 2 has Task 3" "Task 3: Wire Together" "$text2"
+assert_contains "batch 2 has Task 4" "Task 4: End-to-End Test" "$text2"
+# Batch 2 should NOT contain batch 1 content
+TESTS=$((TESTS + 1))
+if [[ "$text2" == *"Create Data Model"* ]]; then
+    echo "FAIL: batch 2 text should not contain batch 1 content"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: batch 2 text does not leak batch 1"
+fi
+# Batch 1 should NOT contain batch 2 content
+TESTS=$((TESTS + 1))
+if [[ "$text" == *"Wire Together"* ]]; then
+    echo "FAIL: batch 1 text should not contain batch 2 content"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: batch 1 text does not leak batch 2"
+fi
+# --- Test: get_batch_task_count ---
+tc=$(get_batch_task_count "$FIXTURE" 1)
+assert_eq "batch 1 has 2 tasks" "2" "$tc"
+tc2=$(get_batch_task_count "$FIXTURE" 2)
+assert_eq "batch 2 has 2 tasks" "2" "$tc2"
+tc3=$(get_batch_task_count "$FIXTURE" 3)
+assert_eq "batch 3 has 1 task" "1" "$tc3"
+# --- Test: is_critical_batch ---
+TESTS=$((TESTS + 1))
+if is_critical_batch "$FIXTURE" 3; then
+    echo "PASS: batch 3 is critical"
+else
+    echo "FAIL: batch 3 should be critical"
+    FAILURES=$((FAILURES + 1))
+fi
+TESTS=$((TESTS + 1))
+if is_critical_batch "$FIXTURE" 1; then
+    echo "FAIL: batch 1 should not be critical"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: batch 1 is not critical"
+fi
+TESTS=$((TESTS + 1))
+if is_critical_batch "$FIXTURE" 2; then
+    echo "FAIL: batch 2 should not be critical"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: batch 2 is not critical"
+fi
+# --- Test: nonexistent batch ---
+text_empty=$(get_batch_text "$FIXTURE" 99)
+assert_eq "nonexistent batch returns empty" "" "$text_empty"
+title_empty=$(get_batch_title "$FIXTURE" 99)
+assert_eq "nonexistent batch title returns empty" "" "$title_empty"
+tc_empty=$(get_batch_task_count "$FIXTURE" 99)
+assert_eq "nonexistent batch task count returns 0" "0" "$tc_empty"
+# === get_batch_context_refs tests ===
+# Create a plan with context_refs
+cat > "$WORK/refs-plan.md" << 'PLAN'
+## Batch 1: Setup
+### Task 1: Create base
+Content here.
+## Batch 2: Build on base
+context_refs: src/auth.py, tests/test_auth.py
+### Task 2: Extend
+Uses auth module from batch 1.
+PLAN
+# Batch 1 has no refs
+val=$(get_batch_context_refs "$WORK/refs-plan.md" 1)
+assert_eq "get_batch_context_refs: batch 1 has no refs" "" "$val"
+# Batch 2 has refs
+val=$(get_batch_context_refs "$WORK/refs-plan.md" 2)
+echo "$val" | grep -q "src/auth.py" && echo "PASS: batch 2 refs include src/auth.py" && TESTS=$((TESTS + 1)) || {
+    echo "FAIL: batch 2 refs missing src/auth.py"; TESTS=$((TESTS + 1)); FAILURES=$((FAILURES + 1))
+}
+echo "$val" | grep -q "tests/test_auth.py" && echo "PASS: batch 2 refs include tests/test_auth.py" && TESTS=$((TESTS + 1)) || {
+    echo "FAIL: batch 2 refs missing tests/test_auth.py"; TESTS=$((TESTS + 1)); FAILURES=$((FAILURES + 1))
+}
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"

package/scripts/tests/test-run-plan-prompt.sh ADDED Viewed

@@ -0,0 +1,254 @@
+#!/usr/bin/env bash
+# Test prompt builder functions
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
+source "$SCRIPT_DIR/../lib/run-plan-prompt.sh"
+FAILURES=0
+TESTS=0
+assert_contains() {
+    local desc="$1" needle="$2" haystack="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$haystack" != *"$needle"* ]]; then
+        echo "FAIL: $desc"
+        echo "  expected to contain: $needle"
+        echo "  in: ${haystack:0:300}..."
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+assert_not_contains() {
+    local desc="$1" needle="$2" haystack="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$haystack" == *"$needle"* ]]; then
+        echo "FAIL: $desc"
+        echo "  expected NOT to contain: $needle"
+        echo "  in: ${haystack:0:300}..."
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$actual" != "$expected" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+# --- Setup: fixture plan + temp git worktree ---
+TMPDIR_ROOT=$(mktemp -d)
+trap 'rm -rf "$TMPDIR_ROOT"' EXIT
+FIXTURE="$TMPDIR_ROOT/plan.md"
+cat > "$FIXTURE" << 'EOF'
+# Feature X Implementation Plan
+**Goal:** Build feature X
+---
+## Batch 1: Foundation (Tasks 1-2)
+### Task 1: Create Data Model
+**Files:**
+- Create: `src/models.py`
+- Test: `tests/test_models.py`
+**Step 1: Write the failing test**
+```python
+def test_model():
+    m = Model("test")
+    assert m.name == "test"
+```
+**Step 2: Implement**
+Create the Model class.
+### Task 2: Add Validation
+**Files:**
+- Modify: `src/models.py`
+Add validation to Model.
+## Batch 2: Integration (Tasks 3-4)
+### Task 3: Wire Together
+Wire the models into the API.
+### Task 4: End-to-End Test
+Write integration test.
+EOF
+# Create a temp git repo so git branch works
+WORKTREE="$TMPDIR_ROOT/worktree"
+mkdir -p "$WORKTREE"
+git -C "$WORKTREE" init -b test-branch --quiet
+git -C "$WORKTREE" config user.email "test@test.com"
+git -C "$WORKTREE" config user.name "Test"
+touch "$WORKTREE/.gitkeep"
+git -C "$WORKTREE" add .gitkeep
+git -C "$WORKTREE" commit -m "init" --quiet
+# --- Test: build_batch_prompt for batch 1 ---
+prompt=$(build_batch_prompt "$FIXTURE" 1 "$WORKTREE" "/usr/bin/python3" "scripts/quality-gate.sh --project-root ." 0)
+assert_contains "has batch number" "Batch 1" "$prompt"
+assert_contains "has batch title" "Foundation (Tasks 1-2)" "$prompt"
+assert_contains "has plan file reference" "plan.md" "$prompt"
+assert_contains "has worktree path" "$WORKTREE" "$prompt"
+assert_contains "has python path" "/usr/bin/python3" "$prompt"
+assert_contains "has branch name" "test-branch" "$prompt"
+assert_contains "has task text - Task 1" "Task 1: Create Data Model" "$prompt"
+assert_contains "has task text - Task 2" "Task 2: Add Validation" "$prompt"
+assert_contains "has TDD instruction" "TDD" "$prompt"
+assert_contains "has quality gate command" "scripts/quality-gate.sh --project-root ." "$prompt"
+assert_contains "has previous test count" "0+" "$prompt"
+assert_contains "has progress.txt instruction" "progress.txt" "$prompt"
+# --- Test: build_batch_prompt for batch 2 ---
+prompt2=$(build_batch_prompt "$FIXTURE" 2 "$WORKTREE" "/opt/python3.12" "make test" 15)
+assert_contains "batch 2 has batch number" "Batch 2" "$prompt2"
+assert_contains "batch 2 has batch title" "Integration (Tasks 3-4)" "$prompt2"
+assert_contains "batch 2 has task text - Task 3" "Task 3: Wire Together" "$prompt2"
+assert_contains "batch 2 has task text - Task 4" "Task 4: End-to-End Test" "$prompt2"
+assert_contains "batch 2 has different python" "/opt/python3.12" "$prompt2"
+assert_contains "batch 2 has different quality gate" "make test" "$prompt2"
+assert_contains "batch 2 has prev test count" "15+" "$prompt2"
+# --- Test: batch 2 does NOT contain batch 1 tasks ---
+TESTS=$((TESTS + 1))
+if [[ "$prompt2" == *"Create Data Model"* ]]; then
+    echo "FAIL: batch 2 prompt should not contain batch 1 tasks"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: batch 2 prompt does not leak batch 1 tasks"
+fi
+# =============================================================================
+# build_stable_prefix tests
+# =============================================================================
+stable=$(build_stable_prefix "$FIXTURE" "$WORKTREE" "/usr/bin/python3" "scripts/quality-gate.sh")
+assert_contains "stable prefix has worktree path" "$WORKTREE" "$stable"
+assert_contains "stable prefix has python path" "/usr/bin/python3" "$stable"
+assert_contains "stable prefix has branch name" "test-branch" "$stable"
+assert_contains "stable prefix has TDD rule" "TDD" "$stable"
+assert_contains "stable prefix has quality gate" "scripts/quality-gate.sh" "$stable"
+assert_contains "stable prefix has progress.txt rule" "progress.txt" "$stable"
+# #48: stable prefix must NOT contain prev_test_count — that belongs in variable suffix
+assert_not_contains "stable prefix does NOT contain test count line" "tests must pass" "$stable"
+# --- Test: build_stable_prefix with bad worktree emits warning but returns 'unknown' ---
+TESTS=$((TESTS + 1))
+bad_worktree_output=$(build_stable_prefix "$FIXTURE" "/nonexistent/path/xyz" "python3" "gate.sh" 2>&1)
+if [[ "$bad_worktree_output" == *"WARNING"* && "$bad_worktree_output" == *"unknown"* ]]; then
+    echo "PASS: build_stable_prefix warns on missing worktree and uses 'unknown' branch"
+else
+    echo "FAIL: build_stable_prefix should warn on missing worktree"
+    echo "  output: $bad_worktree_output"
+    FAILURES=$((FAILURES + 1))
+fi
+# =============================================================================
+# build_variable_suffix tests
+# =============================================================================
+suffix=$(build_variable_suffix "$FIXTURE" 1 "$WORKTREE" 7)
+assert_contains "variable suffix has batch number" "Batch 1" "$suffix"
+assert_contains "variable suffix has batch title" "Foundation" "$suffix"
+assert_contains "variable suffix has task text" "Task 1: Create Data Model" "$suffix"
+# #48: test count is in the variable suffix, not the stable prefix
+assert_contains "variable suffix has test count" "7+" "$suffix"
+# Different test count gives different suffix (confirms test count varies per batch)
+suffix_b2=$(build_variable_suffix "$FIXTURE" 2 "$WORKTREE" 20)
+assert_contains "variable suffix b2 has test count 20+" "20+" "$suffix_b2"
+# =============================================================================
+# Cross-batch context tests
+# =============================================================================
+# --- Setup: add progress.txt and a commit to the worktree ---
+echo "Batch 1: Implemented auth module" > "$WORKTREE/progress.txt"
+echo "code" > "$WORKTREE/code.py"
+git -C "$WORKTREE" add code.py progress.txt
+git -C "$WORKTREE" commit -q -m "feat: add auth"
+# --- Test: prompt includes recent commits ---
+prompt3=$(build_batch_prompt "$FIXTURE" 2 "$WORKTREE" "python3" "scripts/quality-gate.sh" 42)
+assert_contains "cross-batch: has Recent commits" "Recent commits" "$prompt3"
+# --- Test: prompt includes progress.txt content ---
+assert_contains "cross-batch: has Previous progress" "Previous progress" "$prompt3"
+assert_contains "cross-batch: has progress content" "Implemented auth module" "$prompt3"
+# --- Test: prompt includes commit message ---
+assert_contains "cross-batch: has commit in log" "feat: add auth" "$prompt3"
+# =============================================================================
+# #47: corrupted state file — jq failure emits warning, prev_gate stays empty
+# =============================================================================
+echo "NOT VALID JSON {{{" > "$WORKTREE/.run-plan-state.json"
+TESTS=$((TESTS + 1))
+jq_warn_output=$(build_variable_suffix "$FIXTURE" 2 "$WORKTREE" 0 2>&1)
+if [[ "$jq_warn_output" == *"WARNING"* && "$jq_warn_output" == *"corrupted"* ]]; then
+    echo "PASS: build_variable_suffix warns on corrupted state file"
+else
+    echo "FAIL: build_variable_suffix should warn on corrupted state file"
+    echo "  output: ${jq_warn_output:0:200}"
+    FAILURES=$((FAILURES + 1))
+fi
+# Clean up corrupted state file
+rm -f "$WORKTREE/.run-plan-state.json"
+# =============================================================================
+# #50: unreadable progress.txt — error is NOT silently swallowed (no 2>/dev/null || true)
+# The fix removes the error suppression so stderr shows the permission denial.
+# We verify by running tail directly under the same condition — the error must be visible.
+# (Command substitution $() cannot propagate exit codes from within sourced functions
+#  reliably across bash versions, so we test the absence of suppression via the source.)
+# =============================================================================
+TESTS=$((TESTS + 1))
+# Check that progress_tail assignment in build_variable_suffix does NOT use || true
+# by inspecting the source code — the fix is the removal of the error suppression.
+PROMPT_SRC="$SCRIPT_DIR/../lib/run-plan-prompt.sh"
+if grep -q 'tail.*progress\.txt.*|| true' "$PROMPT_SRC" 2>/dev/null || \
+   grep -q 'tail.*progress\.txt.*2>/dev/null.*|| true' "$PROMPT_SRC" 2>/dev/null; then
+    echo "FAIL: build_variable_suffix still has suppressed tail error (|| true present)"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: build_variable_suffix does not suppress tail errors on progress.txt"
+fi
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"

package/scripts/tests/test-run-plan-quality-gate.sh ADDED Viewed

@@ -0,0 +1,222 @@
+#!/usr/bin/env bash
+# Test quality gate runner helper functions
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-quality-gate.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+assert_exit() {
+    local desc="$1" expected_exit="$2"
+    shift 2
+    local actual_exit=0
+    "$@" || actual_exit=$?
+    TESTS=$((TESTS + 1))
+    if [[ "$expected_exit" != "$actual_exit" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected exit: $expected_exit"
+        echo "  actual exit:   $actual_exit"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+# --- Temp dir for git repo simulation ---
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT
+# =============================================================================
+# extract_test_count tests
+# =============================================================================
+# --- Test: extract passed count with skipped ---
+val=$(extract_test_count "1953 passed, 15 skipped in 45.2s")
+assert_eq "extract_test_count: passed with skipped" "1953" "$val"
+# --- Test: extract passed count without skipped ---
+val=$(extract_test_count "42 passed in 2.1s")
+assert_eq "extract_test_count: passed without skipped" "42" "$val"
+# --- Test: extract from no tests ran ---
+val=$(extract_test_count "ERROR: no tests ran")
+assert_eq "extract_test_count: no tests ran" "-1" "$val"
+# --- Test: extract from empty string ---
+val=$(extract_test_count "")
+assert_eq "extract_test_count: empty string" "-1" "$val"
+# --- Test: extract from multi-line pytest output ---
+output="============================= test session starts ==============================
+collected 87 items
+tests/test_foo.py ........ [  9%]
+tests/test_bar.py ........ [ 18%]
+============================== 87 passed in 12.34s ==============================="
+val=$(extract_test_count "$output")
+assert_eq "extract_test_count: full pytest output" "87" "$val"
+# --- Test: extract with failures in output ---
+output="3 failed, 85 passed, 2 skipped in 30.1s"
+val=$(extract_test_count "$output")
+assert_eq "extract_test_count: with failures" "85" "$val"
+# --- Test: extract from jest output ---
+output="Tests:       3 failed, 45 passed, 48 total"
+val=$(extract_test_count "$output")
+assert_eq "extract_test_count: jest output" "45" "$val"
+# --- Test: extract from jest all-pass output ---
+output="Tests:       12 passed, 12 total"
+val=$(extract_test_count "$output")
+assert_eq "extract_test_count: jest all-pass" "12" "$val"
+# --- Test: extract from go test output ---
+output="ok  	github.com/foo/bar	0.123s
+ok  	github.com/foo/baz	0.456s
+FAIL	github.com/foo/qux	0.789s"
+val=$(extract_test_count "$output")
+assert_eq "extract_test_count: go test (2 ok of 3)" "2" "$val"
+# --- Test: unrecognized format returns -1 ---
+val=$(extract_test_count "Some random build output with no test results")
+assert_eq "extract_test_count: unrecognized format" "-1" "$val"
+# =============================================================================
+# check_test_count_regression tests
+# =============================================================================
+# --- Test: no regression (increase) ---
+assert_exit "check_test_count_regression: 200 >= 150 passes" 0 \
+    check_test_count_regression 200 150
+# --- Test: no regression (equal) ---
+assert_exit "check_test_count_regression: 150 >= 150 passes" 0 \
+    check_test_count_regression 150 150
+# --- Test: regression detected ---
+assert_exit "check_test_count_regression: 100 < 150 fails" 1 \
+    check_test_count_regression 100 150
+# --- Test: no regression from zero baseline ---
+assert_exit "check_test_count_regression: 50 >= 0 passes" 0 \
+    check_test_count_regression 50 0
+# =============================================================================
+# check_git_clean tests
+# =============================================================================
+# --- Setup: create a temp git repo ---
+git -C "$WORK" init -q
+git -C "$WORK" config user.email "test@test.com"
+git -C "$WORK" config user.name "Test"
+echo "initial" > "$WORK/file.txt"
+git -C "$WORK" add file.txt
+git -C "$WORK" commit -q -m "initial"
+# --- Test: clean repo passes ---
+assert_exit "check_git_clean: clean repo passes" 0 \
+    check_git_clean "$WORK"
+# --- Test: dirty repo (untracked file) fails ---
+echo "dirty" > "$WORK/untracked.txt"
+assert_exit "check_git_clean: untracked file fails" 1 \
+    check_git_clean "$WORK"
+# --- Test: dirty repo (modified file) fails ---
+rm "$WORK/untracked.txt"
+echo "modified" >> "$WORK/file.txt"
+assert_exit "check_git_clean: modified file fails" 1 \
+    check_git_clean "$WORK"
+# --- Test: -1 skips regression check ---
+assert_exit "check_test_count_regression: -1 new skips check" 0 \
+    check_test_count_regression -1 150
+assert_exit "check_test_count_regression: -1 previous skips check" 0 \
+    check_test_count_regression 50 -1
+# Clean up for any subsequent tests
+git -C "$WORK" checkout -- file.txt
+# =============================================================================
+# Security: no eval in quality gate runner (#3)
+# =============================================================================
+TESTS=$((TESTS + 1))
+QG_FILE="$SCRIPT_DIR/../lib/run-plan-quality-gate.sh"
+if grep -q 'eval.*quality_gate_cmd\|eval.*\$quality_gate' "$QG_FILE"; then
+    echo "FAIL: run-plan-quality-gate.sh uses eval on quality_gate_cmd (command injection risk, bug #3)"
+    FAILURES=$((FAILURES + 1))
+else
+    echo "PASS: run-plan-quality-gate.sh does not use eval on quality_gate_cmd"
+fi
+TESTS=$((TESTS + 1))
+if grep -q 'bash -c.*quality_gate_cmd\|bash -c.*\$quality_gate' "$QG_FILE"; then
+    echo "PASS: run-plan-quality-gate.sh uses bash -c instead of eval"
+else
+    echo "FAIL: run-plan-quality-gate.sh should use bash -c instead of eval (bug #3)"
+    FAILURES=$((FAILURES + 1))
+fi
+# =============================================================================
+# Bug #3 BEHAVIORAL: bash -c limits injection scope vs eval
+# =============================================================================
+# eval "$cmd" would expand variables/globs in the current shell context.
+# bash -c "$cmd" runs in a fresh subshell — variable references from the
+# parent shell don't leak into the command execution.
+# Create a gate command that tries to reference a parent shell variable
+INJECT_WORK=$(mktemp -d)
+git -C "$INJECT_WORK" init -q
+git -C "$INJECT_WORK" config user.email "test@test.com"
+git -C "$INJECT_WORK" config user.name "Test"
+echo "init" > "$INJECT_WORK/file.txt"
+git -C "$INJECT_WORK" add file.txt
+git -C "$INJECT_WORK" commit -q -m "init"
+# Set a variable in the current shell that bash -c should NOT see
+_GATE_SECRET="LEAKED"
+# A gate command that tries to echo the secret — with bash -c it gets empty string
+gate_output=$(cd "$INJECT_WORK" && bash -c 'echo "secret=${_GATE_SECRET:-none}"' 2>&1) || true
+TESTS=$((TESTS + 1))
+if [[ "$gate_output" == *"secret=none"* ]]; then
+    echo "PASS: bash -c does not leak parent shell variables (injection limited)"
+else
+    echo "FAIL: bash -c should not see parent shell variable _GATE_SECRET"
+    echo "  output: $gate_output"
+    FAILURES=$((FAILURES + 1))
+fi
+rm -rf "$INJECT_WORK"
+# =============================================================================
+# Summary
+# =============================================================================
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"