npm - autonomous-coding-toolkit - Versions diffs - 1.0.0 - Mend

autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (324) hide show

package/.claude-plugin/marketplace.json +22 -0
package/.claude-plugin/plugin.json +13 -0
package/LICENSE +21 -0
package/Makefile +21 -0
package/README.md +140 -0
package/SECURITY.md +28 -0
package/agents/bash-expert.md +113 -0
package/agents/dependency-auditor.md +138 -0
package/agents/integration-tester.md +120 -0
package/agents/lesson-scanner.md +149 -0
package/agents/python-expert.md +179 -0
package/agents/service-monitor.md +141 -0
package/agents/shell-expert.md +147 -0
package/benchmarks/runner.sh +147 -0
package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
package/benchmarks/tasks/02-refactor-module/task.md +8 -0
package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
package/bin/act.js +238 -0
package/commands/autocode.md +6 -0
package/commands/cancel-ralph.md +18 -0
package/commands/code-factory.md +53 -0
package/commands/create-prd.md +55 -0
package/commands/ralph-loop.md +18 -0
package/commands/run-plan.md +117 -0
package/commands/submit-lesson.md +122 -0
package/docs/ARCHITECTURE.md +630 -0
package/docs/CONTRIBUTING.md +125 -0
package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
package/docs/lessons/0002-async-def-without-await.md +28 -0
package/docs/lessons/0003-create-task-without-callback.md +28 -0
package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
package/docs/lessons/0005-sqlite-without-closing.md +33 -0
package/docs/lessons/0006-venv-pip-path.md +27 -0
package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
package/docs/lessons/0010-local-outside-function-bash.md +33 -0
package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
package/docs/lessons/0020-persist-state-incrementally.md +44 -0
package/docs/lessons/0021-dual-axis-testing.md +48 -0
package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
package/docs/lessons/0023-static-analysis-spiral.md +51 -0
package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
package/docs/lessons/0045-iterative-design-improvement.md +33 -0
package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
package/docs/lessons/0048-integration-wiring-batch.md +40 -0
package/docs/lessons/0049-ab-verification.md +41 -0
package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
package/docs/lessons/0078-static-review-without-live-test.md +30 -0
package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
package/docs/lessons/FRAMEWORK.md +161 -0
package/docs/lessons/SUMMARY.md +201 -0
package/docs/lessons/TEMPLATE.md +85 -0
package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
package/docs/plans/2026-02-21-mab-research-report.md +406 -0
package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
package/docs/plans/2026-02-22-mab-run-design.md +462 -0
package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
package/docs/plans/2026-02-24-headless-module-split.md +443 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
package/docs/plans/audit-findings.md +186 -0
package/docs/telegram-notification-format.md +98 -0
package/examples/example-plan.md +51 -0
package/examples/example-prd.json +72 -0
package/examples/example-roadmap.md +33 -0
package/examples/quickstart-plan.md +63 -0
package/hooks/hooks.json +26 -0
package/hooks/setup-symlinks.sh +48 -0
package/hooks/stop-hook.sh +135 -0
package/package.json +47 -0
package/policies/bash.md +71 -0
package/policies/python.md +71 -0
package/policies/testing.md +61 -0
package/policies/universal.md +60 -0
package/scripts/analyze-report.sh +97 -0
package/scripts/architecture-map.sh +145 -0
package/scripts/auto-compound.sh +273 -0
package/scripts/batch-audit.sh +42 -0
package/scripts/batch-test.sh +101 -0
package/scripts/entropy-audit.sh +221 -0
package/scripts/failure-digest.sh +51 -0
package/scripts/generate-ast-rules.sh +96 -0
package/scripts/init.sh +112 -0
package/scripts/lesson-check.sh +428 -0
package/scripts/lib/common.sh +61 -0
package/scripts/lib/cost-tracking.sh +153 -0
package/scripts/lib/ollama.sh +60 -0
package/scripts/lib/progress-writer.sh +128 -0
package/scripts/lib/run-plan-context.sh +215 -0
package/scripts/lib/run-plan-echo-back.sh +231 -0
package/scripts/lib/run-plan-headless.sh +396 -0
package/scripts/lib/run-plan-notify.sh +57 -0
package/scripts/lib/run-plan-parser.sh +81 -0
package/scripts/lib/run-plan-prompt.sh +215 -0
package/scripts/lib/run-plan-quality-gate.sh +132 -0
package/scripts/lib/run-plan-routing.sh +315 -0
package/scripts/lib/run-plan-sampling.sh +170 -0
package/scripts/lib/run-plan-scoring.sh +146 -0
package/scripts/lib/run-plan-state.sh +142 -0
package/scripts/lib/run-plan-team.sh +199 -0
package/scripts/lib/telegram.sh +54 -0
package/scripts/lib/thompson-sampling.sh +176 -0
package/scripts/license-check.sh +74 -0
package/scripts/mab-run.sh +575 -0
package/scripts/module-size-check.sh +146 -0
package/scripts/patterns/async-no-await.yml +5 -0
package/scripts/patterns/bare-except.yml +6 -0
package/scripts/patterns/empty-catch.yml +6 -0
package/scripts/patterns/hardcoded-localhost.yml +9 -0
package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
package/scripts/pipeline-status.sh +197 -0
package/scripts/policy-check.sh +226 -0
package/scripts/prior-art-search.sh +133 -0
package/scripts/promote-mab-lessons.sh +126 -0
package/scripts/prompts/agent-a-superpowers.md +29 -0
package/scripts/prompts/agent-b-ralph.md +29 -0
package/scripts/prompts/judge-agent.md +61 -0
package/scripts/prompts/planner-agent.md +44 -0
package/scripts/pull-community-lessons.sh +90 -0
package/scripts/quality-gate.sh +266 -0
package/scripts/research-gate.sh +90 -0
package/scripts/run-plan.sh +329 -0
package/scripts/scope-infer.sh +159 -0
package/scripts/setup-ralph-loop.sh +155 -0
package/scripts/telemetry.sh +230 -0
package/scripts/tests/run-all-tests.sh +52 -0
package/scripts/tests/test-act-cli.sh +46 -0
package/scripts/tests/test-agents-md.sh +87 -0
package/scripts/tests/test-analyze-report.sh +114 -0
package/scripts/tests/test-architecture-map.sh +89 -0
package/scripts/tests/test-auto-compound.sh +169 -0
package/scripts/tests/test-batch-test.sh +65 -0
package/scripts/tests/test-benchmark-runner.sh +25 -0
package/scripts/tests/test-common.sh +168 -0
package/scripts/tests/test-cost-tracking.sh +158 -0
package/scripts/tests/test-echo-back.sh +180 -0
package/scripts/tests/test-entropy-audit.sh +146 -0
package/scripts/tests/test-failure-digest.sh +66 -0
package/scripts/tests/test-generate-ast-rules.sh +145 -0
package/scripts/tests/test-helpers.sh +82 -0
package/scripts/tests/test-init.sh +47 -0
package/scripts/tests/test-lesson-check.sh +278 -0
package/scripts/tests/test-lesson-local.sh +55 -0
package/scripts/tests/test-license-check.sh +109 -0
package/scripts/tests/test-mab-run.sh +182 -0
package/scripts/tests/test-ollama-lib.sh +49 -0
package/scripts/tests/test-ollama.sh +60 -0
package/scripts/tests/test-pipeline-status.sh +198 -0
package/scripts/tests/test-policy-check.sh +124 -0
package/scripts/tests/test-prior-art-search.sh +96 -0
package/scripts/tests/test-progress-writer.sh +140 -0
package/scripts/tests/test-promote-mab-lessons.sh +110 -0
package/scripts/tests/test-pull-community-lessons.sh +149 -0
package/scripts/tests/test-quality-gate.sh +241 -0
package/scripts/tests/test-research-gate.sh +132 -0
package/scripts/tests/test-run-plan-cli.sh +86 -0
package/scripts/tests/test-run-plan-context.sh +305 -0
package/scripts/tests/test-run-plan-e2e.sh +153 -0
package/scripts/tests/test-run-plan-headless.sh +424 -0
package/scripts/tests/test-run-plan-notify.sh +124 -0
package/scripts/tests/test-run-plan-parser.sh +217 -0
package/scripts/tests/test-run-plan-prompt.sh +254 -0
package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
package/scripts/tests/test-run-plan-routing.sh +178 -0
package/scripts/tests/test-run-plan-scoring.sh +148 -0
package/scripts/tests/test-run-plan-state.sh +261 -0
package/scripts/tests/test-run-plan-team.sh +157 -0
package/scripts/tests/test-scope-infer.sh +150 -0
package/scripts/tests/test-setup-ralph-loop.sh +63 -0
package/scripts/tests/test-telegram-env.sh +38 -0
package/scripts/tests/test-telegram.sh +121 -0
package/scripts/tests/test-telemetry.sh +46 -0
package/scripts/tests/test-thompson-sampling.sh +139 -0
package/scripts/tests/test-validate-all.sh +60 -0
package/scripts/tests/test-validate-commands.sh +89 -0
package/scripts/tests/test-validate-hooks.sh +98 -0
package/scripts/tests/test-validate-lessons.sh +150 -0
package/scripts/tests/test-validate-plan-quality.sh +235 -0
package/scripts/tests/test-validate-plans.sh +187 -0
package/scripts/tests/test-validate-plugin.sh +106 -0
package/scripts/tests/test-validate-prd.sh +184 -0
package/scripts/tests/test-validate-skills.sh +134 -0
package/scripts/validate-all.sh +57 -0
package/scripts/validate-commands.sh +67 -0
package/scripts/validate-hooks.sh +89 -0
package/scripts/validate-lessons.sh +98 -0
package/scripts/validate-plan-quality.sh +369 -0
package/scripts/validate-plans.sh +120 -0
package/scripts/validate-plugin.sh +86 -0
package/scripts/validate-policies.sh +42 -0
package/scripts/validate-prd.sh +118 -0
package/scripts/validate-skills.sh +96 -0
package/skills/autocode/SKILL.md +285 -0
package/skills/autocode/ab-verification.md +51 -0
package/skills/autocode/code-quality-standards.md +37 -0
package/skills/autocode/competitive-mode.md +364 -0
package/skills/brainstorming/SKILL.md +97 -0
package/skills/capture-lesson/SKILL.md +187 -0
package/skills/check-lessons/SKILL.md +116 -0
package/skills/dispatching-parallel-agents/SKILL.md +110 -0
package/skills/executing-plans/SKILL.md +85 -0
package/skills/finishing-a-development-branch/SKILL.md +201 -0
package/skills/receiving-code-review/SKILL.md +72 -0
package/skills/requesting-code-review/SKILL.md +59 -0
package/skills/requesting-code-review/code-reviewer.md +82 -0
package/skills/research/SKILL.md +145 -0
package/skills/roadmap/SKILL.md +115 -0
package/skills/subagent-driven-development/SKILL.md +98 -0
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
package/skills/subagent-driven-development/implementer-prompt.md +73 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
package/skills/systematic-debugging/SKILL.md +134 -0
package/skills/systematic-debugging/condition-based-waiting.md +64 -0
package/skills/systematic-debugging/defense-in-depth.md +32 -0
package/skills/systematic-debugging/root-cause-tracing.md +55 -0
package/skills/test-driven-development/SKILL.md +167 -0
package/skills/using-git-worktrees/SKILL.md +219 -0
package/skills/using-superpowers/SKILL.md +54 -0
package/skills/verification-before-completion/SKILL.md +140 -0
package/skills/verify/SKILL.md +82 -0
package/skills/writing-plans/SKILL.md +128 -0
package/skills/writing-skills/SKILL.md +93 -0

package/scripts/tests/test-run-plan-routing.sh ADDED Viewed

@@ -0,0 +1,178 @@
+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
+source "$SCRIPT_DIR/../lib/run-plan-routing.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT
+# Plan with clear parallel batches
+cat > "$WORK/parallel-plan.md" << 'PLAN'
+## Batch 1: Foundation
+**Files:**
+- Create: `src/lib.sh`
+### Task 1: Create lib
+Write lib.
+## Batch 2: Feature A
+**Files:**
+- Create: `src/feature-a.sh`
+context_refs: src/lib.sh
+### Task 2: Build feature A
+## Batch 3: Feature B
+**Files:**
+- Create: `src/feature-b.sh`
+context_refs: src/lib.sh
+### Task 3: Build feature B
+## Batch 4: Integration
+**Files:**
+- Modify: `src/feature-a.sh`
+- Modify: `src/feature-b.sh`
+context_refs: src/feature-a.sh, src/feature-b.sh
+### Task 4: Wire together
+PLAN
+# Test dependency graph building
+deps=$(build_dependency_graph "$WORK/parallel-plan.md")
+assert_eq "dep graph: B2 depends on B1" "true" "$(echo "$deps" | jq '.["2"] | contains(["1"])')"
+assert_eq "dep graph: B3 depends on B1" "true" "$(echo "$deps" | jq '.["3"] | contains(["1"])')"
+assert_eq "dep graph: B4 depends on B2 and B3" "true" "$(echo "$deps" | jq '.["4"] | (contains(["2"]) and contains(["3"]))')"
+# Test parallelism score
+score=$(compute_parallelism_score "$WORK/parallel-plan.md")
+TESTS=$((TESTS + 1))
+if [[ "$score" -gt 40 ]]; then
+    echo "PASS: parallelism score: $score > 40 (batches 2,3 can run parallel)"
+else
+    echo "FAIL: parallelism score: $score <= 40"
+    FAILURES=$((FAILURES + 1))
+fi
+# Test mode recommendation
+mode=$(recommend_execution_mode "$score" "false" 21)
+assert_eq "recommend: team for high score" "team" "$mode"
+# Sequential plan (each batch depends on previous)
+cat > "$WORK/sequential-plan.md" << 'PLAN'
+## Batch 1: Setup
+**Files:**
+- Create: `src/main.sh`
+### Task 1: Setup
+## Batch 2: Extend
+**Files:**
+- Modify: `src/main.sh`
+context_refs: src/main.sh
+### Task 2: Extend
+## Batch 3: Finalize
+**Files:**
+- Modify: `src/main.sh`
+context_refs: src/main.sh
+### Task 3: Finalize
+PLAN
+score=$(compute_parallelism_score "$WORK/sequential-plan.md")
+TESTS=$((TESTS + 1))
+if [[ "$score" -lt 30 ]]; then
+    echo "PASS: sequential plan score: $score < 30"
+else
+    echo "FAIL: sequential plan score: $score >= 30"
+    FAILURES=$((FAILURES + 1))
+fi
+mode=$(recommend_execution_mode "$score" "false" 21)
+assert_eq "recommend: headless for low score" "headless" "$mode"
+# Test model routing
+model=$(classify_batch_model "$WORK/parallel-plan.md" 1)
+assert_eq "model: batch with Create files = sonnet" "sonnet" "$model"
+# Verification batch
+cat > "$WORK/verify-plan.md" << 'PLAN'
+## Batch 1: Verify everything
+### Task 1: Run all tests
+**Step 1: Run tests**
+Run: `bash scripts/tests/run-all-tests.sh`
+**Step 2: Check line counts**
+Run: `wc -l scripts/*.sh`
+PLAN
+model=$(classify_batch_model "$WORK/verify-plan.md" 1)
+assert_eq "model: batch with only Run commands = haiku" "haiku" "$model"
+# === jq timeout guard (bug #15) ===
+# Verify the jq call in compute_parallelism_score is wrapped with timeout
+TESTS=$((TESTS + 1))
+if grep -q 'timeout 30 jq' "$SCRIPT_DIR/../lib/run-plan-routing.sh"; then
+    echo "PASS: jq calls wrapped with timeout 30"
+else
+    echo "FAIL: jq calls should be wrapped with timeout 30"
+    FAILURES=$((FAILURES + 1))
+fi
+# Verify timeout actually terminates a slow jq process
+# Use a FIFO that blocks forever to simulate corrupted/circular graph data
+TESTS=$((TESTS + 1))
+fifo="$WORK/slow-fifo"
+mkfifo "$fifo"
+# Keep FIFO open in background so jq blocks on read (not EOF)
+sleep 60 > "$fifo" &
+sleep_pid=$!
+start=$(date +%s)
+timeout 2 jq -r '.test' < "$fifo" 2>/dev/null || true
+end=$(date +%s)
+elapsed=$((end - start))
+kill "$sleep_pid" 2>/dev/null || true
+wait "$sleep_pid" 2>/dev/null || true
+if [[ "$elapsed" -le 4 ]]; then
+    echo "PASS: timeout kills hanging jq within 4s"
+else
+    echo "FAIL: timeout did not kill hanging jq (took ${elapsed}s)"
+    FAILURES=$((FAILURES + 1))
+fi
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"

package/scripts/tests/test-run-plan-scoring.sh ADDED Viewed

@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
+source "$SCRIPT_DIR/../lib/run-plan-scoring.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+# Gate failed = score 0
+score=$(score_candidate 0 50 100 0 0 0)
+assert_eq "score: gate failed = 0" "0" "$score"
+# Gate passed, good metrics
+score=$(score_candidate 1 50 100 2 0 0)
+TESTS=$((TESTS + 1))
+if [[ "$score" -gt 0 ]]; then
+    echo "PASS: score: gate passed = positive ($score)"
+else
+    echo "FAIL: score: gate passed should be positive ($score)"
+    FAILURES=$((FAILURES + 1))
+fi
+# More tests = higher score
+score_a=$(score_candidate 1 50 100 0 0 0)
+score_b=$(score_candidate 1 80 100 0 0 0)
+TESTS=$((TESTS + 1))
+if [[ "$score_b" -gt "$score_a" ]]; then
+    echo "PASS: score: more tests = higher score ($score_b > $score_a)"
+else
+    echo "FAIL: score: more tests should be higher ($score_b <= $score_a)"
+    FAILURES=$((FAILURES + 1))
+fi
+# Lesson violations = penalty
+score_clean=$(score_candidate 1 50 100 0 0 0)
+score_dirty=$(score_candidate 1 50 100 0 2 0)
+TESTS=$((TESTS + 1))
+if [[ "$score_clean" -gt "$score_dirty" ]]; then
+    echo "PASS: score: lesson violations penalized ($score_clean > $score_dirty)"
+else
+    echo "FAIL: score: lesson violations not penalized ($score_clean <= $score_dirty)"
+    FAILURES=$((FAILURES + 1))
+fi
+# select_winner picks highest score
+winner=$(select_winner "500 300 700 0")
+assert_eq "select_winner: picks index of highest" "2" "$winner"
+# select_winner returns -1 when all zero
+winner=$(select_winner "0 0 0")
+assert_eq "select_winner: all zero = -1 (no winner)" "-1" "$winner"
+# === classify_batch_type ===
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT
+# Test: Create files = new-file
+cat > "$WORK/plan-new.md" << 'PLAN'
+## Batch 1: Setup
+- Create: `src/lib.py`
+- Create: `src/util.py`
+- Test: `tests/test_lib.py`
+**Step 1:** Write files
+PLAN
+result=$(classify_batch_type "$WORK/plan-new.md" 1)
+assert_eq "classify: Create dominant = new-file" "new-file" "$result"
+# Test: Modify only = refactoring
+cat > "$WORK/plan-refactor.md" << 'PLAN'
+## Batch 1: Refactor auth
+- Modify: `src/auth.py:20-50`
+- Modify: `src/session.py:10-30`
+**Step 1:** Update auth
+PLAN
+result=$(classify_batch_type "$WORK/plan-refactor.md" 1)
+assert_eq "classify: Modify only = refactoring" "refactoring" "$result"
+# Test: Run commands only = test-only
+cat > "$WORK/plan-test.md" << 'PLAN'
+## Batch 1: Verify
+Run: pytest tests/ -v
+Run: bash scripts/quality-gate.sh --project-root .
+**Step 1:** Run tests
+PLAN
+result=$(classify_batch_type "$WORK/plan-test.md" 1)
+assert_eq "classify: Run only = test-only" "test-only" "$result"
+# Test: Integration title = integration
+cat > "$WORK/plan-integ.md" << 'PLAN'
+## Batch 1: Integration Wiring
+- Modify: `src/main.py`
+- Create: `src/glue.py`
+**Step 1:** Wire components
+PLAN
+result=$(classify_batch_type "$WORK/plan-integ.md" 1)
+assert_eq "classify: integration title = integration" "integration" "$result"
+# === get_prompt_variants ===
+# Test: no history = vanilla first
+result=$(get_prompt_variants "new-file" "/nonexistent/outcomes.json" 3)
+first_line=$(echo "$result" | head -1)
+assert_eq "variants: first is vanilla" "vanilla" "$first_line"
+# Test: returns exactly N lines
+count=$(echo "$result" | wc -l)
+assert_eq "variants: returns N lines" "3" "$count"
+# Test: with learned history, slot 2 picks winner
+cat > "$WORK/outcomes.json" << 'JSON'
+[{"batch_type": "new-file", "prompt_variant": "check all imports before running tests", "won": true, "score": 500}]
+JSON
+result=$(get_prompt_variants "new-file" "$WORK/outcomes.json" 3)
+second_line=$(echo "$result" | sed -n '2p')
+assert_eq "variants: learned winner in slot 2" "check all imports before running tests" "$second_line"
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"

package/scripts/tests/test-run-plan-state.sh ADDED Viewed

@@ -0,0 +1,261 @@
+#!/usr/bin/env bash
+# Test state manager functions
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../lib/run-plan-state.sh"
+FAILURES=0
+TESTS=0
+assert_eq() {
+    local desc="$1" expected="$2" actual="$3"
+    TESTS=$((TESTS + 1))
+    if [[ "$expected" != "$actual" ]]; then
+        echo "FAIL: $desc"
+        echo "  expected: $expected"
+        echo "  actual:   $actual"
+        FAILURES=$((FAILURES + 1))
+    else
+        echo "PASS: $desc"
+    fi
+}
+# --- Temp dir for worktree simulation ---
+WORK=$(mktemp -d)
+trap 'rm -rf "$WORK"' EXIT
+# --- Test: init_state creates the file ---
+init_state "$WORK" "docs/plans/2026-02-20-feature.md" "headless"
+TESTS=$((TESTS + 1))
+if [[ -f "$WORK/.run-plan-state.json" ]]; then
+    echo "PASS: init_state creates state file"
+else
+    echo "FAIL: init_state should create .run-plan-state.json"
+    FAILURES=$((FAILURES + 1))
+fi
+# --- Test: read_state_field reads plan_file ---
+val=$(read_state_field "$WORK" "plan_file")
+assert_eq "read plan_file" "docs/plans/2026-02-20-feature.md" "$val"
+# --- Test: read_state_field reads mode ---
+val=$(read_state_field "$WORK" "mode")
+assert_eq "read mode" "headless" "$val"
+# --- Test: read_state_field reads current_batch ---
+val=$(read_state_field "$WORK" "current_batch")
+assert_eq "read current_batch (initial)" "1" "$val"
+# --- Test: read_state_field reads started_at ---
+val=$(read_state_field "$WORK" "started_at")
+TESTS=$((TESTS + 1))
+if [[ -n "$val" && "$val" != "null" ]]; then
+    echo "PASS: started_at is set"
+else
+    echo "FAIL: started_at should be a non-null timestamp"
+    FAILURES=$((FAILURES + 1))
+fi
+# --- Test: completed_batches starts empty ---
+val=$(read_state_field "$WORK" "completed_batches")
+assert_eq "completed_batches starts empty" "[]" "$val"
+# --- Test: last_quality_gate starts null ---
+val=$(read_state_field "$WORK" "last_quality_gate")
+assert_eq "last_quality_gate starts null" "null" "$val"
+# --- Test: complete_batch updates state ---
+complete_batch "$WORK" 1 42
+val=$(read_state_field "$WORK" "current_batch")
+assert_eq "current_batch after completing batch 1" "2" "$val"
+val=$(read_state_field "$WORK" "completed_batches")
+assert_eq "completed_batches has batch 1" "[1]" "$val"
+val=$(jq -r '.test_counts["1"]' "$WORK/.run-plan-state.json")
+assert_eq "test_count for batch 1" "42" "$val"
+# --- Test: multiple complete_batch calls accumulate ---
+complete_batch "$WORK" 2 55
+val=$(read_state_field "$WORK" "current_batch")
+assert_eq "current_batch after completing batch 2" "3" "$val"
+val=$(read_state_field "$WORK" "completed_batches")
+assert_eq "completed_batches has both" "[1,2]" "$val"
+val=$(jq -r '.test_counts["2"]' "$WORK/.run-plan-state.json")
+assert_eq "test_count for batch 2" "55" "$val"
+# Previous batch 1 count still there
+val=$(jq -r '.test_counts["1"]' "$WORK/.run-plan-state.json")
+assert_eq "test_count for batch 1 still intact" "42" "$val"
+# --- Test: get_previous_test_count ---
+val=$(get_previous_test_count "$WORK")
+assert_eq "previous test count after batch 2" "55" "$val"
+# --- Test: get_previous_test_count with no completions ---
+WORK2=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2"' EXIT
+init_state "$WORK2" "plan.md" "team"
+val=$(get_previous_test_count "$WORK2")
+assert_eq "previous test count with no completions" "0" "$val"
+# --- Test: set_quality_gate ---
+set_quality_gate "$WORK" 2 "true" 55
+val=$(jq -r '.last_quality_gate.batch' "$WORK/.run-plan-state.json")
+assert_eq "quality gate batch" "2" "$val"
+val=$(jq -r '.last_quality_gate.passed' "$WORK/.run-plan-state.json")
+assert_eq "quality gate passed" "true" "$val"
+val=$(jq -r '.last_quality_gate.test_count' "$WORK/.run-plan-state.json")
+assert_eq "quality gate test_count" "55" "$val"
+# Verify timestamp exists
+val=$(jq -r '.last_quality_gate.timestamp' "$WORK/.run-plan-state.json")
+TESTS=$((TESTS + 1))
+if [[ -n "$val" && "$val" != "null" ]]; then
+    echo "PASS: quality gate has timestamp"
+else
+    echo "FAIL: quality gate should have timestamp"
+    FAILURES=$((FAILURES + 1))
+fi
+# --- Test: set_quality_gate with failed gate ---
+set_quality_gate "$WORK" 3 "false" 50
+val=$(jq -r '.last_quality_gate.batch' "$WORK/.run-plan-state.json")
+assert_eq "failed quality gate batch" "3" "$val"
+val=$(jq -r '.last_quality_gate.passed' "$WORK/.run-plan-state.json")
+assert_eq "failed quality gate passed" "false" "$val"
+# --- Test: complete_batch stores duration ---
+WORK3=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3"' EXIT
+init_state "$WORK3" "plan.md" "headless"
+complete_batch "$WORK3" 1 42 120
+duration=$(jq -r '.durations["1"]' "$WORK3/.run-plan-state.json")
+assert_eq "complete_batch: stores duration" "120" "$duration"
+# --- Test: duration defaults to 0 when not provided ---
+complete_batch "$WORK3" 2 55
+duration=$(jq -r '.durations["2"]' "$WORK3/.run-plan-state.json")
+assert_eq "complete_batch: duration defaults to 0" "0" "$duration"
+# --- Test: init_state includes durations object ---
+val=$(jq -r '.durations | type' "$WORK3/.run-plan-state.json")
+assert_eq "init_state: has durations object" "object" "$val"
+# --- Test: complete_batch with non-numeric batch_num ('final') ---
+WORK4=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4"' EXIT
+init_state "$WORK4" "plan.md" "headless"
+complete_batch "$WORK4" 1 42
+complete_batch "$WORK4" "final" 50
+val=$(jq -r '.test_counts["final"]' "$WORK4/.run-plan-state.json")
+assert_eq "complete_batch: non-numeric batch 'final' stores test count" "50" "$val"
+val=$(jq -r '.durations["final"]' "$WORK4/.run-plan-state.json")
+assert_eq "complete_batch: non-numeric batch 'final' stores duration" "0" "$val"
+val=$(jq -r '.completed_batches | last' "$WORK4/.run-plan-state.json")
+assert_eq "complete_batch: non-numeric batch 'final' in completed_batches" "final" "$val"
+# Numeric batches still work after non-numeric
+val=$(jq -r '.test_counts["1"]' "$WORK4/.run-plan-state.json")
+assert_eq "complete_batch: numeric batch still intact after non-numeric" "42" "$val"
+# --- Test: get_previous_test_count returns -1 when key missing ---
+WORK5=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5"' EXIT
+init_state "$WORK5" "plan.md" "headless"
+# Manually add a batch to completed_batches without a corresponding test_count entry
+jq '.completed_batches += [1]' "$WORK5/.run-plan-state.json" > "$WORK5/.tmp.json" && mv "$WORK5/.tmp.json" "$WORK5/.run-plan-state.json"
+val=$(get_previous_test_count "$WORK5")
+assert_eq "get_previous_test_count: returns -1 when key missing" "-1" "$val"
+# --- Test: set_quality_gate with non-numeric batch_num ('final') ---
+WORK6=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5" "$WORK6"' EXIT
+init_state "$WORK6" "plan.md" "headless"
+complete_batch "$WORK6" 1 42
+set_quality_gate "$WORK6" "final" "true" 99
+val=$(jq -r '.last_quality_gate.batch' "$WORK6/.run-plan-state.json")
+assert_eq "set_quality_gate: non-numeric batch 'final' stored" "final" "$val"
+val=$(jq -r '.last_quality_gate.passed' "$WORK6/.run-plan-state.json")
+assert_eq "set_quality_gate: non-numeric batch passed=true" "true" "$val"
+val=$(jq -r '.last_quality_gate.test_count' "$WORK6/.run-plan-state.json")
+assert_eq "set_quality_gate: non-numeric batch test_count" "99" "$val"
+# --- Test: end-to-end complete_batch 'final' then get_previous_test_count ---
+WORK7=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5" "$WORK6" "$WORK7"' EXIT
+init_state "$WORK7" "plan.md" "headless"
+complete_batch "$WORK7" 1 42
+complete_batch "$WORK7" "final" 99
+val=$(get_previous_test_count "$WORK7")
+assert_eq "e2e: complete_batch 'final' then get_previous_test_count returns 99" "99" "$val"
+# --- Test: init_state includes costs object ---
+WORK_COST=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5" "$WORK6" "$WORK7" "$WORK_COST" "$WORK8"' EXIT
+init_state "$WORK_COST" "plan.md" "headless"
+val=$(jq -r '.costs | type' "$WORK_COST/.run-plan-state.json")
+assert_eq "init_state: has costs object" "object" "$val"
+val=$(jq -r '.total_cost_usd' "$WORK_COST/.run-plan-state.json")
+assert_eq "init_state: total_cost_usd starts at 0" "0" "$val"
+# --- Test: set_quality_gate normalizes truthy values (#8) ---
+WORK8=$(mktemp -d)
+trap 'rm -rf "$WORK" "$WORK2" "$WORK3" "$WORK4" "$WORK5" "$WORK6" "$WORK7" "$WORK_COST" "$WORK8"' EXIT
+init_state "$WORK8" "plan.md" "headless"
+# "1" should normalize to true
+set_quality_gate "$WORK8" 1 "1" 10
+val=$(jq -r '.last_quality_gate.passed' "$WORK8/.run-plan-state.json")
+assert_eq "set_quality_gate: '1' normalizes to true" "true" "$val"
+# "0" should normalize to false
+set_quality_gate "$WORK8" 1 "0" 10
+val=$(jq -r '.last_quality_gate.passed' "$WORK8/.run-plan-state.json")
+assert_eq "set_quality_gate: '0' normalizes to false" "false" "$val"
+# "yes" should normalize to true
+set_quality_gate "$WORK8" 1 "yes" 10
+val=$(jq -r '.last_quality_gate.passed' "$WORK8/.run-plan-state.json")
+assert_eq "set_quality_gate: 'yes' normalizes to true" "true" "$val"
+# "no" should normalize to false
+set_quality_gate "$WORK8" 1 "no" 10
+val=$(jq -r '.last_quality_gate.passed' "$WORK8/.run-plan-state.json")
+assert_eq "set_quality_gate: 'no' normalizes to false" "false" "$val"
+# "random" should normalize to false
+set_quality_gate "$WORK8" 1 "random" 10
+val=$(jq -r '.last_quality_gate.passed' "$WORK8/.run-plan-state.json")
+assert_eq "set_quality_gate: 'random' normalizes to false" "false" "$val"
+echo ""
+echo "Results: $((TESTS - FAILURES))/$TESTS passed"
+if [[ $FAILURES -gt 0 ]]; then
+    echo "FAILURES: $FAILURES"
+    exit 1
+fi
+echo "ALL PASSED"