autonomous-coding-toolkit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +22 -0
- package/.claude-plugin/plugin.json +13 -0
- package/LICENSE +21 -0
- package/Makefile +21 -0
- package/README.md +140 -0
- package/SECURITY.md +28 -0
- package/agents/bash-expert.md +113 -0
- package/agents/dependency-auditor.md +138 -0
- package/agents/integration-tester.md +120 -0
- package/agents/lesson-scanner.md +149 -0
- package/agents/python-expert.md +179 -0
- package/agents/service-monitor.md +141 -0
- package/agents/shell-expert.md +147 -0
- package/benchmarks/runner.sh +147 -0
- package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
- package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
- package/benchmarks/tasks/02-refactor-module/task.md +8 -0
- package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
- package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
- package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
- package/bin/act.js +238 -0
- package/commands/autocode.md +6 -0
- package/commands/cancel-ralph.md +18 -0
- package/commands/code-factory.md +53 -0
- package/commands/create-prd.md +55 -0
- package/commands/ralph-loop.md +18 -0
- package/commands/run-plan.md +117 -0
- package/commands/submit-lesson.md +122 -0
- package/docs/ARCHITECTURE.md +630 -0
- package/docs/CONTRIBUTING.md +125 -0
- package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
- package/docs/lessons/0002-async-def-without-await.md +28 -0
- package/docs/lessons/0003-create-task-without-callback.md +28 -0
- package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
- package/docs/lessons/0005-sqlite-without-closing.md +33 -0
- package/docs/lessons/0006-venv-pip-path.md +27 -0
- package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
- package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
- package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
- package/docs/lessons/0010-local-outside-function-bash.md +33 -0
- package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
- package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
- package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
- package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
- package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
- package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
- package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
- package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
- package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
- package/docs/lessons/0020-persist-state-incrementally.md +44 -0
- package/docs/lessons/0021-dual-axis-testing.md +48 -0
- package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
- package/docs/lessons/0023-static-analysis-spiral.md +51 -0
- package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
- package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
- package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
- package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
- package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
- package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
- package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
- package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
- package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
- package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
- package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
- package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
- package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
- package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
- package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
- package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
- package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
- package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
- package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
- package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
- package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
- package/docs/lessons/0045-iterative-design-improvement.md +33 -0
- package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
- package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
- package/docs/lessons/0048-integration-wiring-batch.md +40 -0
- package/docs/lessons/0049-ab-verification.md +41 -0
- package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
- package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
- package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
- package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
- package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
- package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
- package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
- package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
- package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
- package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
- package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
- package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
- package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
- package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
- package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
- package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
- package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
- package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
- package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
- package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
- package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
- package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
- package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
- package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
- package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
- package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
- package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
- package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
- package/docs/lessons/0078-static-review-without-live-test.md +30 -0
- package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
- package/docs/lessons/FRAMEWORK.md +161 -0
- package/docs/lessons/SUMMARY.md +201 -0
- package/docs/lessons/TEMPLATE.md +85 -0
- package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
- package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
- package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
- package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
- package/docs/plans/2026-02-21-mab-research-report.md +406 -0
- package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
- package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
- package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
- package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
- package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
- package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
- package/docs/plans/2026-02-22-mab-run-design.md +462 -0
- package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
- package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
- package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
- package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
- package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
- package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
- package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
- package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
- package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
- package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
- package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
- package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
- package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
- package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
- package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
- package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
- package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
- package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
- package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
- package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
- package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
- package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
- package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
- package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
- package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
- package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
- package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
- package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
- package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
- package/docs/plans/2026-02-24-headless-module-split.md +443 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
- package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
- package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
- package/docs/plans/audit-findings.md +186 -0
- package/docs/telegram-notification-format.md +98 -0
- package/examples/example-plan.md +51 -0
- package/examples/example-prd.json +72 -0
- package/examples/example-roadmap.md +33 -0
- package/examples/quickstart-plan.md +63 -0
- package/hooks/hooks.json +26 -0
- package/hooks/setup-symlinks.sh +48 -0
- package/hooks/stop-hook.sh +135 -0
- package/package.json +47 -0
- package/policies/bash.md +71 -0
- package/policies/python.md +71 -0
- package/policies/testing.md +61 -0
- package/policies/universal.md +60 -0
- package/scripts/analyze-report.sh +97 -0
- package/scripts/architecture-map.sh +145 -0
- package/scripts/auto-compound.sh +273 -0
- package/scripts/batch-audit.sh +42 -0
- package/scripts/batch-test.sh +101 -0
- package/scripts/entropy-audit.sh +221 -0
- package/scripts/failure-digest.sh +51 -0
- package/scripts/generate-ast-rules.sh +96 -0
- package/scripts/init.sh +112 -0
- package/scripts/lesson-check.sh +428 -0
- package/scripts/lib/common.sh +61 -0
- package/scripts/lib/cost-tracking.sh +153 -0
- package/scripts/lib/ollama.sh +60 -0
- package/scripts/lib/progress-writer.sh +128 -0
- package/scripts/lib/run-plan-context.sh +215 -0
- package/scripts/lib/run-plan-echo-back.sh +231 -0
- package/scripts/lib/run-plan-headless.sh +396 -0
- package/scripts/lib/run-plan-notify.sh +57 -0
- package/scripts/lib/run-plan-parser.sh +81 -0
- package/scripts/lib/run-plan-prompt.sh +215 -0
- package/scripts/lib/run-plan-quality-gate.sh +132 -0
- package/scripts/lib/run-plan-routing.sh +315 -0
- package/scripts/lib/run-plan-sampling.sh +170 -0
- package/scripts/lib/run-plan-scoring.sh +146 -0
- package/scripts/lib/run-plan-state.sh +142 -0
- package/scripts/lib/run-plan-team.sh +199 -0
- package/scripts/lib/telegram.sh +54 -0
- package/scripts/lib/thompson-sampling.sh +176 -0
- package/scripts/license-check.sh +74 -0
- package/scripts/mab-run.sh +575 -0
- package/scripts/module-size-check.sh +146 -0
- package/scripts/patterns/async-no-await.yml +5 -0
- package/scripts/patterns/bare-except.yml +6 -0
- package/scripts/patterns/empty-catch.yml +6 -0
- package/scripts/patterns/hardcoded-localhost.yml +9 -0
- package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
- package/scripts/pipeline-status.sh +197 -0
- package/scripts/policy-check.sh +226 -0
- package/scripts/prior-art-search.sh +133 -0
- package/scripts/promote-mab-lessons.sh +126 -0
- package/scripts/prompts/agent-a-superpowers.md +29 -0
- package/scripts/prompts/agent-b-ralph.md +29 -0
- package/scripts/prompts/judge-agent.md +61 -0
- package/scripts/prompts/planner-agent.md +44 -0
- package/scripts/pull-community-lessons.sh +90 -0
- package/scripts/quality-gate.sh +266 -0
- package/scripts/research-gate.sh +90 -0
- package/scripts/run-plan.sh +329 -0
- package/scripts/scope-infer.sh +159 -0
- package/scripts/setup-ralph-loop.sh +155 -0
- package/scripts/telemetry.sh +230 -0
- package/scripts/tests/run-all-tests.sh +52 -0
- package/scripts/tests/test-act-cli.sh +46 -0
- package/scripts/tests/test-agents-md.sh +87 -0
- package/scripts/tests/test-analyze-report.sh +114 -0
- package/scripts/tests/test-architecture-map.sh +89 -0
- package/scripts/tests/test-auto-compound.sh +169 -0
- package/scripts/tests/test-batch-test.sh +65 -0
- package/scripts/tests/test-benchmark-runner.sh +25 -0
- package/scripts/tests/test-common.sh +168 -0
- package/scripts/tests/test-cost-tracking.sh +158 -0
- package/scripts/tests/test-echo-back.sh +180 -0
- package/scripts/tests/test-entropy-audit.sh +146 -0
- package/scripts/tests/test-failure-digest.sh +66 -0
- package/scripts/tests/test-generate-ast-rules.sh +145 -0
- package/scripts/tests/test-helpers.sh +82 -0
- package/scripts/tests/test-init.sh +47 -0
- package/scripts/tests/test-lesson-check.sh +278 -0
- package/scripts/tests/test-lesson-local.sh +55 -0
- package/scripts/tests/test-license-check.sh +109 -0
- package/scripts/tests/test-mab-run.sh +182 -0
- package/scripts/tests/test-ollama-lib.sh +49 -0
- package/scripts/tests/test-ollama.sh +60 -0
- package/scripts/tests/test-pipeline-status.sh +198 -0
- package/scripts/tests/test-policy-check.sh +124 -0
- package/scripts/tests/test-prior-art-search.sh +96 -0
- package/scripts/tests/test-progress-writer.sh +140 -0
- package/scripts/tests/test-promote-mab-lessons.sh +110 -0
- package/scripts/tests/test-pull-community-lessons.sh +149 -0
- package/scripts/tests/test-quality-gate.sh +241 -0
- package/scripts/tests/test-research-gate.sh +132 -0
- package/scripts/tests/test-run-plan-cli.sh +86 -0
- package/scripts/tests/test-run-plan-context.sh +305 -0
- package/scripts/tests/test-run-plan-e2e.sh +153 -0
- package/scripts/tests/test-run-plan-headless.sh +424 -0
- package/scripts/tests/test-run-plan-notify.sh +124 -0
- package/scripts/tests/test-run-plan-parser.sh +217 -0
- package/scripts/tests/test-run-plan-prompt.sh +254 -0
- package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
- package/scripts/tests/test-run-plan-routing.sh +178 -0
- package/scripts/tests/test-run-plan-scoring.sh +148 -0
- package/scripts/tests/test-run-plan-state.sh +261 -0
- package/scripts/tests/test-run-plan-team.sh +157 -0
- package/scripts/tests/test-scope-infer.sh +150 -0
- package/scripts/tests/test-setup-ralph-loop.sh +63 -0
- package/scripts/tests/test-telegram-env.sh +38 -0
- package/scripts/tests/test-telegram.sh +121 -0
- package/scripts/tests/test-telemetry.sh +46 -0
- package/scripts/tests/test-thompson-sampling.sh +139 -0
- package/scripts/tests/test-validate-all.sh +60 -0
- package/scripts/tests/test-validate-commands.sh +89 -0
- package/scripts/tests/test-validate-hooks.sh +98 -0
- package/scripts/tests/test-validate-lessons.sh +150 -0
- package/scripts/tests/test-validate-plan-quality.sh +235 -0
- package/scripts/tests/test-validate-plans.sh +187 -0
- package/scripts/tests/test-validate-plugin.sh +106 -0
- package/scripts/tests/test-validate-prd.sh +184 -0
- package/scripts/tests/test-validate-skills.sh +134 -0
- package/scripts/validate-all.sh +57 -0
- package/scripts/validate-commands.sh +67 -0
- package/scripts/validate-hooks.sh +89 -0
- package/scripts/validate-lessons.sh +98 -0
- package/scripts/validate-plan-quality.sh +369 -0
- package/scripts/validate-plans.sh +120 -0
- package/scripts/validate-plugin.sh +86 -0
- package/scripts/validate-policies.sh +42 -0
- package/scripts/validate-prd.sh +118 -0
- package/scripts/validate-skills.sh +96 -0
- package/skills/autocode/SKILL.md +285 -0
- package/skills/autocode/ab-verification.md +51 -0
- package/skills/autocode/code-quality-standards.md +37 -0
- package/skills/autocode/competitive-mode.md +364 -0
- package/skills/brainstorming/SKILL.md +97 -0
- package/skills/capture-lesson/SKILL.md +187 -0
- package/skills/check-lessons/SKILL.md +116 -0
- package/skills/dispatching-parallel-agents/SKILL.md +110 -0
- package/skills/executing-plans/SKILL.md +85 -0
- package/skills/finishing-a-development-branch/SKILL.md +201 -0
- package/skills/receiving-code-review/SKILL.md +72 -0
- package/skills/requesting-code-review/SKILL.md +59 -0
- package/skills/requesting-code-review/code-reviewer.md +82 -0
- package/skills/research/SKILL.md +145 -0
- package/skills/roadmap/SKILL.md +115 -0
- package/skills/subagent-driven-development/SKILL.md +98 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
- package/skills/subagent-driven-development/implementer-prompt.md +73 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
- package/skills/systematic-debugging/SKILL.md +134 -0
- package/skills/systematic-debugging/condition-based-waiting.md +64 -0
- package/skills/systematic-debugging/defense-in-depth.md +32 -0
- package/skills/systematic-debugging/root-cause-tracing.md +55 -0
- package/skills/test-driven-development/SKILL.md +167 -0
- package/skills/using-git-worktrees/SKILL.md +219 -0
- package/skills/using-superpowers/SKILL.md +54 -0
- package/skills/verification-before-completion/SKILL.md +140 -0
- package/skills/verify/SKILL.md +82 -0
- package/skills/writing-plans/SKILL.md +128 -0
- package/skills/writing-skills/SKILL.md +93 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test shared common.sh functions
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
source "$SCRIPT_DIR/../lib/common.sh"
|
|
7
|
+
|
|
8
|
+
FAILURES=0
|
|
9
|
+
TESTS=0
|
|
10
|
+
|
|
11
|
+
assert_eq() {
|
|
12
|
+
local desc="$1" expected="$2" actual="$3"
|
|
13
|
+
TESTS=$((TESTS + 1))
|
|
14
|
+
if [[ "$expected" != "$actual" ]]; then
|
|
15
|
+
echo "FAIL: $desc"
|
|
16
|
+
echo " expected: $expected"
|
|
17
|
+
echo " actual: $actual"
|
|
18
|
+
FAILURES=$((FAILURES + 1))
|
|
19
|
+
else
|
|
20
|
+
echo "PASS: $desc"
|
|
21
|
+
fi
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
assert_exit() {
|
|
25
|
+
local desc="$1" expected_exit="$2"
|
|
26
|
+
shift 2
|
|
27
|
+
local actual_exit=0
|
|
28
|
+
"$@" || actual_exit=$?
|
|
29
|
+
TESTS=$((TESTS + 1))
|
|
30
|
+
if [[ "$expected_exit" != "$actual_exit" ]]; then
|
|
31
|
+
echo "FAIL: $desc"
|
|
32
|
+
echo " expected exit: $expected_exit"
|
|
33
|
+
echo " actual exit: $actual_exit"
|
|
34
|
+
FAILURES=$((FAILURES + 1))
|
|
35
|
+
else
|
|
36
|
+
echo "PASS: $desc"
|
|
37
|
+
fi
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
WORK=$(mktemp -d)
|
|
41
|
+
trap 'rm -rf "$WORK"' EXIT
|
|
42
|
+
|
|
43
|
+
# === detect_project_type tests ===
|
|
44
|
+
|
|
45
|
+
# Python project (pyproject.toml)
|
|
46
|
+
mkdir -p "$WORK/py-proj"
|
|
47
|
+
touch "$WORK/py-proj/pyproject.toml"
|
|
48
|
+
val=$(detect_project_type "$WORK/py-proj")
|
|
49
|
+
assert_eq "detect_project_type: pyproject.toml -> python" "python" "$val"
|
|
50
|
+
|
|
51
|
+
# Python project (setup.py)
|
|
52
|
+
mkdir -p "$WORK/py-setup"
|
|
53
|
+
touch "$WORK/py-setup/setup.py"
|
|
54
|
+
val=$(detect_project_type "$WORK/py-setup")
|
|
55
|
+
assert_eq "detect_project_type: setup.py -> python" "python" "$val"
|
|
56
|
+
|
|
57
|
+
# Node project (package.json)
|
|
58
|
+
mkdir -p "$WORK/node-proj"
|
|
59
|
+
echo '{"name":"test"}' > "$WORK/node-proj/package.json"
|
|
60
|
+
val=$(detect_project_type "$WORK/node-proj")
|
|
61
|
+
assert_eq "detect_project_type: package.json -> node" "node" "$val"
|
|
62
|
+
|
|
63
|
+
# Makefile project
|
|
64
|
+
mkdir -p "$WORK/make-proj"
|
|
65
|
+
echo 'test:' > "$WORK/make-proj/Makefile"
|
|
66
|
+
val=$(detect_project_type "$WORK/make-proj")
|
|
67
|
+
assert_eq "detect_project_type: Makefile -> make" "make" "$val"
|
|
68
|
+
|
|
69
|
+
# Unknown project
|
|
70
|
+
mkdir -p "$WORK/empty"
|
|
71
|
+
val=$(detect_project_type "$WORK/empty")
|
|
72
|
+
assert_eq "detect_project_type: empty -> unknown" "unknown" "$val"
|
|
73
|
+
|
|
74
|
+
# Bash project detection (run-all-tests.sh)
|
|
75
|
+
mkdir -p "$WORK/bash-proj/scripts/tests"
|
|
76
|
+
echo '#!/bin/bash' > "$WORK/bash-proj/scripts/tests/run-all-tests.sh"
|
|
77
|
+
chmod +x "$WORK/bash-proj/scripts/tests/run-all-tests.sh"
|
|
78
|
+
val=$(detect_project_type "$WORK/bash-proj")
|
|
79
|
+
assert_eq "detect_project_type: bash project with run-all-tests.sh" "bash" "$val"
|
|
80
|
+
|
|
81
|
+
# Bash project with test-*.sh glob
|
|
82
|
+
mkdir -p "$WORK/bash-proj2/scripts/tests"
|
|
83
|
+
touch "$WORK/bash-proj2/scripts/tests/test-foo.sh"
|
|
84
|
+
val=$(detect_project_type "$WORK/bash-proj2")
|
|
85
|
+
assert_eq "detect_project_type: bash project with test-*.sh files" "bash" "$val"
|
|
86
|
+
|
|
87
|
+
# === strip_json_fences tests ===
|
|
88
|
+
|
|
89
|
+
val=$(echo '```json
|
|
90
|
+
{"key":"value"}
|
|
91
|
+
```' | strip_json_fences)
|
|
92
|
+
assert_eq "strip_json_fences: removes fences" '{"key":"value"}' "$val"
|
|
93
|
+
|
|
94
|
+
val=$(echo '{"key":"value"}' | strip_json_fences)
|
|
95
|
+
assert_eq "strip_json_fences: plain JSON unchanged" '{"key":"value"}' "$val"
|
|
96
|
+
|
|
97
|
+
# === check_memory_available tests ===
|
|
98
|
+
|
|
99
|
+
# This test just verifies the function exists and returns 0/1
|
|
100
|
+
# We can't control actual memory, so test the interface
|
|
101
|
+
assert_exit "check_memory_available: runs without error" 0 \
|
|
102
|
+
check_memory_available 0
|
|
103
|
+
|
|
104
|
+
# Test 1GB threshold: should also always pass on any real system
|
|
105
|
+
assert_exit "check_memory_available: threshold 1 always passes" 0 \
|
|
106
|
+
check_memory_available 1
|
|
107
|
+
|
|
108
|
+
# Test that check_memory_available uses MB internally (not GB)
|
|
109
|
+
# Verify it doesn't use free -g (which truncates)
|
|
110
|
+
TESTS=$((TESTS + 1))
|
|
111
|
+
if grep -q 'free -g' "$SCRIPT_DIR/../lib/common.sh"; then
|
|
112
|
+
echo "FAIL: check_memory_available should use free -m, not free -g"
|
|
113
|
+
FAILURES=$((FAILURES + 1))
|
|
114
|
+
else
|
|
115
|
+
echo "PASS: check_memory_available uses free -m (no free -g in common.sh)"
|
|
116
|
+
fi
|
|
117
|
+
|
|
118
|
+
# Test that check_memory_available returns exit 2 when free is unavailable
|
|
119
|
+
# Create a wrapper that hides the real free command
|
|
120
|
+
_test_no_free() {
|
|
121
|
+
(
|
|
122
|
+
# Prepend fake bin to PATH so awk is still available, but free outputs nothing
|
|
123
|
+
local fake_bin
|
|
124
|
+
fake_bin=$(mktemp -d)
|
|
125
|
+
cat > "$fake_bin/free" <<'EOF'
|
|
126
|
+
#!/bin/bash
|
|
127
|
+
# Output nothing — simulates unavailability without breaking awk in PATH
|
|
128
|
+
EOF
|
|
129
|
+
chmod +x "$fake_bin/free"
|
|
130
|
+
PATH="$fake_bin:$PATH" check_memory_available 4
|
|
131
|
+
)
|
|
132
|
+
}
|
|
133
|
+
assert_exit "check_memory_available: returns 2 when free unavailable" 2 \
|
|
134
|
+
_test_no_free
|
|
135
|
+
|
|
136
|
+
# === detect_project_type nullglob safety (#24) ===
|
|
137
|
+
|
|
138
|
+
# Test that bash detection works even with nullglob set
|
|
139
|
+
mkdir -p "$WORK/bash-nullglob/scripts/tests"
|
|
140
|
+
touch "$WORK/bash-nullglob/scripts/tests/test-bar.sh"
|
|
141
|
+
val=$(shopt -s nullglob; detect_project_type "$WORK/bash-nullglob")
|
|
142
|
+
assert_eq "detect_project_type: bash detection works with nullglob set" "bash" "$val"
|
|
143
|
+
|
|
144
|
+
# Test that compgen -G is used instead of ls for glob detection
|
|
145
|
+
TESTS=$((TESTS + 1))
|
|
146
|
+
if grep -q 'compgen -G' "$SCRIPT_DIR/../lib/common.sh"; then
|
|
147
|
+
echo "PASS: detect_project_type uses compgen -G (nullglob-safe)"
|
|
148
|
+
else
|
|
149
|
+
echo "FAIL: detect_project_type should use compgen -G, not ls"
|
|
150
|
+
FAILURES=$((FAILURES + 1))
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
# === require_command tests ===
|
|
154
|
+
|
|
155
|
+
assert_exit "require_command: bash exists" 0 \
|
|
156
|
+
require_command "bash"
|
|
157
|
+
|
|
158
|
+
assert_exit "require_command: nonexistent-binary-xyz fails" 1 \
|
|
159
|
+
require_command "nonexistent-binary-xyz"
|
|
160
|
+
|
|
161
|
+
# === Summary ===
|
|
162
|
+
echo ""
|
|
163
|
+
echo "Results: $((TESTS - FAILURES))/$TESTS passed"
|
|
164
|
+
if [[ $FAILURES -gt 0 ]]; then
|
|
165
|
+
echo "FAILURES: $FAILURES"
|
|
166
|
+
exit 1
|
|
167
|
+
fi
|
|
168
|
+
echo "ALL PASSED"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test cost tracking functions
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
source "$SCRIPT_DIR/test-helpers.sh"
|
|
7
|
+
source "$SCRIPT_DIR/../lib/run-plan-state.sh"
|
|
8
|
+
source "$SCRIPT_DIR/../lib/cost-tracking.sh"
|
|
9
|
+
|
|
10
|
+
# --- Setup ---
|
|
11
|
+
WORK=$(mktemp -d)
|
|
12
|
+
trap 'rm -rf "$WORK"' EXIT
|
|
13
|
+
|
|
14
|
+
# Create mock JSONL session directory
|
|
15
|
+
MOCK_SESSION_DIR="$WORK/.claude/projects/test-project"
|
|
16
|
+
mkdir -p "$MOCK_SESSION_DIR"
|
|
17
|
+
|
|
18
|
+
# Mock session JSONL with token usage data
|
|
19
|
+
MOCK_SESSION_ID="test-session-abc-123"
|
|
20
|
+
cat > "$MOCK_SESSION_DIR/${MOCK_SESSION_ID}.jsonl" << 'JSONL'
|
|
21
|
+
{"type":"summary","costUSD":0.0423,"durationMs":12345,"inputTokens":8500,"outputTokens":2100,"cacheReadTokens":3200,"cacheWriteTokens":1000,"model":"claude-sonnet-4-6"}
|
|
22
|
+
JSONL
|
|
23
|
+
|
|
24
|
+
# --- Test: find_session_jsonl locates file ---
|
|
25
|
+
result=$(find_session_jsonl "$MOCK_SESSION_ID" "$WORK/.claude")
|
|
26
|
+
assert_contains "find_session_jsonl: returns path" "$MOCK_SESSION_ID" "$result"
|
|
27
|
+
|
|
28
|
+
# --- Test: find_session_jsonl returns empty for missing session ---
|
|
29
|
+
result=$(find_session_jsonl "nonexistent-session" "$WORK/.claude")
|
|
30
|
+
assert_eq "find_session_jsonl: empty for missing" "" "$result"
|
|
31
|
+
|
|
32
|
+
# --- Test: extract_session_cost returns JSON with token fields ---
|
|
33
|
+
cost_json=$(extract_session_cost "$MOCK_SESSION_ID" "$WORK/.claude")
|
|
34
|
+
assert_contains "extract: has input_tokens" "input_tokens" "$cost_json"
|
|
35
|
+
assert_contains "extract: has output_tokens" "output_tokens" "$cost_json"
|
|
36
|
+
assert_contains "extract: has cache_read_tokens" "cache_read_tokens" "$cost_json"
|
|
37
|
+
assert_contains "extract: has estimated_cost_usd" "estimated_cost_usd" "$cost_json"
|
|
38
|
+
|
|
39
|
+
input_tokens=$(echo "$cost_json" | jq -r '.input_tokens')
|
|
40
|
+
assert_eq "extract: input_tokens value" "8500" "$input_tokens"
|
|
41
|
+
|
|
42
|
+
output_tokens=$(echo "$cost_json" | jq -r '.output_tokens')
|
|
43
|
+
assert_eq "extract: output_tokens value" "2100" "$output_tokens"
|
|
44
|
+
|
|
45
|
+
cache_read=$(echo "$cost_json" | jq -r '.cache_read_tokens')
|
|
46
|
+
assert_eq "extract: cache_read_tokens value" "3200" "$cache_read"
|
|
47
|
+
|
|
48
|
+
cost_usd=$(echo "$cost_json" | jq -r '.estimated_cost_usd')
|
|
49
|
+
assert_eq "extract: cost from JSONL summary" "0.0423" "$cost_usd"
|
|
50
|
+
|
|
51
|
+
# Fix #39: tracking_status field — found when real summary data exists
|
|
52
|
+
tracking_status=$(echo "$cost_json" | jq -r '.tracking_status')
|
|
53
|
+
assert_eq "extract: tracking_status is found for real session" "found" "$tracking_status"
|
|
54
|
+
|
|
55
|
+
# --- Test: extract_session_cost handles missing session ---
|
|
56
|
+
cost_json=$(extract_session_cost "nonexistent" "$WORK/.claude" 2>/dev/null)
|
|
57
|
+
input_tokens=$(echo "$cost_json" | jq -r '.input_tokens')
|
|
58
|
+
assert_eq "extract: missing session returns 0 input_tokens" "0" "$input_tokens"
|
|
59
|
+
|
|
60
|
+
# Fix #39: tracking_status:"missing_file" distinguishes broken tracking from true $0 cost
|
|
61
|
+
tracking_status=$(echo "$cost_json" | jq -r '.tracking_status')
|
|
62
|
+
assert_eq "extract: missing session tracking_status is missing_file" "missing_file" "$tracking_status"
|
|
63
|
+
|
|
64
|
+
# Fix #36: session_id with special chars must not corrupt JSON
|
|
65
|
+
SPECIAL_SID='injected"value\with\backslash'
|
|
66
|
+
MOCK_SPECIAL_DIR="$WORK/.claude/projects/test-project"
|
|
67
|
+
# No JSONL for this session — tests injection-safe fallback path
|
|
68
|
+
cost_json=$(extract_session_cost "$SPECIAL_SID" "$WORK/.claude" 2>/dev/null)
|
|
69
|
+
assert_eq "extract: special chars in session_id produce valid JSON" "0" "$(echo "$cost_json" | jq -r '.input_tokens')"
|
|
70
|
+
assert_eq "extract: tracking_status for special-char session" "missing_file" "$(echo "$cost_json" | jq -r '.tracking_status')"
|
|
71
|
+
|
|
72
|
+
# Fix #39: tracking_status:"no_summary" for JSONL with no summary line
|
|
73
|
+
NO_SUMMARY_SID="no-summary-session"
|
|
74
|
+
cat > "$MOCK_SESSION_DIR/${NO_SUMMARY_SID}.jsonl" << 'JSONL'
|
|
75
|
+
{"type":"user","message":"hello"}
|
|
76
|
+
{"type":"assistant","message":"hi"}
|
|
77
|
+
JSONL
|
|
78
|
+
cost_json=$(extract_session_cost "$NO_SUMMARY_SID" "$WORK/.claude" 2>/dev/null)
|
|
79
|
+
tracking_status=$(echo "$cost_json" | jq -r '.tracking_status')
|
|
80
|
+
assert_eq "extract: no summary line tracking_status is no_summary" "no_summary" "$tracking_status"
|
|
81
|
+
assert_eq "extract: no summary line returns 0 cost" "0" "$(echo "$cost_json" | jq -r '.estimated_cost_usd')"
|
|
82
|
+
|
|
83
|
+
# Fix #35: grep on file with no summary line must not kill set -e callers
|
|
84
|
+
# (tested implicitly above — test suite uses set -euo pipefail and did not die)
|
|
85
|
+
|
|
86
|
+
# --- Test: record_batch_cost writes to state ---
|
|
87
|
+
init_state "$WORK" "plan.md" "headless"
|
|
88
|
+
record_batch_cost "$WORK" 1 "$MOCK_SESSION_ID" "$WORK/.claude"
|
|
89
|
+
|
|
90
|
+
costs_batch_1=$(jq -r '.costs["1"].input_tokens' "$WORK/.run-plan-state.json")
|
|
91
|
+
assert_eq "record: batch 1 input_tokens in state" "8500" "$costs_batch_1"
|
|
92
|
+
|
|
93
|
+
cost_usd=$(jq -r '.costs["1"].estimated_cost_usd' "$WORK/.run-plan-state.json")
|
|
94
|
+
assert_eq "record: batch 1 cost_usd in state" "0.0423" "$cost_usd"
|
|
95
|
+
|
|
96
|
+
session_id=$(jq -r '.costs["1"].session_id' "$WORK/.run-plan-state.json")
|
|
97
|
+
assert_eq "record: batch 1 session_id in state" "$MOCK_SESSION_ID" "$session_id"
|
|
98
|
+
|
|
99
|
+
total_cost=$(jq -r '.total_cost_usd' "$WORK/.run-plan-state.json")
|
|
100
|
+
assert_eq "record: total_cost_usd updated" "0.0423" "$total_cost"
|
|
101
|
+
|
|
102
|
+
# Fix #41: total_cost_usd must be 0 (not null) when costs object is empty
|
|
103
|
+
WORK2=$(mktemp -d)
|
|
104
|
+
trap 'rm -rf "$WORK2"' EXIT
|
|
105
|
+
init_state "$WORK2" "plan.md" "headless"
|
|
106
|
+
# Manually inject an empty costs object then verify // 0 guard
|
|
107
|
+
jq '.costs = {}' "$WORK2/.run-plan-state.json" > "$WORK2/.run-plan-state.json.tmp" && mv "$WORK2/.run-plan-state.json.tmp" "$WORK2/.run-plan-state.json"
|
|
108
|
+
record_batch_cost "$WORK2" 1 "nonexistent-for-null-test" "$WORK2/.claude" 2>/dev/null
|
|
109
|
+
null_guard=$(jq -r '.total_cost_usd' "$WORK2/.run-plan-state.json")
|
|
110
|
+
assert_eq "record: total_cost_usd is 0 not null for empty costs" "0" "$null_guard"
|
|
111
|
+
|
|
112
|
+
# --- Test: record_batch_cost accumulates across batches ---
|
|
113
|
+
MOCK_SESSION_ID_2="test-session-def-456"
|
|
114
|
+
cat > "$MOCK_SESSION_DIR/${MOCK_SESSION_ID_2}.jsonl" << 'JSONL'
|
|
115
|
+
{"type":"summary","costUSD":0.031,"durationMs":9000,"inputTokens":7200,"outputTokens":1800,"cacheReadTokens":5000,"cacheWriteTokens":500,"model":"claude-sonnet-4-6"}
|
|
116
|
+
JSONL
|
|
117
|
+
|
|
118
|
+
record_batch_cost "$WORK" 2 "$MOCK_SESSION_ID_2" "$WORK/.claude"
|
|
119
|
+
|
|
120
|
+
total_cost=$(jq -r '.total_cost_usd' "$WORK/.run-plan-state.json")
|
|
121
|
+
# 0.0423 + 0.031 = 0.0733
|
|
122
|
+
assert_eq "record: total_cost accumulates" "0.0733" "$total_cost"
|
|
123
|
+
|
|
124
|
+
# --- Test: check_budget returns 0 when under budget ---
|
|
125
|
+
assert_exit "check_budget: under budget returns 0" 0 check_budget "$WORK" "1.00"
|
|
126
|
+
|
|
127
|
+
# --- Test: check_budget returns 1 when over budget ---
|
|
128
|
+
assert_exit "check_budget: over budget returns 1" 1 check_budget "$WORK" "0.05"
|
|
129
|
+
|
|
130
|
+
# Fix #40: check_budget awk fallback — verify awk float comparison expressions
|
|
131
|
+
# Build a PATH with awk but without bc (symlink all /usr/bin and /bin except bc)
|
|
132
|
+
NO_BC_PATH_DIR=$(mktemp -d)
|
|
133
|
+
trap 'rm -rf "$NO_BC_PATH_DIR"' EXIT
|
|
134
|
+
for _f in /usr/bin/* /bin/*; do
|
|
135
|
+
_bn=$(basename "$_f")
|
|
136
|
+
[[ "$_bn" == "bc" ]] && continue
|
|
137
|
+
[[ -e "$NO_BC_PATH_DIR/$_bn" ]] && continue
|
|
138
|
+
ln -sf "$_f" "$NO_BC_PATH_DIR/$_bn" 2>/dev/null || true
|
|
139
|
+
done
|
|
140
|
+
# Also make other dirs in PATH available (nvm node, linuxbrew, .local/bin)
|
|
141
|
+
WORK3=$(mktemp -d)
|
|
142
|
+
trap 'rm -rf "$WORK3"' EXIT
|
|
143
|
+
PATH="$NO_BC_PATH_DIR" bash -c "
|
|
144
|
+
source '$SCRIPT_DIR/../lib/run-plan-state.sh'
|
|
145
|
+
source '$SCRIPT_DIR/../lib/cost-tracking.sh'
|
|
146
|
+
init_state '$WORK3' 'plan.md' 'headless'
|
|
147
|
+
record_batch_cost '$WORK3' 1 '$MOCK_SESSION_ID' '$WORK/.claude' 2>/dev/null
|
|
148
|
+
" 2>/dev/null
|
|
149
|
+
assert_exit "check_budget: awk fallback under budget returns 0" 0 \
|
|
150
|
+
bash -c "PATH='$NO_BC_PATH_DIR' source '$SCRIPT_DIR/../lib/cost-tracking.sh' 2>/dev/null; check_budget '$WORK3' '1.00'" 2>/dev/null
|
|
151
|
+
assert_exit "check_budget: awk fallback over budget returns 1" 1 \
|
|
152
|
+
bash -c "PATH='$NO_BC_PATH_DIR' source '$SCRIPT_DIR/../lib/cost-tracking.sh' 2>/dev/null; check_budget '$WORK3' '0.01'" 2>/dev/null
|
|
153
|
+
|
|
154
|
+
# --- Test: get_total_cost returns accumulated cost ---
|
|
155
|
+
total=$(get_total_cost "$WORK")
|
|
156
|
+
assert_eq "get_total_cost: returns accumulated" "0.0733" "$total"
|
|
157
|
+
|
|
158
|
+
report_results
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test spec echo-back gate function
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
source "$SCRIPT_DIR/../lib/run-plan-parser.sh"
|
|
7
|
+
source "$SCRIPT_DIR/../lib/run-plan-echo-back.sh" 2>/dev/null || true
|
|
8
|
+
|
|
9
|
+
# Source just the echo_back_check function if full sourcing fails
|
|
10
|
+
# (run-plan-headless.sh references globals that may not be set)
|
|
11
|
+
type echo_back_check &>/dev/null || {
|
|
12
|
+
echo "ERROR: echo_back_check function not available"
|
|
13
|
+
exit 1
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
FAILURES=0
|
|
17
|
+
TESTS=0
|
|
18
|
+
|
|
19
|
+
assert_eq() {
|
|
20
|
+
local desc="$1" expected="$2" actual="$3"
|
|
21
|
+
TESTS=$((TESTS + 1))
|
|
22
|
+
if [[ "$expected" != "$actual" ]]; then
|
|
23
|
+
echo "FAIL: $desc (expected: $expected, got: $actual)"
|
|
24
|
+
FAILURES=$((FAILURES + 1))
|
|
25
|
+
else
|
|
26
|
+
echo "PASS: $desc"
|
|
27
|
+
fi
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
TMPDIR_ROOT=$(mktemp -d)
|
|
31
|
+
trap 'rm -rf "$TMPDIR_ROOT"' EXIT
|
|
32
|
+
|
|
33
|
+
LOG_DIR="$TMPDIR_ROOT/logs"
|
|
34
|
+
mkdir -p "$LOG_DIR"
|
|
35
|
+
|
|
36
|
+
# =============================================================================
|
|
37
|
+
# Mock claude command: returns canned responses based on prompt content
|
|
38
|
+
# =============================================================================
|
|
39
|
+
|
|
40
|
+
MOCK_SCRIPT="$TMPDIR_ROOT/mock-claude"
|
|
41
|
+
cat > "$MOCK_SCRIPT" << 'MOCK'
|
|
42
|
+
#!/usr/bin/env bash
|
|
43
|
+
# Mock claude CLI for testing echo-back
|
|
44
|
+
# Reads prompt from -p arg, returns canned response
|
|
45
|
+
|
|
46
|
+
prompt=""
|
|
47
|
+
model=""
|
|
48
|
+
while [[ $# -gt 0 ]]; do
|
|
49
|
+
case "$1" in
|
|
50
|
+
-p) prompt="$2"; shift 2 ;;
|
|
51
|
+
--model) model="$2"; shift 2 ;;
|
|
52
|
+
*) shift ;;
|
|
53
|
+
esac
|
|
54
|
+
done
|
|
55
|
+
|
|
56
|
+
# Retry restatement (check BEFORE initial restatement — both contain "restate")
|
|
57
|
+
if [[ "$prompt" == *"Re-read the specification"* && -z "$model" ]]; then
|
|
58
|
+
echo "This batch builds the authentication system as specified."
|
|
59
|
+
exit 0
|
|
60
|
+
fi
|
|
61
|
+
|
|
62
|
+
# Echo-back restatement request → return matching restatement
|
|
63
|
+
if [[ "$prompt" == *"restate in one paragraph"* && -z "$model" ]]; then
|
|
64
|
+
if [[ "$prompt" == *"data model"* ]]; then
|
|
65
|
+
echo "This batch creates a data model with validation and tests."
|
|
66
|
+
elif [[ "$prompt" == *"MISMATCH_TEST"* ]]; then
|
|
67
|
+
echo "This batch does something completely unrelated to the spec."
|
|
68
|
+
else
|
|
69
|
+
echo "This batch implements the specified functionality."
|
|
70
|
+
fi
|
|
71
|
+
exit 0
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
# Haiku verification → check if restatement mentions key terms
|
|
75
|
+
if [[ -n "$model" && "$model" == "haiku" ]]; then
|
|
76
|
+
if [[ "$prompt" == *"unrelated"* ]]; then
|
|
77
|
+
echo "NO - The restatement does not match the original spec."
|
|
78
|
+
else
|
|
79
|
+
echo "YES - The restatement captures the key goals."
|
|
80
|
+
fi
|
|
81
|
+
exit 0
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
echo "Unknown mock scenario"
|
|
85
|
+
exit 0
|
|
86
|
+
MOCK
|
|
87
|
+
chmod +x "$MOCK_SCRIPT"
|
|
88
|
+
|
|
89
|
+
# =============================================================================
|
|
90
|
+
# Test 1: Matching restatement passes
|
|
91
|
+
# =============================================================================
|
|
92
|
+
|
|
93
|
+
batch_text="Create data model with validation.
|
|
94
|
+
Add tests for the model."
|
|
95
|
+
|
|
96
|
+
exit_code=0
|
|
97
|
+
echo_back_check "$batch_text" "$LOG_DIR" 1 "$MOCK_SCRIPT" >/dev/null 2>&1 || exit_code=$?
|
|
98
|
+
assert_eq "matching restatement passes" "0" "$exit_code"
|
|
99
|
+
|
|
100
|
+
# =============================================================================
|
|
101
|
+
# Test 2: Mismatched restatement triggers retry
|
|
102
|
+
# =============================================================================
|
|
103
|
+
|
|
104
|
+
mismatch_text="MISMATCH_TEST: Build the authentication system."
|
|
105
|
+
|
|
106
|
+
exit_code=0
|
|
107
|
+
echo_back_check "$mismatch_text" "$LOG_DIR" 2 "$MOCK_SCRIPT" >/dev/null 2>&1 || exit_code=$?
|
|
108
|
+
# The retry restatement doesn't mention "unrelated", so haiku says YES → passes on retry
|
|
109
|
+
assert_eq "mismatch with successful retry passes" "0" "$exit_code"
|
|
110
|
+
|
|
111
|
+
# =============================================================================
|
|
112
|
+
# Test 3: Empty restatement skips check
|
|
113
|
+
# =============================================================================
|
|
114
|
+
|
|
115
|
+
EMPTY_MOCK="$TMPDIR_ROOT/mock-empty"
|
|
116
|
+
cat > "$EMPTY_MOCK" << 'MOCK'
|
|
117
|
+
#!/usr/bin/env bash
|
|
118
|
+
# Returns empty for all calls
|
|
119
|
+
exit 0
|
|
120
|
+
MOCK
|
|
121
|
+
chmod +x "$EMPTY_MOCK"
|
|
122
|
+
|
|
123
|
+
exit_code=0
|
|
124
|
+
echo_back_check "some batch text" "$LOG_DIR" 3 "$EMPTY_MOCK" >/dev/null 2>&1 || exit_code=$?
|
|
125
|
+
assert_eq "empty restatement skips gracefully" "0" "$exit_code"
|
|
126
|
+
|
|
127
|
+
# =============================================================================
|
|
128
|
+
# Test 4: Log file is created
|
|
129
|
+
# =============================================================================
|
|
130
|
+
|
|
131
|
+
TESTS=$((TESTS + 1))
|
|
132
|
+
if [[ -f "$LOG_DIR/batch-1-echo-back.log" ]]; then
|
|
133
|
+
echo "PASS: echo-back log file created"
|
|
134
|
+
else
|
|
135
|
+
echo "FAIL: echo-back log file not created"
|
|
136
|
+
FAILURES=$((FAILURES + 1))
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
# =============================================================================
|
|
140
|
+
# Test 5: Persistent failure mock (both attempts fail)
|
|
141
|
+
# =============================================================================
|
|
142
|
+
|
|
143
|
+
FAIL_MOCK="$TMPDIR_ROOT/mock-fail"
|
|
144
|
+
cat > "$FAIL_MOCK" << 'MOCK'
|
|
145
|
+
#!/usr/bin/env bash
|
|
146
|
+
prompt=""
|
|
147
|
+
model=""
|
|
148
|
+
while [[ $# -gt 0 ]]; do
|
|
149
|
+
case "$1" in
|
|
150
|
+
-p) prompt="$2"; shift 2 ;;
|
|
151
|
+
--model) model="$2"; shift 2 ;;
|
|
152
|
+
*) shift ;;
|
|
153
|
+
esac
|
|
154
|
+
done
|
|
155
|
+
|
|
156
|
+
if [[ "$prompt" == *"restate"* || "$prompt" == *"Re-read"* ]]; then
|
|
157
|
+
echo "This is completely wrong and unrelated."
|
|
158
|
+
exit 0
|
|
159
|
+
fi
|
|
160
|
+
|
|
161
|
+
if [[ -n "$model" && "$model" == "haiku" ]]; then
|
|
162
|
+
echo "NO - The restatement is completely wrong."
|
|
163
|
+
exit 0
|
|
164
|
+
fi
|
|
165
|
+
|
|
166
|
+
exit 0
|
|
167
|
+
MOCK
|
|
168
|
+
chmod +x "$FAIL_MOCK"
|
|
169
|
+
|
|
170
|
+
exit_code=0
|
|
171
|
+
echo_back_check "Build the user dashboard" "$LOG_DIR" 5 "$FAIL_MOCK" >/dev/null 2>&1 || exit_code=$?
|
|
172
|
+
assert_eq "persistent mismatch fails" "1" "$exit_code"
|
|
173
|
+
|
|
174
|
+
echo ""
|
|
175
|
+
echo "Results: $((TESTS - FAILURES))/$TESTS passed"
|
|
176
|
+
if [[ $FAILURES -gt 0 ]]; then
|
|
177
|
+
echo "FAILURES: $FAILURES"
|
|
178
|
+
exit 1
|
|
179
|
+
fi
|
|
180
|
+
echo "ALL PASSED"
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test entropy-audit.sh — verifies refactored behavior
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
EA="$SCRIPT_DIR/../entropy-audit.sh"
|
|
7
|
+
|
|
8
|
+
FAILURES=0
|
|
9
|
+
TESTS=0
|
|
10
|
+
|
|
11
|
+
assert_eq() {
|
|
12
|
+
local desc="$1" expected="$2" actual="$3"
|
|
13
|
+
TESTS=$((TESTS + 1))
|
|
14
|
+
if [[ "$expected" != "$actual" ]]; then
|
|
15
|
+
echo "FAIL: $desc"
|
|
16
|
+
echo " expected: $expected"
|
|
17
|
+
echo " actual: $actual"
|
|
18
|
+
FAILURES=$((FAILURES + 1))
|
|
19
|
+
else
|
|
20
|
+
echo "PASS: $desc"
|
|
21
|
+
fi
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
assert_exit() {
|
|
25
|
+
local desc="$1" expected_exit="$2"
|
|
26
|
+
shift 2
|
|
27
|
+
local actual_exit=0
|
|
28
|
+
"$@" >/dev/null 2>&1 || actual_exit=$?
|
|
29
|
+
TESTS=$((TESTS + 1))
|
|
30
|
+
if [[ "$expected_exit" != "$actual_exit" ]]; then
|
|
31
|
+
echo "FAIL: $desc"
|
|
32
|
+
echo " expected exit: $expected_exit"
|
|
33
|
+
echo " actual exit: $actual_exit"
|
|
34
|
+
FAILURES=$((FAILURES + 1))
|
|
35
|
+
else
|
|
36
|
+
echo "PASS: $desc"
|
|
37
|
+
fi
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
assert_contains() {
|
|
41
|
+
local desc="$1" needle="$2" haystack="$3"
|
|
42
|
+
TESTS=$((TESTS + 1))
|
|
43
|
+
if echo "$haystack" | grep -qF "$needle"; then
|
|
44
|
+
echo "PASS: $desc"
|
|
45
|
+
else
|
|
46
|
+
echo "FAIL: $desc"
|
|
47
|
+
echo " expected to contain: $needle"
|
|
48
|
+
FAILURES=$((FAILURES + 1))
|
|
49
|
+
fi
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
WORK=$(mktemp -d)
|
|
53
|
+
trap 'rm -rf "$WORK"' EXIT
|
|
54
|
+
|
|
55
|
+
# === Sources common.sh ===
|
|
56
|
+
|
|
57
|
+
TESTS=$((TESTS + 1))
|
|
58
|
+
if grep -q 'source.*lib/common.sh' "$EA"; then
|
|
59
|
+
echo "PASS: entropy-audit.sh sources lib/common.sh"
|
|
60
|
+
else
|
|
61
|
+
echo "FAIL: entropy-audit.sh sources lib/common.sh"
|
|
62
|
+
FAILURES=$((FAILURES + 1))
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
# === No hardcoded $HOME/Documents/projects ===
|
|
66
|
+
|
|
67
|
+
TESTS=$((TESTS + 1))
|
|
68
|
+
if grep -q 'PROJECTS_DIR="\$HOME/Documents/projects"' "$EA"; then
|
|
69
|
+
echo "FAIL: still has hardcoded PROJECTS_DIR"
|
|
70
|
+
FAILURES=$((FAILURES + 1))
|
|
71
|
+
else
|
|
72
|
+
echo "PASS: no hardcoded PROJECTS_DIR"
|
|
73
|
+
fi
|
|
74
|
+
|
|
75
|
+
# === Accepts --projects-dir argument ===
|
|
76
|
+
|
|
77
|
+
TESTS=$((TESTS + 1))
|
|
78
|
+
if grep -q '\-\-projects-dir' "$EA"; then
|
|
79
|
+
echo "PASS: accepts --projects-dir argument"
|
|
80
|
+
else
|
|
81
|
+
echo "FAIL: should accept --projects-dir argument"
|
|
82
|
+
FAILURES=$((FAILURES + 1))
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
# === Uses env var with default ===
|
|
86
|
+
|
|
87
|
+
TESTS=$((TESTS + 1))
|
|
88
|
+
if grep -qE 'PROJECTS_DIR="\$\{PROJECTS_DIR:-' "$EA"; then
|
|
89
|
+
echo "PASS: uses PROJECTS_DIR env var with default"
|
|
90
|
+
else
|
|
91
|
+
echo "FAIL: should use PROJECTS_DIR env var with default"
|
|
92
|
+
FAILURES=$((FAILURES + 1))
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
# === CLI tests ===
|
|
96
|
+
|
|
97
|
+
assert_exit "--help exits 0" 0 bash "$EA" --help
|
|
98
|
+
|
|
99
|
+
# === --projects-dir overrides default ===
|
|
100
|
+
|
|
101
|
+
mkdir -p "$WORK/test-proj/.git"
|
|
102
|
+
echo "# Test" > "$WORK/test-proj/CLAUDE.md"
|
|
103
|
+
output=$(bash "$EA" --projects-dir "$WORK" --project test-proj 2>&1) || true
|
|
104
|
+
assert_contains "custom projects-dir used" "Auditing test-proj" "$output"
|
|
105
|
+
|
|
106
|
+
# === PROJECTS_DIR env var works ===
|
|
107
|
+
|
|
108
|
+
output=$(PROJECTS_DIR="$WORK" bash "$EA" --project test-proj 2>&1) || true
|
|
109
|
+
assert_contains "env var PROJECTS_DIR works" "Auditing test-proj" "$output"
|
|
110
|
+
|
|
111
|
+
# === Empty find produces zero iterations (bug #13) ===
|
|
112
|
+
# Create a project with no code files — only a CLAUDE.md
|
|
113
|
+
mkdir -p "$WORK/empty-proj"
|
|
114
|
+
echo "# Empty project" > "$WORK/empty-proj/CLAUDE.md"
|
|
115
|
+
bash "$EA" --projects-dir "$WORK" --project empty-proj 2>&1 || true
|
|
116
|
+
# Find the report file (audit writes to a timestamped dir)
|
|
117
|
+
report=$(ls -t /tmp/entropy-audit-*/empty-proj.md 2>/dev/null | head -1)
|
|
118
|
+
TESTS=$((TESTS + 1))
|
|
119
|
+
if [[ -z "$report" ]]; then
|
|
120
|
+
echo "FAIL: empty find: no report file generated"
|
|
121
|
+
FAILURES=$((FAILURES + 1))
|
|
122
|
+
else
|
|
123
|
+
echo "PASS: empty find: report file generated"
|
|
124
|
+
report_content=$(cat "$report")
|
|
125
|
+
# Should report "All files within limit" (zero violations, zero iterations)
|
|
126
|
+
assert_contains "empty find: no size violations" "All files within limit" "$report_content"
|
|
127
|
+
# Should NOT contain any file-specific warnings (phantom iteration would produce one)
|
|
128
|
+
TESTS=$((TESTS + 1))
|
|
129
|
+
if echo "$report_content" | grep -qE '⚠️.*lines$'; then
|
|
130
|
+
echo "FAIL: empty find produced phantom file size warning"
|
|
131
|
+
FAILURES=$((FAILURES + 1))
|
|
132
|
+
else
|
|
133
|
+
echo "PASS: empty find: no phantom file size warnings"
|
|
134
|
+
fi
|
|
135
|
+
# Naming check should also report clean
|
|
136
|
+
assert_contains "empty find: no naming drift" "No naming drift detected" "$report_content"
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
# === Summary ===
|
|
140
|
+
echo ""
|
|
141
|
+
echo "Results: $((TESTS - FAILURES))/$TESTS passed"
|
|
142
|
+
if [[ $FAILURES -gt 0 ]]; then
|
|
143
|
+
echo "FAILURES: $FAILURES"
|
|
144
|
+
exit 1
|
|
145
|
+
fi
|
|
146
|
+
echo "ALL PASSED"
|