autonomous-coding-toolkit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +22 -0
- package/.claude-plugin/plugin.json +13 -0
- package/LICENSE +21 -0
- package/Makefile +21 -0
- package/README.md +140 -0
- package/SECURITY.md +28 -0
- package/agents/bash-expert.md +113 -0
- package/agents/dependency-auditor.md +138 -0
- package/agents/integration-tester.md +120 -0
- package/agents/lesson-scanner.md +149 -0
- package/agents/python-expert.md +179 -0
- package/agents/service-monitor.md +141 -0
- package/agents/shell-expert.md +147 -0
- package/benchmarks/runner.sh +147 -0
- package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
- package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
- package/benchmarks/tasks/02-refactor-module/task.md +8 -0
- package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
- package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
- package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
- package/bin/act.js +238 -0
- package/commands/autocode.md +6 -0
- package/commands/cancel-ralph.md +18 -0
- package/commands/code-factory.md +53 -0
- package/commands/create-prd.md +55 -0
- package/commands/ralph-loop.md +18 -0
- package/commands/run-plan.md +117 -0
- package/commands/submit-lesson.md +122 -0
- package/docs/ARCHITECTURE.md +630 -0
- package/docs/CONTRIBUTING.md +125 -0
- package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
- package/docs/lessons/0002-async-def-without-await.md +28 -0
- package/docs/lessons/0003-create-task-without-callback.md +28 -0
- package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
- package/docs/lessons/0005-sqlite-without-closing.md +33 -0
- package/docs/lessons/0006-venv-pip-path.md +27 -0
- package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
- package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
- package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
- package/docs/lessons/0010-local-outside-function-bash.md +33 -0
- package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
- package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
- package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
- package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
- package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
- package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
- package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
- package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
- package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
- package/docs/lessons/0020-persist-state-incrementally.md +44 -0
- package/docs/lessons/0021-dual-axis-testing.md +48 -0
- package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
- package/docs/lessons/0023-static-analysis-spiral.md +51 -0
- package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
- package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
- package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
- package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
- package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
- package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
- package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
- package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
- package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
- package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
- package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
- package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
- package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
- package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
- package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
- package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
- package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
- package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
- package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
- package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
- package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
- package/docs/lessons/0045-iterative-design-improvement.md +33 -0
- package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
- package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
- package/docs/lessons/0048-integration-wiring-batch.md +40 -0
- package/docs/lessons/0049-ab-verification.md +41 -0
- package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
- package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
- package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
- package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
- package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
- package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
- package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
- package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
- package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
- package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
- package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
- package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
- package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
- package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
- package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
- package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
- package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
- package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
- package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
- package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
- package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
- package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
- package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
- package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
- package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
- package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
- package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
- package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
- package/docs/lessons/0078-static-review-without-live-test.md +30 -0
- package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
- package/docs/lessons/FRAMEWORK.md +161 -0
- package/docs/lessons/SUMMARY.md +201 -0
- package/docs/lessons/TEMPLATE.md +85 -0
- package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
- package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
- package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
- package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
- package/docs/plans/2026-02-21-mab-research-report.md +406 -0
- package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
- package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
- package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
- package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
- package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
- package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
- package/docs/plans/2026-02-22-mab-run-design.md +462 -0
- package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
- package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
- package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
- package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
- package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
- package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
- package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
- package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
- package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
- package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
- package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
- package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
- package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
- package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
- package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
- package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
- package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
- package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
- package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
- package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
- package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
- package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
- package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
- package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
- package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
- package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
- package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
- package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
- package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
- package/docs/plans/2026-02-24-headless-module-split.md +443 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
- package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
- package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
- package/docs/plans/audit-findings.md +186 -0
- package/docs/telegram-notification-format.md +98 -0
- package/examples/example-plan.md +51 -0
- package/examples/example-prd.json +72 -0
- package/examples/example-roadmap.md +33 -0
- package/examples/quickstart-plan.md +63 -0
- package/hooks/hooks.json +26 -0
- package/hooks/setup-symlinks.sh +48 -0
- package/hooks/stop-hook.sh +135 -0
- package/package.json +47 -0
- package/policies/bash.md +71 -0
- package/policies/python.md +71 -0
- package/policies/testing.md +61 -0
- package/policies/universal.md +60 -0
- package/scripts/analyze-report.sh +97 -0
- package/scripts/architecture-map.sh +145 -0
- package/scripts/auto-compound.sh +273 -0
- package/scripts/batch-audit.sh +42 -0
- package/scripts/batch-test.sh +101 -0
- package/scripts/entropy-audit.sh +221 -0
- package/scripts/failure-digest.sh +51 -0
- package/scripts/generate-ast-rules.sh +96 -0
- package/scripts/init.sh +112 -0
- package/scripts/lesson-check.sh +428 -0
- package/scripts/lib/common.sh +61 -0
- package/scripts/lib/cost-tracking.sh +153 -0
- package/scripts/lib/ollama.sh +60 -0
- package/scripts/lib/progress-writer.sh +128 -0
- package/scripts/lib/run-plan-context.sh +215 -0
- package/scripts/lib/run-plan-echo-back.sh +231 -0
- package/scripts/lib/run-plan-headless.sh +396 -0
- package/scripts/lib/run-plan-notify.sh +57 -0
- package/scripts/lib/run-plan-parser.sh +81 -0
- package/scripts/lib/run-plan-prompt.sh +215 -0
- package/scripts/lib/run-plan-quality-gate.sh +132 -0
- package/scripts/lib/run-plan-routing.sh +315 -0
- package/scripts/lib/run-plan-sampling.sh +170 -0
- package/scripts/lib/run-plan-scoring.sh +146 -0
- package/scripts/lib/run-plan-state.sh +142 -0
- package/scripts/lib/run-plan-team.sh +199 -0
- package/scripts/lib/telegram.sh +54 -0
- package/scripts/lib/thompson-sampling.sh +176 -0
- package/scripts/license-check.sh +74 -0
- package/scripts/mab-run.sh +575 -0
- package/scripts/module-size-check.sh +146 -0
- package/scripts/patterns/async-no-await.yml +5 -0
- package/scripts/patterns/bare-except.yml +6 -0
- package/scripts/patterns/empty-catch.yml +6 -0
- package/scripts/patterns/hardcoded-localhost.yml +9 -0
- package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
- package/scripts/pipeline-status.sh +197 -0
- package/scripts/policy-check.sh +226 -0
- package/scripts/prior-art-search.sh +133 -0
- package/scripts/promote-mab-lessons.sh +126 -0
- package/scripts/prompts/agent-a-superpowers.md +29 -0
- package/scripts/prompts/agent-b-ralph.md +29 -0
- package/scripts/prompts/judge-agent.md +61 -0
- package/scripts/prompts/planner-agent.md +44 -0
- package/scripts/pull-community-lessons.sh +90 -0
- package/scripts/quality-gate.sh +266 -0
- package/scripts/research-gate.sh +90 -0
- package/scripts/run-plan.sh +329 -0
- package/scripts/scope-infer.sh +159 -0
- package/scripts/setup-ralph-loop.sh +155 -0
- package/scripts/telemetry.sh +230 -0
- package/scripts/tests/run-all-tests.sh +52 -0
- package/scripts/tests/test-act-cli.sh +46 -0
- package/scripts/tests/test-agents-md.sh +87 -0
- package/scripts/tests/test-analyze-report.sh +114 -0
- package/scripts/tests/test-architecture-map.sh +89 -0
- package/scripts/tests/test-auto-compound.sh +169 -0
- package/scripts/tests/test-batch-test.sh +65 -0
- package/scripts/tests/test-benchmark-runner.sh +25 -0
- package/scripts/tests/test-common.sh +168 -0
- package/scripts/tests/test-cost-tracking.sh +158 -0
- package/scripts/tests/test-echo-back.sh +180 -0
- package/scripts/tests/test-entropy-audit.sh +146 -0
- package/scripts/tests/test-failure-digest.sh +66 -0
- package/scripts/tests/test-generate-ast-rules.sh +145 -0
- package/scripts/tests/test-helpers.sh +82 -0
- package/scripts/tests/test-init.sh +47 -0
- package/scripts/tests/test-lesson-check.sh +278 -0
- package/scripts/tests/test-lesson-local.sh +55 -0
- package/scripts/tests/test-license-check.sh +109 -0
- package/scripts/tests/test-mab-run.sh +182 -0
- package/scripts/tests/test-ollama-lib.sh +49 -0
- package/scripts/tests/test-ollama.sh +60 -0
- package/scripts/tests/test-pipeline-status.sh +198 -0
- package/scripts/tests/test-policy-check.sh +124 -0
- package/scripts/tests/test-prior-art-search.sh +96 -0
- package/scripts/tests/test-progress-writer.sh +140 -0
- package/scripts/tests/test-promote-mab-lessons.sh +110 -0
- package/scripts/tests/test-pull-community-lessons.sh +149 -0
- package/scripts/tests/test-quality-gate.sh +241 -0
- package/scripts/tests/test-research-gate.sh +132 -0
- package/scripts/tests/test-run-plan-cli.sh +86 -0
- package/scripts/tests/test-run-plan-context.sh +305 -0
- package/scripts/tests/test-run-plan-e2e.sh +153 -0
- package/scripts/tests/test-run-plan-headless.sh +424 -0
- package/scripts/tests/test-run-plan-notify.sh +124 -0
- package/scripts/tests/test-run-plan-parser.sh +217 -0
- package/scripts/tests/test-run-plan-prompt.sh +254 -0
- package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
- package/scripts/tests/test-run-plan-routing.sh +178 -0
- package/scripts/tests/test-run-plan-scoring.sh +148 -0
- package/scripts/tests/test-run-plan-state.sh +261 -0
- package/scripts/tests/test-run-plan-team.sh +157 -0
- package/scripts/tests/test-scope-infer.sh +150 -0
- package/scripts/tests/test-setup-ralph-loop.sh +63 -0
- package/scripts/tests/test-telegram-env.sh +38 -0
- package/scripts/tests/test-telegram.sh +121 -0
- package/scripts/tests/test-telemetry.sh +46 -0
- package/scripts/tests/test-thompson-sampling.sh +139 -0
- package/scripts/tests/test-validate-all.sh +60 -0
- package/scripts/tests/test-validate-commands.sh +89 -0
- package/scripts/tests/test-validate-hooks.sh +98 -0
- package/scripts/tests/test-validate-lessons.sh +150 -0
- package/scripts/tests/test-validate-plan-quality.sh +235 -0
- package/scripts/tests/test-validate-plans.sh +187 -0
- package/scripts/tests/test-validate-plugin.sh +106 -0
- package/scripts/tests/test-validate-prd.sh +184 -0
- package/scripts/tests/test-validate-skills.sh +134 -0
- package/scripts/validate-all.sh +57 -0
- package/scripts/validate-commands.sh +67 -0
- package/scripts/validate-hooks.sh +89 -0
- package/scripts/validate-lessons.sh +98 -0
- package/scripts/validate-plan-quality.sh +369 -0
- package/scripts/validate-plans.sh +120 -0
- package/scripts/validate-plugin.sh +86 -0
- package/scripts/validate-policies.sh +42 -0
- package/scripts/validate-prd.sh +118 -0
- package/scripts/validate-skills.sh +96 -0
- package/skills/autocode/SKILL.md +285 -0
- package/skills/autocode/ab-verification.md +51 -0
- package/skills/autocode/code-quality-standards.md +37 -0
- package/skills/autocode/competitive-mode.md +364 -0
- package/skills/brainstorming/SKILL.md +97 -0
- package/skills/capture-lesson/SKILL.md +187 -0
- package/skills/check-lessons/SKILL.md +116 -0
- package/skills/dispatching-parallel-agents/SKILL.md +110 -0
- package/skills/executing-plans/SKILL.md +85 -0
- package/skills/finishing-a-development-branch/SKILL.md +201 -0
- package/skills/receiving-code-review/SKILL.md +72 -0
- package/skills/requesting-code-review/SKILL.md +59 -0
- package/skills/requesting-code-review/code-reviewer.md +82 -0
- package/skills/research/SKILL.md +145 -0
- package/skills/roadmap/SKILL.md +115 -0
- package/skills/subagent-driven-development/SKILL.md +98 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
- package/skills/subagent-driven-development/implementer-prompt.md +73 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
- package/skills/systematic-debugging/SKILL.md +134 -0
- package/skills/systematic-debugging/condition-based-waiting.md +64 -0
- package/skills/systematic-debugging/defense-in-depth.md +32 -0
- package/skills/systematic-debugging/root-cause-tracing.md +55 -0
- package/skills/test-driven-development/SKILL.md +167 -0
- package/skills/using-git-worktrees/SKILL.md +219 -0
- package/skills/using-superpowers/SKILL.md +54 -0
- package/skills/verification-before-completion/SKILL.md +140 -0
- package/skills/verify/SKILL.md +82 -0
- package/skills/writing-plans/SKILL.md +128 -0
- package/skills/writing-skills/SKILL.md +93 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-echo-back.sh — Spec echo-back gate for verifying agent understanding
|
|
3
|
+
#
|
|
4
|
+
# Standalone module: can be sourced by any execution mode (headless, team, ralph).
|
|
5
|
+
# No dependencies on batch loop state — only reads SKIP_ECHO_BACK and STRICT_ECHO_BACK globals.
|
|
6
|
+
#
|
|
7
|
+
# Functions:
|
|
8
|
+
# _echo_back_check <batch_text> <log_file>
|
|
9
|
+
# Lightweight keyword-match gate on agent output. Non-blocking by default.
|
|
10
|
+
# echo_back_check <batch_text> <log_dir> <batch_num> [claude_cmd]
|
|
11
|
+
# Full spec verification: agent restatement → haiku verdict → retry once.
|
|
12
|
+
#
|
|
13
|
+
# Globals (read-only): SKIP_ECHO_BACK, STRICT_ECHO_BACK
|
|
14
|
+
#
|
|
15
|
+
# Echo-back gate behavior (--strict-echo-back / --skip-echo-back):
|
|
16
|
+
# Default: NON-BLOCKING — prints a WARNING if agent echo-back looks wrong, then continues.
|
|
17
|
+
# --skip-echo-back: disables the echo-back check entirely (no prompt, no warning).
|
|
18
|
+
# --strict-echo-back: makes the echo-back check BLOCKING — returns 1 on mismatch, aborting the batch.
|
|
19
|
+
|
|
20
|
+
# Echo-back gate: ask agent to restate the batch intent, check for gross misalignment.
|
|
21
|
+
# Behavior controlled by SKIP_ECHO_BACK and STRICT_ECHO_BACK globals.
|
|
22
|
+
# Non-blocking by default (warns only). --strict-echo-back makes it blocking.
|
|
23
|
+
# Args: <batch_text> <log_file>
|
|
24
|
+
# Returns: 0 always (non-blocking default), or 1 on mismatch with --strict-echo-back
|
|
25
|
+
_echo_back_check() {
|
|
26
|
+
local batch_text="$1"
|
|
27
|
+
local log_file="$2"
|
|
28
|
+
|
|
29
|
+
# --skip-echo-back: disabled entirely
|
|
30
|
+
if [[ "${SKIP_ECHO_BACK:-false}" == "true" ]]; then
|
|
31
|
+
return 0
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
# Log file must exist to read agent output
|
|
35
|
+
if [[ ! -f "$log_file" ]]; then
|
|
36
|
+
return 0
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
# Extract first paragraph of batch_text as the expected intent keywords
|
|
40
|
+
local expected_keywords
|
|
41
|
+
expected_keywords=$(echo "$batch_text" | head -5 | grep -oE '\b[A-Za-z]{4,}\b' | sort -u | head -10 | tr '\n' '|' | sed 's/|$//' || true)
|
|
42
|
+
|
|
43
|
+
if [[ -z "$expected_keywords" ]]; then
|
|
44
|
+
return 0
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
# Check if log output contains any of the expected keywords (basic alignment check)
|
|
48
|
+
local found_any=false
|
|
49
|
+
local keyword
|
|
50
|
+
while IFS= read -r keyword; do
|
|
51
|
+
[[ -z "$keyword" ]] && continue
|
|
52
|
+
if grep -qi "$keyword" "$log_file" 2>/dev/null; then
|
|
53
|
+
found_any=true
|
|
54
|
+
break
|
|
55
|
+
fi
|
|
56
|
+
done <<< "$(echo "$expected_keywords" | tr '|' '\n')"
|
|
57
|
+
|
|
58
|
+
if [[ "$found_any" == "false" ]]; then
|
|
59
|
+
echo "WARNING: Echo-back check: agent output may not address the batch intent (keywords not found: $expected_keywords)" >&2
|
|
60
|
+
# --strict-echo-back: blocking — return 1 to abort batch
|
|
61
|
+
if [[ "${STRICT_ECHO_BACK:-false}" == "true" ]]; then
|
|
62
|
+
echo "ERROR: --strict-echo-back is set. Aborting batch due to spec misalignment." >&2
|
|
63
|
+
return 1
|
|
64
|
+
fi
|
|
65
|
+
# Default: non-blocking, proceeding anyway
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
return 0
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# echo_back_check — Verify agent understands the batch spec before execution
|
|
72
|
+
# Args: <batch_text> <log_dir> <batch_num> [claude_cmd]
|
|
73
|
+
# Returns: 0 if restatement matches spec, 1 if mismatch after retry
|
|
74
|
+
# The optional claude_cmd parameter allows test injection of a mock.
|
|
75
|
+
echo_back_check() {
|
|
76
|
+
local batch_text="$1"
|
|
77
|
+
local log_dir="$2"
|
|
78
|
+
local batch_num="$3"
|
|
79
|
+
local claude_cmd="${4:-claude}"
|
|
80
|
+
|
|
81
|
+
local echo_prompt restatement verify_prompt verdict
|
|
82
|
+
local echo_log="$log_dir/batch-${batch_num}-echo-back.log"
|
|
83
|
+
|
|
84
|
+
# Step 1: Ask the agent to restate the batch spec
|
|
85
|
+
echo_prompt="Before implementing, restate in one paragraph what this batch must accomplish. Do not write any code. Just describe the goal and key deliverables.
|
|
86
|
+
|
|
87
|
+
The batch specification is:
|
|
88
|
+
${batch_text}"
|
|
89
|
+
|
|
90
|
+
local claude_exit=0
|
|
91
|
+
restatement=$(CLAUDECODE='' "$claude_cmd" -p "$echo_prompt" \
|
|
92
|
+
--allowedTools "" \
|
|
93
|
+
--permission-mode bypassPermissions \
|
|
94
|
+
2>"$echo_log") || claude_exit=$?
|
|
95
|
+
|
|
96
|
+
if [[ $claude_exit -ne 0 ]]; then
|
|
97
|
+
echo " Echo-back: claude failed (exit $claude_exit) — see $echo_log" >&2
|
|
98
|
+
return 0
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
if [[ -z "$restatement" ]]; then
|
|
102
|
+
echo " Echo-back: no restatement received (skipping check)" >&2
|
|
103
|
+
return 0
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
# Extract first paragraph (up to first blank line)
|
|
107
|
+
restatement=$(echo "$restatement" | awk '/^$/{exit} {print}')
|
|
108
|
+
|
|
109
|
+
# Step 2: Lightweight comparison via haiku
|
|
110
|
+
verify_prompt="Compare these two texts. Does the RESTATEMENT accurately capture the key goals of the ORIGINAL SPEC? Answer YES or NO followed by a brief reason.
|
|
111
|
+
|
|
112
|
+
ORIGINAL SPEC:
|
|
113
|
+
${batch_text}
|
|
114
|
+
|
|
115
|
+
RESTATEMENT:
|
|
116
|
+
${restatement}"
|
|
117
|
+
|
|
118
|
+
verdict=$(CLAUDECODE='' "$claude_cmd" -p "$verify_prompt" \
|
|
119
|
+
--model haiku \
|
|
120
|
+
--allowedTools "" \
|
|
121
|
+
--permission-mode bypassPermissions \
|
|
122
|
+
2>>"$echo_log") || true
|
|
123
|
+
|
|
124
|
+
if echo "$verdict" | grep -qi "YES"; then
|
|
125
|
+
echo " Echo-back: PASSED (spec understood)"
|
|
126
|
+
return 0
|
|
127
|
+
fi
|
|
128
|
+
|
|
129
|
+
# Step 3: Retry once with clarified prompt
|
|
130
|
+
echo " Echo-back: MISMATCH — retrying with clarified prompt" >&2
|
|
131
|
+
local reason
|
|
132
|
+
reason=$(echo "$verdict" | head -2)
|
|
133
|
+
|
|
134
|
+
local retry_prompt="Your previous restatement did not match the spec. The reviewer said: ${reason}
|
|
135
|
+
|
|
136
|
+
Re-read the specification carefully and restate in one paragraph what this batch must accomplish:
|
|
137
|
+
${batch_text}"
|
|
138
|
+
|
|
139
|
+
local retry_restatement
|
|
140
|
+
retry_restatement=$(CLAUDECODE='' "$claude_cmd" -p "$retry_prompt" \
|
|
141
|
+
--allowedTools "" \
|
|
142
|
+
--permission-mode bypassPermissions \
|
|
143
|
+
2>>"$echo_log") || true
|
|
144
|
+
|
|
145
|
+
retry_restatement=$(echo "$retry_restatement" | awk '/^$/{exit} {print}')
|
|
146
|
+
|
|
147
|
+
local retry_verify="Compare these two texts. Does the RESTATEMENT accurately capture the key goals of the ORIGINAL SPEC? Answer YES or NO followed by a brief reason.
|
|
148
|
+
|
|
149
|
+
ORIGINAL SPEC:
|
|
150
|
+
${batch_text}
|
|
151
|
+
|
|
152
|
+
RESTATEMENT:
|
|
153
|
+
${retry_restatement}"
|
|
154
|
+
|
|
155
|
+
local retry_verdict
|
|
156
|
+
retry_verdict=$(CLAUDECODE='' "$claude_cmd" -p "$retry_verify" \
|
|
157
|
+
--model haiku \
|
|
158
|
+
--allowedTools "" \
|
|
159
|
+
--permission-mode bypassPermissions \
|
|
160
|
+
2>>"$echo_log") || true
|
|
161
|
+
|
|
162
|
+
if echo "$retry_verdict" | grep -qi "YES"; then
|
|
163
|
+
echo " Echo-back: PASSED on retry (spec understood)"
|
|
164
|
+
return 0
|
|
165
|
+
fi
|
|
166
|
+
|
|
167
|
+
echo " Echo-back: FAILED after retry (spec not understood)" >&2
|
|
168
|
+
return 1
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# --- Tier 2: LLM semantic verification ---
|
|
172
|
+
# Activates on batch 1, integration batches, or --strict-echo-back
|
|
173
|
+
# Requires: claude CLI available
|
|
174
|
+
run_echo_back_tier2() {
|
|
175
|
+
local batch_text="$1"
|
|
176
|
+
local agent_summary="$2"
|
|
177
|
+
|
|
178
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
179
|
+
echo "echo-back-tier2: claude CLI not available — skipping" >&2
|
|
180
|
+
return 0
|
|
181
|
+
fi
|
|
182
|
+
|
|
183
|
+
local prompt
|
|
184
|
+
prompt=$(cat <<PROMPT
|
|
185
|
+
You are a specification compliance reviewer. Compare:
|
|
186
|
+
|
|
187
|
+
SPECIFICATION:
|
|
188
|
+
$batch_text
|
|
189
|
+
|
|
190
|
+
AGENT'S UNDERSTANDING:
|
|
191
|
+
$agent_summary
|
|
192
|
+
|
|
193
|
+
Does the agent's understanding match the specification? Flag any:
|
|
194
|
+
- Missing requirements
|
|
195
|
+
- Added requirements not in spec
|
|
196
|
+
- Misinterpreted requirements
|
|
197
|
+
- Ambiguous interpretations
|
|
198
|
+
|
|
199
|
+
Output exactly one line: PASS or FAIL followed by a colon and explanation.
|
|
200
|
+
PROMPT
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
local result
|
|
204
|
+
result=$(echo "$prompt" | claude -p --max-tokens 200 2>/dev/null || echo "PASS: echo-back tier2 unavailable")
|
|
205
|
+
|
|
206
|
+
if echo "$result" | grep -qi "^FAIL"; then
|
|
207
|
+
echo "echo-back-tier2: FAILED — $result"
|
|
208
|
+
return 1
|
|
209
|
+
else
|
|
210
|
+
echo "echo-back-tier2: PASSED"
|
|
211
|
+
return 0
|
|
212
|
+
fi
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# Determine if tier 2 should activate
|
|
216
|
+
should_run_tier2() {
|
|
217
|
+
local batch_number="${1:-0}"
|
|
218
|
+
local batch_type="${2:-unknown}"
|
|
219
|
+
local strict="${3:-false}"
|
|
220
|
+
|
|
221
|
+
# Always on batch 1 (disproportionate risk)
|
|
222
|
+
[[ "$batch_number" == "1" ]] && return 0
|
|
223
|
+
|
|
224
|
+
# Always on integration batches
|
|
225
|
+
[[ "$batch_type" == "integration" ]] && return 0
|
|
226
|
+
|
|
227
|
+
# When strict mode is set
|
|
228
|
+
[[ "$strict" == "true" ]] && return 0
|
|
229
|
+
|
|
230
|
+
return 1
|
|
231
|
+
}
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-headless.sh — Headless batch execution loop for run-plan
|
|
3
|
+
#
|
|
4
|
+
# Requires globals: WORKTREE, RESUME, START_BATCH, END_BATCH, NOTIFY,
|
|
5
|
+
# PLAN_FILE, QUALITY_GATE_CMD, PYTHON, MAX_RETRIES, ON_FAILURE, VERIFY, MODE,
|
|
6
|
+
# SKIP_ECHO_BACK, STRICT_ECHO_BACK
|
|
7
|
+
# Requires libs: run-plan-parser, state, quality-gate, notify, prompt, scoring, echo-back
|
|
8
|
+
|
|
9
|
+
run_mode_headless() {
|
|
10
|
+
mkdir -p "$WORKTREE/logs"
|
|
11
|
+
|
|
12
|
+
# Initialize state if not resuming
|
|
13
|
+
if [[ "$RESUME" != true ]]; then
|
|
14
|
+
init_state "$WORKTREE" "$PLAN_FILE" "$MODE"
|
|
15
|
+
|
|
16
|
+
# Mark earlier batches as completed (if --start-batch > 1)
|
|
17
|
+
if [[ "$START_BATCH" -gt 1 ]]; then
|
|
18
|
+
for ((b = 1; b < START_BATCH; b++)); do
|
|
19
|
+
complete_batch "$WORKTREE" "$b" 0
|
|
20
|
+
done
|
|
21
|
+
fi
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
# Generate AGENTS.md for agent awareness
|
|
25
|
+
generate_agents_md "$PLAN_FILE" "$WORKTREE" "$MODE"
|
|
26
|
+
|
|
27
|
+
# Load telegram credentials if notifications enabled
|
|
28
|
+
if [[ "$NOTIFY" == true ]]; then
|
|
29
|
+
_load_telegram_env || echo "WARNING: Telegram notifications unavailable" >&2
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
local plan_name
|
|
33
|
+
plan_name=$(basename "$PLAN_FILE" .md)
|
|
34
|
+
|
|
35
|
+
# Build the stable prefix ONCE before the batch loop and cache it to disk.
|
|
36
|
+
# The stable prefix contains plan identity, worktree path, python, branch, and TDD rules —
|
|
37
|
+
# none of which change between batches. prev_test_count is intentionally excluded because
|
|
38
|
+
# it increases after each batch; it lives in the variable suffix (#48).
|
|
39
|
+
#
|
|
40
|
+
# #45: Check that the write succeeded. A silent failure here would leave all subsequent
|
|
41
|
+
# batches with a missing/stale prefix file — fail fast instead.
|
|
42
|
+
local stable_prefix
|
|
43
|
+
stable_prefix=$(build_stable_prefix "$PLAN_FILE" "$WORKTREE" "$PYTHON" "$QUALITY_GATE_CMD")
|
|
44
|
+
echo "$stable_prefix" > "$WORKTREE/.run-plan-prefix.txt" || {
|
|
45
|
+
echo "ERROR: Failed to write prefix file $WORKTREE/.run-plan-prefix.txt" >&2
|
|
46
|
+
exit 1
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Preserve user's --sample value before batch loop so per-batch reset doesn't clobber it (#16/#28)
|
|
50
|
+
local SAMPLE_DEFAULT=${SAMPLE_COUNT:-0}
|
|
51
|
+
|
|
52
|
+
for ((batch = START_BATCH; batch <= END_BATCH; batch++)); do
|
|
53
|
+
# Reset sampling count each batch — prevents leak from prior batch's retry/critical trigger (#16/#28)
|
|
54
|
+
SAMPLE_COUNT=$SAMPLE_DEFAULT
|
|
55
|
+
|
|
56
|
+
# Budget enforcement
|
|
57
|
+
if [[ -n "${MAX_BUDGET:-}" ]]; then
|
|
58
|
+
if ! check_budget "$WORKTREE" "$MAX_BUDGET"; then
|
|
59
|
+
echo "STOPPING: Budget limit reached (\$${MAX_BUDGET})"
|
|
60
|
+
exit 1
|
|
61
|
+
fi
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
local title
|
|
65
|
+
title=$(get_batch_title "$PLAN_FILE" "$batch")
|
|
66
|
+
echo ""
|
|
67
|
+
echo "================================================================"
|
|
68
|
+
echo " Batch $batch: $title"
|
|
69
|
+
echo "================================================================"
|
|
70
|
+
|
|
71
|
+
local batch_text
|
|
72
|
+
batch_text=$(get_batch_text "$PLAN_FILE" "$batch")
|
|
73
|
+
if [[ -z "$batch_text" ]]; then
|
|
74
|
+
echo " (empty batch -- skipping)"
|
|
75
|
+
continue
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
# Declare batch_passed before MAB routing — the MAB `continue` path
|
|
79
|
+
# skips the retry loop where it was originally declared (#4A review).
|
|
80
|
+
local batch_passed=false
|
|
81
|
+
|
|
82
|
+
# MAB routing (when --mab flag set)
|
|
83
|
+
if [[ "${MAB:-false}" == "true" ]]; then
|
|
84
|
+
local batch_type_for_route
|
|
85
|
+
batch_type_for_route=$(classify_batch_type "$PLAN_FILE" "$batch")
|
|
86
|
+
local perf_file="$WORKTREE/logs/strategy-perf.json"
|
|
87
|
+
[[ ! -f "$perf_file" ]] && init_strategy_perf "$perf_file"
|
|
88
|
+
|
|
89
|
+
local mab_route
|
|
90
|
+
mab_route=$(thompson_route "$batch_type_for_route" "$perf_file")
|
|
91
|
+
echo " [MAB] type=$batch_type_for_route → route=$mab_route"
|
|
92
|
+
|
|
93
|
+
if [[ "$mab_route" == "mab" ]]; then
|
|
94
|
+
local mab_exit=0
|
|
95
|
+
"$SCRIPT_DIR/mab-run.sh" \
|
|
96
|
+
--plan "$PLAN_FILE" --batch "$batch" \
|
|
97
|
+
--work-unit "$title" --worktree "$WORKTREE" \
|
|
98
|
+
--quality-gate "$QUALITY_GATE_CMD" || mab_exit=$?
|
|
99
|
+
|
|
100
|
+
if [[ $mab_exit -eq 0 ]]; then
|
|
101
|
+
local new_tc; new_tc=$(get_previous_test_count "$WORKTREE")
|
|
102
|
+
complete_batch "$WORKTREE" "$batch" "$new_tc"
|
|
103
|
+
batch_passed=true
|
|
104
|
+
else
|
|
105
|
+
echo "MAB batch $batch failed (exit $mab_exit)"
|
|
106
|
+
fi
|
|
107
|
+
# Skip normal headless execution — jump to batch_passed check
|
|
108
|
+
continue
|
|
109
|
+
fi
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
# Write batch header to progress.txt at the start of each batch (#53)
|
|
113
|
+
# Non-fatal: progress tracking failure must not kill the run
|
|
114
|
+
if type write_batch_progress &>/dev/null; then
|
|
115
|
+
write_batch_progress "$WORKTREE" "$batch" "$title" || \
|
|
116
|
+
echo "WARNING: Failed to write batch progress header (non-fatal)" >&2
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
# Generate and inject per-batch context into CLAUDE.md
|
|
120
|
+
# Guard all CLAUDE.md manipulation — failures here must not kill the run
|
|
121
|
+
local batch_context="" _claude_md_existed=false _claude_md_backup=""
|
|
122
|
+
batch_context=$(generate_batch_context "$PLAN_FILE" "$batch" "$WORKTREE" 2>/dev/null || true)
|
|
123
|
+
if [[ -n "$batch_context" ]]; then
|
|
124
|
+
{
|
|
125
|
+
local claude_md="$WORKTREE/CLAUDE.md"
|
|
126
|
+
if [[ -f "$claude_md" ]]; then
|
|
127
|
+
_claude_md_existed=true
|
|
128
|
+
_claude_md_backup=$(cat "$claude_md")
|
|
129
|
+
fi
|
|
130
|
+
# Remove previous run-plan context section if present.
|
|
131
|
+
# awk approach avoids the sed range-deletion bug (#4): if
|
|
132
|
+
# "## Run-Plan:" is the LAST section in CLAUDE.md, the sed
|
|
133
|
+
# pattern '/^## Run-Plan:/,/^## [^R]/' has no closing anchor
|
|
134
|
+
# and deletes from Run-Plan to EOF — eating the entire file.
|
|
135
|
+
# awk prints everything before the Run-Plan section, skips
|
|
136
|
+
# lines until the next ## header (or EOF), then resumes.
|
|
137
|
+
if [[ -f "$claude_md" ]] && grep -q "^## Run-Plan:" "$claude_md"; then
|
|
138
|
+
local tmp
|
|
139
|
+
tmp=$(mktemp)
|
|
140
|
+
awk '
|
|
141
|
+
/^## Run-Plan:/ { in_section=1; next }
|
|
142
|
+
in_section && /^## / { in_section=0 }
|
|
143
|
+
!in_section { print }
|
|
144
|
+
' "$claude_md" > "$tmp"
|
|
145
|
+
mv "$tmp" "$claude_md"
|
|
146
|
+
fi
|
|
147
|
+
# Append new context
|
|
148
|
+
echo "" >> "$claude_md"
|
|
149
|
+
echo "$batch_context" >> "$claude_md"
|
|
150
|
+
} || echo "WARNING: Failed to inject batch context into CLAUDE.md (non-fatal)" >&2
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
# Fetch the current test count INSIDE the loop — it increases after each batch.
|
|
154
|
+
# Combine the cached stable prefix with the per-batch variable suffix so the
|
|
155
|
+
# prompt always reflects the actual current test count (#48).
|
|
156
|
+
local prev_test_count
|
|
157
|
+
prev_test_count=$(get_previous_test_count "$WORKTREE")
|
|
158
|
+
|
|
159
|
+
local prompt
|
|
160
|
+
prompt=$(printf '%s\n\n%s\n' \
|
|
161
|
+
"$(build_variable_suffix "$PLAN_FILE" "$batch" "$WORKTREE" "$prev_test_count")" \
|
|
162
|
+
"$stable_prefix")
|
|
163
|
+
|
|
164
|
+
# Spec echo-back gate: verify agent understands the batch before executing
|
|
165
|
+
if [[ "${SKIP_ECHO_BACK:-false}" != "true" ]]; then
|
|
166
|
+
if ! echo_back_check "$batch_text" "$WORKTREE/logs" "$batch"; then
|
|
167
|
+
echo "WARNING: Echo-back check failed for batch $batch (proceeding anyway)" >&2
|
|
168
|
+
fi
|
|
169
|
+
fi
|
|
170
|
+
|
|
171
|
+
local max_attempts=$((MAX_RETRIES + 1))
|
|
172
|
+
local attempt=0
|
|
173
|
+
|
|
174
|
+
while [[ $attempt -lt $max_attempts ]]; do
|
|
175
|
+
attempt=$((attempt + 1))
|
|
176
|
+
local log_file="$WORKTREE/logs/batch-${batch}-attempt-${attempt}.log"
|
|
177
|
+
local batch_start
|
|
178
|
+
batch_start=$(date +%s)
|
|
179
|
+
|
|
180
|
+
echo ""
|
|
181
|
+
echo "--- Attempt $attempt of $max_attempts ---"
|
|
182
|
+
|
|
183
|
+
# Auto-sample on retry if configured
|
|
184
|
+
if [[ "${SAMPLE_ON_RETRY:-false}" == "true" && "${SAMPLE_COUNT:-0}" -eq 0 && $attempt -ge 2 ]]; then
|
|
185
|
+
SAMPLE_COUNT="${SAMPLE_DEFAULT_COUNT:-3}"
|
|
186
|
+
echo " Auto-enabling sampling ($SAMPLE_COUNT candidates) for retry"
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
# Auto-sample on critical batches
|
|
190
|
+
if [[ "${SAMPLE_ON_CRITICAL:-false}" == "true" && "${SAMPLE_COUNT:-0}" -eq 0 && $attempt -eq 1 ]]; then
|
|
191
|
+
if is_critical_batch "$PLAN_FILE" "$batch"; then
|
|
192
|
+
SAMPLE_COUNT="${SAMPLE_DEFAULT_COUNT:-3}"
|
|
193
|
+
echo " Auto-enabling sampling ($SAMPLE_COUNT candidates) for critical batch"
|
|
194
|
+
fi
|
|
195
|
+
fi
|
|
196
|
+
|
|
197
|
+
# Memory guard for sampling
|
|
198
|
+
if [[ "${SAMPLE_COUNT:-0}" -gt 0 ]]; then
|
|
199
|
+
check_memory_for_sampling || true
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# If sampling enabled and this is a retry, use parallel candidates
|
|
203
|
+
if [[ "${SAMPLE_COUNT:-0}" -gt 0 && $attempt -ge 2 ]]; then
|
|
204
|
+
if run_sampling_candidates "$WORKTREE" "$PLAN_FILE" "$batch" "$prompt" "$QUALITY_GATE_CMD"; then
|
|
205
|
+
batch_passed=true
|
|
206
|
+
break
|
|
207
|
+
fi
|
|
208
|
+
continue # Skip normal retry path below
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
# Build escalation context for retries
|
|
212
|
+
local full_prompt="$prompt"
|
|
213
|
+
if [[ $attempt -eq 2 ]]; then
|
|
214
|
+
local prev_log="$WORKTREE/logs/batch-${batch}-attempt-$((attempt - 1)).log"
|
|
215
|
+
full_prompt="$prompt
|
|
216
|
+
|
|
217
|
+
IMPORTANT: Previous attempt failed. Review the quality gate output and fix the issues.
|
|
218
|
+
The previous attempt log is available at: $prev_log"
|
|
219
|
+
elif [[ $attempt -ge 3 ]]; then
|
|
220
|
+
local prev_log="$WORKTREE/logs/batch-${batch}-attempt-$((attempt - 1)).log"
|
|
221
|
+
local log_digest=""
|
|
222
|
+
if [[ -f "$prev_log" ]]; then
|
|
223
|
+
log_digest=$("$SCRIPT_DIR/../failure-digest.sh" "$prev_log" 2>/dev/null || tail -50 "$prev_log" 2>/dev/null || true)
|
|
224
|
+
fi
|
|
225
|
+
full_prompt="$prompt
|
|
226
|
+
|
|
227
|
+
IMPORTANT: Previous attempts failed ($((attempt - 1)) so far). This is attempt $attempt.
|
|
228
|
+
Failure digest from previous attempt:
|
|
229
|
+
\`\`\`
|
|
230
|
+
$log_digest
|
|
231
|
+
\`\`\`
|
|
232
|
+
Focus on fixing the root cause. Check test output carefully."
|
|
233
|
+
fi
|
|
234
|
+
|
|
235
|
+
# Run claude headless (unset CLAUDECODE to allow nested invocation)
|
|
236
|
+
# Use --output-format json to capture session_id for cost tracking
|
|
237
|
+
# NOTE: this sacrifices real-time streaming — if streaming is needed,
|
|
238
|
+
# remove --output-format json and use tee instead (#38).
|
|
239
|
+
local claude_exit=0
|
|
240
|
+
local claude_json_output=""
|
|
241
|
+
claude_json_output=$(CLAUDECODE='' claude -p "$full_prompt" \
|
|
242
|
+
--allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
|
|
243
|
+
--permission-mode bypassPermissions \
|
|
244
|
+
--output-format json \
|
|
245
|
+
2>"$log_file.stderr") || claude_exit=$?
|
|
246
|
+
|
|
247
|
+
# Extract session_id and result from JSON output
|
|
248
|
+
local batch_session_id=""
|
|
249
|
+
if [[ -n "$claude_json_output" ]]; then
|
|
250
|
+
batch_session_id=$(echo "$claude_json_output" | jq -r '.session_id // empty' 2>/dev/null || true)
|
|
251
|
+
# Write result text to log file (was previously done by tee)
|
|
252
|
+
echo "$claude_json_output" | jq -r '.result // empty' 2>/dev/null > "$log_file" || true
|
|
253
|
+
# Append stderr to log
|
|
254
|
+
cat "$log_file.stderr" >> "$log_file" 2>/dev/null || true
|
|
255
|
+
fi
|
|
256
|
+
rm -f "$log_file.stderr"
|
|
257
|
+
|
|
258
|
+
if [[ $claude_exit -ne 0 ]]; then
|
|
259
|
+
echo "WARNING: claude exited with code $claude_exit"
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
# Diagnostic: if log file is empty or missing, claude likely crashed with no output (#38)
|
|
263
|
+
if [[ ! -s "$log_file" ]]; then
|
|
264
|
+
echo "WARNING: claude produced no output (log file empty or missing). Claude may have crashed." >&2
|
|
265
|
+
echo " Log path: $log_file" >&2
|
|
266
|
+
echo " Exit code: $claude_exit" >&2
|
|
267
|
+
echo "[run-plan] claude produced no output for batch $batch attempt $attempt (exit=$claude_exit)" >> "$log_file"
|
|
268
|
+
fi
|
|
269
|
+
|
|
270
|
+
# Echo-back gate: check agent output reflects batch intent (#30)
|
|
271
|
+
# NON-BLOCKING by default; use --strict-echo-back to make it blocking.
|
|
272
|
+
_echo_back_check "$batch_text" "$log_file" || {
|
|
273
|
+
if [[ "${STRICT_ECHO_BACK:-false}" == "true" ]]; then
|
|
274
|
+
echo "Batch $batch FAILED on attempt $attempt: echo-back gate (strict mode)"
|
|
275
|
+
# Fall through to quality gate failure handling
|
|
276
|
+
fi
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
# Restore CLAUDE.md after context injection (prevent git-clean failure)
|
|
280
|
+
# Try git checkout first (works when CLAUDE.md is tracked).
|
|
281
|
+
# Fallback: if file didn't exist before injection, remove it;
|
|
282
|
+
# if it did exist, restore from backup.
|
|
283
|
+
if [[ -n "$batch_context" ]]; then
|
|
284
|
+
{
|
|
285
|
+
git -C "$WORKTREE" checkout -- CLAUDE.md 2>/dev/null
|
|
286
|
+
} || {
|
|
287
|
+
if [[ "$_claude_md_existed" == false ]]; then
|
|
288
|
+
rm -f "$WORKTREE/CLAUDE.md"
|
|
289
|
+
elif [[ -n "$_claude_md_backup" ]]; then
|
|
290
|
+
printf '%s\n' "$_claude_md_backup" > "$WORKTREE/CLAUDE.md"
|
|
291
|
+
fi
|
|
292
|
+
} || echo "WARNING: Failed to restore CLAUDE.md (non-fatal)" >&2
|
|
293
|
+
fi
|
|
294
|
+
|
|
295
|
+
# Compute duration before quality gate (includes claude time, not gate time)
|
|
296
|
+
local batch_end
|
|
297
|
+
batch_end=$(date +%s)
|
|
298
|
+
local duration_secs="$((batch_end - batch_start))"
|
|
299
|
+
local duration="${duration_secs}s"
|
|
300
|
+
|
|
301
|
+
# Run quality gate (passes duration for state tracking)
|
|
302
|
+
local gate_exit=0
|
|
303
|
+
run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "$batch" "$duration_secs" || gate_exit=$?
|
|
304
|
+
|
|
305
|
+
if [[ $gate_exit -eq 0 ]]; then
|
|
306
|
+
echo "Batch $batch PASSED (${duration})"
|
|
307
|
+
batch_passed=true
|
|
308
|
+
|
|
309
|
+
# Record cost for this batch
|
|
310
|
+
if [[ -n "${batch_session_id:-}" ]]; then
|
|
311
|
+
record_batch_cost "$WORKTREE" "$batch" "$batch_session_id" || \
|
|
312
|
+
echo "WARNING: Failed to record batch cost (non-fatal)" >&2
|
|
313
|
+
fi
|
|
314
|
+
|
|
315
|
+
# Append State section to progress.txt after quality gate passes (#53)
|
|
316
|
+
# Records test count, duration, and cost for cross-context memory.
|
|
317
|
+
if type append_progress_section &>/dev/null; then
|
|
318
|
+
{
|
|
319
|
+
local _state_test_count
|
|
320
|
+
_state_test_count=$(get_previous_test_count "$WORKTREE" 2>/dev/null || echo "0")
|
|
321
|
+
local _state_cost=""
|
|
322
|
+
_state_cost=$(jq -r ".costs[\"$batch\"].estimated_cost_usd // empty" "$WORKTREE/.run-plan-state.json" 2>/dev/null || true)
|
|
323
|
+
local _state_content="- Tests: ${_state_test_count} passing"$'\n'"- Duration: ${duration}"
|
|
324
|
+
[[ -n "$_state_cost" ]] && _state_content+=$'\n'"- Cost: \$${_state_cost}"
|
|
325
|
+
append_progress_section "$WORKTREE" "State" "$_state_content"
|
|
326
|
+
} || echo "WARNING: Failed to append progress State section (non-fatal)" >&2
|
|
327
|
+
fi
|
|
328
|
+
|
|
329
|
+
if [[ "$NOTIFY" == true ]]; then
|
|
330
|
+
{
|
|
331
|
+
local new_test_count
|
|
332
|
+
new_test_count=$(get_previous_test_count "$WORKTREE")
|
|
333
|
+
# Build summary from git log (commits in this batch)
|
|
334
|
+
local batch_summary=""
|
|
335
|
+
batch_summary=$(cd "$WORKTREE" && git log --oneline -5 2>/dev/null | head -3 | sed 's/^[a-f0-9]* /• /' | tr '\n' '; ' | sed 's/; $//') || true
|
|
336
|
+
local batch_cost=""
|
|
337
|
+
batch_cost=$(jq -r ".costs[\"$batch\"].estimated_cost_usd // empty" "$WORKTREE/.run-plan-state.json" 2>/dev/null || true)
|
|
338
|
+
notify_success "$plan_name" "$batch" "$END_BATCH" "$title" "$new_test_count" "$prev_test_count" "$duration" "$MODE" "$batch_summary" "$batch_cost"
|
|
339
|
+
} || echo "WARNING: Telegram notification failed (non-fatal)" >&2
|
|
340
|
+
fi
|
|
341
|
+
break
|
|
342
|
+
else
|
|
343
|
+
echo "Batch $batch FAILED on attempt $attempt (${duration})"
|
|
344
|
+
|
|
345
|
+
if [[ "$NOTIFY" == true ]]; then
|
|
346
|
+
notify_failure "$plan_name" "$batch" "$END_BATCH" "$title" "0" "?" "Quality gate failed" "$ON_FAILURE" || echo "WARNING: Telegram notification failed (non-fatal)" >&2
|
|
347
|
+
fi
|
|
348
|
+
|
|
349
|
+
# Record failure pattern for cross-run learning
|
|
350
|
+
{
|
|
351
|
+
local fail_type="quality gate failure"
|
|
352
|
+
if [[ -f "$log_file" ]]; then
|
|
353
|
+
fail_type=$(grep -oE "(FAIL|ERROR|FAILED).*" "$log_file" | head -1 | cut -c1-80 || echo "quality gate failure")
|
|
354
|
+
[[ -z "$fail_type" ]] && fail_type="quality gate failure"
|
|
355
|
+
fi
|
|
356
|
+
record_failure_pattern "$WORKTREE" "$title" "$fail_type" ""
|
|
357
|
+
} || echo "WARNING: Failed to record failure pattern (non-fatal)" >&2
|
|
358
|
+
|
|
359
|
+
# Handle failure mode
|
|
360
|
+
if [[ "$ON_FAILURE" == "stop" ]]; then
|
|
361
|
+
echo "STOPPING: --on-failure=stop. Fix issues and use --resume to continue."
|
|
362
|
+
exit 1
|
|
363
|
+
elif [[ "$ON_FAILURE" == "skip" ]]; then
|
|
364
|
+
echo "SKIPPING: Batch $batch failed, moving to next batch."
|
|
365
|
+
break
|
|
366
|
+
elif [[ "$ON_FAILURE" == "retry" ]]; then
|
|
367
|
+
if [[ $attempt -ge $max_attempts ]]; then
|
|
368
|
+
echo "EXHAUSTED: All $max_attempts attempts failed for batch $batch."
|
|
369
|
+
echo "STOPPING: No more retries."
|
|
370
|
+
exit 1
|
|
371
|
+
fi
|
|
372
|
+
echo "RETRYING: Attempt $((attempt + 1)) of $max_attempts..."
|
|
373
|
+
fi
|
|
374
|
+
fi
|
|
375
|
+
done
|
|
376
|
+
|
|
377
|
+
if [[ "$batch_passed" != true && "$ON_FAILURE" != "skip" ]]; then
|
|
378
|
+
echo "Batch $batch never passed. Exiting."
|
|
379
|
+
exit 1
|
|
380
|
+
fi
|
|
381
|
+
done
|
|
382
|
+
|
|
383
|
+
echo ""
|
|
384
|
+
echo "================================================================"
|
|
385
|
+
echo " All batches complete ($START_BATCH → $END_BATCH)"
|
|
386
|
+
echo "================================================================"
|
|
387
|
+
|
|
388
|
+
if [[ "$VERIFY" == true ]]; then
|
|
389
|
+
echo ""
|
|
390
|
+
echo "Running final verification..."
|
|
391
|
+
run_quality_gate "$WORKTREE" "$QUALITY_GATE_CMD" "final" || {
|
|
392
|
+
echo "FINAL VERIFICATION FAILED"
|
|
393
|
+
exit 1
|
|
394
|
+
}
|
|
395
|
+
fi
|
|
396
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-notify.sh — Telegram notification helpers for run-plan
|
|
3
|
+
#
|
|
4
|
+
# Functions:
|
|
5
|
+
# format_success_message <plan_name> <batch_num> <total_batches> <batch_title> <test_count> <prev_count> <duration> <mode> [summary]
|
|
6
|
+
# format_failure_message <plan_name> <batch_num> <total_batches> <batch_title> <test_count> <failing_count> <error> <action>
|
|
7
|
+
# notify_success (same args as format_success_message) — format + send
|
|
8
|
+
# notify_failure (same args as format_failure_message) — format + send
|
|
9
|
+
|
|
10
|
+
# Source shared telegram functions
|
|
11
|
+
_NOTIFY_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
12
|
+
source "$_NOTIFY_SCRIPT_DIR/telegram.sh"
|
|
13
|
+
|
|
14
|
+
format_success_message() {
|
|
15
|
+
local plan_name="$1" batch_num="$2" total_batches="$3" batch_title="$4"
|
|
16
|
+
local test_count="$5" prev_count="$6" duration="$7" mode="$8"
|
|
17
|
+
local summary="${9:-}" cost="${10:-}"
|
|
18
|
+
local delta=$(( test_count - prev_count ))
|
|
19
|
+
|
|
20
|
+
local msg
|
|
21
|
+
msg=$(printf '%s — Batch %s/%s ✓\n*%s*\nTests: %s (↑%s) | %s | %s' \
|
|
22
|
+
"$plan_name" "$batch_num" "$total_batches" "$batch_title" \
|
|
23
|
+
"$test_count" "$delta" "$duration" "$mode")
|
|
24
|
+
|
|
25
|
+
if [[ -n "$cost" && "$cost" != "0" ]]; then
|
|
26
|
+
msg+=" | \$${cost}"
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
if [[ -n "$summary" ]]; then
|
|
30
|
+
msg+=$'\n'"$summary"
|
|
31
|
+
fi
|
|
32
|
+
|
|
33
|
+
echo "$msg"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
format_failure_message() {
|
|
37
|
+
local plan_name="$1" batch_num="$2" total_batches="$3" batch_title="$4"
|
|
38
|
+
local test_count="$5" failing_count="$6" error="$7" action="$8"
|
|
39
|
+
|
|
40
|
+
printf '%s — Batch %s/%s ✗\n*%s*\nTests: %s (%s failing)\nIssue: %s\nAction: %s' \
|
|
41
|
+
"$plan_name" "$batch_num" "$total_batches" "$batch_title" \
|
|
42
|
+
"$test_count" "$failing_count" "$error" "$action"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
notify_success() {
|
|
46
|
+
local msg
|
|
47
|
+
msg=$(format_success_message "$@")
|
|
48
|
+
# Notification failure is non-critical — don't abort the pipeline
|
|
49
|
+
_send_telegram "$msg" || echo "WARNING: notify_success: Telegram send failed (non-fatal)" >&2
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
notify_failure() {
|
|
53
|
+
local msg
|
|
54
|
+
msg=$(format_failure_message "$@")
|
|
55
|
+
# Notification failure is non-critical — don't abort the pipeline
|
|
56
|
+
_send_telegram "$msg" || echo "WARNING: notify_failure: Telegram send failed (non-fatal)" >&2
|
|
57
|
+
}
|