autonomous-coding-toolkit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +22 -0
- package/.claude-plugin/plugin.json +13 -0
- package/LICENSE +21 -0
- package/Makefile +21 -0
- package/README.md +140 -0
- package/SECURITY.md +28 -0
- package/agents/bash-expert.md +113 -0
- package/agents/dependency-auditor.md +138 -0
- package/agents/integration-tester.md +120 -0
- package/agents/lesson-scanner.md +149 -0
- package/agents/python-expert.md +179 -0
- package/agents/service-monitor.md +141 -0
- package/agents/shell-expert.md +147 -0
- package/benchmarks/runner.sh +147 -0
- package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
- package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
- package/benchmarks/tasks/02-refactor-module/task.md +8 -0
- package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
- package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
- package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
- package/bin/act.js +238 -0
- package/commands/autocode.md +6 -0
- package/commands/cancel-ralph.md +18 -0
- package/commands/code-factory.md +53 -0
- package/commands/create-prd.md +55 -0
- package/commands/ralph-loop.md +18 -0
- package/commands/run-plan.md +117 -0
- package/commands/submit-lesson.md +122 -0
- package/docs/ARCHITECTURE.md +630 -0
- package/docs/CONTRIBUTING.md +125 -0
- package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
- package/docs/lessons/0002-async-def-without-await.md +28 -0
- package/docs/lessons/0003-create-task-without-callback.md +28 -0
- package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
- package/docs/lessons/0005-sqlite-without-closing.md +33 -0
- package/docs/lessons/0006-venv-pip-path.md +27 -0
- package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
- package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
- package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
- package/docs/lessons/0010-local-outside-function-bash.md +33 -0
- package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
- package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
- package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
- package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
- package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
- package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
- package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
- package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
- package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
- package/docs/lessons/0020-persist-state-incrementally.md +44 -0
- package/docs/lessons/0021-dual-axis-testing.md +48 -0
- package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
- package/docs/lessons/0023-static-analysis-spiral.md +51 -0
- package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
- package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
- package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
- package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
- package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
- package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
- package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
- package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
- package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
- package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
- package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
- package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
- package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
- package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
- package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
- package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
- package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
- package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
- package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
- package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
- package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
- package/docs/lessons/0045-iterative-design-improvement.md +33 -0
- package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
- package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
- package/docs/lessons/0048-integration-wiring-batch.md +40 -0
- package/docs/lessons/0049-ab-verification.md +41 -0
- package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
- package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
- package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
- package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
- package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
- package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
- package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
- package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
- package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
- package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
- package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
- package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
- package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
- package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
- package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
- package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
- package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
- package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
- package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
- package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
- package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
- package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
- package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
- package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
- package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
- package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
- package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
- package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
- package/docs/lessons/0078-static-review-without-live-test.md +30 -0
- package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
- package/docs/lessons/FRAMEWORK.md +161 -0
- package/docs/lessons/SUMMARY.md +201 -0
- package/docs/lessons/TEMPLATE.md +85 -0
- package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
- package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
- package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
- package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
- package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
- package/docs/plans/2026-02-21-mab-research-report.md +406 -0
- package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
- package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
- package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
- package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
- package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
- package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
- package/docs/plans/2026-02-22-mab-run-design.md +462 -0
- package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
- package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
- package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
- package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
- package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
- package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
- package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
- package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
- package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
- package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
- package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
- package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
- package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
- package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
- package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
- package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
- package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
- package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
- package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
- package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
- package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
- package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
- package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
- package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
- package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
- package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
- package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
- package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
- package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
- package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
- package/docs/plans/2026-02-24-headless-module-split.md +443 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
- package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
- package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
- package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
- package/docs/plans/audit-findings.md +186 -0
- package/docs/telegram-notification-format.md +98 -0
- package/examples/example-plan.md +51 -0
- package/examples/example-prd.json +72 -0
- package/examples/example-roadmap.md +33 -0
- package/examples/quickstart-plan.md +63 -0
- package/hooks/hooks.json +26 -0
- package/hooks/setup-symlinks.sh +48 -0
- package/hooks/stop-hook.sh +135 -0
- package/package.json +47 -0
- package/policies/bash.md +71 -0
- package/policies/python.md +71 -0
- package/policies/testing.md +61 -0
- package/policies/universal.md +60 -0
- package/scripts/analyze-report.sh +97 -0
- package/scripts/architecture-map.sh +145 -0
- package/scripts/auto-compound.sh +273 -0
- package/scripts/batch-audit.sh +42 -0
- package/scripts/batch-test.sh +101 -0
- package/scripts/entropy-audit.sh +221 -0
- package/scripts/failure-digest.sh +51 -0
- package/scripts/generate-ast-rules.sh +96 -0
- package/scripts/init.sh +112 -0
- package/scripts/lesson-check.sh +428 -0
- package/scripts/lib/common.sh +61 -0
- package/scripts/lib/cost-tracking.sh +153 -0
- package/scripts/lib/ollama.sh +60 -0
- package/scripts/lib/progress-writer.sh +128 -0
- package/scripts/lib/run-plan-context.sh +215 -0
- package/scripts/lib/run-plan-echo-back.sh +231 -0
- package/scripts/lib/run-plan-headless.sh +396 -0
- package/scripts/lib/run-plan-notify.sh +57 -0
- package/scripts/lib/run-plan-parser.sh +81 -0
- package/scripts/lib/run-plan-prompt.sh +215 -0
- package/scripts/lib/run-plan-quality-gate.sh +132 -0
- package/scripts/lib/run-plan-routing.sh +315 -0
- package/scripts/lib/run-plan-sampling.sh +170 -0
- package/scripts/lib/run-plan-scoring.sh +146 -0
- package/scripts/lib/run-plan-state.sh +142 -0
- package/scripts/lib/run-plan-team.sh +199 -0
- package/scripts/lib/telegram.sh +54 -0
- package/scripts/lib/thompson-sampling.sh +176 -0
- package/scripts/license-check.sh +74 -0
- package/scripts/mab-run.sh +575 -0
- package/scripts/module-size-check.sh +146 -0
- package/scripts/patterns/async-no-await.yml +5 -0
- package/scripts/patterns/bare-except.yml +6 -0
- package/scripts/patterns/empty-catch.yml +6 -0
- package/scripts/patterns/hardcoded-localhost.yml +9 -0
- package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
- package/scripts/pipeline-status.sh +197 -0
- package/scripts/policy-check.sh +226 -0
- package/scripts/prior-art-search.sh +133 -0
- package/scripts/promote-mab-lessons.sh +126 -0
- package/scripts/prompts/agent-a-superpowers.md +29 -0
- package/scripts/prompts/agent-b-ralph.md +29 -0
- package/scripts/prompts/judge-agent.md +61 -0
- package/scripts/prompts/planner-agent.md +44 -0
- package/scripts/pull-community-lessons.sh +90 -0
- package/scripts/quality-gate.sh +266 -0
- package/scripts/research-gate.sh +90 -0
- package/scripts/run-plan.sh +329 -0
- package/scripts/scope-infer.sh +159 -0
- package/scripts/setup-ralph-loop.sh +155 -0
- package/scripts/telemetry.sh +230 -0
- package/scripts/tests/run-all-tests.sh +52 -0
- package/scripts/tests/test-act-cli.sh +46 -0
- package/scripts/tests/test-agents-md.sh +87 -0
- package/scripts/tests/test-analyze-report.sh +114 -0
- package/scripts/tests/test-architecture-map.sh +89 -0
- package/scripts/tests/test-auto-compound.sh +169 -0
- package/scripts/tests/test-batch-test.sh +65 -0
- package/scripts/tests/test-benchmark-runner.sh +25 -0
- package/scripts/tests/test-common.sh +168 -0
- package/scripts/tests/test-cost-tracking.sh +158 -0
- package/scripts/tests/test-echo-back.sh +180 -0
- package/scripts/tests/test-entropy-audit.sh +146 -0
- package/scripts/tests/test-failure-digest.sh +66 -0
- package/scripts/tests/test-generate-ast-rules.sh +145 -0
- package/scripts/tests/test-helpers.sh +82 -0
- package/scripts/tests/test-init.sh +47 -0
- package/scripts/tests/test-lesson-check.sh +278 -0
- package/scripts/tests/test-lesson-local.sh +55 -0
- package/scripts/tests/test-license-check.sh +109 -0
- package/scripts/tests/test-mab-run.sh +182 -0
- package/scripts/tests/test-ollama-lib.sh +49 -0
- package/scripts/tests/test-ollama.sh +60 -0
- package/scripts/tests/test-pipeline-status.sh +198 -0
- package/scripts/tests/test-policy-check.sh +124 -0
- package/scripts/tests/test-prior-art-search.sh +96 -0
- package/scripts/tests/test-progress-writer.sh +140 -0
- package/scripts/tests/test-promote-mab-lessons.sh +110 -0
- package/scripts/tests/test-pull-community-lessons.sh +149 -0
- package/scripts/tests/test-quality-gate.sh +241 -0
- package/scripts/tests/test-research-gate.sh +132 -0
- package/scripts/tests/test-run-plan-cli.sh +86 -0
- package/scripts/tests/test-run-plan-context.sh +305 -0
- package/scripts/tests/test-run-plan-e2e.sh +153 -0
- package/scripts/tests/test-run-plan-headless.sh +424 -0
- package/scripts/tests/test-run-plan-notify.sh +124 -0
- package/scripts/tests/test-run-plan-parser.sh +217 -0
- package/scripts/tests/test-run-plan-prompt.sh +254 -0
- package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
- package/scripts/tests/test-run-plan-routing.sh +178 -0
- package/scripts/tests/test-run-plan-scoring.sh +148 -0
- package/scripts/tests/test-run-plan-state.sh +261 -0
- package/scripts/tests/test-run-plan-team.sh +157 -0
- package/scripts/tests/test-scope-infer.sh +150 -0
- package/scripts/tests/test-setup-ralph-loop.sh +63 -0
- package/scripts/tests/test-telegram-env.sh +38 -0
- package/scripts/tests/test-telegram.sh +121 -0
- package/scripts/tests/test-telemetry.sh +46 -0
- package/scripts/tests/test-thompson-sampling.sh +139 -0
- package/scripts/tests/test-validate-all.sh +60 -0
- package/scripts/tests/test-validate-commands.sh +89 -0
- package/scripts/tests/test-validate-hooks.sh +98 -0
- package/scripts/tests/test-validate-lessons.sh +150 -0
- package/scripts/tests/test-validate-plan-quality.sh +235 -0
- package/scripts/tests/test-validate-plans.sh +187 -0
- package/scripts/tests/test-validate-plugin.sh +106 -0
- package/scripts/tests/test-validate-prd.sh +184 -0
- package/scripts/tests/test-validate-skills.sh +134 -0
- package/scripts/validate-all.sh +57 -0
- package/scripts/validate-commands.sh +67 -0
- package/scripts/validate-hooks.sh +89 -0
- package/scripts/validate-lessons.sh +98 -0
- package/scripts/validate-plan-quality.sh +369 -0
- package/scripts/validate-plans.sh +120 -0
- package/scripts/validate-plugin.sh +86 -0
- package/scripts/validate-policies.sh +42 -0
- package/scripts/validate-prd.sh +118 -0
- package/scripts/validate-skills.sh +96 -0
- package/skills/autocode/SKILL.md +285 -0
- package/skills/autocode/ab-verification.md +51 -0
- package/skills/autocode/code-quality-standards.md +37 -0
- package/skills/autocode/competitive-mode.md +364 -0
- package/skills/brainstorming/SKILL.md +97 -0
- package/skills/capture-lesson/SKILL.md +187 -0
- package/skills/check-lessons/SKILL.md +116 -0
- package/skills/dispatching-parallel-agents/SKILL.md +110 -0
- package/skills/executing-plans/SKILL.md +85 -0
- package/skills/finishing-a-development-branch/SKILL.md +201 -0
- package/skills/receiving-code-review/SKILL.md +72 -0
- package/skills/requesting-code-review/SKILL.md +59 -0
- package/skills/requesting-code-review/code-reviewer.md +82 -0
- package/skills/research/SKILL.md +145 -0
- package/skills/roadmap/SKILL.md +115 -0
- package/skills/subagent-driven-development/SKILL.md +98 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
- package/skills/subagent-driven-development/implementer-prompt.md +73 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
- package/skills/systematic-debugging/SKILL.md +134 -0
- package/skills/systematic-debugging/condition-based-waiting.md +64 -0
- package/skills/systematic-debugging/defense-in-depth.md +32 -0
- package/skills/systematic-debugging/root-cause-tracing.md +55 -0
- package/skills/test-driven-development/SKILL.md +167 -0
- package/skills/using-git-worktrees/SKILL.md +219 -0
- package/skills/using-superpowers/SKILL.md +54 -0
- package/skills/verification-before-completion/SKILL.md +140 -0
- package/skills/verify/SKILL.md +82 -0
- package/skills/writing-plans/SKILL.md +128 -0
- package/skills/writing-skills/SKILL.md +93 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-routing.sh — Plan analysis, dependency graph, and execution mode routing
|
|
3
|
+
#
|
|
4
|
+
# Analyzes plan structure to determine optimal execution mode.
|
|
5
|
+
# Builds dependency graphs from Files/context_refs metadata.
|
|
6
|
+
#
|
|
7
|
+
# Functions:
|
|
8
|
+
# build_dependency_graph <plan_file> -> JSON {batch: [deps]}
|
|
9
|
+
# compute_parallelism_score <plan_file> -> 0-100 score
|
|
10
|
+
# recommend_execution_mode <score> <teams_avail> <mem_gb> -> headless|team
|
|
11
|
+
# classify_batch_model <plan_file> <batch_num> -> sonnet|haiku|opus
|
|
12
|
+
# generate_routing_plan <plan_file> <score> <teams_avail> <mem_gb> <mode> -> printed plan
|
|
13
|
+
# log_routing_decision <worktree> <category> <message>
|
|
14
|
+
|
|
15
|
+
# --- Configuration ---
|
|
16
|
+
PARALLELISM_THRESHOLD_TEAM=40 # Score above this recommends team mode
|
|
17
|
+
MIN_MEMORY_TEAM_GB=8 # Minimum memory for team mode
|
|
18
|
+
# shellcheck disable=SC2034 # MIN_BATCHES_TEAM reserved for future use
|
|
19
|
+
MIN_BATCHES_TEAM=3 # Need at least 3 batches to justify team mode
|
|
20
|
+
|
|
21
|
+
# --- Sampling configuration ---
|
|
22
|
+
# shellcheck disable=SC2034 # consumed by run-plan-headless.sh
|
|
23
|
+
SAMPLE_ON_RETRY=true # auto-sample when batch fails first attempt
|
|
24
|
+
# shellcheck disable=SC2034
|
|
25
|
+
SAMPLE_ON_CRITICAL=true # auto-sample for CRITICAL batches
|
|
26
|
+
# shellcheck disable=SC2034
|
|
27
|
+
SAMPLE_DEFAULT_COUNT=3 # default candidate count
|
|
28
|
+
# shellcheck disable=SC2034
|
|
29
|
+
SAMPLE_MAX_COUNT=5 # hard cap
|
|
30
|
+
# shellcheck disable=SC2034
|
|
31
|
+
SAMPLE_MIN_MEMORY_PER_GB=4 # per-candidate memory requirement (GB)
|
|
32
|
+
|
|
33
|
+
# --- Extract files touched by a batch ---
|
|
34
|
+
# Returns lines like "Create:src/lib.sh" or "Modify:src/lib.sh"
|
|
35
|
+
_get_batch_files() {
|
|
36
|
+
local plan_file="$1" batch_num="$2"
|
|
37
|
+
local batch_text
|
|
38
|
+
batch_text=$(get_batch_text "$plan_file" "$batch_num")
|
|
39
|
+
echo "$batch_text" | grep -oE '(Create|Modify): `[^`]+`' | sed 's/`//g; s/: /:/g' || true
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# --- Build dependency graph ---
|
|
43
|
+
# Returns JSON: {"1": [], "2": ["1"], "3": ["1"], "4": ["2","3"]}
|
|
44
|
+
build_dependency_graph() {
|
|
45
|
+
local plan_file="$1"
|
|
46
|
+
local total
|
|
47
|
+
total=$(count_batches "$plan_file")
|
|
48
|
+
|
|
49
|
+
# Phase 1: collect files each batch creates/modifies
|
|
50
|
+
declare -A creates
|
|
51
|
+
declare -A modifies
|
|
52
|
+
|
|
53
|
+
for ((b = 1; b <= total; b++)); do
|
|
54
|
+
local files
|
|
55
|
+
files=$(_get_batch_files "$plan_file" "$b")
|
|
56
|
+
while IFS= read -r line; do
|
|
57
|
+
[[ -z "$line" ]] && continue
|
|
58
|
+
local action="${line%%:*}"
|
|
59
|
+
local file="${line#*:}"
|
|
60
|
+
if [[ "$action" == "Create" ]]; then
|
|
61
|
+
creates["$file"]="$b"
|
|
62
|
+
fi
|
|
63
|
+
# Track all batches that touch each file (Create or Modify)
|
|
64
|
+
modifies["$file"]="${modifies[$file]:-} $b"
|
|
65
|
+
done <<< "$files"
|
|
66
|
+
done
|
|
67
|
+
|
|
68
|
+
# Phase 2: find dependencies (Create→Modify, Modify→Modify, context_refs)
|
|
69
|
+
local graph="{"
|
|
70
|
+
for ((b = 1; b <= total; b++)); do
|
|
71
|
+
local deps=()
|
|
72
|
+
|
|
73
|
+
local files
|
|
74
|
+
files=$(_get_batch_files "$plan_file" "$b")
|
|
75
|
+
while IFS= read -r line; do
|
|
76
|
+
[[ -z "$line" ]] && continue
|
|
77
|
+
local action="${line%%:*}"
|
|
78
|
+
local file="${line#*:}"
|
|
79
|
+
if [[ "$action" == "Modify" ]]; then
|
|
80
|
+
local creator="${creates[$file]:-}"
|
|
81
|
+
if [[ -n "$creator" && "$creator" != "$b" ]]; then
|
|
82
|
+
deps+=("$creator")
|
|
83
|
+
fi
|
|
84
|
+
local touchers="${modifies[$file]:-}"
|
|
85
|
+
for t in $touchers; do
|
|
86
|
+
if [[ "$t" -lt "$b" ]]; then
|
|
87
|
+
deps+=("$t")
|
|
88
|
+
fi
|
|
89
|
+
done
|
|
90
|
+
fi
|
|
91
|
+
done <<< "$files"
|
|
92
|
+
|
|
93
|
+
# Check context_refs
|
|
94
|
+
local refs
|
|
95
|
+
refs=$(get_batch_context_refs "$plan_file" "$b" 2>/dev/null || true)
|
|
96
|
+
while IFS= read -r ref; do
|
|
97
|
+
ref=$(echo "$ref" | xargs)
|
|
98
|
+
[[ -z "$ref" ]] && continue
|
|
99
|
+
local creator="${creates[$ref]:-}"
|
|
100
|
+
if [[ -n "$creator" && "$creator" != "$b" ]]; then
|
|
101
|
+
deps+=("$creator")
|
|
102
|
+
fi
|
|
103
|
+
done <<< "$refs"
|
|
104
|
+
|
|
105
|
+
# Deduplicate deps
|
|
106
|
+
local unique_deps=()
|
|
107
|
+
local seen=""
|
|
108
|
+
for d in "${deps[@]+"${deps[@]}"}"; do
|
|
109
|
+
if [[ "$seen" != *"|$d|"* ]]; then
|
|
110
|
+
unique_deps+=("$d")
|
|
111
|
+
seen+="|$d|"
|
|
112
|
+
fi
|
|
113
|
+
done
|
|
114
|
+
|
|
115
|
+
# Build JSON array
|
|
116
|
+
local deps_json="[]"
|
|
117
|
+
if [[ ${#unique_deps[@]} -gt 0 ]]; then
|
|
118
|
+
deps_json="["
|
|
119
|
+
for ((i = 0; i < ${#unique_deps[@]}; i++)); do
|
|
120
|
+
[[ $i -gt 0 ]] && deps_json+=","
|
|
121
|
+
deps_json+="\"${unique_deps[$i]}\""
|
|
122
|
+
done
|
|
123
|
+
deps_json+="]"
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
[[ "$b" -gt 1 ]] && graph+=","
|
|
127
|
+
graph+="\"$b\":$deps_json"
|
|
128
|
+
done
|
|
129
|
+
graph+="}"
|
|
130
|
+
|
|
131
|
+
echo "$graph"
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# --- Compute parallelism score (0-100) ---
|
|
135
|
+
# Higher = more batches can run in parallel
|
|
136
|
+
compute_parallelism_score() {
|
|
137
|
+
local plan_file="$1"
|
|
138
|
+
local total
|
|
139
|
+
total=$(count_batches "$plan_file")
|
|
140
|
+
|
|
141
|
+
if [[ "$total" -le 1 ]]; then
|
|
142
|
+
echo "0"
|
|
143
|
+
return
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
local graph
|
|
147
|
+
graph=$(build_dependency_graph "$plan_file")
|
|
148
|
+
|
|
149
|
+
# Topological sort into parallel groups
|
|
150
|
+
local completed=""
|
|
151
|
+
local groups=0
|
|
152
|
+
local max_group_size=0
|
|
153
|
+
local remaining="$total"
|
|
154
|
+
|
|
155
|
+
while [[ "$remaining" -gt 0 ]]; do
|
|
156
|
+
groups=$((groups + 1))
|
|
157
|
+
local group_size=0
|
|
158
|
+
local new_completed=""
|
|
159
|
+
|
|
160
|
+
for ((b = 1; b <= total; b++)); do
|
|
161
|
+
# Skip already completed
|
|
162
|
+
[[ "$completed" == *"|$b|"* ]] && continue
|
|
163
|
+
|
|
164
|
+
local deps
|
|
165
|
+
deps=$(echo "$graph" | timeout 30 jq -r ".\"$b\"[]" 2>/dev/null) || {
|
|
166
|
+
[[ $? -eq 124 ]] && echo "[WARN] jq timeout on batch $b — treating as no deps" >&2
|
|
167
|
+
deps=""
|
|
168
|
+
}
|
|
169
|
+
local all_met=true
|
|
170
|
+
while IFS= read -r dep; do
|
|
171
|
+
[[ -z "$dep" ]] && continue
|
|
172
|
+
if [[ "$completed" != *"|$dep|"* ]]; then
|
|
173
|
+
all_met=false
|
|
174
|
+
break
|
|
175
|
+
fi
|
|
176
|
+
done <<< "$deps"
|
|
177
|
+
|
|
178
|
+
if [[ "$all_met" == true ]]; then
|
|
179
|
+
new_completed+="|$b|"
|
|
180
|
+
group_size=$((group_size + 1))
|
|
181
|
+
remaining=$((remaining - 1))
|
|
182
|
+
fi
|
|
183
|
+
done
|
|
184
|
+
|
|
185
|
+
completed+="$new_completed"
|
|
186
|
+
|
|
187
|
+
if [[ "$group_size" -gt "$max_group_size" ]]; then
|
|
188
|
+
max_group_size=$group_size
|
|
189
|
+
fi
|
|
190
|
+
|
|
191
|
+
if [[ "$group_size" -eq 0 ]]; then
|
|
192
|
+
break
|
|
193
|
+
fi
|
|
194
|
+
done
|
|
195
|
+
|
|
196
|
+
# Score: weighted parallel_ratio (70%) + group_savings (30%)
|
|
197
|
+
local parallel_ratio=$(( (max_group_size * 100) / total ))
|
|
198
|
+
local denom=$(( total - 1 ))
|
|
199
|
+
[[ "$denom" -lt 1 ]] && denom=1
|
|
200
|
+
local group_savings=$(( (total - groups) * 100 / denom ))
|
|
201
|
+
local score=$(( (parallel_ratio * 7 + group_savings * 3) / 10 ))
|
|
202
|
+
|
|
203
|
+
# Clamp to 0-100
|
|
204
|
+
[[ "$score" -gt 100 ]] && score=100
|
|
205
|
+
[[ "$score" -lt 0 ]] && score=0
|
|
206
|
+
|
|
207
|
+
echo "$score"
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
# --- Recommend execution mode ---
|
|
211
|
+
recommend_execution_mode() {
|
|
212
|
+
local score="$1"
|
|
213
|
+
local teams_available="${2:-false}"
|
|
214
|
+
local available_mem_gb="${3:-0}"
|
|
215
|
+
|
|
216
|
+
if [[ "$score" -ge "$PARALLELISM_THRESHOLD_TEAM" && "$available_mem_gb" -ge "$MIN_MEMORY_TEAM_GB" ]]; then
|
|
217
|
+
echo "team"
|
|
218
|
+
else
|
|
219
|
+
echo "headless"
|
|
220
|
+
fi
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# --- Classify batch model (sonnet/haiku/opus) ---
|
|
224
|
+
classify_batch_model() {
|
|
225
|
+
local plan_file="$1" batch_num="$2"
|
|
226
|
+
local batch_text
|
|
227
|
+
batch_text=$(get_batch_text "$plan_file" "$batch_num")
|
|
228
|
+
|
|
229
|
+
# Check for Create files — needs implementation skill = sonnet
|
|
230
|
+
if echo "$batch_text" | grep -qE -- '^\*\*Files:\*\*' && echo "$batch_text" | grep -qE -- 'Create:'; then
|
|
231
|
+
echo "sonnet"
|
|
232
|
+
return
|
|
233
|
+
fi
|
|
234
|
+
|
|
235
|
+
# Check for Modify files — needs understanding + editing = sonnet
|
|
236
|
+
if echo "$batch_text" | grep -qE -- 'Modify:'; then
|
|
237
|
+
echo "sonnet"
|
|
238
|
+
return
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# Check if batch is mostly Run commands (verification) = haiku
|
|
242
|
+
local total_steps
|
|
243
|
+
total_steps=$(echo "$batch_text" | grep -cE -- '^\*\*Step [0-9]+' 2>/dev/null || true)
|
|
244
|
+
total_steps=${total_steps:-0}
|
|
245
|
+
local run_steps
|
|
246
|
+
run_steps=$(echo "$batch_text" | grep -cE -- '^Run: ' 2>/dev/null || true)
|
|
247
|
+
run_steps=${run_steps:-0}
|
|
248
|
+
if [[ "$total_steps" -gt 0 && "$run_steps" -ge "$total_steps" ]]; then
|
|
249
|
+
echo "haiku"
|
|
250
|
+
return
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
# Check for CRITICAL tag = opus
|
|
254
|
+
local title
|
|
255
|
+
title=$(get_batch_title "$plan_file" "$batch_num")
|
|
256
|
+
if [[ "$title" == *"CRITICAL"* ]]; then
|
|
257
|
+
echo "opus"
|
|
258
|
+
return
|
|
259
|
+
fi
|
|
260
|
+
|
|
261
|
+
# Default: sonnet
|
|
262
|
+
echo "sonnet"
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# --- Generate human-readable routing plan ---
|
|
266
|
+
generate_routing_plan() {
|
|
267
|
+
local plan_file="$1" score="$2" teams_available="$3" mem_gb="$4" current_mode="$5"
|
|
268
|
+
local total
|
|
269
|
+
total=$(count_batches "$plan_file")
|
|
270
|
+
|
|
271
|
+
echo ""
|
|
272
|
+
echo "=== Routing Analysis ==="
|
|
273
|
+
echo " Batches: $total"
|
|
274
|
+
echo " Parallelism score: $score/100"
|
|
275
|
+
echo " Teams available: $teams_available"
|
|
276
|
+
echo " Memory: ${mem_gb}GB"
|
|
277
|
+
echo ""
|
|
278
|
+
|
|
279
|
+
# Show dependency graph
|
|
280
|
+
local graph
|
|
281
|
+
graph=$(build_dependency_graph "$plan_file")
|
|
282
|
+
echo " Dependency graph:"
|
|
283
|
+
for ((b = 1; b <= total; b++)); do
|
|
284
|
+
local deps
|
|
285
|
+
deps=$(echo "$graph" | timeout 30 jq -r ".\"$b\" | join(\",\")" 2>/dev/null) || {
|
|
286
|
+
[[ $? -eq 124 ]] && echo "[WARN] jq timeout on batch $b — treating as no deps" >&2
|
|
287
|
+
deps=""
|
|
288
|
+
}
|
|
289
|
+
local title
|
|
290
|
+
title=$(get_batch_title "$plan_file" "$b")
|
|
291
|
+
local model
|
|
292
|
+
model=$(classify_batch_model "$plan_file" "$b")
|
|
293
|
+
if [[ -z "$deps" ]]; then
|
|
294
|
+
echo " Batch $b: $title [$model] (no deps)"
|
|
295
|
+
else
|
|
296
|
+
echo " Batch $b: $title [$model] (depends on: $deps)"
|
|
297
|
+
fi
|
|
298
|
+
done
|
|
299
|
+
|
|
300
|
+
echo ""
|
|
301
|
+
local recommended
|
|
302
|
+
recommended=$(recommend_execution_mode "$score" "$teams_available" "$mem_gb")
|
|
303
|
+
echo " Recommended mode: $recommended"
|
|
304
|
+
if [[ "$current_mode" != "auto" && "$current_mode" != "$recommended" ]]; then
|
|
305
|
+
echo " (overridden by --mode $current_mode)"
|
|
306
|
+
fi
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
# --- Routing decision logger ---
|
|
310
|
+
log_routing_decision() {
|
|
311
|
+
local worktree="$1" category="$2" message="$3"
|
|
312
|
+
local log_file="$worktree/logs/routing-decisions.log"
|
|
313
|
+
mkdir -p "$(dirname "$log_file")"
|
|
314
|
+
echo "[$(date '+%H:%M:%S')] $category: $message" >> "$log_file"
|
|
315
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-sampling.sh — Parallel candidate sampling for batch execution
|
|
3
|
+
#
|
|
4
|
+
# Standalone module: spawns N parallel candidates with prompt variants,
|
|
5
|
+
# scores each via quality gate, picks the winner. Uses patch files (not stash)
|
|
6
|
+
# to manage worktree state across candidates.
|
|
7
|
+
#
|
|
8
|
+
# Functions:
|
|
9
|
+
# check_memory_for_sampling
|
|
10
|
+
# Returns 0 if enough memory for SAMPLE_COUNT candidates, 1 otherwise.
|
|
11
|
+
# run_sampling_candidates <worktree> <plan_file> <batch> <prompt> <quality_gate_cmd>
|
|
12
|
+
# Spawns SAMPLE_COUNT candidates, scores them, applies winner's patch.
|
|
13
|
+
# Returns 0 if winner found, 1 if no candidate passed.
|
|
14
|
+
#
|
|
15
|
+
# Globals (read-only): SAMPLE_COUNT, SAMPLE_MIN_MEMORY_PER_GB
|
|
16
|
+
# Requires libs: run-plan-scoring (score_candidate, select_winner, classify_batch_type, get_prompt_variants)
|
|
17
|
+
# run-plan-quality-gate (run_quality_gate)
|
|
18
|
+
# run-plan-state (get_previous_test_count)
|
|
19
|
+
|
|
20
|
+
check_memory_for_sampling() {
|
|
21
|
+
local avail_mb
|
|
22
|
+
avail_mb=$(free -m 2>/dev/null | awk '/Mem:/{print $7}')
|
|
23
|
+
if [[ -z "$avail_mb" ]]; then
|
|
24
|
+
echo " WARNING: Cannot determine available memory. Falling back to single attempt."
|
|
25
|
+
SAMPLE_COUNT=0
|
|
26
|
+
return 1
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
local needed_mb=$(( SAMPLE_COUNT * ${SAMPLE_MIN_MEMORY_PER_GB:-4} * 1024 ))
|
|
30
|
+
if [[ "$avail_mb" -lt "$needed_mb" ]]; then
|
|
31
|
+
local avail_display needed_display
|
|
32
|
+
avail_display=$(awk "BEGIN {printf \"%.1f\", $avail_mb / 1024}")
|
|
33
|
+
needed_display=$(( SAMPLE_COUNT * ${SAMPLE_MIN_MEMORY_PER_GB:-4} ))
|
|
34
|
+
echo " WARNING: Not enough memory for sampling (${avail_display}G < ${needed_display}G needed). Falling back to single attempt."
|
|
35
|
+
SAMPLE_COUNT=0
|
|
36
|
+
return 1
|
|
37
|
+
fi
|
|
38
|
+
return 0
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
run_sampling_candidates() {
|
|
42
|
+
local worktree="$1"
|
|
43
|
+
local plan_file="$2"
|
|
44
|
+
local batch="$3"
|
|
45
|
+
local prompt="$4"
|
|
46
|
+
local quality_gate_cmd="$5"
|
|
47
|
+
|
|
48
|
+
echo " Sampling $SAMPLE_COUNT candidates for batch $batch..."
|
|
49
|
+
local scores=""
|
|
50
|
+
local candidate_logs=()
|
|
51
|
+
|
|
52
|
+
# Save baseline state using a patch file rather than git stash.
|
|
53
|
+
# This avoids LIFO ordering issues when multiple stash/pop cycles
|
|
54
|
+
# interact: stash.pop always restores the top entry, so interleaved
|
|
55
|
+
# stash calls across candidates can restore the wrong state (#2).
|
|
56
|
+
# Using patch files gives explicit, named state snapshots instead.
|
|
57
|
+
local _baseline_patch="/tmp/run-plan-baseline-${batch}-$$.diff"
|
|
58
|
+
(cd "$worktree" && git diff > "$_baseline_patch" 2>/dev/null || true)
|
|
59
|
+
|
|
60
|
+
# Classify batch and get type-aware prompt variants
|
|
61
|
+
local batch_type
|
|
62
|
+
batch_type=$(classify_batch_type "$plan_file" "$batch")
|
|
63
|
+
local variants
|
|
64
|
+
variants=$(get_prompt_variants "$batch_type" "$worktree/logs/sampling-outcomes.json" "$SAMPLE_COUNT")
|
|
65
|
+
|
|
66
|
+
local c=0
|
|
67
|
+
local _winner_patch=""
|
|
68
|
+
while IFS= read -r variant_name; do
|
|
69
|
+
local variant_suffix=""
|
|
70
|
+
if [[ "$variant_name" != "vanilla" ]]; then
|
|
71
|
+
variant_suffix=$'\nIMPORTANT: '"$variant_name"
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
local candidate_log="$worktree/logs/batch-${batch}-candidate-${c}.log"
|
|
75
|
+
candidate_logs+=("$candidate_log")
|
|
76
|
+
|
|
77
|
+
# Restore clean baseline for each candidate using the saved patch.
|
|
78
|
+
# Reset tracked changes first, then re-apply the baseline diff.
|
|
79
|
+
(cd "$worktree" && git checkout . 2>/dev/null || true)
|
|
80
|
+
if [[ -s "$_baseline_patch" ]]; then
|
|
81
|
+
if ! (cd "$worktree" && git apply "$_baseline_patch" 2>/dev/null); then
|
|
82
|
+
echo " WARNING: Failed to restore baseline patch for candidate $c — starting from clean state" >&2
|
|
83
|
+
fi
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
CLAUDECODE='' claude -p "${prompt}${variant_suffix}" \
|
|
87
|
+
--allowedTools "Bash,Read,Write,Edit,Grep,Glob" \
|
|
88
|
+
--permission-mode bypassPermissions \
|
|
89
|
+
> "$candidate_log" 2>&1 || true
|
|
90
|
+
|
|
91
|
+
# Score this candidate
|
|
92
|
+
local gate_exit=0
|
|
93
|
+
run_quality_gate "$worktree" "$quality_gate_cmd" "sample-$c" "0" || gate_exit=$?
|
|
94
|
+
local gate_passed=0
|
|
95
|
+
[[ $gate_exit -eq 0 ]] && gate_passed=1
|
|
96
|
+
|
|
97
|
+
local new_tests
|
|
98
|
+
new_tests=$(get_previous_test_count "$worktree")
|
|
99
|
+
local diff_size
|
|
100
|
+
diff_size=$(cd "$worktree" && git diff --stat HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+' | head -1 || echo "100")
|
|
101
|
+
|
|
102
|
+
local score
|
|
103
|
+
score=$(score_candidate "$gate_passed" "${new_tests:-0}" "${diff_size:-100}" "0" "0" "0")
|
|
104
|
+
scores+="$score "
|
|
105
|
+
|
|
106
|
+
echo " Candidate $c: score=$score (gate=$gate_passed, tests=${new_tests:-0})"
|
|
107
|
+
|
|
108
|
+
# Save winning candidate's state as a patch file for later restore.
|
|
109
|
+
# Only the last passing candidate's patch is kept as the winner
|
|
110
|
+
# (select_winner picks the highest score, which is last-wins on tie).
|
|
111
|
+
if [[ $gate_passed -eq 1 ]]; then
|
|
112
|
+
_winner_patch="/tmp/run-plan-winner-${batch}-${c}-$$.diff"
|
|
113
|
+
(cd "$worktree" && git diff > "$_winner_patch" 2>/dev/null || true)
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
# Reset worktree for next candidate iteration
|
|
117
|
+
(cd "$worktree" && git checkout . 2>/dev/null || true)
|
|
118
|
+
|
|
119
|
+
c=$((c + 1))
|
|
120
|
+
done <<< "$variants"
|
|
121
|
+
|
|
122
|
+
# Pick winner
|
|
123
|
+
local winner
|
|
124
|
+
winner=$(select_winner "$scores")
|
|
125
|
+
if [[ "$winner" -ge 0 ]]; then
|
|
126
|
+
echo " Winner: candidate $winner (scores: $scores)"
|
|
127
|
+
|
|
128
|
+
# Restore winner's patch — explicit named file, no LIFO ordering risk
|
|
129
|
+
local _apply_patch="/tmp/run-plan-winner-${batch}-${winner}-$$.diff"
|
|
130
|
+
if [[ -s "$_apply_patch" ]]; then
|
|
131
|
+
if ! (cd "$worktree" && git apply "$_apply_patch"); then
|
|
132
|
+
echo " ERROR: Failed to apply winning candidate $winner patch — sampling result lost" >&2
|
|
133
|
+
# Clean up temp patch files
|
|
134
|
+
rm -f "$_baseline_patch" /tmp/run-plan-winner-${batch}-*-$$.diff 2>/dev/null || true
|
|
135
|
+
return 1
|
|
136
|
+
fi
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
# Log sampling outcome
|
|
140
|
+
local outcomes_file="$worktree/logs/sampling-outcomes.json"
|
|
141
|
+
mkdir -p "$(dirname "$outcomes_file")"
|
|
142
|
+
[[ ! -f "$outcomes_file" ]] && echo "[]" > "$outcomes_file"
|
|
143
|
+
|
|
144
|
+
# Get the winning variant name from the variants list
|
|
145
|
+
local winning_variant
|
|
146
|
+
winning_variant=$(echo "$variants" | sed -n "$((winner + 1))p")
|
|
147
|
+
winning_variant="${winning_variant:-vanilla}"
|
|
148
|
+
|
|
149
|
+
jq --arg bt "$batch_type" --arg vn "$winning_variant" --arg sc "$(echo "$scores" | awk '{print $1}')" \
|
|
150
|
+
'. += [{"batch_type": $bt, "prompt_variant": $vn, "won": true, "score": ($sc | tonumber), "timestamp": now | tostring}]' \
|
|
151
|
+
"$outcomes_file" > "$outcomes_file.tmp" && mv "$outcomes_file.tmp" "$outcomes_file" || true
|
|
152
|
+
|
|
153
|
+
# Clean up temp patch files
|
|
154
|
+
rm -f "$_baseline_patch" /tmp/run-plan-winner-${batch}-*-$$.diff 2>/dev/null || true
|
|
155
|
+
return 0
|
|
156
|
+
else
|
|
157
|
+
echo " No candidate passed quality gate"
|
|
158
|
+
# Restore baseline state for the normal retry path
|
|
159
|
+
if [[ -s "$_baseline_patch" ]]; then
|
|
160
|
+
if ! (cd "$worktree" && git apply "$_baseline_patch" 2>/dev/null); then
|
|
161
|
+
echo " WARNING: Failed to restore baseline after sampling — continuing from clean state" >&2
|
|
162
|
+
fi
|
|
163
|
+
fi
|
|
164
|
+
fi
|
|
165
|
+
|
|
166
|
+
# Clean up temp patch files
|
|
167
|
+
rm -f "$_baseline_patch" /tmp/run-plan-winner-${batch}-*-$$.diff 2>/dev/null || true
|
|
168
|
+
|
|
169
|
+
return 1
|
|
170
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# run-plan-scoring.sh — Candidate scoring for parallel patch sampling
|
|
3
|
+
#
|
|
4
|
+
# Functions:
|
|
5
|
+
# score_candidate <gate_passed> <test_count> <diff_lines> <lint_warnings> <lesson_violations> <ast_violations>
|
|
6
|
+
# select_winner <scores_string> -> index of highest score (0-based), -1 if all zero
|
|
7
|
+
|
|
8
|
+
score_candidate() {
|
|
9
|
+
local gate_passed="${1:-0}"
|
|
10
|
+
local test_count="${2:-0}"
|
|
11
|
+
local diff_lines="${3:-1}"
|
|
12
|
+
local lint_warnings="${4:-0}"
|
|
13
|
+
local lesson_violations="${5:-0}"
|
|
14
|
+
local ast_violations="${6:-0}"
|
|
15
|
+
|
|
16
|
+
if [[ "$gate_passed" -ne 1 ]]; then
|
|
17
|
+
echo 0
|
|
18
|
+
return
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
# Avoid division by zero
|
|
22
|
+
[[ "$diff_lines" -lt 1 ]] && diff_lines=1
|
|
23
|
+
|
|
24
|
+
local score=$(( (test_count * 10) + (10000 / (diff_lines + 1)) + (1000 / (lint_warnings + 1)) - (lesson_violations * 200) - (ast_violations * 100) ))
|
|
25
|
+
|
|
26
|
+
# Floor at 1 (gate passed = always positive)
|
|
27
|
+
[[ "$score" -lt 1 ]] && score=1
|
|
28
|
+
echo "$score"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
select_winner() {
|
|
32
|
+
local scores_str="$1"
|
|
33
|
+
local max_score=0
|
|
34
|
+
local max_idx=-1
|
|
35
|
+
local idx=0
|
|
36
|
+
|
|
37
|
+
for score in $scores_str; do
|
|
38
|
+
if [[ "$score" -gt "$max_score" ]]; then
|
|
39
|
+
max_score="$score"
|
|
40
|
+
max_idx=$idx
|
|
41
|
+
fi
|
|
42
|
+
idx=$((idx + 1))
|
|
43
|
+
done
|
|
44
|
+
|
|
45
|
+
echo "$max_idx"
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Classify a batch by its dominant action type.
|
|
49
|
+
# Returns: new-file | refactoring | integration | test-only | unknown
|
|
50
|
+
classify_batch_type() {
|
|
51
|
+
local plan_file="$1" batch_num="$2"
|
|
52
|
+
local batch_text title
|
|
53
|
+
|
|
54
|
+
# Source parser if not already loaded
|
|
55
|
+
type get_batch_text &>/dev/null || source "$(dirname "${BASH_SOURCE[0]}")/run-plan-parser.sh"
|
|
56
|
+
|
|
57
|
+
batch_text=$(get_batch_text "$plan_file" "$batch_num" 2>/dev/null || true)
|
|
58
|
+
title=$(get_batch_title "$plan_file" "$batch_num" 2>/dev/null || true)
|
|
59
|
+
|
|
60
|
+
# Check title for integration keywords
|
|
61
|
+
if echo "$title" | grep -qiE 'integrat|wir|connect|glue'; then
|
|
62
|
+
echo "integration"
|
|
63
|
+
return
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
local creates modifies runs
|
|
67
|
+
creates=$(echo "$batch_text" | grep -cE '^\s*-\s*Create:' || true)
|
|
68
|
+
creates=${creates:-0}
|
|
69
|
+
modifies=$(echo "$batch_text" | grep -cE '^\s*-\s*Modify:' || true)
|
|
70
|
+
modifies=${modifies:-0}
|
|
71
|
+
runs=$(echo "$batch_text" | grep -cE '^Run:' || true)
|
|
72
|
+
runs=${runs:-0}
|
|
73
|
+
|
|
74
|
+
# Test-only: only Run commands, no Create/Modify
|
|
75
|
+
if [[ "$creates" -eq 0 && "$modifies" -eq 0 && "$runs" -gt 0 ]]; then
|
|
76
|
+
echo "test-only"
|
|
77
|
+
return
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
# New file creation dominant
|
|
81
|
+
if [[ "$creates" -gt "$modifies" ]]; then
|
|
82
|
+
echo "new-file"
|
|
83
|
+
return
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# Refactoring: modifications dominant
|
|
87
|
+
if [[ "$modifies" -gt 0 ]]; then
|
|
88
|
+
echo "refactoring"
|
|
89
|
+
return
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
echo "unknown"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Get prompt variant suffixes for a batch type.
|
|
96
|
+
# Uses learned outcomes if available, otherwise defaults.
|
|
97
|
+
# Args: <batch_type> <outcomes_file> <count>
|
|
98
|
+
# Output: N lines, each a prompt suffix string
|
|
99
|
+
get_prompt_variants() {
|
|
100
|
+
local batch_type="$1"
|
|
101
|
+
local outcomes_file="$2"
|
|
102
|
+
local count="${3:-3}"
|
|
103
|
+
|
|
104
|
+
# Default variants per batch type
|
|
105
|
+
local -A type_variants
|
|
106
|
+
type_variants[new-file]="check all imports before running tests|write tests first then implement"
|
|
107
|
+
type_variants[refactoring]="minimal change only|run tests after each edit"
|
|
108
|
+
type_variants[integration]="trace end-to-end before declaring done|check every import and export"
|
|
109
|
+
type_variants[test-only]="use real objects not mocks|focus on edge cases only"
|
|
110
|
+
type_variants[unknown]="try a different approach|make the minimum possible change"
|
|
111
|
+
|
|
112
|
+
local defaults="${type_variants[$batch_type]:-${type_variants[unknown]}}"
|
|
113
|
+
|
|
114
|
+
# Slot 1: always vanilla
|
|
115
|
+
echo "vanilla"
|
|
116
|
+
|
|
117
|
+
# Check for learned winners
|
|
118
|
+
local learned_variant=""
|
|
119
|
+
if [[ -f "$outcomes_file" ]]; then
|
|
120
|
+
learned_variant=$(jq -r --arg bt "$batch_type" \
|
|
121
|
+
'[.[] | select(.batch_type == $bt and .won == true)] | sort_by(.score) | reverse | .[0].prompt_variant // empty' \
|
|
122
|
+
"$outcomes_file" 2>/dev/null || true)
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
# Slot 2: learned winner or first default
|
|
126
|
+
local variant2="${learned_variant:-$(echo "$defaults" | cut -d'|' -f1)}"
|
|
127
|
+
if [[ "$count" -ge 2 ]]; then
|
|
128
|
+
echo "$variant2"
|
|
129
|
+
fi
|
|
130
|
+
|
|
131
|
+
# Slot 3+: remaining defaults (exploration)
|
|
132
|
+
local slot=3
|
|
133
|
+
IFS='|' read -ra parts <<< "$defaults"
|
|
134
|
+
for part in "${parts[@]}"; do
|
|
135
|
+
[[ "$slot" -gt "$count" ]] && break
|
|
136
|
+
[[ "$part" == "$variant2" ]] && continue
|
|
137
|
+
echo "$part"
|
|
138
|
+
slot=$((slot + 1))
|
|
139
|
+
done
|
|
140
|
+
|
|
141
|
+
# Fill remaining slots with generic variants
|
|
142
|
+
while [[ "$slot" -le "$count" ]]; do
|
|
143
|
+
echo "try a fundamentally different approach"
|
|
144
|
+
slot=$((slot + 1))
|
|
145
|
+
done
|
|
146
|
+
}
|