shipwright-cli 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +22 -8
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +8 -6
- package/dashboard/public/styles.css +176 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +117 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +12 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/metrics.ts +69 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +14 -2
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +25 -4
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +119 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +180 -5
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +101 -3
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +104 -1138
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -711
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +161 -2901
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -8
- package/scripts/sw-adaptive.sh +8 -7
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +15 -6
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +45 -20
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +107 -5
- package/scripts/sw-daemon.sh +71 -11
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +71 -20
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +378 -5
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +12 -7
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +13 -4
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +9 -4
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +18 -4
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +10 -3
- package/scripts/sw-incident.sh +273 -5
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +44 -7
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +436 -1076
- package/scripts/sw-memory.sh +357 -3
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +483 -27
- package/scripts/sw-otel.sh +15 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +7 -1
- package/scripts/sw-pipeline-vitals.sh +12 -6
- package/scripts/sw-pipeline.sh +54 -2653
- package/scripts/sw-pm.sh +16 -8
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +17 -5
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +17 -4
- package/scripts/sw-quality.sh +14 -6
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +14 -5
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +173 -6
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +14 -6
- package/scripts/sw-stream.sh +13 -4
- package/scripts/sw-swarm.sh +20 -7
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +7 -31
- package/scripts/sw-testgen.sh +17 -6
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +37 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +3 -2
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +5 -2
- package/scripts/sw-widgets.sh +9 -4
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Module guard - prevent double-sourcing
|
|
3
|
+
[[ -n "${_TEST_HOLDOUT_LOADED:-}" ]] && return 0
|
|
4
|
+
_TEST_HOLDOUT_LOADED=1
|
|
5
|
+
|
|
6
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
7
|
+
# ║ shipwright test-holdout — Test-as-Holdout Validation System ║
|
|
8
|
+
# ║ Prevents agent overfitting by partitioning tests into visible/sealed ║
|
|
9
|
+
# ║ Based on StrongDM pattern: agents can't game what they can't see ║
|
|
10
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
11
|
+
|
|
12
|
+
# shellcheck disable=SC2034
|
|
13
|
+
VERSION="3.3.0"
|
|
14
|
+
|
|
15
|
+
# ─── Output Helpers (fallback if not already loaded) ─────────────────────────
|
|
16
|
+
[[ "$(type -t info 2>/dev/null)" == "function" ]] || info() { echo -e "\033[38;2;0;212;255m\033[1m▸\033[0m $*"; }
|
|
17
|
+
[[ "$(type -t success 2>/dev/null)" == "function" ]] || success() { echo -e "\033[38;2;74;222;128m\033[1m✓\033[0m $*"; }
|
|
18
|
+
[[ "$(type -t warn 2>/dev/null)" == "function" ]] || warn() { echo -e "\033[38;2;250;204;21m\033[1m⚠\033[0m $*"; }
|
|
19
|
+
[[ "$(type -t error 2>/dev/null)" == "function" ]] || error() { echo -e "\033[38;2;248;113;113m\033[1m✗\033[0m $*" >&2; }
|
|
20
|
+
if [[ "$(type -t now_iso 2>/dev/null)" != "function" ]]; then
|
|
21
|
+
now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
|
|
22
|
+
now_epoch() { date +%s; }
|
|
23
|
+
fi
|
|
24
|
+
|
|
25
|
+
# ─── Configuration ───────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
HOLDOUT_RATIO="${HOLDOUT_RATIO:-30}" # % of tests to hold out (default 30%)
|
|
28
|
+
HOLDOUT_DIR="${HOLDOUT_DIR:-.claude/test-holdout}"
|
|
29
|
+
HOLDOUT_SEALED_DIR="${HOLDOUT_DIR}/.sealed"
|
|
30
|
+
HOLDOUT_MANIFEST="${HOLDOUT_DIR}/manifest.json"
|
|
31
|
+
HOLDOUT_RESULTS="${HOLDOUT_DIR}/results.json"
|
|
32
|
+
|
|
33
|
+
# ─── Test Discovery ─────────────────────────────────────────────────────────
|
|
34
|
+
# Find test files in a project. Returns newline-separated list of test file paths.
|
|
35
|
+
|
|
36
|
+
holdout_discover_tests() {
|
|
37
|
+
local project_dir="${1:-.}"
|
|
38
|
+
local language="${2:-}"
|
|
39
|
+
|
|
40
|
+
# Auto-detect language if not provided
|
|
41
|
+
if [[ -z "$language" ]] && type detect_primary_language >/dev/null 2>&1; then
|
|
42
|
+
language=$(detect_primary_language "$project_dir")
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
case "$language" in
|
|
46
|
+
typescript|javascript)
|
|
47
|
+
find "$project_dir" \
|
|
48
|
+
-type f \( -name "*.test.ts" -o -name "*.test.js" -o -name "*.spec.ts" -o -name "*.spec.js" \) \
|
|
49
|
+
! -path "*/node_modules/*" ! -path "*/.claude/*" \
|
|
50
|
+
2>/dev/null | sort
|
|
51
|
+
;;
|
|
52
|
+
python)
|
|
53
|
+
find "$project_dir" \
|
|
54
|
+
-type f \( -name "test_*.py" -o -name "*_test.py" \) \
|
|
55
|
+
! -path "*/__pycache__/*" ! -path "*/.claude/*" \
|
|
56
|
+
2>/dev/null | sort
|
|
57
|
+
;;
|
|
58
|
+
go)
|
|
59
|
+
find "$project_dir" \
|
|
60
|
+
-type f -name "*_test.go" \
|
|
61
|
+
! -path "*/.claude/*" \
|
|
62
|
+
2>/dev/null | sort
|
|
63
|
+
;;
|
|
64
|
+
rust)
|
|
65
|
+
# Rust tests are typically in the same files or tests/ dir
|
|
66
|
+
find "$project_dir" \
|
|
67
|
+
-type f -name "*.rs" -path "*/tests/*" \
|
|
68
|
+
! -path "*/.claude/*" \
|
|
69
|
+
2>/dev/null | sort
|
|
70
|
+
;;
|
|
71
|
+
*)
|
|
72
|
+
# Generic: find files with "test" in name
|
|
73
|
+
find "$project_dir" \
|
|
74
|
+
-type f \( -name "*test*" -o -name "*spec*" \) \
|
|
75
|
+
! -path "*/node_modules/*" ! -path "*/.claude/*" ! -path "*/.git/*" \
|
|
76
|
+
! -name "*.md" ! -name "*.json" ! -name "*.yml" \
|
|
77
|
+
2>/dev/null | sort
|
|
78
|
+
;;
|
|
79
|
+
esac
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# ─── Partition ───────────────────────────────────────────────────────────────
|
|
83
|
+
# Split discovered tests into visible (agent can see) and holdout (sealed).
|
|
84
|
+
# Uses deterministic hashing so same files always get same partition.
|
|
85
|
+
|
|
86
|
+
holdout_partition() {
|
|
87
|
+
local project_dir="${1:-.}"
|
|
88
|
+
local language="${2:-}"
|
|
89
|
+
local ratio="${3:-$HOLDOUT_RATIO}"
|
|
90
|
+
|
|
91
|
+
local all_tests
|
|
92
|
+
all_tests=$(holdout_discover_tests "$project_dir" "$language")
|
|
93
|
+
|
|
94
|
+
if [[ -z "$all_tests" ]]; then
|
|
95
|
+
warn "No test files discovered in $project_dir"
|
|
96
|
+
return 1
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
local total_tests visible_count holdout_count
|
|
100
|
+
total_tests=$(echo "$all_tests" | wc -l | tr -d ' ')
|
|
101
|
+
holdout_count=$(( total_tests * ratio / 100 ))
|
|
102
|
+
# Minimum 1 holdout test if we have at least 2 tests
|
|
103
|
+
if [[ "$holdout_count" -eq 0 ]] && [[ "$total_tests" -ge 2 ]]; then
|
|
104
|
+
holdout_count=1
|
|
105
|
+
fi
|
|
106
|
+
visible_count=$(( total_tests - holdout_count ))
|
|
107
|
+
|
|
108
|
+
# Deterministic partition using hash of filename
|
|
109
|
+
local visible_tests=""
|
|
110
|
+
local holdout_tests=""
|
|
111
|
+
local idx=0
|
|
112
|
+
|
|
113
|
+
while IFS= read -r test_file; do
|
|
114
|
+
local hash_val
|
|
115
|
+
# Use md5 for deterministic partitioning
|
|
116
|
+
if command -v md5 >/dev/null 2>&1; then
|
|
117
|
+
hash_val=$(printf '%s' "$test_file" | md5 -q 2>/dev/null)
|
|
118
|
+
else
|
|
119
|
+
hash_val=$(printf '%s' "$test_file" | md5sum 2>/dev/null | cut -d' ' -f1)
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
# Use last 2 hex chars to get a number 0-255, partition by ratio
|
|
123
|
+
local hash_num
|
|
124
|
+
hash_num=$(printf '%d' "0x${hash_val:30:2}" 2>/dev/null || echo "0")
|
|
125
|
+
local threshold=$(( 256 * ratio / 100 ))
|
|
126
|
+
|
|
127
|
+
if [[ "$hash_num" -lt "$threshold" ]] && [[ -n "$holdout_tests" || "$idx" -gt 0 ]]; then
|
|
128
|
+
if [[ -n "$holdout_tests" ]]; then
|
|
129
|
+
holdout_tests="${holdout_tests}"$'\n'"${test_file}"
|
|
130
|
+
else
|
|
131
|
+
holdout_tests="${test_file}"
|
|
132
|
+
fi
|
|
133
|
+
else
|
|
134
|
+
if [[ -n "$visible_tests" ]]; then
|
|
135
|
+
visible_tests="${visible_tests}"$'\n'"${test_file}"
|
|
136
|
+
else
|
|
137
|
+
visible_tests="${test_file}"
|
|
138
|
+
fi
|
|
139
|
+
fi
|
|
140
|
+
idx=$((idx + 1))
|
|
141
|
+
done <<< "$all_tests"
|
|
142
|
+
|
|
143
|
+
# Ensure we have at least one holdout if possible
|
|
144
|
+
if [[ -z "$holdout_tests" ]] && [[ "$total_tests" -ge 2 ]]; then
|
|
145
|
+
# Move last visible test to holdout
|
|
146
|
+
holdout_tests=$(echo "$visible_tests" | tail -1)
|
|
147
|
+
# BSD head doesn't support -n -1; use sed to remove last line
|
|
148
|
+
visible_tests=$(echo "$visible_tests" | sed '$ d')
|
|
149
|
+
fi
|
|
150
|
+
|
|
151
|
+
# Ensure we have at least one visible test
|
|
152
|
+
if [[ -z "$visible_tests" ]] && [[ -n "$holdout_tests" ]]; then
|
|
153
|
+
visible_tests=$(echo "$holdout_tests" | head -1)
|
|
154
|
+
holdout_tests=$(echo "$holdout_tests" | tail -n +2)
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
local actual_holdout actual_visible
|
|
158
|
+
actual_holdout=$(echo "$holdout_tests" | grep -c '.' 2>/dev/null) || actual_holdout=0
|
|
159
|
+
actual_visible=$(echo "$visible_tests" | grep -c '.' 2>/dev/null) || actual_visible=0
|
|
160
|
+
|
|
161
|
+
info "Test partition: ${actual_visible} visible, ${actual_holdout} holdout (${ratio}% target)"
|
|
162
|
+
|
|
163
|
+
# Store partition info
|
|
164
|
+
mkdir -p "$HOLDOUT_DIR"
|
|
165
|
+
echo "$visible_tests" > "$HOLDOUT_DIR/visible-tests.txt"
|
|
166
|
+
|
|
167
|
+
# Export for callers
|
|
168
|
+
HOLDOUT_VISIBLE_TESTS="$visible_tests"
|
|
169
|
+
HOLDOUT_SEALED_TESTS="$holdout_tests"
|
|
170
|
+
HOLDOUT_TOTAL="$total_tests"
|
|
171
|
+
HOLDOUT_VISIBLE_COUNT="$actual_visible"
|
|
172
|
+
HOLDOUT_SEALED_COUNT="$actual_holdout"
|
|
173
|
+
|
|
174
|
+
return 0
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# ─── Seal ────────────────────────────────────────────────────────────────────
|
|
178
|
+
# Move holdout tests to sealed directory where agents can't read them.
|
|
179
|
+
# Creates a manifest tracking original locations for restoration.
|
|
180
|
+
|
|
181
|
+
holdout_seal() {
|
|
182
|
+
local project_dir="${1:-.}"
|
|
183
|
+
|
|
184
|
+
if [[ -z "${HOLDOUT_SEALED_TESTS:-}" ]]; then
|
|
185
|
+
error "No holdout tests to seal. Run holdout_partition first."
|
|
186
|
+
return 1
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
mkdir -p "$HOLDOUT_SEALED_DIR"
|
|
190
|
+
|
|
191
|
+
local manifest_entries=""
|
|
192
|
+
local sealed_count=0
|
|
193
|
+
|
|
194
|
+
while IFS= read -r test_file; do
|
|
195
|
+
[[ -z "$test_file" ]] && continue
|
|
196
|
+
[[ ! -f "$test_file" ]] && continue
|
|
197
|
+
|
|
198
|
+
# Create relative path for storage
|
|
199
|
+
local rel_path
|
|
200
|
+
rel_path=$(echo "$test_file" | sed "s|^${project_dir}/||")
|
|
201
|
+
local sealed_path="${HOLDOUT_SEALED_DIR}/${rel_path}"
|
|
202
|
+
local sealed_parent
|
|
203
|
+
sealed_parent=$(dirname "$sealed_path")
|
|
204
|
+
|
|
205
|
+
mkdir -p "$sealed_parent"
|
|
206
|
+
|
|
207
|
+
# Copy test to sealed location (don't move — agent might notice missing files)
|
|
208
|
+
cp "$test_file" "$sealed_path"
|
|
209
|
+
|
|
210
|
+
# Build manifest entry
|
|
211
|
+
local entry
|
|
212
|
+
entry=$(printf '{"original":"%s","sealed":"%s","hash":"%s"}' \
|
|
213
|
+
"$rel_path" "$sealed_path" \
|
|
214
|
+
"$(md5 -q "$test_file" 2>/dev/null || md5sum "$test_file" 2>/dev/null | cut -d' ' -f1)")
|
|
215
|
+
|
|
216
|
+
if [[ -n "$manifest_entries" ]]; then
|
|
217
|
+
manifest_entries="${manifest_entries},${entry}"
|
|
218
|
+
else
|
|
219
|
+
manifest_entries="${entry}"
|
|
220
|
+
fi
|
|
221
|
+
sealed_count=$((sealed_count + 1))
|
|
222
|
+
done <<< "$HOLDOUT_SEALED_TESTS"
|
|
223
|
+
|
|
224
|
+
# Write manifest
|
|
225
|
+
cat > "$HOLDOUT_MANIFEST" <<EOF
|
|
226
|
+
{
|
|
227
|
+
"created": "$(now_iso)",
|
|
228
|
+
"ratio": ${HOLDOUT_RATIO},
|
|
229
|
+
"total_tests": ${HOLDOUT_TOTAL:-0},
|
|
230
|
+
"visible_count": ${HOLDOUT_VISIBLE_COUNT:-0},
|
|
231
|
+
"sealed_count": ${sealed_count},
|
|
232
|
+
"tests": [${manifest_entries}]
|
|
233
|
+
}
|
|
234
|
+
EOF
|
|
235
|
+
|
|
236
|
+
# Add sealed directory to .gitignore if not already there
|
|
237
|
+
local gitignore="${project_dir}/.gitignore"
|
|
238
|
+
if [[ -f "$gitignore" ]]; then
|
|
239
|
+
if ! grep -q "test-holdout/.sealed" "$gitignore" 2>/dev/null; then
|
|
240
|
+
echo "" >> "$gitignore"
|
|
241
|
+
echo "# Shipwright test holdout (sealed tests hidden from agents)" >> "$gitignore"
|
|
242
|
+
echo ".claude/test-holdout/.sealed/" >> "$gitignore"
|
|
243
|
+
fi
|
|
244
|
+
fi
|
|
245
|
+
|
|
246
|
+
success "Sealed ${sealed_count} holdout tests"
|
|
247
|
+
|
|
248
|
+
if type emit_event >/dev/null 2>&1; then
|
|
249
|
+
emit_event "test_holdout_sealed" \
|
|
250
|
+
"total=${HOLDOUT_TOTAL:-0}" \
|
|
251
|
+
"visible=${HOLDOUT_VISIBLE_COUNT:-0}" \
|
|
252
|
+
"sealed=${sealed_count}" \
|
|
253
|
+
"ratio=${HOLDOUT_RATIO}"
|
|
254
|
+
fi
|
|
255
|
+
|
|
256
|
+
return 0
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# ─── Validate ────────────────────────────────────────────────────────────────
|
|
260
|
+
# Run holdout tests AFTER agent claims completion. This is the critical gate.
|
|
261
|
+
# Returns 0 if all holdout tests pass, 1 if any fail.
|
|
262
|
+
|
|
263
|
+
holdout_validate() {
|
|
264
|
+
local project_dir="${1:-.}"
|
|
265
|
+
local test_cmd="${2:-}"
|
|
266
|
+
|
|
267
|
+
if [[ ! -f "$HOLDOUT_MANIFEST" ]]; then
|
|
268
|
+
warn "No holdout manifest found — skipping holdout validation"
|
|
269
|
+
return 0
|
|
270
|
+
fi
|
|
271
|
+
|
|
272
|
+
if [[ -z "$test_cmd" ]]; then
|
|
273
|
+
# Auto-detect test command
|
|
274
|
+
if type detect_test_framework >/dev/null 2>&1; then
|
|
275
|
+
local framework
|
|
276
|
+
framework=$(detect_test_framework "$project_dir")
|
|
277
|
+
case "$framework" in
|
|
278
|
+
vitest) test_cmd="npx vitest run" ;;
|
|
279
|
+
jest) test_cmd="npx jest" ;;
|
|
280
|
+
pytest) test_cmd="pytest" ;;
|
|
281
|
+
"go test") test_cmd="go test" ;;
|
|
282
|
+
"cargo test") test_cmd="cargo test" ;;
|
|
283
|
+
*) test_cmd="" ;;
|
|
284
|
+
esac
|
|
285
|
+
fi
|
|
286
|
+
fi
|
|
287
|
+
|
|
288
|
+
if [[ -z "$test_cmd" ]]; then
|
|
289
|
+
warn "No test command available — cannot validate holdout tests"
|
|
290
|
+
return 0
|
|
291
|
+
fi
|
|
292
|
+
|
|
293
|
+
info "Running holdout validation (sealed tests the agent never saw)..."
|
|
294
|
+
|
|
295
|
+
local sealed_tests
|
|
296
|
+
sealed_tests=$(jq -r '.tests[].original' "$HOLDOUT_MANIFEST" 2>/dev/null)
|
|
297
|
+
|
|
298
|
+
if [[ -z "$sealed_tests" ]]; then
|
|
299
|
+
warn "No sealed tests in manifest"
|
|
300
|
+
return 0
|
|
301
|
+
fi
|
|
302
|
+
|
|
303
|
+
local pass_count=0
|
|
304
|
+
local fail_count=0
|
|
305
|
+
local total_count=0
|
|
306
|
+
local failed_tests=""
|
|
307
|
+
|
|
308
|
+
while IFS= read -r test_file; do
|
|
309
|
+
[[ -z "$test_file" ]] && continue
|
|
310
|
+
total_count=$((total_count + 1))
|
|
311
|
+
|
|
312
|
+
local full_path="${project_dir}/${test_file}"
|
|
313
|
+
if [[ ! -f "$full_path" ]]; then
|
|
314
|
+
warn "Holdout test missing: ${test_file} (may have been deleted by agent)"
|
|
315
|
+
fail_count=$((fail_count + 1))
|
|
316
|
+
if [[ -n "$failed_tests" ]]; then
|
|
317
|
+
failed_tests="${failed_tests},\"${test_file}\""
|
|
318
|
+
else
|
|
319
|
+
failed_tests="\"${test_file}\""
|
|
320
|
+
fi
|
|
321
|
+
continue
|
|
322
|
+
fi
|
|
323
|
+
|
|
324
|
+
# Run the individual test (quote path to handle spaces)
|
|
325
|
+
local test_result=0
|
|
326
|
+
if eval "${test_cmd} \"${full_path}\"" >/dev/null 2>&1; then
|
|
327
|
+
pass_count=$((pass_count + 1))
|
|
328
|
+
else
|
|
329
|
+
test_result=$?
|
|
330
|
+
fail_count=$((fail_count + 1))
|
|
331
|
+
if [[ -n "$failed_tests" ]]; then
|
|
332
|
+
failed_tests="${failed_tests},\"${test_file}\""
|
|
333
|
+
else
|
|
334
|
+
failed_tests="\"${test_file}\""
|
|
335
|
+
fi
|
|
336
|
+
fi
|
|
337
|
+
done <<< "$sealed_tests"
|
|
338
|
+
|
|
339
|
+
# Write results
|
|
340
|
+
cat > "$HOLDOUT_RESULTS" <<EOF
|
|
341
|
+
{
|
|
342
|
+
"validated_at": "$(now_iso)",
|
|
343
|
+
"total": ${total_count},
|
|
344
|
+
"passed": ${pass_count},
|
|
345
|
+
"failed": ${fail_count},
|
|
346
|
+
"pass_rate": $(( total_count > 0 ? pass_count * 100 / total_count : 0 )),
|
|
347
|
+
"failed_tests": [${failed_tests}]
|
|
348
|
+
}
|
|
349
|
+
EOF
|
|
350
|
+
|
|
351
|
+
if type emit_event >/dev/null 2>&1; then
|
|
352
|
+
emit_event "test_holdout_validated" \
|
|
353
|
+
"total=${total_count}" \
|
|
354
|
+
"passed=${pass_count}" \
|
|
355
|
+
"failed=${fail_count}" \
|
|
356
|
+
"pass_rate=$(( total_count > 0 ? pass_count * 100 / total_count : 0 ))"
|
|
357
|
+
fi
|
|
358
|
+
|
|
359
|
+
if [[ "$fail_count" -gt 0 ]]; then
|
|
360
|
+
error "Holdout validation FAILED: ${fail_count}/${total_count} sealed tests failed"
|
|
361
|
+
error "Failed tests: ${failed_tests}"
|
|
362
|
+
return 1
|
|
363
|
+
fi
|
|
364
|
+
|
|
365
|
+
success "Holdout validation PASSED: ${pass_count}/${total_count} sealed tests passed"
|
|
366
|
+
return 0
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
# ─── Reveal ──────────────────────────────────────────────────────────────────
|
|
370
|
+
# Show holdout results and clean up sealed directory.
|
|
371
|
+
|
|
372
|
+
holdout_reveal() {
|
|
373
|
+
if [[ -f "$HOLDOUT_RESULTS" ]]; then
|
|
374
|
+
local pass_rate
|
|
375
|
+
pass_rate=$(jq -r '.pass_rate // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
|
|
376
|
+
local passed failed total
|
|
377
|
+
passed=$(jq -r '.passed // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
|
|
378
|
+
failed=$(jq -r '.failed // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
|
|
379
|
+
total=$(jq -r '.total // 0' "$HOLDOUT_RESULTS" 2>/dev/null || echo "0")
|
|
380
|
+
|
|
381
|
+
if [[ "$failed" -gt 0 ]]; then
|
|
382
|
+
error "Holdout Results: ${passed}/${total} passed (${pass_rate}%)"
|
|
383
|
+
local failed_list
|
|
384
|
+
failed_list=$(jq -r '.failed_tests[]' "$HOLDOUT_RESULTS" 2>/dev/null || true)
|
|
385
|
+
if [[ -n "$failed_list" ]]; then
|
|
386
|
+
echo " Failed tests:"
|
|
387
|
+
echo "$failed_list" | while IFS= read -r t; do
|
|
388
|
+
echo " - $t"
|
|
389
|
+
done
|
|
390
|
+
fi
|
|
391
|
+
else
|
|
392
|
+
success "Holdout Results: ${passed}/${total} passed (${pass_rate}%)"
|
|
393
|
+
fi
|
|
394
|
+
else
|
|
395
|
+
warn "No holdout results available"
|
|
396
|
+
fi
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
# ─── Cleanup ─────────────────────────────────────────────────────────────────
|
|
400
|
+
# Remove sealed tests and holdout artifacts.
|
|
401
|
+
|
|
402
|
+
holdout_cleanup() {
|
|
403
|
+
if [[ -d "$HOLDOUT_SEALED_DIR" ]]; then
|
|
404
|
+
rm -rf "$HOLDOUT_SEALED_DIR"
|
|
405
|
+
fi
|
|
406
|
+
if [[ -f "$HOLDOUT_MANIFEST" ]]; then
|
|
407
|
+
rm -f "$HOLDOUT_MANIFEST"
|
|
408
|
+
fi
|
|
409
|
+
if [[ -f "$HOLDOUT_RESULTS" ]]; then
|
|
410
|
+
rm -f "$HOLDOUT_RESULTS"
|
|
411
|
+
fi
|
|
412
|
+
if [[ -f "$HOLDOUT_DIR/visible-tests.txt" ]]; then
|
|
413
|
+
rm -f "$HOLDOUT_DIR/visible-tests.txt"
|
|
414
|
+
fi
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
# ─── Status ──────────────────────────────────────────────────────────────────
|
|
418
|
+
|
|
419
|
+
holdout_status() {
|
|
420
|
+
if [[ -f "$HOLDOUT_MANIFEST" ]]; then
|
|
421
|
+
local sealed_count visible_count ratio
|
|
422
|
+
sealed_count=$(jq -r '.sealed_count // 0' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "0")
|
|
423
|
+
visible_count=$(jq -r '.visible_count // 0' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "0")
|
|
424
|
+
ratio=$(jq -r '.ratio // 30' "$HOLDOUT_MANIFEST" 2>/dev/null || echo "30")
|
|
425
|
+
echo "holdout_active=true sealed=${sealed_count} visible=${visible_count} ratio=${ratio}%"
|
|
426
|
+
else
|
|
427
|
+
echo "holdout_active=false"
|
|
428
|
+
fi
|
|
429
|
+
}
|