shipwright-cli 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +2 -0
- package/.claude/agents/devops-engineer.md +2 -0
- package/.claude/agents/doc-fleet-agent.md +2 -0
- package/.claude/agents/pipeline-agent.md +2 -0
- package/.claude/agents/shell-script-specialist.md +2 -0
- package/.claude/agents/test-specialist.md +2 -0
- package/.claude/hooks/agent-crash-capture.sh +32 -0
- package/.claude/hooks/post-tool-use.sh +3 -2
- package/.claude/hooks/pre-tool-use.sh +35 -3
- package/README.md +4 -4
- package/claude-code/hooks/config-change.sh +18 -0
- package/claude-code/hooks/instructions-reloaded.sh +7 -0
- package/claude-code/hooks/worktree-create.sh +25 -0
- package/claude-code/hooks/worktree-remove.sh +20 -0
- package/config/code-constitution.json +130 -0
- package/dashboard/middleware/auth.ts +134 -0
- package/dashboard/middleware/constants.ts +21 -0
- package/dashboard/public/index.html +2 -6
- package/dashboard/public/styles.css +100 -97
- package/dashboard/routes/auth.ts +38 -0
- package/dashboard/server.ts +66 -25
- package/dashboard/services/config.ts +26 -0
- package/dashboard/services/db.ts +118 -0
- package/dashboard/src/canvas/pixel-agent.ts +298 -0
- package/dashboard/src/canvas/pixel-sprites.ts +440 -0
- package/dashboard/src/canvas/shipyard-effects.ts +367 -0
- package/dashboard/src/canvas/shipyard-scene.ts +616 -0
- package/dashboard/src/canvas/submarine-layout.ts +267 -0
- package/dashboard/src/components/header.ts +8 -7
- package/dashboard/src/core/router.ts +1 -0
- package/dashboard/src/design/submarine-theme.ts +253 -0
- package/dashboard/src/main.ts +2 -0
- package/dashboard/src/types/api.ts +2 -1
- package/dashboard/src/views/activity.ts +2 -1
- package/dashboard/src/views/shipyard.ts +39 -0
- package/dashboard/types/index.ts +166 -0
- package/docs/plans/2026-02-28-compound-audit-and-shipyard-design.md +186 -0
- package/docs/plans/2026-02-28-skipper-shipwright-implementation-plan.md +1182 -0
- package/docs/plans/2026-02-28-skipper-shipwright-integration-design.md +531 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-design.md +298 -0
- package/docs/plans/2026-03-01-ai-powered-skill-injection-plan.md +1109 -0
- package/docs/plans/2026-03-01-capabilities-cleanup-plan.md +658 -0
- package/docs/plans/2026-03-01-clean-architecture-plan.md +924 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-design.md +191 -0
- package/docs/plans/2026-03-01-compound-audit-cascade-plan.md +921 -0
- package/docs/plans/2026-03-01-deep-integration-plan.md +851 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-design.md +145 -0
- package/docs/plans/2026-03-01-pipeline-audit-trail-plan.md +770 -0
- package/docs/plans/2026-03-01-refined-depths-brand-design.md +382 -0
- package/docs/plans/2026-03-01-refined-depths-implementation.md +599 -0
- package/docs/plans/2026-03-01-skipper-kernel-integration-design.md +203 -0
- package/docs/plans/2026-03-01-unified-platform-design.md +272 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-design.md +189 -0
- package/docs/plans/2026-03-07-claude-code-feature-integration-plan.md +1165 -0
- package/docs/research/BACKLOG_QUICK_REFERENCE.md +352 -0
- package/docs/research/CUTTING_EDGE_RESEARCH_2026.md +546 -0
- package/docs/research/RESEARCH_INDEX.md +439 -0
- package/docs/research/RESEARCH_SOURCES.md +440 -0
- package/docs/research/RESEARCH_SUMMARY.txt +275 -0
- package/docs/superpowers/specs/2026-03-10-pipeline-quality-revolution-design.md +341 -0
- package/package.json +2 -2
- package/scripts/lib/adaptive-model.sh +427 -0
- package/scripts/lib/adaptive-timeout.sh +316 -0
- package/scripts/lib/audit-trail.sh +309 -0
- package/scripts/lib/auto-recovery.sh +471 -0
- package/scripts/lib/bandit-selector.sh +431 -0
- package/scripts/lib/bootstrap.sh +104 -2
- package/scripts/lib/causal-graph.sh +455 -0
- package/scripts/lib/compat.sh +126 -0
- package/scripts/lib/compound-audit.sh +337 -0
- package/scripts/lib/constitutional.sh +454 -0
- package/scripts/lib/context-budget.sh +359 -0
- package/scripts/lib/convergence.sh +594 -0
- package/scripts/lib/cost-optimizer.sh +634 -0
- package/scripts/lib/daemon-adaptive.sh +10 -0
- package/scripts/lib/daemon-dispatch.sh +106 -17
- package/scripts/lib/daemon-failure.sh +34 -4
- package/scripts/lib/daemon-patrol.sh +23 -2
- package/scripts/lib/daemon-poll-github.sh +361 -0
- package/scripts/lib/daemon-poll-health.sh +299 -0
- package/scripts/lib/daemon-poll.sh +27 -611
- package/scripts/lib/daemon-state.sh +112 -66
- package/scripts/lib/daemon-triage.sh +10 -0
- package/scripts/lib/dod-scorecard.sh +442 -0
- package/scripts/lib/error-actionability.sh +300 -0
- package/scripts/lib/formal-spec.sh +461 -0
- package/scripts/lib/helpers.sh +177 -4
- package/scripts/lib/intent-analysis.sh +409 -0
- package/scripts/lib/loop-convergence.sh +350 -0
- package/scripts/lib/loop-iteration.sh +682 -0
- package/scripts/lib/loop-progress.sh +48 -0
- package/scripts/lib/loop-restart.sh +185 -0
- package/scripts/lib/memory-effectiveness.sh +506 -0
- package/scripts/lib/mutation-executor.sh +352 -0
- package/scripts/lib/outcome-feedback.sh +521 -0
- package/scripts/lib/pipeline-cli.sh +336 -0
- package/scripts/lib/pipeline-commands.sh +1216 -0
- package/scripts/lib/pipeline-detection.sh +100 -2
- package/scripts/lib/pipeline-execution.sh +897 -0
- package/scripts/lib/pipeline-github.sh +28 -3
- package/scripts/lib/pipeline-intelligence-compound.sh +431 -0
- package/scripts/lib/pipeline-intelligence-scoring.sh +407 -0
- package/scripts/lib/pipeline-intelligence-skip.sh +181 -0
- package/scripts/lib/pipeline-intelligence.sh +100 -1136
- package/scripts/lib/pipeline-quality-bash-compat.sh +182 -0
- package/scripts/lib/pipeline-quality-checks.sh +17 -715
- package/scripts/lib/pipeline-quality-gates.sh +563 -0
- package/scripts/lib/pipeline-stages-build.sh +730 -0
- package/scripts/lib/pipeline-stages-delivery.sh +965 -0
- package/scripts/lib/pipeline-stages-intake.sh +1133 -0
- package/scripts/lib/pipeline-stages-monitor.sh +407 -0
- package/scripts/lib/pipeline-stages-review.sh +1022 -0
- package/scripts/lib/pipeline-stages.sh +59 -2929
- package/scripts/lib/pipeline-state.sh +36 -5
- package/scripts/lib/pipeline-util.sh +487 -0
- package/scripts/lib/policy-learner.sh +438 -0
- package/scripts/lib/process-reward.sh +493 -0
- package/scripts/lib/project-detect.sh +649 -0
- package/scripts/lib/quality-profile.sh +334 -0
- package/scripts/lib/recruit-commands.sh +885 -0
- package/scripts/lib/recruit-learning.sh +739 -0
- package/scripts/lib/recruit-roles.sh +648 -0
- package/scripts/lib/reward-aggregator.sh +458 -0
- package/scripts/lib/rl-optimizer.sh +362 -0
- package/scripts/lib/root-cause.sh +427 -0
- package/scripts/lib/scope-enforcement.sh +445 -0
- package/scripts/lib/session-restart.sh +493 -0
- package/scripts/lib/skill-memory.sh +300 -0
- package/scripts/lib/skill-registry.sh +775 -0
- package/scripts/lib/spec-driven.sh +476 -0
- package/scripts/lib/test-helpers.sh +18 -7
- package/scripts/lib/test-holdout.sh +429 -0
- package/scripts/lib/test-optimizer.sh +511 -0
- package/scripts/shipwright-file-suggest.sh +45 -0
- package/scripts/skills/adversarial-quality.md +61 -0
- package/scripts/skills/api-design.md +44 -0
- package/scripts/skills/architecture-design.md +50 -0
- package/scripts/skills/brainstorming.md +43 -0
- package/scripts/skills/data-pipeline.md +44 -0
- package/scripts/skills/deploy-safety.md +64 -0
- package/scripts/skills/documentation.md +38 -0
- package/scripts/skills/frontend-design.md +45 -0
- package/scripts/skills/generated/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/.gitkeep +0 -0
- package/scripts/skills/generated/_refinements/adversarial-quality.patch.md +3 -0
- package/scripts/skills/generated/_refinements/architecture-design.patch.md +3 -0
- package/scripts/skills/generated/_refinements/brainstorming.patch.md +3 -0
- package/scripts/skills/generated/cli-version-management.md +29 -0
- package/scripts/skills/generated/collection-system-validation.md +99 -0
- package/scripts/skills/generated/large-scale-c-refactoring-coordination.md +97 -0
- package/scripts/skills/generated/pattern-matching-similarity-scoring.md +195 -0
- package/scripts/skills/generated/test-parallelization-detection.md +65 -0
- package/scripts/skills/observability.md +79 -0
- package/scripts/skills/performance.md +48 -0
- package/scripts/skills/pr-quality.md +49 -0
- package/scripts/skills/product-thinking.md +43 -0
- package/scripts/skills/security-audit.md +49 -0
- package/scripts/skills/systematic-debugging.md +40 -0
- package/scripts/skills/testing-strategy.md +47 -0
- package/scripts/skills/two-stage-review.md +52 -0
- package/scripts/skills/validation-thoroughness.md +55 -0
- package/scripts/sw +9 -3
- package/scripts/sw-activity.sh +9 -2
- package/scripts/sw-adaptive.sh +2 -1
- package/scripts/sw-adversarial.sh +2 -1
- package/scripts/sw-architecture-enforcer.sh +3 -1
- package/scripts/sw-auth.sh +12 -2
- package/scripts/sw-autonomous.sh +5 -1
- package/scripts/sw-changelog.sh +4 -1
- package/scripts/sw-checkpoint.sh +2 -1
- package/scripts/sw-ci.sh +5 -1
- package/scripts/sw-cleanup.sh +4 -26
- package/scripts/sw-code-review.sh +10 -4
- package/scripts/sw-connect.sh +2 -1
- package/scripts/sw-context.sh +2 -1
- package/scripts/sw-cost.sh +48 -3
- package/scripts/sw-daemon.sh +66 -9
- package/scripts/sw-dashboard.sh +3 -1
- package/scripts/sw-db.sh +59 -16
- package/scripts/sw-decide.sh +8 -2
- package/scripts/sw-decompose.sh +360 -17
- package/scripts/sw-deps.sh +4 -1
- package/scripts/sw-developer-simulation.sh +4 -1
- package/scripts/sw-discovery.sh +325 -2
- package/scripts/sw-doc-fleet.sh +4 -1
- package/scripts/sw-docs-agent.sh +3 -1
- package/scripts/sw-docs.sh +2 -1
- package/scripts/sw-doctor.sh +453 -2
- package/scripts/sw-dora.sh +4 -1
- package/scripts/sw-durable.sh +4 -3
- package/scripts/sw-e2e-orchestrator.sh +17 -16
- package/scripts/sw-eventbus.sh +7 -1
- package/scripts/sw-evidence.sh +364 -12
- package/scripts/sw-feedback.sh +550 -9
- package/scripts/sw-fix.sh +20 -1
- package/scripts/sw-fleet-discover.sh +6 -2
- package/scripts/sw-fleet-viz.sh +4 -1
- package/scripts/sw-fleet.sh +5 -1
- package/scripts/sw-github-app.sh +16 -3
- package/scripts/sw-github-checks.sh +3 -2
- package/scripts/sw-github-deploy.sh +3 -2
- package/scripts/sw-github-graphql.sh +18 -7
- package/scripts/sw-guild.sh +5 -1
- package/scripts/sw-heartbeat.sh +5 -30
- package/scripts/sw-hello.sh +67 -0
- package/scripts/sw-hygiene.sh +6 -1
- package/scripts/sw-incident.sh +265 -1
- package/scripts/sw-init.sh +18 -2
- package/scripts/sw-instrument.sh +10 -2
- package/scripts/sw-intelligence.sh +42 -6
- package/scripts/sw-jira.sh +5 -1
- package/scripts/sw-launchd.sh +2 -1
- package/scripts/sw-linear.sh +4 -1
- package/scripts/sw-logs.sh +4 -1
- package/scripts/sw-loop.sh +432 -1128
- package/scripts/sw-memory.sh +356 -2
- package/scripts/sw-mission-control.sh +6 -1
- package/scripts/sw-model-router.sh +481 -26
- package/scripts/sw-otel.sh +13 -4
- package/scripts/sw-oversight.sh +14 -5
- package/scripts/sw-patrol-meta.sh +334 -0
- package/scripts/sw-pipeline-composer.sh +5 -1
- package/scripts/sw-pipeline-vitals.sh +2 -1
- package/scripts/sw-pipeline.sh +53 -2664
- package/scripts/sw-pm.sh +12 -5
- package/scripts/sw-pr-lifecycle.sh +2 -1
- package/scripts/sw-predictive.sh +7 -1
- package/scripts/sw-prep.sh +185 -2
- package/scripts/sw-ps.sh +5 -25
- package/scripts/sw-public-dashboard.sh +15 -3
- package/scripts/sw-quality.sh +2 -1
- package/scripts/sw-reaper.sh +8 -25
- package/scripts/sw-recruit.sh +156 -2303
- package/scripts/sw-regression.sh +19 -12
- package/scripts/sw-release-manager.sh +3 -1
- package/scripts/sw-release.sh +4 -1
- package/scripts/sw-remote.sh +3 -1
- package/scripts/sw-replay.sh +7 -1
- package/scripts/sw-retro.sh +158 -1
- package/scripts/sw-review-rerun.sh +3 -1
- package/scripts/sw-scale.sh +10 -3
- package/scripts/sw-security-audit.sh +6 -1
- package/scripts/sw-self-optimize.sh +6 -3
- package/scripts/sw-session.sh +9 -3
- package/scripts/sw-setup.sh +3 -1
- package/scripts/sw-stall-detector.sh +406 -0
- package/scripts/sw-standup.sh +15 -7
- package/scripts/sw-status.sh +3 -1
- package/scripts/sw-strategic.sh +4 -1
- package/scripts/sw-stream.sh +7 -1
- package/scripts/sw-swarm.sh +18 -6
- package/scripts/sw-team-stages.sh +13 -6
- package/scripts/sw-templates.sh +5 -29
- package/scripts/sw-testgen.sh +7 -1
- package/scripts/sw-tmux-pipeline.sh +4 -1
- package/scripts/sw-tmux-role-color.sh +2 -0
- package/scripts/sw-tmux-status.sh +1 -1
- package/scripts/sw-tmux.sh +3 -1
- package/scripts/sw-trace.sh +3 -1
- package/scripts/sw-tracker-github.sh +3 -0
- package/scripts/sw-tracker-jira.sh +3 -0
- package/scripts/sw-tracker-linear.sh +3 -0
- package/scripts/sw-tracker.sh +3 -1
- package/scripts/sw-triage.sh +2 -1
- package/scripts/sw-upgrade.sh +3 -1
- package/scripts/sw-ux.sh +5 -2
- package/scripts/sw-webhook.sh +3 -1
- package/scripts/sw-widgets.sh +3 -1
- package/scripts/sw-worktree.sh +15 -3
- package/scripts/test-skill-injection.sh +1233 -0
- package/templates/pipelines/autonomous.json +27 -3
- package/templates/pipelines/cost-aware.json +34 -8
- package/templates/pipelines/deployed.json +12 -0
- package/templates/pipelines/enterprise.json +12 -0
- package/templates/pipelines/fast.json +6 -0
- package/templates/pipelines/full.json +27 -3
- package/templates/pipelines/hotfix.json +6 -0
- package/templates/pipelines/standard.json +12 -0
- package/templates/pipelines/tdd.json +12 -0
|
@@ -0,0 +1,1233 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test-skill-injection.sh — Verify Dynamic Skill Injection System
|
|
3
|
+
# Tests: skill registry, issue type classification, retry context, two-stage review
|
|
4
|
+
set -euo pipefail
|
|
5
|
+
|
|
6
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
7
|
+
ORIGINAL_PATH="$PATH"
|
|
8
|
+
PASS=0
|
|
9
|
+
FAIL=0
|
|
10
|
+
ERRORS=""
|
|
11
|
+
|
|
12
|
+
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
pass() { PASS=$((PASS + 1)); printf " ✓ %s\n" "$1"; }
|
|
15
|
+
fail() { FAIL=$((FAIL + 1)); ERRORS="${ERRORS}\n ✗ $1"; printf " \033[31m✗ %s\033[0m\n" "$1"; }
|
|
16
|
+
|
|
17
|
+
assert_eq() {
|
|
18
|
+
local actual="$1" expected="$2" msg="$3"
|
|
19
|
+
if [[ "$actual" == "$expected" ]]; then
|
|
20
|
+
pass "$msg"
|
|
21
|
+
else
|
|
22
|
+
fail "$msg (expected '$expected', got '$actual')"
|
|
23
|
+
fi
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
assert_contains() {
|
|
27
|
+
local haystack="$1" needle="$2" msg="$3"
|
|
28
|
+
if echo "$haystack" | grep -q "$needle" 2>/dev/null; then
|
|
29
|
+
pass "$msg"
|
|
30
|
+
else
|
|
31
|
+
fail "$msg (expected to contain '$needle')"
|
|
32
|
+
fi
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
assert_not_empty() {
|
|
36
|
+
local val="$1" msg="$2"
|
|
37
|
+
if [[ -n "$val" ]]; then
|
|
38
|
+
pass "$msg"
|
|
39
|
+
else
|
|
40
|
+
fail "$msg (was empty)"
|
|
41
|
+
fi
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
assert_file_exists() {
|
|
45
|
+
local path="$1" msg="$2"
|
|
46
|
+
if [[ -f "$path" ]]; then
|
|
47
|
+
pass "$msg"
|
|
48
|
+
else
|
|
49
|
+
fail "$msg (file not found: $path)"
|
|
50
|
+
fi
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
assert_exit_zero() {
|
|
54
|
+
local msg="$1"
|
|
55
|
+
shift
|
|
56
|
+
if "$@" >/dev/null 2>&1; then
|
|
57
|
+
pass "$msg"
|
|
58
|
+
else
|
|
59
|
+
fail "$msg (exit code: $?)"
|
|
60
|
+
fi
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
assert_exit_nonzero() {
|
|
64
|
+
local msg="$1"
|
|
65
|
+
shift
|
|
66
|
+
if "$@" >/dev/null 2>&1; then
|
|
67
|
+
fail "$msg (expected non-zero exit, got 0)"
|
|
68
|
+
else
|
|
69
|
+
pass "$msg"
|
|
70
|
+
fi
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
assert_true() {
|
|
74
|
+
local expr="$1" msg="$2"
|
|
75
|
+
if eval "$expr" 2>/dev/null; then
|
|
76
|
+
pass "$msg"
|
|
77
|
+
else
|
|
78
|
+
fail "$msg (expression was false)"
|
|
79
|
+
fi
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
assert_not_contains() {
|
|
83
|
+
local haystack="$1" needle="$2" msg="$3"
|
|
84
|
+
if echo "$haystack" | grep -q "$needle" 2>/dev/null; then
|
|
85
|
+
fail "$msg (should NOT contain '$needle')"
|
|
86
|
+
else
|
|
87
|
+
pass "$msg"
|
|
88
|
+
fi
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
92
|
+
# TEST SUITE 1: Skill Registry
|
|
93
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
94
|
+
|
|
95
|
+
echo ""
|
|
96
|
+
echo "═══ Suite 1: Skill Registry ═══"
|
|
97
|
+
|
|
98
|
+
source "$SCRIPT_DIR/lib/skill-registry.sh"
|
|
99
|
+
|
|
100
|
+
# --- skill_get_prompts: plan stage ---
|
|
101
|
+
echo ""
|
|
102
|
+
echo " ── Plan stage mappings ──"
|
|
103
|
+
|
|
104
|
+
plan_frontend=$(skill_get_prompts "frontend" "plan")
|
|
105
|
+
assert_contains "$plan_frontend" "brainstorming.md" "frontend/plan includes brainstorming"
|
|
106
|
+
assert_contains "$plan_frontend" "frontend-design.md" "frontend/plan includes frontend-design"
|
|
107
|
+
assert_contains "$plan_frontend" "product-thinking.md" "frontend/plan includes product-thinking"
|
|
108
|
+
|
|
109
|
+
plan_api=$(skill_get_prompts "api" "plan")
|
|
110
|
+
assert_contains "$plan_api" "brainstorming.md" "api/plan includes brainstorming"
|
|
111
|
+
assert_contains "$plan_api" "api-design.md" "api/plan includes api-design"
|
|
112
|
+
|
|
113
|
+
plan_database=$(skill_get_prompts "database" "plan")
|
|
114
|
+
assert_contains "$plan_database" "brainstorming.md" "database/plan includes brainstorming"
|
|
115
|
+
assert_contains "$plan_database" "data-pipeline.md" "database/plan includes data-pipeline"
|
|
116
|
+
|
|
117
|
+
plan_security=$(skill_get_prompts "security" "plan")
|
|
118
|
+
assert_contains "$plan_security" "brainstorming.md" "security/plan includes brainstorming"
|
|
119
|
+
assert_contains "$plan_security" "security-audit.md" "security/plan includes security-audit"
|
|
120
|
+
|
|
121
|
+
plan_performance=$(skill_get_prompts "performance" "plan")
|
|
122
|
+
assert_contains "$plan_performance" "brainstorming.md" "performance/plan includes brainstorming"
|
|
123
|
+
assert_contains "$plan_performance" "performance.md" "performance/plan includes performance"
|
|
124
|
+
|
|
125
|
+
plan_testing=$(skill_get_prompts "testing" "plan")
|
|
126
|
+
assert_contains "$plan_testing" "testing-strategy.md" "testing/plan includes testing-strategy"
|
|
127
|
+
|
|
128
|
+
plan_docs=$(skill_get_prompts "documentation" "plan")
|
|
129
|
+
assert_contains "$plan_docs" "documentation.md" "documentation/plan includes documentation"
|
|
130
|
+
|
|
131
|
+
plan_backend=$(skill_get_prompts "backend" "plan")
|
|
132
|
+
assert_contains "$plan_backend" "brainstorming.md" "backend/plan includes brainstorming"
|
|
133
|
+
|
|
134
|
+
plan_refactor=$(skill_get_prompts "refactor" "plan")
|
|
135
|
+
assert_contains "$plan_refactor" "brainstorming.md" "refactor/plan includes brainstorming"
|
|
136
|
+
|
|
137
|
+
plan_infra=$(skill_get_prompts "infrastructure" "plan")
|
|
138
|
+
assert_contains "$plan_infra" "brainstorming.md" "infrastructure/plan includes brainstorming"
|
|
139
|
+
|
|
140
|
+
# --- skill_get_prompts: build stage ---
|
|
141
|
+
echo ""
|
|
142
|
+
echo " ── Build stage mappings ──"
|
|
143
|
+
|
|
144
|
+
build_frontend=$(skill_get_prompts "frontend" "build")
|
|
145
|
+
assert_contains "$build_frontend" "frontend-design.md" "frontend/build includes frontend-design"
|
|
146
|
+
|
|
147
|
+
build_api=$(skill_get_prompts "api" "build")
|
|
148
|
+
assert_contains "$build_api" "api-design.md" "api/build includes api-design"
|
|
149
|
+
|
|
150
|
+
build_security=$(skill_get_prompts "security" "build")
|
|
151
|
+
assert_contains "$build_security" "security-audit.md" "security/build includes security-audit"
|
|
152
|
+
|
|
153
|
+
build_backend=$(skill_get_prompts "backend" "build")
|
|
154
|
+
assert_eq "$build_backend" "" "backend/build returns no skills (empty)"
|
|
155
|
+
|
|
156
|
+
build_refactor=$(skill_get_prompts "refactor" "build")
|
|
157
|
+
assert_eq "$build_refactor" "" "refactor/build returns no skills (empty)"
|
|
158
|
+
|
|
159
|
+
# --- skill_get_prompts: review stage ---
|
|
160
|
+
echo ""
|
|
161
|
+
echo " ── Review stage mappings ──"
|
|
162
|
+
|
|
163
|
+
review_frontend=$(skill_get_prompts "frontend" "review")
|
|
164
|
+
assert_contains "$review_frontend" "two-stage-review.md" "frontend/review includes two-stage-review"
|
|
165
|
+
|
|
166
|
+
review_api=$(skill_get_prompts "api" "review")
|
|
167
|
+
assert_contains "$review_api" "two-stage-review.md" "api/review includes two-stage-review"
|
|
168
|
+
assert_contains "$review_api" "security-audit.md" "api/review includes security-audit"
|
|
169
|
+
|
|
170
|
+
review_security=$(skill_get_prompts "security" "review")
|
|
171
|
+
assert_contains "$review_security" "two-stage-review.md" "security/review includes two-stage-review"
|
|
172
|
+
assert_contains "$review_security" "security-audit.md" "security/review includes security-audit"
|
|
173
|
+
|
|
174
|
+
review_docs=$(skill_get_prompts "documentation" "review")
|
|
175
|
+
assert_eq "$review_docs" "" "documentation/review returns no skills (empty)"
|
|
176
|
+
|
|
177
|
+
# --- skill_get_prompts: unknown stage ---
|
|
178
|
+
echo ""
|
|
179
|
+
echo " ── Edge cases ──"
|
|
180
|
+
|
|
181
|
+
unknown_stage=$(skill_get_prompts "frontend" "nonexistent_stage")
|
|
182
|
+
assert_eq "$unknown_stage" "" "unknown stage returns empty"
|
|
183
|
+
|
|
184
|
+
unknown_type=$(skill_get_prompts "aliens" "plan")
|
|
185
|
+
assert_contains "$unknown_type" "brainstorming.md" "unknown type defaults to brainstorming in plan"
|
|
186
|
+
|
|
187
|
+
# --- skill_load_prompts ---
|
|
188
|
+
echo ""
|
|
189
|
+
echo " ── skill_load_prompts ──"
|
|
190
|
+
|
|
191
|
+
loaded_frontend=$(skill_load_prompts "frontend" "plan")
|
|
192
|
+
assert_contains "$loaded_frontend" "Socratic Design Refinement" "frontend/plan loads brainstorming content"
|
|
193
|
+
assert_contains "$loaded_frontend" "Accessibility" "frontend/plan loads frontend-design content"
|
|
194
|
+
assert_contains "$loaded_frontend" "User Stories" "frontend/plan loads product-thinking content"
|
|
195
|
+
|
|
196
|
+
loaded_backend_build=$(skill_load_prompts "backend" "build")
|
|
197
|
+
assert_eq "$loaded_backend_build" "" "backend/build loads no content (empty)"
|
|
198
|
+
|
|
199
|
+
loaded_api_review=$(skill_load_prompts "api" "review")
|
|
200
|
+
assert_contains "$loaded_api_review" "Two-Stage Code Review" "api/review loads two-stage-review content"
|
|
201
|
+
assert_contains "$loaded_api_review" "OWASP" "api/review loads security-audit content"
|
|
202
|
+
|
|
203
|
+
# --- skill_has_two_stage_review ---
|
|
204
|
+
echo ""
|
|
205
|
+
echo " ── skill_has_two_stage_review ──"
|
|
206
|
+
|
|
207
|
+
assert_exit_zero "frontend has two-stage review" skill_has_two_stage_review "frontend"
|
|
208
|
+
assert_exit_zero "api has two-stage review" skill_has_two_stage_review "api"
|
|
209
|
+
assert_exit_zero "backend has two-stage review" skill_has_two_stage_review "backend"
|
|
210
|
+
assert_exit_zero "security has two-stage review" skill_has_two_stage_review "security"
|
|
211
|
+
assert_exit_nonzero "documentation has NO two-stage review" skill_has_two_stage_review "documentation"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
215
|
+
# TEST SUITE 2: Issue Type Classification (Fallback Heuristic)
|
|
216
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
217
|
+
|
|
218
|
+
echo ""
|
|
219
|
+
echo "═══ Suite 2: Issue Type Classification ═══"
|
|
220
|
+
|
|
221
|
+
# We need to source sw-intelligence.sh to get _intelligence_fallback_analyze.
|
|
222
|
+
# It requires some functions/vars — stub them out.
|
|
223
|
+
emit_event() { :; }
|
|
224
|
+
warn() { :; }
|
|
225
|
+
info() { :; }
|
|
226
|
+
error() { :; }
|
|
227
|
+
now_epoch() { date +%s; }
|
|
228
|
+
compute_md5() { echo "test"; }
|
|
229
|
+
export INTELLIGENCE_ENABLED="false" # force fallback path
|
|
230
|
+
export -f emit_event warn info error now_epoch compute_md5
|
|
231
|
+
|
|
232
|
+
# Source intelligence (it checks for functions, so stub what's needed)
|
|
233
|
+
_intelligence_enabled() { return 1; }
|
|
234
|
+
_intelligence_cache_get() { return 1; }
|
|
235
|
+
_intelligence_cache_set() { :; }
|
|
236
|
+
intelligence_github_enrich() { echo "$1"; }
|
|
237
|
+
export -f _intelligence_enabled _intelligence_cache_get _intelligence_cache_set intelligence_github_enrich
|
|
238
|
+
|
|
239
|
+
source "$SCRIPT_DIR/sw-intelligence.sh" 2>/dev/null || true
|
|
240
|
+
|
|
241
|
+
echo ""
|
|
242
|
+
echo " ── Label-based issue_type detection ──"
|
|
243
|
+
|
|
244
|
+
# Test the fallback analyzer directly
|
|
245
|
+
result_frontend=$(_intelligence_fallback_analyze "Fix CSS layout" "The sidebar is broken on mobile" "ui, bug")
|
|
246
|
+
type_frontend=$(echo "$result_frontend" | jq -r '.issue_type' 2>/dev/null)
|
|
247
|
+
assert_eq "$type_frontend" "frontend" "labels 'ui, bug' → frontend"
|
|
248
|
+
|
|
249
|
+
result_api=$(_intelligence_fallback_analyze "Add REST endpoint" "New /api/users endpoint" "api, feature")
|
|
250
|
+
type_api=$(echo "$result_api" | jq -r '.issue_type' 2>/dev/null)
|
|
251
|
+
assert_eq "$type_api" "api" "labels 'api, feature' → api"
|
|
252
|
+
|
|
253
|
+
result_db=$(_intelligence_fallback_analyze "Add migration" "Need new schema" "db, migration")
|
|
254
|
+
type_db=$(echo "$result_db" | jq -r '.issue_type' 2>/dev/null)
|
|
255
|
+
assert_eq "$type_db" "database" "labels 'db, migration' → database"
|
|
256
|
+
|
|
257
|
+
result_sec=$(_intelligence_fallback_analyze "Fix auth bypass" "XSS vulnerability" "security")
|
|
258
|
+
type_sec=$(echo "$result_sec" | jq -r '.issue_type' 2>/dev/null)
|
|
259
|
+
assert_eq "$type_sec" "security" "labels 'security' → security"
|
|
260
|
+
|
|
261
|
+
result_perf=$(_intelligence_fallback_analyze "Slow query" "latency is 5s" "perf, backend")
|
|
262
|
+
type_perf=$(echo "$result_perf" | jq -r '.issue_type' 2>/dev/null)
|
|
263
|
+
assert_eq "$type_perf" "performance" "labels 'perf, backend' → performance"
|
|
264
|
+
|
|
265
|
+
result_test=$(_intelligence_fallback_analyze "Add tests" "Improve coverage" "test, quality")
|
|
266
|
+
type_test=$(echo "$result_test" | jq -r '.issue_type' 2>/dev/null)
|
|
267
|
+
assert_eq "$type_test" "testing" "labels 'test, quality' → testing"
|
|
268
|
+
|
|
269
|
+
result_docs=$(_intelligence_fallback_analyze "Update README" "Outdated docs" "docs")
|
|
270
|
+
type_docs=$(echo "$result_docs" | jq -r '.issue_type' 2>/dev/null)
|
|
271
|
+
assert_eq "$type_docs" "documentation" "labels 'docs' → documentation"
|
|
272
|
+
|
|
273
|
+
result_infra=$(_intelligence_fallback_analyze "Fix CI" "Pipeline broken" "ci, infra")
|
|
274
|
+
type_infra=$(echo "$result_infra" | jq -r '.issue_type' 2>/dev/null)
|
|
275
|
+
assert_eq "$type_infra" "infrastructure" "labels 'ci, infra' → infrastructure"
|
|
276
|
+
|
|
277
|
+
result_refactor=$(_intelligence_fallback_analyze "Refactor auth" "Clean up module" "refactor")
|
|
278
|
+
type_refactor=$(echo "$result_refactor" | jq -r '.issue_type' 2>/dev/null)
|
|
279
|
+
assert_eq "$type_refactor" "refactor" "labels 'refactor' → refactor"
|
|
280
|
+
|
|
281
|
+
result_default=$(_intelligence_fallback_analyze "Some task" "Do the thing" "enhancement")
|
|
282
|
+
type_default=$(echo "$result_default" | jq -r '.issue_type' 2>/dev/null)
|
|
283
|
+
assert_eq "$type_default" "backend" "labels 'enhancement' → backend (default)"
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
287
|
+
# TEST SUITE 3: Skill Files Integrity
|
|
288
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
289
|
+
|
|
290
|
+
echo ""
|
|
291
|
+
echo "═══ Suite 3: Skill File Integrity ═══"
|
|
292
|
+
|
|
293
|
+
EXPECTED_SKILLS=(
|
|
294
|
+
brainstorming
|
|
295
|
+
systematic-debugging
|
|
296
|
+
two-stage-review
|
|
297
|
+
frontend-design
|
|
298
|
+
api-design
|
|
299
|
+
data-pipeline
|
|
300
|
+
security-audit
|
|
301
|
+
performance
|
|
302
|
+
testing-strategy
|
|
303
|
+
product-thinking
|
|
304
|
+
documentation
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
for skill in "${EXPECTED_SKILLS[@]}"; do
|
|
308
|
+
path="$SCRIPT_DIR/skills/${skill}.md"
|
|
309
|
+
assert_file_exists "$path" "skill file exists: ${skill}.md"
|
|
310
|
+
done
|
|
311
|
+
|
|
312
|
+
# Verify content signatures (each file should have specific content)
|
|
313
|
+
echo ""
|
|
314
|
+
echo " ── Content verification ──"
|
|
315
|
+
|
|
316
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/brainstorming.md")" "Socratic" "brainstorming.md mentions Socratic"
|
|
317
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/systematic-debugging.md")" "Root Cause" "systematic-debugging.md mentions Root Cause"
|
|
318
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/two-stage-review.md")" "Pass 1" "two-stage-review.md has Pass 1"
|
|
319
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/two-stage-review.md")" "Pass 2" "two-stage-review.md has Pass 2"
|
|
320
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/frontend-design.md")" "Accessibility" "frontend-design.md mentions Accessibility"
|
|
321
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/api-design.md")" "RESTful" "api-design.md mentions RESTful"
|
|
322
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/data-pipeline.md")" "Migration" "data-pipeline.md mentions Migration"
|
|
323
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/security-audit.md")" "OWASP" "security-audit.md mentions OWASP"
|
|
324
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/performance.md")" "Profiling" "performance.md mentions Profiling"
|
|
325
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/testing-strategy.md")" "Test Pyramid" "testing-strategy.md mentions Test Pyramid"
|
|
326
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/product-thinking.md")" "User Stories" "product-thinking.md mentions User Stories"
|
|
327
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/documentation.md")" "Skip Heavy Stages" "documentation.md mentions Skip Heavy Stages"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
331
|
+
# TEST SUITE 4: Retry Context Mechanics
|
|
332
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
333
|
+
|
|
334
|
+
echo ""
|
|
335
|
+
echo "═══ Suite 4: Retry Context Mechanics ═══"
|
|
336
|
+
|
|
337
|
+
TMPDIR_TEST=$(mktemp -d)
|
|
338
|
+
trap "rm -rf $TMPDIR_TEST" EXIT
|
|
339
|
+
|
|
340
|
+
# Test: retry context file format
|
|
341
|
+
echo ""
|
|
342
|
+
echo " ── Retry context file creation ──"
|
|
343
|
+
|
|
344
|
+
# Simulate what sw-pipeline.sh writes
|
|
345
|
+
_retry_ctx_file="${TMPDIR_TEST}/.retry-context-build.md"
|
|
346
|
+
error_class="logic"
|
|
347
|
+
attempt=1
|
|
348
|
+
max_retries=2
|
|
349
|
+
_log_file="${TMPDIR_TEST}/build-log.txt"
|
|
350
|
+
echo "Error: Cannot find module './foo'" > "$_log_file"
|
|
351
|
+
echo "TypeError: undefined is not a function" >> "$_log_file"
|
|
352
|
+
|
|
353
|
+
ARTIFACTS_DIR="$TMPDIR_TEST"
|
|
354
|
+
# Create some fake artifacts
|
|
355
|
+
echo "# Plan" > "${ARTIFACTS_DIR}/plan.md"
|
|
356
|
+
for i in $(seq 1 15); do echo "- Task $i" >> "${ARTIFACTS_DIR}/plan.md"; done
|
|
357
|
+
|
|
358
|
+
{
|
|
359
|
+
echo "## Previous Attempt Failed"
|
|
360
|
+
echo ""
|
|
361
|
+
echo "**Error classification:** ${error_class}"
|
|
362
|
+
echo "**Attempt:** ${attempt} of $((max_retries + 1))"
|
|
363
|
+
echo ""
|
|
364
|
+
echo "### Error Output (last 30 lines)"
|
|
365
|
+
echo '```'
|
|
366
|
+
tail -30 "$_log_file" 2>/dev/null || echo "(no log available)"
|
|
367
|
+
echo '```'
|
|
368
|
+
echo ""
|
|
369
|
+
local_existing_artifacts=""
|
|
370
|
+
for _af in plan.md design.md test-results.log; do
|
|
371
|
+
if [[ -s "${ARTIFACTS_DIR}/${_af}" ]]; then
|
|
372
|
+
_af_lines=$(wc -l < "${ARTIFACTS_DIR}/${_af}" 2>/dev/null | xargs)
|
|
373
|
+
local_existing_artifacts="${local_existing_artifacts} - ${_af} (${_af_lines} lines)\n"
|
|
374
|
+
fi
|
|
375
|
+
done
|
|
376
|
+
if [[ -n "$local_existing_artifacts" ]]; then
|
|
377
|
+
echo "### Existing Artifacts (PRESERVE these)"
|
|
378
|
+
echo -e "$local_existing_artifacts"
|
|
379
|
+
fi
|
|
380
|
+
echo "### Investigation Required"
|
|
381
|
+
echo "1. Read the error output above carefully"
|
|
382
|
+
} > "$_retry_ctx_file" 2>/dev/null || true
|
|
383
|
+
|
|
384
|
+
assert_file_exists "$_retry_ctx_file" "retry context file created"
|
|
385
|
+
assert_contains "$(cat "$_retry_ctx_file")" "Previous Attempt Failed" "retry context has header"
|
|
386
|
+
assert_contains "$(cat "$_retry_ctx_file")" "logic" "retry context has error class"
|
|
387
|
+
assert_contains "$(cat "$_retry_ctx_file")" "Cannot find module" "retry context captures error output"
|
|
388
|
+
assert_contains "$(cat "$_retry_ctx_file")" "plan.md" "retry context lists existing artifacts"
|
|
389
|
+
assert_contains "$(cat "$_retry_ctx_file")" "Investigation Required" "retry context has investigation section"
|
|
390
|
+
|
|
391
|
+
# Test: plan artifact skip logic
|
|
392
|
+
echo ""
|
|
393
|
+
echo " ── Plan artifact skip logic ──"
|
|
394
|
+
|
|
395
|
+
plan_artifact="${TMPDIR_TEST}/plan.md"
|
|
396
|
+
existing_lines=$(wc -l < "$plan_artifact" 2>/dev/null | xargs)
|
|
397
|
+
existing_lines="${existing_lines:-0}"
|
|
398
|
+
if [[ "$existing_lines" -gt 10 ]]; then
|
|
399
|
+
plan_skip="yes"
|
|
400
|
+
else
|
|
401
|
+
plan_skip="no"
|
|
402
|
+
fi
|
|
403
|
+
assert_eq "$plan_skip" "yes" "plan with ${existing_lines} lines skips retry (>10)"
|
|
404
|
+
|
|
405
|
+
# Test with short plan
|
|
406
|
+
echo "# Short plan" > "${TMPDIR_TEST}/short-plan.md"
|
|
407
|
+
short_lines=$(wc -l < "${TMPDIR_TEST}/short-plan.md" 2>/dev/null | xargs)
|
|
408
|
+
if [[ "$short_lines" -gt 10 ]]; then
|
|
409
|
+
short_skip="yes"
|
|
410
|
+
else
|
|
411
|
+
short_skip="no"
|
|
412
|
+
fi
|
|
413
|
+
assert_eq "$short_skip" "no" "plan with ${short_lines} lines does NOT skip retry (<=10)"
|
|
414
|
+
|
|
415
|
+
# Test: retry context consumption
|
|
416
|
+
echo ""
|
|
417
|
+
echo " ── Retry context consumption ──"
|
|
418
|
+
|
|
419
|
+
echo "Debug info here" > "${TMPDIR_TEST}/.retry-context-plan.md"
|
|
420
|
+
_retry_ctx="${TMPDIR_TEST}/.retry-context-plan.md"
|
|
421
|
+
if [[ -s "$_retry_ctx" ]]; then
|
|
422
|
+
_retry_hints=$(cat "$_retry_ctx" 2>/dev/null || true)
|
|
423
|
+
rm -f "$_retry_ctx"
|
|
424
|
+
fi
|
|
425
|
+
assert_eq "$_retry_hints" "Debug info here" "retry context consumed correctly"
|
|
426
|
+
if [[ ! -f "$_retry_ctx" ]]; then
|
|
427
|
+
pass "retry context file deleted after consumption"
|
|
428
|
+
else
|
|
429
|
+
fail "retry context file should be deleted after consumption"
|
|
430
|
+
fi
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
434
|
+
# TEST SUITE 5: Integration — End-to-End Skill Flow
|
|
435
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
436
|
+
|
|
437
|
+
echo ""
|
|
438
|
+
echo "═══ Suite 5: Integration — End-to-End Skill Flow ═══"
|
|
439
|
+
|
|
440
|
+
echo ""
|
|
441
|
+
echo " ── Frontend issue → full skill chain ──"
|
|
442
|
+
|
|
443
|
+
# Simulate: frontend issue classified, skills loaded for each stage
|
|
444
|
+
export INTELLIGENCE_ISSUE_TYPE="frontend"
|
|
445
|
+
|
|
446
|
+
plan_skills=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "plan")
|
|
447
|
+
assert_contains "$plan_skills" "Socratic" "frontend plan gets brainstorming"
|
|
448
|
+
assert_contains "$plan_skills" "Accessibility" "frontend plan gets frontend-design"
|
|
449
|
+
assert_contains "$plan_skills" "User Stories" "frontend plan gets product-thinking"
|
|
450
|
+
|
|
451
|
+
build_skills=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "build")
|
|
452
|
+
assert_contains "$build_skills" "Responsive Design" "frontend build gets frontend-design"
|
|
453
|
+
|
|
454
|
+
review_skills=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "review")
|
|
455
|
+
assert_contains "$review_skills" "Two-Stage" "frontend review gets two-stage-review"
|
|
456
|
+
|
|
457
|
+
echo ""
|
|
458
|
+
echo " ── API issue → security in review ──"
|
|
459
|
+
|
|
460
|
+
export INTELLIGENCE_ISSUE_TYPE="api"
|
|
461
|
+
api_review=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "review")
|
|
462
|
+
assert_contains "$api_review" "Two-Stage" "api review gets two-stage-review"
|
|
463
|
+
assert_contains "$api_review" "OWASP" "api review gets security-audit"
|
|
464
|
+
|
|
465
|
+
echo ""
|
|
466
|
+
echo " ── Documentation issue → lightweight ──"
|
|
467
|
+
|
|
468
|
+
export INTELLIGENCE_ISSUE_TYPE="documentation"
|
|
469
|
+
doc_plan=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "plan")
|
|
470
|
+
assert_contains "$doc_plan" "Skip Heavy Stages" "documentation plan gets lightweight guidance"
|
|
471
|
+
|
|
472
|
+
doc_review=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "review")
|
|
473
|
+
assert_eq "$doc_review" "" "documentation review gets NO review skills"
|
|
474
|
+
|
|
475
|
+
assert_exit_nonzero "documentation has no two-stage review" skill_has_two_stage_review "documentation"
|
|
476
|
+
|
|
477
|
+
echo ""
|
|
478
|
+
echo " ── Security issue → double security ──"
|
|
479
|
+
|
|
480
|
+
export INTELLIGENCE_ISSUE_TYPE="security"
|
|
481
|
+
sec_plan=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "plan")
|
|
482
|
+
assert_contains "$sec_plan" "OWASP" "security plan gets security-audit"
|
|
483
|
+
|
|
484
|
+
sec_build=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "build")
|
|
485
|
+
assert_contains "$sec_build" "OWASP" "security build gets security-audit"
|
|
486
|
+
|
|
487
|
+
sec_review=$(skill_load_prompts "$INTELLIGENCE_ISSUE_TYPE" "review")
|
|
488
|
+
assert_contains "$sec_review" "OWASP" "security review gets security-audit"
|
|
489
|
+
assert_contains "$sec_review" "Two-Stage" "security review also gets two-stage-review"
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
493
|
+
# TEST SUITE 6: New Skill Files (PDLC Expansion)
|
|
494
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
495
|
+
|
|
496
|
+
echo ""
|
|
497
|
+
echo "═══ Suite 6: New PDLC Skill Files ═══"
|
|
498
|
+
|
|
499
|
+
NEW_SKILLS=(
|
|
500
|
+
architecture-design
|
|
501
|
+
adversarial-quality
|
|
502
|
+
pr-quality
|
|
503
|
+
deploy-safety
|
|
504
|
+
validation-thoroughness
|
|
505
|
+
observability
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
for skill in "${NEW_SKILLS[@]}"; do
|
|
509
|
+
path="$SCRIPT_DIR/skills/${skill}.md"
|
|
510
|
+
assert_file_exists "$path" "new skill file exists: ${skill}.md"
|
|
511
|
+
done
|
|
512
|
+
|
|
513
|
+
echo ""
|
|
514
|
+
echo " ── Content verification ──"
|
|
515
|
+
|
|
516
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/architecture-design.md")" "Component Decomposition" "architecture-design.md has component decomposition"
|
|
517
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/architecture-design.md")" "Interface Contracts" "architecture-design.md has interface contracts"
|
|
518
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/adversarial-quality.md")" "Failure Mode" "adversarial-quality.md has failure mode analysis"
|
|
519
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/adversarial-quality.md")" "Negative Testing" "adversarial-quality.md has negative testing"
|
|
520
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/pr-quality.md")" "Commit Hygiene" "pr-quality.md has commit hygiene"
|
|
521
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/pr-quality.md")" "Reviewer Empathy" "pr-quality.md has reviewer empathy"
|
|
522
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/deploy-safety.md")" "Rollback" "deploy-safety.md has rollback strategy"
|
|
523
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/deploy-safety.md")" "Blue-Green" "deploy-safety.md has blue-green strategy"
|
|
524
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/validation-thoroughness.md")" "Smoke Test" "validation-thoroughness.md has smoke test design"
|
|
525
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/validation-thoroughness.md")" "Health Check Layers" "validation-thoroughness.md has health check layers"
|
|
526
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/observability.md")" "Anomaly Detection" "observability.md has anomaly detection"
|
|
527
|
+
assert_contains "$(cat "$SCRIPT_DIR/skills/observability.md")" "Auto-Rollback Triggers" "observability.md has auto-rollback triggers"
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
531
|
+
# TEST SUITE 7: New Stage Mappings in Registry
|
|
532
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
533
|
+
|
|
534
|
+
echo ""
|
|
535
|
+
echo "═══ Suite 7: New Stage Mappings ═══"
|
|
536
|
+
|
|
537
|
+
echo ""
|
|
538
|
+
echo " ── Design stage ──"
|
|
539
|
+
|
|
540
|
+
design_frontend=$(skill_get_prompts "frontend" "design")
|
|
541
|
+
assert_contains "$design_frontend" "architecture-design.md" "frontend/design includes architecture-design"
|
|
542
|
+
assert_contains "$design_frontend" "frontend-design.md" "frontend/design includes frontend-design"
|
|
543
|
+
|
|
544
|
+
design_api=$(skill_get_prompts "api" "design")
|
|
545
|
+
assert_contains "$design_api" "architecture-design.md" "api/design includes architecture-design"
|
|
546
|
+
assert_contains "$design_api" "api-design.md" "api/design includes api-design"
|
|
547
|
+
|
|
548
|
+
design_security=$(skill_get_prompts "security" "design")
|
|
549
|
+
assert_contains "$design_security" "security-audit.md" "security/design includes security-audit"
|
|
550
|
+
|
|
551
|
+
design_docs=$(skill_get_prompts "documentation" "design")
|
|
552
|
+
assert_eq "$design_docs" "" "documentation/design returns empty"
|
|
553
|
+
|
|
554
|
+
design_backend=$(skill_get_prompts "backend" "design")
|
|
555
|
+
assert_contains "$design_backend" "architecture-design.md" "backend/design includes architecture-design"
|
|
556
|
+
|
|
557
|
+
echo ""
|
|
558
|
+
echo " ── Compound quality stage ──"
|
|
559
|
+
|
|
560
|
+
cq_frontend=$(skill_get_prompts "frontend" "compound_quality")
|
|
561
|
+
assert_contains "$cq_frontend" "adversarial-quality.md" "frontend/compound_quality includes adversarial-quality"
|
|
562
|
+
assert_contains "$cq_frontend" "testing-strategy.md" "frontend/compound_quality includes testing-strategy"
|
|
563
|
+
|
|
564
|
+
cq_api=$(skill_get_prompts "api" "compound_quality")
|
|
565
|
+
assert_contains "$cq_api" "adversarial-quality.md" "api/compound_quality includes adversarial-quality"
|
|
566
|
+
assert_contains "$cq_api" "security-audit.md" "api/compound_quality includes security-audit"
|
|
567
|
+
|
|
568
|
+
cq_docs=$(skill_get_prompts "documentation" "compound_quality")
|
|
569
|
+
assert_eq "$cq_docs" "" "documentation/compound_quality returns empty"
|
|
570
|
+
|
|
571
|
+
echo ""
|
|
572
|
+
echo " ── PR stage ──"
|
|
573
|
+
|
|
574
|
+
pr_any=$(skill_get_prompts "backend" "pr")
|
|
575
|
+
assert_contains "$pr_any" "pr-quality.md" "backend/pr includes pr-quality"
|
|
576
|
+
|
|
577
|
+
pr_docs=$(skill_get_prompts "documentation" "pr")
|
|
578
|
+
assert_contains "$pr_docs" "pr-quality.md" "documentation/pr includes pr-quality"
|
|
579
|
+
|
|
580
|
+
echo ""
|
|
581
|
+
echo " ── Deploy stage ──"
|
|
582
|
+
|
|
583
|
+
deploy_api=$(skill_get_prompts "api" "deploy")
|
|
584
|
+
assert_contains "$deploy_api" "deploy-safety.md" "api/deploy includes deploy-safety"
|
|
585
|
+
assert_contains "$deploy_api" "security-audit.md" "api/deploy includes security-audit"
|
|
586
|
+
|
|
587
|
+
deploy_db=$(skill_get_prompts "database" "deploy")
|
|
588
|
+
assert_contains "$deploy_db" "deploy-safety.md" "database/deploy includes deploy-safety"
|
|
589
|
+
assert_contains "$deploy_db" "data-pipeline.md" "database/deploy includes data-pipeline"
|
|
590
|
+
|
|
591
|
+
deploy_docs=$(skill_get_prompts "documentation" "deploy")
|
|
592
|
+
assert_eq "$deploy_docs" "" "documentation/deploy returns empty"
|
|
593
|
+
|
|
594
|
+
echo ""
|
|
595
|
+
echo " ── Validate stage ──"
|
|
596
|
+
|
|
597
|
+
validate_api=$(skill_get_prompts "api" "validate")
|
|
598
|
+
assert_contains "$validate_api" "validation-thoroughness.md" "api/validate includes validation-thoroughness"
|
|
599
|
+
assert_contains "$validate_api" "security-audit.md" "api/validate includes security-audit"
|
|
600
|
+
|
|
601
|
+
validate_docs=$(skill_get_prompts "documentation" "validate")
|
|
602
|
+
assert_eq "$validate_docs" "" "documentation/validate returns empty"
|
|
603
|
+
|
|
604
|
+
echo ""
|
|
605
|
+
echo " ── Monitor stage ──"
|
|
606
|
+
|
|
607
|
+
monitor_perf=$(skill_get_prompts "performance" "monitor")
|
|
608
|
+
assert_contains "$monitor_perf" "observability.md" "performance/monitor includes observability"
|
|
609
|
+
assert_contains "$monitor_perf" "performance.md" "performance/monitor includes performance (double!)"
|
|
610
|
+
|
|
611
|
+
monitor_docs=$(skill_get_prompts "documentation" "monitor")
|
|
612
|
+
assert_eq "$monitor_docs" "" "documentation/monitor returns empty"
|
|
613
|
+
|
|
614
|
+
monitor_api=$(skill_get_prompts "api" "monitor")
|
|
615
|
+
assert_contains "$monitor_api" "observability.md" "api/monitor includes observability"
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
619
|
+
# TEST SUITE 8: Full PDLC Integration — Every Stage Covered
|
|
620
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
621
|
+
|
|
622
|
+
echo ""
|
|
623
|
+
echo "═══ Suite 8: Full PDLC Integration ═══"
|
|
624
|
+
|
|
625
|
+
echo ""
|
|
626
|
+
echo " ── API issue → all 9 stages ──"
|
|
627
|
+
export INTELLIGENCE_ISSUE_TYPE="api"
|
|
628
|
+
|
|
629
|
+
api_plan=$(skill_load_prompts "api" "plan")
|
|
630
|
+
assert_not_empty "$api_plan" "api/plan has skills"
|
|
631
|
+
|
|
632
|
+
api_design=$(skill_load_prompts "api" "design")
|
|
633
|
+
assert_contains "$api_design" "Architecture" "api/design has architecture guidance"
|
|
634
|
+
assert_contains "$api_design" "RESTful" "api/design has API patterns"
|
|
635
|
+
|
|
636
|
+
api_build=$(skill_load_prompts "api" "build")
|
|
637
|
+
assert_not_empty "$api_build" "api/build has skills"
|
|
638
|
+
|
|
639
|
+
api_review=$(skill_load_prompts "api" "review")
|
|
640
|
+
assert_not_empty "$api_review" "api/review has skills"
|
|
641
|
+
|
|
642
|
+
api_cq=$(skill_load_prompts "api" "compound_quality")
|
|
643
|
+
assert_contains "$api_cq" "Failure Mode" "api/compound_quality has adversarial thinking"
|
|
644
|
+
|
|
645
|
+
api_pr=$(skill_load_prompts "api" "pr")
|
|
646
|
+
assert_contains "$api_pr" "Commit Hygiene" "api/pr has PR quality"
|
|
647
|
+
|
|
648
|
+
api_deploy=$(skill_load_prompts "api" "deploy")
|
|
649
|
+
assert_contains "$api_deploy" "Rollback" "api/deploy has deploy safety"
|
|
650
|
+
|
|
651
|
+
api_validate=$(skill_load_prompts "api" "validate")
|
|
652
|
+
assert_contains "$api_validate" "Smoke Test" "api/validate has validation"
|
|
653
|
+
|
|
654
|
+
api_monitor=$(skill_load_prompts "api" "monitor")
|
|
655
|
+
assert_contains "$api_monitor" "Anomaly" "api/monitor has observability"
|
|
656
|
+
|
|
657
|
+
echo ""
|
|
658
|
+
echo " ── Documentation issue → lightweight everywhere ──"
|
|
659
|
+
export INTELLIGENCE_ISSUE_TYPE="documentation"
|
|
660
|
+
|
|
661
|
+
ALL_STAGES=(plan design build review compound_quality pr deploy validate monitor)
|
|
662
|
+
doc_nonempty=0
|
|
663
|
+
doc_empty=0
|
|
664
|
+
for stage in "${ALL_STAGES[@]}"; do
|
|
665
|
+
content=$(skill_load_prompts "documentation" "$stage")
|
|
666
|
+
if [[ -n "$content" ]]; then
|
|
667
|
+
doc_nonempty=$((doc_nonempty + 1))
|
|
668
|
+
else
|
|
669
|
+
doc_empty=$((doc_empty + 1))
|
|
670
|
+
fi
|
|
671
|
+
done
|
|
672
|
+
# Documentation should have skills in plan, build, pr only (3 non-empty)
|
|
673
|
+
assert_eq "$doc_nonempty" "3" "documentation has exactly 3 stages with skills (plan, build, pr)"
|
|
674
|
+
assert_eq "$doc_empty" "6" "documentation skips 6 stages (design, review, compound_quality, deploy, validate, monitor)"
|
|
675
|
+
|
|
676
|
+
echo ""
|
|
677
|
+
echo " ── Total skill file count ──"
|
|
678
|
+
total_skills=$(ls "$SCRIPT_DIR/skills/"*.md 2>/dev/null | wc -l | xargs)
|
|
679
|
+
assert_eq "$total_skills" "17" "17 total skill files (11 original + 6 new)"
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
683
|
+
# TEST SUITE 9: Adaptive Skill Selection (Level 2)
|
|
684
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
685
|
+
|
|
686
|
+
echo ""
|
|
687
|
+
echo "═══ Suite 9: Adaptive Skill Selection ═══"
|
|
688
|
+
|
|
689
|
+
# Test: skill_detect_from_body — accessibility keywords
|
|
690
|
+
echo ""
|
|
691
|
+
echo " ── Body analysis: accessibility ──"
|
|
692
|
+
|
|
693
|
+
body_accessibility="Fix WCAG compliance issues. The interface needs better keyboard navigation and ARIA labels for screen readers."
|
|
694
|
+
detected_a11y=$(skill_detect_from_body "$body_accessibility" "plan")
|
|
695
|
+
assert_contains "$detected_a11y" "frontend-design.md" "accessibility keywords detected"
|
|
696
|
+
|
|
697
|
+
# Test: skill_detect_from_body — API keywords
|
|
698
|
+
echo ""
|
|
699
|
+
echo " ── Body analysis: API/endpoint ──"
|
|
700
|
+
|
|
701
|
+
body_api="Design new REST endpoint for user management. Need GraphQL mutation support."
|
|
702
|
+
detected_api=$(skill_detect_from_body "$body_api" "plan")
|
|
703
|
+
assert_contains "$detected_api" "api-design.md" "API keywords detected"
|
|
704
|
+
|
|
705
|
+
# Test: skill_detect_from_body — security keywords
|
|
706
|
+
echo ""
|
|
707
|
+
echo " ── Body analysis: security ──"
|
|
708
|
+
|
|
709
|
+
body_security="Fix XSS vulnerability in user input. Implement OWASP Top 10 mitigations."
|
|
710
|
+
detected_sec=$(skill_detect_from_body "$body_security" "plan")
|
|
711
|
+
assert_contains "$detected_sec" "security-audit.md" "security keywords detected"
|
|
712
|
+
|
|
713
|
+
# Test: skill_detect_from_body — performance keywords
|
|
714
|
+
echo ""
|
|
715
|
+
echo " ── Body analysis: performance ──"
|
|
716
|
+
|
|
717
|
+
body_perf="Query is too slow. P95 latency is 5 seconds. Need to optimize and add caching."
|
|
718
|
+
detected_perf=$(skill_detect_from_body "$body_perf" "plan")
|
|
719
|
+
assert_contains "$detected_perf" "performance.md" "performance keywords detected"
|
|
720
|
+
|
|
721
|
+
# Test: skill_detect_from_body — migration keywords
|
|
722
|
+
echo ""
|
|
723
|
+
echo " ── Body analysis: database migration ──"
|
|
724
|
+
|
|
725
|
+
body_migration="Database schema refactor needed. Add new column and create migration."
|
|
726
|
+
detected_db=$(skill_detect_from_body "$body_migration" "plan")
|
|
727
|
+
assert_contains "$detected_db" "data-pipeline.md" "migration keywords detected"
|
|
728
|
+
|
|
729
|
+
# Test: skill_detect_from_body — empty body returns empty
|
|
730
|
+
echo ""
|
|
731
|
+
echo " ── Body analysis: edge cases ──"
|
|
732
|
+
|
|
733
|
+
detected_empty=$(skill_detect_from_body "" "plan")
|
|
734
|
+
assert_eq "$detected_empty" "" "empty body returns empty (no extra skills)"
|
|
735
|
+
|
|
736
|
+
# Test: skill_detect_from_body — multiple patterns in one body
|
|
737
|
+
body_multi="Improve accessibility (ARIA labels) and add API endpoint (REST). Also need security audit for OWASP compliance."
|
|
738
|
+
detected_multi=$(skill_detect_from_body "$body_multi" "plan")
|
|
739
|
+
assert_contains "$detected_multi" "frontend-design.md" "multiple patterns: accessibility detected"
|
|
740
|
+
assert_contains "$detected_multi" "api-design.md" "multiple patterns: API detected"
|
|
741
|
+
assert_contains "$detected_multi" "security-audit.md" "multiple patterns: security detected"
|
|
742
|
+
|
|
743
|
+
# Test: skill_weight_by_complexity — simple issues (1-3) reduce to first skill
|
|
744
|
+
echo ""
|
|
745
|
+
echo " ── Complexity weighting: simple (1-3) ──"
|
|
746
|
+
|
|
747
|
+
skills_sample="$(printf '%s\n%s\n%s' "$SCRIPT_DIR/skills/brainstorming.md" "$SCRIPT_DIR/skills/frontend-design.md" "$SCRIPT_DIR/skills/product-thinking.md")"
|
|
748
|
+
weighted_simple=$(skill_weight_by_complexity "1" "$skills_sample")
|
|
749
|
+
simple_count=$(echo "$weighted_simple" | grep -c "^.*\.md$" 2>/dev/null || echo "0")
|
|
750
|
+
assert_eq "$simple_count" "1" "complexity 1 keeps only 1 skill (essential)"
|
|
751
|
+
|
|
752
|
+
weighted_simple_3=$(skill_weight_by_complexity "3" "$skills_sample")
|
|
753
|
+
simple_3_count=$(echo "$weighted_simple_3" | grep -c "^.*\.md$" 2>/dev/null || echo "0")
|
|
754
|
+
assert_eq "$simple_3_count" "1" "complexity 3 keeps only 1 skill (essential)"
|
|
755
|
+
|
|
756
|
+
# Test: skill_weight_by_complexity — standard issues (4-7) keep all skills
|
|
757
|
+
echo ""
|
|
758
|
+
echo " ── Complexity weighting: standard (4-7) ──"
|
|
759
|
+
|
|
760
|
+
weighted_std=$(skill_weight_by_complexity "5" "$skills_sample")
|
|
761
|
+
std_count=$(echo "$weighted_std" | grep -c "^.*\.md$" 2>/dev/null || echo "0")
|
|
762
|
+
assert_eq "$std_count" "3" "complexity 5 keeps all 3 skills (standard)"
|
|
763
|
+
|
|
764
|
+
weighted_std_7=$(skill_weight_by_complexity "7" "$skills_sample")
|
|
765
|
+
std_7_count=$(echo "$weighted_std_7" | grep -c "^.*\.md$" 2>/dev/null || echo "0")
|
|
766
|
+
assert_eq "$std_7_count" "3" "complexity 7 keeps all 3 skills (standard)"
|
|
767
|
+
|
|
768
|
+
# Test: skill_weight_by_complexity — complex issues (8-10) add cross-cutting concerns
|
|
769
|
+
echo ""
|
|
770
|
+
echo " ── Complexity weighting: complex (8-10) ──"
|
|
771
|
+
|
|
772
|
+
weighted_complex=$(skill_weight_by_complexity "9" "$skills_sample")
|
|
773
|
+
# Should have original 3 + security-audit (if not present) + performance (if not present)
|
|
774
|
+
assert_contains "$weighted_complex" "brainstorming.md" "complexity 9: includes original skills"
|
|
775
|
+
assert_contains "$weighted_complex" "security-audit.md" "complexity 9: adds security-audit"
|
|
776
|
+
assert_contains "$weighted_complex" "performance.md" "complexity 9: adds performance"
|
|
777
|
+
|
|
778
|
+
# Test: skill_select_adaptive — combines all signals
|
|
779
|
+
echo ""
|
|
780
|
+
echo " ── Adaptive selection: full integration ──"
|
|
781
|
+
|
|
782
|
+
export INTELLIGENCE_ISSUE_TYPE="api"
|
|
783
|
+
body_for_adaptive="Add new REST API endpoint. Need to ensure WCAG accessibility. Consider OWASP security."
|
|
784
|
+
complexity_level=6
|
|
785
|
+
adaptive_result=$(skill_select_adaptive "api" "plan" "$body_for_adaptive" "$complexity_level")
|
|
786
|
+
assert_contains "$adaptive_result" "brainstorming.md" "adaptive: base skills included"
|
|
787
|
+
assert_contains "$adaptive_result" "api-design.md" "adaptive: issue-type skill included"
|
|
788
|
+
assert_contains "$adaptive_result" "frontend-design.md" "adaptive: body analysis detects accessibility"
|
|
789
|
+
assert_contains "$adaptive_result" "security-audit.md" "adaptive: body analysis detects security"
|
|
790
|
+
|
|
791
|
+
# Test: deduplication in adaptive selection
|
|
792
|
+
echo ""
|
|
793
|
+
echo " ── Adaptive selection: deduplication ──"
|
|
794
|
+
|
|
795
|
+
# If api-design is in base skills and body mentions API, should only appear once
|
|
796
|
+
adaptive_dup=$(skill_select_adaptive "api" "design" "Improve REST API design with GraphQL support" "5")
|
|
797
|
+
api_count=$(echo "$adaptive_dup" | grep -c "api-design.md" 2>/dev/null || echo "0")
|
|
798
|
+
assert_eq "$api_count" "1" "adaptive: duplicate skills deduplicated"
|
|
799
|
+
|
|
800
|
+
# Test: graceful degradation when adaptive unavailable
|
|
801
|
+
echo ""
|
|
802
|
+
echo " ── Fallback behavior ──"
|
|
803
|
+
|
|
804
|
+
# This tests that pipeline-stages.sh correctly falls back to skill_load_prompts if skill_select_adaptive unavailable
|
|
805
|
+
# We simulate by checking that skill_load_prompts still works standalone
|
|
806
|
+
fallback_result=$(skill_load_prompts "frontend" "plan")
|
|
807
|
+
assert_contains "$fallback_result" "Accessibility" "fallback: skill_load_prompts still functional"
|
|
808
|
+
|
|
809
|
+
echo ""
|
|
810
|
+
echo " ── Adaptive with zero complexity ──"
|
|
811
|
+
|
|
812
|
+
# Edge case: complexity 0 should be normalized to 1
|
|
813
|
+
weighted_zero=$(skill_weight_by_complexity "0" "$skills_sample")
|
|
814
|
+
zero_count=$(echo "$weighted_zero" | grep -c "^.*\.md$" 2>/dev/null || echo "0")
|
|
815
|
+
assert_eq "$zero_count" "1" "complexity 0 normalized to 1 (essential only)"
|
|
816
|
+
|
|
817
|
+
# Edge case: complexity > 10 should be capped
|
|
818
|
+
weighted_high=$(skill_weight_by_complexity "99" "$skills_sample")
|
|
819
|
+
assert_contains "$weighted_high" "security-audit.md" "complexity 99 capped to 10 (adds cross-cutting)"
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
823
|
+
# TEST SUITE 10: Skill Memory & Learning
|
|
824
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
825
|
+
|
|
826
|
+
echo ""
|
|
827
|
+
echo "═══ Suite 10: Skill Memory & Learning ═══"
|
|
828
|
+
|
|
829
|
+
# Load skill memory module first (once)
|
|
830
|
+
source "$SCRIPT_DIR/lib/skill-memory.sh"
|
|
831
|
+
|
|
832
|
+
# --- Test 1: Recording a success outcome creates/updates JSON ---
|
|
833
|
+
echo ""
|
|
834
|
+
echo " ── Recording outcomes ──"
|
|
835
|
+
|
|
836
|
+
# Use temporary file for testing (don't pollute real memory)
|
|
837
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
838
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
839
|
+
|
|
840
|
+
skill_memory_record "backend" "plan" "brainstorming,frontend-design" "success" "1" >/dev/null 2>&1 || true
|
|
841
|
+
assert_file_exists "$SKILL_MEMORY_FILE" "Memory file created on first write"
|
|
842
|
+
|
|
843
|
+
result=$(jq '.records | length' "$SKILL_MEMORY_FILE" 2>/dev/null || echo "0")
|
|
844
|
+
assert_eq "$result" "1" "First record written to memory"
|
|
845
|
+
|
|
846
|
+
# --- Test 2: Recording a failure outcome ---
|
|
847
|
+
skill_memory_record "frontend" "build" "frontend-design" "failure" "1" >/dev/null 2>&1 || true
|
|
848
|
+
result=$(jq '.records | length' "$SKILL_MEMORY_FILE" 2>/dev/null || echo "0")
|
|
849
|
+
assert_eq "$result" "2" "Second record (failure) appended"
|
|
850
|
+
|
|
851
|
+
# Clean up from test 1-2 before test 3
|
|
852
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
853
|
+
# Re-initialize after cleanup
|
|
854
|
+
_skill_memory_ensure_file
|
|
855
|
+
|
|
856
|
+
# --- Test 3: Success rate calculation (2 success, 1 failure = 67%, rounded to 66) ---
|
|
857
|
+
skill_memory_record "api" "review" "two-stage-review,security-audit" "success" "1" >/dev/null 2>&1 || true
|
|
858
|
+
skill_memory_record "api" "review" "two-stage-review,security-audit" "success" "1" >/dev/null 2>&1 || true
|
|
859
|
+
skill_memory_record "api" "review" "two-stage-review,security-audit" "failure" "1" >/dev/null 2>&1 || true
|
|
860
|
+
|
|
861
|
+
rate=$(skill_memory_get_success_rate "api" "review" "two-stage-review" 2>/dev/null || echo "0")
|
|
862
|
+
assert_eq "$rate" "66" "Success rate: 2 success + 1 failure = 66%"
|
|
863
|
+
|
|
864
|
+
# --- Test 4: Recommendations return skills sorted by success rate ---
|
|
865
|
+
skill_memory_record "backend" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
866
|
+
skill_memory_record "backend" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
867
|
+
skill_memory_record "backend" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
868
|
+
|
|
869
|
+
skill_memory_record "backend" "plan" "architecture-design" "success" "1" >/dev/null 2>&1 || true
|
|
870
|
+
skill_memory_record "backend" "plan" "architecture-design" "failure" "1" >/dev/null 2>&1 || true
|
|
871
|
+
|
|
872
|
+
recommendations=$(skill_memory_get_recommendations "backend" "plan" 2>/dev/null || echo "")
|
|
873
|
+
# brainstorming should rank higher (100% success vs 50%)
|
|
874
|
+
if [[ "$recommendations" == *"brainstorming"* ]]; then
|
|
875
|
+
pass "Recommendations returned for backend/plan"
|
|
876
|
+
else
|
|
877
|
+
fail "Recommendations should not be empty for backend/plan"
|
|
878
|
+
fi
|
|
879
|
+
|
|
880
|
+
# Clean up from test 1-4
|
|
881
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
882
|
+
|
|
883
|
+
# --- Test 5: Empty memory returns empty recommendations ---
|
|
884
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
885
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
886
|
+
recommendations=$(skill_memory_get_recommendations "frontend" "test" 2>/dev/null || echo "")
|
|
887
|
+
assert_eq "$recommendations" "" "Empty memory returns empty recommendations"
|
|
888
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
889
|
+
|
|
890
|
+
# --- Test 6: Memory file created lazily ---
|
|
891
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
892
|
+
rm -f "$_TEST_MEMORY_FILE"
|
|
893
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
894
|
+
[[ ! -f "$SKILL_MEMORY_FILE" ]] && pass "Temp file doesn't exist initially"
|
|
895
|
+
|
|
896
|
+
skill_memory_record "testing" "plan" "testing-strategy" "success" "1" >/dev/null 2>&1 || true
|
|
897
|
+
assert_file_exists "$SKILL_MEMORY_FILE" "Memory file created lazily on first record"
|
|
898
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
899
|
+
|
|
900
|
+
# --- Test 7: Graceful handling when jq is unavailable ---
|
|
901
|
+
export PATH="/nonexistent:$PATH"
|
|
902
|
+
if ! command -v jq &>/dev/null; then
|
|
903
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
904
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
905
|
+
skill_memory_record "database" "design" "data-pipeline" "success" "1" >/dev/null 2>&1 || true
|
|
906
|
+
fail_code=$?
|
|
907
|
+
if [[ $fail_code -ne 0 ]]; then
|
|
908
|
+
pass "Graceful failure when jq unavailable"
|
|
909
|
+
else
|
|
910
|
+
fail "Should return error when jq unavailable"
|
|
911
|
+
fi
|
|
912
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
913
|
+
else
|
|
914
|
+
pass "jq is available (can't fully test unavailable case)"
|
|
915
|
+
fi
|
|
916
|
+
export PATH="$ORIGINAL_PATH"
|
|
917
|
+
|
|
918
|
+
# --- Test 8: Max records limit (pruning) ---
|
|
919
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
920
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
921
|
+
for i in {1..250}; do
|
|
922
|
+
skill_memory_record "refactor" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
923
|
+
done
|
|
924
|
+
record_count=$(jq '.records | length' "$SKILL_MEMORY_FILE" 2>/dev/null || echo "0")
|
|
925
|
+
if [[ "$record_count" -le 200 ]]; then
|
|
926
|
+
pass "Records pruned to max 200 (got $record_count)"
|
|
927
|
+
else
|
|
928
|
+
fail "Records not pruned (expected ≤200, got $record_count)"
|
|
929
|
+
fi
|
|
930
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
931
|
+
|
|
932
|
+
# --- Test 9: Skill stats function ---
|
|
933
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
934
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
935
|
+
skill_memory_record "performance" "compound_quality" "adversarial-quality,performance" "success" "1" >/dev/null 2>&1 || true
|
|
936
|
+
skill_memory_record "performance" "compound_quality" "adversarial-quality,performance" "success" "1" >/dev/null 2>&1 || true
|
|
937
|
+
skill_memory_record "performance" "compound_quality" "adversarial-quality,performance" "failure" "1" >/dev/null 2>&1 || true
|
|
938
|
+
|
|
939
|
+
stats=$(skill_memory_stats "performance" "compound_quality" "adversarial-quality" 2>/dev/null || echo "")
|
|
940
|
+
success_count=$(echo "$stats" | jq '.success_count' 2>/dev/null || echo "0")
|
|
941
|
+
failure_count=$(echo "$stats" | jq '.failure_count' 2>/dev/null || echo "0")
|
|
942
|
+
|
|
943
|
+
assert_eq "$success_count" "2" "Stats: 2 successes for adversarial-quality"
|
|
944
|
+
assert_eq "$failure_count" "1" "Stats: 1 failure for adversarial-quality"
|
|
945
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock"
|
|
946
|
+
|
|
947
|
+
# --- Test 10: Export and import functionality ---
|
|
948
|
+
_TEST_MEMORY_FILE=$(mktemp)
|
|
949
|
+
_TEST_IMPORT_FILE=$(mktemp)
|
|
950
|
+
export SKILL_MEMORY_FILE="$_TEST_MEMORY_FILE"
|
|
951
|
+
|
|
952
|
+
skill_memory_record "documentation" "pr" "pr-quality" "success" "1" >/dev/null 2>&1 || true
|
|
953
|
+
skill_memory_record "documentation" "pr" "pr-quality" "success" "1" >/dev/null 2>&1 || true
|
|
954
|
+
|
|
955
|
+
skill_memory_export > "$_TEST_IMPORT_FILE" 2>/dev/null || true
|
|
956
|
+
import_records=$(jq '.records | length' "$_TEST_IMPORT_FILE" 2>/dev/null || echo "0")
|
|
957
|
+
assert_eq "$import_records" "2" "Export has correct record count"
|
|
958
|
+
|
|
959
|
+
rm -f "$SKILL_MEMORY_FILE" "${SKILL_MEMORY_FILE}.lock" "$_TEST_IMPORT_FILE"
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
963
|
+
# TEST SUITE 11: Skill Catalog Builder
|
|
964
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
965
|
+
|
|
966
|
+
echo ""
|
|
967
|
+
echo "═══ Suite 11: Skill Catalog Builder ═══"
|
|
968
|
+
echo ""
|
|
969
|
+
|
|
970
|
+
# Test: catalog includes curated skills
|
|
971
|
+
echo " ── Curated skills in catalog ──"
|
|
972
|
+
_catalog=$(skill_build_catalog 2>/dev/null || true)
|
|
973
|
+
assert_contains "$_catalog" "brainstorming" "catalog includes brainstorming"
|
|
974
|
+
assert_contains "$_catalog" "frontend-design" "catalog includes frontend-design"
|
|
975
|
+
assert_contains "$_catalog" "security-audit" "catalog includes security-audit"
|
|
976
|
+
|
|
977
|
+
# Test: catalog includes one-line descriptions
|
|
978
|
+
assert_contains "$_catalog" "Socratic" "brainstorming has description"
|
|
979
|
+
|
|
980
|
+
# Test: catalog includes generated skills when they exist
|
|
981
|
+
_gen_dir="${SKILLS_DIR}/generated"
|
|
982
|
+
mkdir -p "$_gen_dir"
|
|
983
|
+
printf '%s\n%s\n' "## Test Generated Skill" "Test content for generated skill." > "$_gen_dir/test-gen-skill.md"
|
|
984
|
+
_catalog=$(skill_build_catalog 2>/dev/null || true)
|
|
985
|
+
assert_contains "$_catalog" "test-gen-skill" "catalog includes generated skill"
|
|
986
|
+
assert_contains "$_catalog" "[generated]" "generated skill is tagged"
|
|
987
|
+
rm -f "$_gen_dir/test-gen-skill.md"
|
|
988
|
+
|
|
989
|
+
# Test: catalog includes memory context when available
|
|
990
|
+
skill_memory_clear 2>/dev/null || true
|
|
991
|
+
skill_memory_record "frontend" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
992
|
+
skill_memory_record "frontend" "plan" "brainstorming" "success" "1" >/dev/null 2>&1 || true
|
|
993
|
+
_catalog=$(skill_build_catalog "frontend" "plan" 2>/dev/null || true)
|
|
994
|
+
assert_contains "$_catalog" "success" "catalog includes memory context"
|
|
995
|
+
skill_memory_clear 2>/dev/null || true
|
|
996
|
+
|
|
997
|
+
echo ""
|
|
998
|
+
echo " ── LLM skill analysis (mock) ──"
|
|
999
|
+
|
|
1000
|
+
# We can't test real LLM calls in unit tests, so test the JSON parsing/artifact writing
|
|
1001
|
+
# Mock: simulate skill_analyze_issue writing skill-plan.json
|
|
1002
|
+
_test_artifacts=$(mktemp -d)
|
|
1003
|
+
|
|
1004
|
+
_mock_plan='{"issue_type":"frontend","confidence":0.92,"secondary_domains":["accessibility"],"complexity_assessment":{"score":6,"reasoning":"moderate"},"skill_plan":{"plan":["brainstorming","frontend-design"],"build":["frontend-design"],"review":["two-stage-review"]},"skill_rationale":{"frontend-design":"ARIA progressbar needed","brainstorming":"Task decomposition required"},"generated_skills":[],"review_focus":["accessibility"],"risk_areas":["ETA accuracy"]}'
|
|
1005
|
+
echo "$_mock_plan" > "$_test_artifacts/skill-plan.json"
|
|
1006
|
+
|
|
1007
|
+
# Verify skill-plan.json is valid JSON
|
|
1008
|
+
assert_true "jq '.' '$_test_artifacts/skill-plan.json' >/dev/null 2>&1" "skill-plan.json is valid JSON"
|
|
1009
|
+
|
|
1010
|
+
# Verify we can extract skills for a stage
|
|
1011
|
+
_plan_skills=$(jq -r '.skill_plan.plan[]' "$_test_artifacts/skill-plan.json" 2>/dev/null | tr '\n' ',' | sed 's/,$//')
|
|
1012
|
+
assert_eq "$_plan_skills" "brainstorming,frontend-design" "plan stage skills extracted correctly"
|
|
1013
|
+
|
|
1014
|
+
# Verify rationale extraction
|
|
1015
|
+
_rationale=$(jq -r '.skill_rationale["frontend-design"]' "$_test_artifacts/skill-plan.json" 2>/dev/null)
|
|
1016
|
+
assert_contains "$_rationale" "ARIA" "rationale extracted correctly"
|
|
1017
|
+
|
|
1018
|
+
rm -rf "$_test_artifacts"
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1022
|
+
# TEST SUITE 12: Plan-Based Skill Loading
|
|
1023
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1024
|
+
|
|
1025
|
+
echo ""
|
|
1026
|
+
echo "═══ Suite 12: Plan-Based Skill Loading ═══"
|
|
1027
|
+
echo ""
|
|
1028
|
+
|
|
1029
|
+
_test_artifacts=$(mktemp -d)
|
|
1030
|
+
|
|
1031
|
+
# Write a mock skill-plan.json
|
|
1032
|
+
cat > "$_test_artifacts/skill-plan.json" << 'PLAN_EOF'
|
|
1033
|
+
{
|
|
1034
|
+
"issue_type": "frontend",
|
|
1035
|
+
"skill_plan": {
|
|
1036
|
+
"plan": ["brainstorming", "frontend-design"],
|
|
1037
|
+
"build": ["frontend-design"],
|
|
1038
|
+
"review": ["two-stage-review"],
|
|
1039
|
+
"deploy": []
|
|
1040
|
+
},
|
|
1041
|
+
"skill_rationale": {
|
|
1042
|
+
"brainstorming": "Task decomposition for progress bar feature",
|
|
1043
|
+
"frontend-design": "ARIA progressbar role and responsive CSS needed",
|
|
1044
|
+
"two-stage-review": "Spec compliance check against plan.md"
|
|
1045
|
+
},
|
|
1046
|
+
"generated_skills": []
|
|
1047
|
+
}
|
|
1048
|
+
PLAN_EOF
|
|
1049
|
+
|
|
1050
|
+
echo " ── Loading skills from plan ──"
|
|
1051
|
+
|
|
1052
|
+
# Test: load plan stage skills
|
|
1053
|
+
plan_content=$(ARTIFACTS_DIR="$_test_artifacts" skill_load_from_plan "plan" 2>/dev/null || true)
|
|
1054
|
+
assert_contains "$plan_content" "brainstorming" "plan stage loads brainstorming skill"
|
|
1055
|
+
assert_contains "$plan_content" "frontend-design" "plan stage loads frontend-design skill content"
|
|
1056
|
+
assert_contains "$plan_content" "ARIA progressbar" "plan stage includes rationale"
|
|
1057
|
+
assert_contains "$plan_content" "Task decomposition" "plan stage includes brainstorming rationale"
|
|
1058
|
+
|
|
1059
|
+
# Test: load build stage skills
|
|
1060
|
+
build_content=$(ARTIFACTS_DIR="$_test_artifacts" skill_load_from_plan "build" 2>/dev/null || true)
|
|
1061
|
+
assert_contains "$build_content" "frontend-design" "build stage loads frontend-design"
|
|
1062
|
+
assert_not_contains "$build_content" "brainstorming" "build stage does NOT load brainstorming"
|
|
1063
|
+
|
|
1064
|
+
# Test: empty stage returns empty
|
|
1065
|
+
deploy_content=$(ARTIFACTS_DIR="$_test_artifacts" skill_load_from_plan "deploy" 2>/dev/null || true)
|
|
1066
|
+
assert_eq "" "$(echo "$deploy_content" | tr -d '[:space:]')" "empty stage returns empty"
|
|
1067
|
+
|
|
1068
|
+
# Test: missing skill-plan.json falls back to skill_select_adaptive
|
|
1069
|
+
_no_plan_dir=$(mktemp -d)
|
|
1070
|
+
fallback_content=$(ARTIFACTS_DIR="$_no_plan_dir" INTELLIGENCE_ISSUE_TYPE="frontend" skill_load_from_plan "plan" 2>/dev/null || true)
|
|
1071
|
+
assert_contains "$fallback_content" "brainstorming\|frontend\|Socratic" "fallback to adaptive when no plan"
|
|
1072
|
+
rm -rf "$_no_plan_dir"
|
|
1073
|
+
|
|
1074
|
+
# Test: refinements are appended
|
|
1075
|
+
mkdir -p "$SKILLS_DIR/generated/_refinements"
|
|
1076
|
+
echo "REFINEMENT: Always check stat-bar CSS pattern reuse." > "$SKILLS_DIR/generated/_refinements/frontend-design.patch.md"
|
|
1077
|
+
plan_content=$(ARTIFACTS_DIR="$_test_artifacts" skill_load_from_plan "plan" 2>/dev/null || true)
|
|
1078
|
+
assert_contains "$plan_content" "REFINEMENT" "refinement patch appended to skill"
|
|
1079
|
+
rm -f "$SKILLS_DIR/generated/_refinements/frontend-design.patch.md"
|
|
1080
|
+
|
|
1081
|
+
rm -rf "$_test_artifacts"
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1085
|
+
# TEST SUITE 13: Outcome Learning Loop
|
|
1086
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1087
|
+
|
|
1088
|
+
echo ""
|
|
1089
|
+
echo "═══ Suite 13: Outcome Learning Loop ═══"
|
|
1090
|
+
echo ""
|
|
1091
|
+
|
|
1092
|
+
_test_artifacts=$(mktemp -d)
|
|
1093
|
+
|
|
1094
|
+
# Write a mock skill-plan.json
|
|
1095
|
+
cat > "$_test_artifacts/skill-plan.json" << 'PLAN_EOF'
|
|
1096
|
+
{
|
|
1097
|
+
"issue_type": "frontend",
|
|
1098
|
+
"skill_plan": {
|
|
1099
|
+
"plan": ["brainstorming", "frontend-design"],
|
|
1100
|
+
"build": ["frontend-design"],
|
|
1101
|
+
"review": ["two-stage-review"]
|
|
1102
|
+
},
|
|
1103
|
+
"skill_rationale": {
|
|
1104
|
+
"frontend-design": "ARIA progressbar needed"
|
|
1105
|
+
},
|
|
1106
|
+
"generated_skills": []
|
|
1107
|
+
}
|
|
1108
|
+
PLAN_EOF
|
|
1109
|
+
|
|
1110
|
+
echo " ── Outcome JSON parsing ──"
|
|
1111
|
+
|
|
1112
|
+
# Test: parse a mock outcome response
|
|
1113
|
+
_mock_outcome='{"skill_effectiveness":{"frontend-design":{"verdict":"effective","evidence":"ARIA section in plan","learning":"stat-bar reuse hint followed"}},"refinements":[{"skill":"frontend-design","addition":"For dashboard features, mention existing CSS patterns"}],"generated_skill_verdict":{}}'
|
|
1114
|
+
echo "$_mock_outcome" > "$_test_artifacts/skill-outcome.json"
|
|
1115
|
+
|
|
1116
|
+
# Verify outcome JSON is valid
|
|
1117
|
+
assert_true "jq '.' '$_test_artifacts/skill-outcome.json' >/dev/null 2>&1" "outcome JSON is valid"
|
|
1118
|
+
|
|
1119
|
+
# Verify verdict extraction
|
|
1120
|
+
_verdict=$(jq -r '.skill_effectiveness["frontend-design"].verdict' "$_test_artifacts/skill-outcome.json" 2>/dev/null)
|
|
1121
|
+
assert_eq "effective" "$_verdict" "verdict extracted correctly"
|
|
1122
|
+
|
|
1123
|
+
# Verify refinement extraction
|
|
1124
|
+
_refinement_skill=$(jq -r '.refinements[0].skill' "$_test_artifacts/skill-outcome.json" 2>/dev/null)
|
|
1125
|
+
assert_eq "frontend-design" "$_refinement_skill" "refinement skill extracted"
|
|
1126
|
+
|
|
1127
|
+
echo ""
|
|
1128
|
+
echo " ── Refinement file writing ──"
|
|
1129
|
+
|
|
1130
|
+
# Test: skill_apply_refinements writes patch files
|
|
1131
|
+
_ref_dir="${SKILLS_DIR}/generated/_refinements"
|
|
1132
|
+
mkdir -p "$_ref_dir"
|
|
1133
|
+
skill_apply_refinements "$_test_artifacts/skill-outcome.json" 2>/dev/null || true
|
|
1134
|
+
assert_true "[[ -f '$_ref_dir/frontend-design.patch.md' ]]" "refinement patch file created"
|
|
1135
|
+
_ref_content=$(cat "$_ref_dir/frontend-design.patch.md" 2>/dev/null || true)
|
|
1136
|
+
assert_contains "$_ref_content" "dashboard" "refinement content written"
|
|
1137
|
+
rm -f "$_ref_dir/frontend-design.patch.md"
|
|
1138
|
+
|
|
1139
|
+
echo ""
|
|
1140
|
+
echo " ── Generated skill lifecycle ──"
|
|
1141
|
+
|
|
1142
|
+
# Test: prune verdict deletes generated skill
|
|
1143
|
+
mkdir -p "${SKILLS_DIR}/generated"
|
|
1144
|
+
echo "## Temp Skill" > "${SKILLS_DIR}/generated/temp-skill.md"
|
|
1145
|
+
_prune_outcome='{"skill_effectiveness":{},"refinements":[],"generated_skill_verdict":{"temp-skill":"prune"}}'
|
|
1146
|
+
echo "$_prune_outcome" > "$_test_artifacts/skill-outcome.json"
|
|
1147
|
+
skill_apply_lifecycle_verdicts "$_test_artifacts/skill-outcome.json" 2>/dev/null || true
|
|
1148
|
+
assert_true "[[ ! -f '${SKILLS_DIR}/generated/temp-skill.md' ]]" "pruned skill deleted"
|
|
1149
|
+
|
|
1150
|
+
rm -rf "$_test_artifacts"
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1154
|
+
# Suite 14: Full AI Integration
|
|
1155
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1156
|
+
|
|
1157
|
+
echo ""
|
|
1158
|
+
echo "═══ Suite 14: Full AI Integration ═══"
|
|
1159
|
+
echo ""
|
|
1160
|
+
|
|
1161
|
+
echo " ── End-to-end skill flow ──"
|
|
1162
|
+
|
|
1163
|
+
# Test: catalog → plan → load → outcome cycle
|
|
1164
|
+
_e2e_dir=$(mktemp -d)
|
|
1165
|
+
|
|
1166
|
+
# 1. Build catalog (should include all 17 curated skills)
|
|
1167
|
+
_catalog=$(skill_build_catalog 2>/dev/null || true)
|
|
1168
|
+
_catalog_lines=$(echo "$_catalog" | grep -c '^-' 2>/dev/null || echo "0")
|
|
1169
|
+
assert_true "[[ $_catalog_lines -ge 17 ]]" "catalog has at least 17 skills (got $_catalog_lines)"
|
|
1170
|
+
|
|
1171
|
+
# 2. Write a skill plan (simulating what skill_analyze_issue would produce)
|
|
1172
|
+
cat > "$_e2e_dir/skill-plan.json" << 'E2E_PLAN'
|
|
1173
|
+
{
|
|
1174
|
+
"issue_type": "api",
|
|
1175
|
+
"confidence": 0.88,
|
|
1176
|
+
"skill_plan": {
|
|
1177
|
+
"plan": ["brainstorming", "api-design"],
|
|
1178
|
+
"build": ["api-design"],
|
|
1179
|
+
"review": ["two-stage-review", "security-audit"]
|
|
1180
|
+
},
|
|
1181
|
+
"skill_rationale": {
|
|
1182
|
+
"api-design": "REST endpoint versioning needed",
|
|
1183
|
+
"brainstorming": "Multiple valid API approaches",
|
|
1184
|
+
"two-stage-review": "Spec compliance for API contract",
|
|
1185
|
+
"security-audit": "Auth endpoint requires security review"
|
|
1186
|
+
},
|
|
1187
|
+
"generated_skills": []
|
|
1188
|
+
}
|
|
1189
|
+
E2E_PLAN
|
|
1190
|
+
|
|
1191
|
+
# 3. Load from plan for each stage
|
|
1192
|
+
ARTIFACTS_DIR="$_e2e_dir" _plan_out=$(skill_load_from_plan "plan" 2>/dev/null || true)
|
|
1193
|
+
ARTIFACTS_DIR="$_e2e_dir" _build_out=$(skill_load_from_plan "build" 2>/dev/null || true)
|
|
1194
|
+
ARTIFACTS_DIR="$_e2e_dir" _review_out=$(skill_load_from_plan "review" 2>/dev/null || true)
|
|
1195
|
+
|
|
1196
|
+
assert_contains "$_plan_out" "api-design" "plan loads api-design skill"
|
|
1197
|
+
assert_contains "$_plan_out" "REST endpoint" "plan includes rationale"
|
|
1198
|
+
assert_contains "$_build_out" "api-design" "build loads api-design"
|
|
1199
|
+
assert_not_contains "$_build_out" "brainstorming" "build doesn't load plan-only skills"
|
|
1200
|
+
assert_contains "$_review_out" "two-stage-review" "review loads two-stage-review"
|
|
1201
|
+
assert_contains "$_review_out" "security-audit" "review loads security-audit"
|
|
1202
|
+
|
|
1203
|
+
# 4. Test fallback chain (no plan → adaptive → static)
|
|
1204
|
+
_no_plan_dir=$(mktemp -d)
|
|
1205
|
+
_fallback_out=$(ARTIFACTS_DIR="$_no_plan_dir" INTELLIGENCE_ISSUE_TYPE="frontend" skill_load_from_plan "plan" 2>/dev/null || true)
|
|
1206
|
+
assert_contains "$_fallback_out" "brainstorming\|frontend\|Socratic" "fallback produces output when no plan exists"
|
|
1207
|
+
|
|
1208
|
+
# 5. Verify generated skill directory structure
|
|
1209
|
+
assert_true "[[ -d '$SKILLS_DIR/generated' ]]" "generated skills directory exists"
|
|
1210
|
+
assert_true "[[ -d '$SKILLS_DIR/generated/_refinements' ]]" "refinements directory exists"
|
|
1211
|
+
|
|
1212
|
+
rm -rf "$_e2e_dir" "$_no_plan_dir"
|
|
1213
|
+
|
|
1214
|
+
|
|
1215
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1216
|
+
# RESULTS
|
|
1217
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
1218
|
+
|
|
1219
|
+
echo ""
|
|
1220
|
+
echo "═══════════════════════════════════════════"
|
|
1221
|
+
TOTAL=$((PASS + FAIL))
|
|
1222
|
+
if [[ "$FAIL" -eq 0 ]]; then
|
|
1223
|
+
printf "\033[32m ALL %d TESTS PASSED ✓\033[0m\n" "$TOTAL"
|
|
1224
|
+
else
|
|
1225
|
+
printf "\033[31m %d/%d PASSED, %d FAILED\033[0m\n" "$PASS" "$TOTAL" "$FAIL"
|
|
1226
|
+
echo ""
|
|
1227
|
+
echo " Failures:"
|
|
1228
|
+
echo -e "$ERRORS"
|
|
1229
|
+
fi
|
|
1230
|
+
echo "═══════════════════════════════════════════"
|
|
1231
|
+
echo ""
|
|
1232
|
+
|
|
1233
|
+
exit "$FAIL"
|