workerssuper 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +20 -0
- package/.claude-plugin/plugin.json +13 -0
- package/.codex/INSTALL.md +67 -0
- package/.cursor-plugin/plugin.json +18 -0
- package/.gitattributes +18 -0
- package/.github/FUNDING.yml +3 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +52 -0
- package/.github/ISSUE_TEMPLATE/config.yml +5 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +34 -0
- package/.github/ISSUE_TEMPLATE/platform_support.md +23 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +87 -0
- package/.opencode/INSTALL.md +83 -0
- package/.opencode/plugins/superpowers.js +107 -0
- package/CHANGELOG.md +13 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/GEMINI.md +2 -0
- package/LICENSE +21 -0
- package/README.md +187 -0
- package/RELEASE-NOTES.md +1057 -0
- package/agents/code-reviewer.md +48 -0
- package/commands/brainstorm.md +5 -0
- package/commands/execute-plan.md +5 -0
- package/commands/write-plan.md +5 -0
- package/docs/README.codex.md +126 -0
- package/docs/README.opencode.md +130 -0
- package/docs/plans/2025-11-22-opencode-support-design.md +294 -0
- package/docs/plans/2025-11-22-opencode-support-implementation.md +1095 -0
- package/docs/plans/2025-11-28-skills-improvements-from-user-feedback.md +711 -0
- package/docs/plans/2026-01-17-visual-brainstorming.md +571 -0
- package/docs/superpowers/plans/2026-01-22-document-review-system.md +301 -0
- package/docs/superpowers/plans/2026-02-19-visual-brainstorming-refactor.md +523 -0
- package/docs/superpowers/plans/2026-03-11-zero-dep-brainstorm-server.md +479 -0
- package/docs/superpowers/specs/2026-01-22-document-review-system-design.md +136 -0
- package/docs/superpowers/specs/2026-02-19-visual-brainstorming-refactor-design.md +162 -0
- package/docs/superpowers/specs/2026-03-11-zero-dep-brainstorm-server-design.md +118 -0
- package/docs/testing.md +303 -0
- package/docs/windows/polyglot-hooks.md +212 -0
- package/gemini-extension.json +6 -0
- package/hooks/hooks-cursor.json +10 -0
- package/hooks/hooks.json +16 -0
- package/hooks/run-hook.cmd +46 -0
- package/hooks/session-start +57 -0
- package/package.json +5 -0
- package/skills/brainstorming/SKILL.md +164 -0
- package/skills/brainstorming/scripts/frame-template.html +214 -0
- package/skills/brainstorming/scripts/helper.js +88 -0
- package/skills/brainstorming/scripts/server.cjs +338 -0
- package/skills/brainstorming/scripts/start-server.sh +153 -0
- package/skills/brainstorming/scripts/stop-server.sh +55 -0
- package/skills/brainstorming/spec-document-reviewer-prompt.md +49 -0
- package/skills/brainstorming/visual-companion.md +286 -0
- package/skills/dispatching-parallel-agents/SKILL.md +182 -0
- package/skills/executing-plans/SKILL.md +70 -0
- package/skills/finishing-a-development-branch/SKILL.md +200 -0
- package/skills/receiving-code-review/SKILL.md +213 -0
- package/skills/requesting-code-review/SKILL.md +105 -0
- package/skills/requesting-code-review/code-reviewer.md +146 -0
- package/skills/subagent-driven-development/SKILL.md +277 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +26 -0
- package/skills/subagent-driven-development/implementer-prompt.md +113 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +61 -0
- package/skills/systematic-debugging/CREATION-LOG.md +119 -0
- package/skills/systematic-debugging/SKILL.md +296 -0
- package/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
- package/skills/systematic-debugging/condition-based-waiting.md +115 -0
- package/skills/systematic-debugging/defense-in-depth.md +122 -0
- package/skills/systematic-debugging/find-polluter.sh +63 -0
- package/skills/systematic-debugging/root-cause-tracing.md +169 -0
- package/skills/systematic-debugging/test-academic.md +14 -0
- package/skills/systematic-debugging/test-pressure-1.md +58 -0
- package/skills/systematic-debugging/test-pressure-2.md +68 -0
- package/skills/systematic-debugging/test-pressure-3.md +69 -0
- package/skills/test-driven-development/SKILL.md +371 -0
- package/skills/test-driven-development/testing-anti-patterns.md +299 -0
- package/skills/using-git-worktrees/SKILL.md +218 -0
- package/skills/using-superpowers/SKILL.md +115 -0
- package/skills/using-superpowers/references/codex-tools.md +25 -0
- package/skills/using-superpowers/references/gemini-tools.md +33 -0
- package/skills/verification-before-completion/SKILL.md +139 -0
- package/skills/writing-plans/SKILL.md +145 -0
- package/skills/writing-plans/plan-document-reviewer-prompt.md +49 -0
- package/skills/writing-skills/SKILL.md +655 -0
- package/skills/writing-skills/anthropic-best-practices.md +1150 -0
- package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
- package/skills/writing-skills/graphviz-conventions.dot +172 -0
- package/skills/writing-skills/persuasion-principles.md +187 -0
- package/skills/writing-skills/render-graphs.js +168 -0
- package/skills/writing-skills/testing-skills-with-subagents.md +384 -0
- package/tests/brainstorm-server/package-lock.json +36 -0
- package/tests/brainstorm-server/package.json +10 -0
- package/tests/brainstorm-server/server.test.js +424 -0
- package/tests/brainstorm-server/windows-lifecycle.test.sh +351 -0
- package/tests/brainstorm-server/ws-protocol.test.js +392 -0
- package/tests/claude-code/README.md +158 -0
- package/tests/claude-code/analyze-token-usage.py +168 -0
- package/tests/claude-code/run-skill-tests.sh +187 -0
- package/tests/claude-code/test-document-review-system.sh +177 -0
- package/tests/claude-code/test-helpers.sh +202 -0
- package/tests/claude-code/test-subagent-driven-development-integration.sh +314 -0
- package/tests/claude-code/test-subagent-driven-development.sh +165 -0
- package/tests/explicit-skill-requests/prompts/action-oriented.txt +3 -0
- package/tests/explicit-skill-requests/prompts/after-planning-flow.txt +17 -0
- package/tests/explicit-skill-requests/prompts/claude-suggested-it.txt +11 -0
- package/tests/explicit-skill-requests/prompts/i-know-what-sdd-means.txt +8 -0
- package/tests/explicit-skill-requests/prompts/mid-conversation-execute-plan.txt +3 -0
- package/tests/explicit-skill-requests/prompts/please-use-brainstorming.txt +1 -0
- package/tests/explicit-skill-requests/prompts/skip-formalities.txt +3 -0
- package/tests/explicit-skill-requests/prompts/subagent-driven-development-please.txt +1 -0
- package/tests/explicit-skill-requests/prompts/use-systematic-debugging.txt +1 -0
- package/tests/explicit-skill-requests/run-all.sh +70 -0
- package/tests/explicit-skill-requests/run-claude-describes-sdd.sh +100 -0
- package/tests/explicit-skill-requests/run-extended-multiturn-test.sh +113 -0
- package/tests/explicit-skill-requests/run-haiku-test.sh +144 -0
- package/tests/explicit-skill-requests/run-multiturn-test.sh +143 -0
- package/tests/explicit-skill-requests/run-test.sh +136 -0
- package/tests/opencode/run-tests.sh +163 -0
- package/tests/opencode/setup.sh +73 -0
- package/tests/opencode/test-plugin-loading.sh +72 -0
- package/tests/opencode/test-priority.sh +198 -0
- package/tests/opencode/test-tools.sh +104 -0
- package/tests/skill-triggering/prompts/dispatching-parallel-agents.txt +8 -0
- package/tests/skill-triggering/prompts/executing-plans.txt +1 -0
- package/tests/skill-triggering/prompts/requesting-code-review.txt +3 -0
- package/tests/skill-triggering/prompts/systematic-debugging.txt +11 -0
- package/tests/skill-triggering/prompts/test-driven-development.txt +7 -0
- package/tests/skill-triggering/prompts/writing-plans.txt +10 -0
- package/tests/skill-triggering/run-all.sh +60 -0
- package/tests/skill-triggering/run-test.sh +88 -0
- package/tests/subagent-driven-dev/go-fractals/design.md +81 -0
- package/tests/subagent-driven-dev/go-fractals/plan.md +172 -0
- package/tests/subagent-driven-dev/go-fractals/scaffold.sh +45 -0
- package/tests/subagent-driven-dev/run-test.sh +106 -0
- package/tests/subagent-driven-dev/svelte-todo/design.md +70 -0
- package/tests/subagent-driven-dev/svelte-todo/plan.md +222 -0
- package/tests/subagent-driven-dev/svelte-todo/scaffold.sh +46 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[Previous assistant message]:
|
|
2
|
+
Plan complete and saved to docs/superpowers/plans/auth-system.md.
|
|
3
|
+
|
|
4
|
+
Two execution options:
|
|
5
|
+
1. Subagent-Driven (this session) - I dispatch a fresh subagent per task, review between tasks, fast iteration within this conversation
|
|
6
|
+
2. Parallel Session (separate) - Open a new Claude Code session with the execute-plan skill, batch execution with review checkpoints
|
|
7
|
+
|
|
8
|
+
Which approach do you want to use for implementation?
|
|
9
|
+
|
|
10
|
+
[Your response]:
|
|
11
|
+
subagent-driven-development, please
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
I have my implementation plan ready at docs/superpowers/plans/auth-system.md.
|
|
2
|
+
|
|
3
|
+
I want to use subagent-driven-development to execute it. That means:
|
|
4
|
+
- Dispatch a fresh subagent for each task in the plan
|
|
5
|
+
- Review the output between tasks
|
|
6
|
+
- Keep iteration fast within this conversation
|
|
7
|
+
|
|
8
|
+
Let's start - please read the plan and begin dispatching subagents for each task.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
please use the brainstorming skill to help me think through this feature
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
subagent-driven-development, please
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
use systematic-debugging to figure out what's wrong
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Run all explicit skill request tests
|
|
3
|
+
# Usage: ./run-all.sh
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
PROMPTS_DIR="$SCRIPT_DIR/prompts"
|
|
9
|
+
|
|
10
|
+
echo "=== Running All Explicit Skill Request Tests ==="
|
|
11
|
+
echo ""
|
|
12
|
+
|
|
13
|
+
PASSED=0
|
|
14
|
+
FAILED=0
|
|
15
|
+
RESULTS=""
|
|
16
|
+
|
|
17
|
+
# Test: subagent-driven-development, please
|
|
18
|
+
echo ">>> Test 1: subagent-driven-development-please"
|
|
19
|
+
if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/subagent-driven-development-please.txt"; then
|
|
20
|
+
PASSED=$((PASSED + 1))
|
|
21
|
+
RESULTS="$RESULTS\nPASS: subagent-driven-development-please"
|
|
22
|
+
else
|
|
23
|
+
FAILED=$((FAILED + 1))
|
|
24
|
+
RESULTS="$RESULTS\nFAIL: subagent-driven-development-please"
|
|
25
|
+
fi
|
|
26
|
+
echo ""
|
|
27
|
+
|
|
28
|
+
# Test: use systematic-debugging
|
|
29
|
+
echo ">>> Test 2: use-systematic-debugging"
|
|
30
|
+
if "$SCRIPT_DIR/run-test.sh" "systematic-debugging" "$PROMPTS_DIR/use-systematic-debugging.txt"; then
|
|
31
|
+
PASSED=$((PASSED + 1))
|
|
32
|
+
RESULTS="$RESULTS\nPASS: use-systematic-debugging"
|
|
33
|
+
else
|
|
34
|
+
FAILED=$((FAILED + 1))
|
|
35
|
+
RESULTS="$RESULTS\nFAIL: use-systematic-debugging"
|
|
36
|
+
fi
|
|
37
|
+
echo ""
|
|
38
|
+
|
|
39
|
+
# Test: please use brainstorming
|
|
40
|
+
echo ">>> Test 3: please-use-brainstorming"
|
|
41
|
+
if "$SCRIPT_DIR/run-test.sh" "brainstorming" "$PROMPTS_DIR/please-use-brainstorming.txt"; then
|
|
42
|
+
PASSED=$((PASSED + 1))
|
|
43
|
+
RESULTS="$RESULTS\nPASS: please-use-brainstorming"
|
|
44
|
+
else
|
|
45
|
+
FAILED=$((FAILED + 1))
|
|
46
|
+
RESULTS="$RESULTS\nFAIL: please-use-brainstorming"
|
|
47
|
+
fi
|
|
48
|
+
echo ""
|
|
49
|
+
|
|
50
|
+
# Test: mid-conversation execute plan
|
|
51
|
+
echo ">>> Test 4: mid-conversation-execute-plan"
|
|
52
|
+
if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/mid-conversation-execute-plan.txt"; then
|
|
53
|
+
PASSED=$((PASSED + 1))
|
|
54
|
+
RESULTS="$RESULTS\nPASS: mid-conversation-execute-plan"
|
|
55
|
+
else
|
|
56
|
+
FAILED=$((FAILED + 1))
|
|
57
|
+
RESULTS="$RESULTS\nFAIL: mid-conversation-execute-plan"
|
|
58
|
+
fi
|
|
59
|
+
echo ""
|
|
60
|
+
|
|
61
|
+
echo "=== Summary ==="
|
|
62
|
+
echo -e "$RESULTS"
|
|
63
|
+
echo ""
|
|
64
|
+
echo "Passed: $PASSED"
|
|
65
|
+
echo "Failed: $FAILED"
|
|
66
|
+
echo "Total: $((PASSED + FAILED))"
|
|
67
|
+
|
|
68
|
+
if [ "$FAILED" -gt 0 ]; then
|
|
69
|
+
exit 1
|
|
70
|
+
fi
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test where Claude explicitly describes subagent-driven-development before user requests it
|
|
3
|
+
# This mimics the original failure scenario
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
9
|
+
|
|
10
|
+
TIMESTAMP=$(date +%s)
|
|
11
|
+
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/claude-describes"
|
|
12
|
+
mkdir -p "$OUTPUT_DIR"
|
|
13
|
+
|
|
14
|
+
PROJECT_DIR="$OUTPUT_DIR/project"
|
|
15
|
+
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
|
|
16
|
+
|
|
17
|
+
echo "=== Test: Claude Describes SDD First ==="
|
|
18
|
+
echo "Output dir: $OUTPUT_DIR"
|
|
19
|
+
echo ""
|
|
20
|
+
|
|
21
|
+
cd "$PROJECT_DIR"
|
|
22
|
+
|
|
23
|
+
# Create a plan
|
|
24
|
+
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
|
|
25
|
+
# Auth System Implementation Plan
|
|
26
|
+
|
|
27
|
+
## Task 1: Add User Model
|
|
28
|
+
Create user model with email and password fields.
|
|
29
|
+
|
|
30
|
+
## Task 2: Add Auth Routes
|
|
31
|
+
Create login and register endpoints.
|
|
32
|
+
|
|
33
|
+
## Task 3: Add JWT Middleware
|
|
34
|
+
Protect routes with JWT validation.
|
|
35
|
+
EOF
|
|
36
|
+
|
|
37
|
+
# Turn 1: Have Claude describe execution options including SDD
|
|
38
|
+
echo ">>> Turn 1: Ask Claude to describe execution options..."
|
|
39
|
+
claude -p "I have a plan at docs/superpowers/plans/auth-system.md. Tell me about my options for executing it, including what subagent-driven-development means and how it works." \
|
|
40
|
+
--model haiku \
|
|
41
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
42
|
+
--dangerously-skip-permissions \
|
|
43
|
+
--max-turns 3 \
|
|
44
|
+
--output-format stream-json \
|
|
45
|
+
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
|
|
46
|
+
echo "Done."
|
|
47
|
+
|
|
48
|
+
# Turn 2: THE CRITICAL TEST - now that Claude has explained it
|
|
49
|
+
echo ">>> Turn 2: Request subagent-driven-development..."
|
|
50
|
+
FINAL_LOG="$OUTPUT_DIR/turn2.json"
|
|
51
|
+
claude -p "subagent-driven-development, please" \
|
|
52
|
+
--continue \
|
|
53
|
+
--model haiku \
|
|
54
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
55
|
+
--dangerously-skip-permissions \
|
|
56
|
+
--max-turns 2 \
|
|
57
|
+
--output-format stream-json \
|
|
58
|
+
> "$FINAL_LOG" 2>&1 || true
|
|
59
|
+
echo "Done."
|
|
60
|
+
echo ""
|
|
61
|
+
|
|
62
|
+
echo "=== Results ==="
|
|
63
|
+
|
|
64
|
+
# Check Turn 1 to see if Claude described SDD
|
|
65
|
+
echo "Turn 1 - Claude's description of options (excerpt):"
|
|
66
|
+
grep '"type":"assistant"' "$OUTPUT_DIR/turn1.json" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo " (could not extract)"
|
|
67
|
+
echo ""
|
|
68
|
+
echo "---"
|
|
69
|
+
echo ""
|
|
70
|
+
|
|
71
|
+
# Check final turn
|
|
72
|
+
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
|
|
73
|
+
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
|
|
74
|
+
echo "PASS: Skill was triggered after Claude described it"
|
|
75
|
+
TRIGGERED=true
|
|
76
|
+
else
|
|
77
|
+
echo "FAIL: Skill was NOT triggered (Claude may have thought it already knew)"
|
|
78
|
+
TRIGGERED=false
|
|
79
|
+
|
|
80
|
+
echo ""
|
|
81
|
+
echo "Tools invoked in final turn:"
|
|
82
|
+
grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | sort -u | head -10 || echo " (none)"
|
|
83
|
+
|
|
84
|
+
echo ""
|
|
85
|
+
echo "Final turn response:"
|
|
86
|
+
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo " (could not extract)"
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
echo ""
|
|
90
|
+
echo "Skills triggered in final turn:"
|
|
91
|
+
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
|
|
92
|
+
|
|
93
|
+
echo ""
|
|
94
|
+
echo "Logs in: $OUTPUT_DIR"
|
|
95
|
+
|
|
96
|
+
if [ "$TRIGGERED" = "true" ]; then
|
|
97
|
+
exit 0
|
|
98
|
+
else
|
|
99
|
+
exit 1
|
|
100
|
+
fi
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Extended multi-turn test with more conversation history
|
|
3
|
+
# This tries to reproduce the failure by building more context
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
9
|
+
|
|
10
|
+
TIMESTAMP=$(date +%s)
|
|
11
|
+
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/extended-multiturn"
|
|
12
|
+
mkdir -p "$OUTPUT_DIR"
|
|
13
|
+
|
|
14
|
+
PROJECT_DIR="$OUTPUT_DIR/project"
|
|
15
|
+
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
|
|
16
|
+
|
|
17
|
+
echo "=== Extended Multi-Turn Test ==="
|
|
18
|
+
echo "Output dir: $OUTPUT_DIR"
|
|
19
|
+
echo "Plugin dir: $PLUGIN_DIR"
|
|
20
|
+
echo ""
|
|
21
|
+
|
|
22
|
+
cd "$PROJECT_DIR"
|
|
23
|
+
|
|
24
|
+
# Turn 1: Start brainstorming
|
|
25
|
+
echo ">>> Turn 1: Brainstorming request..."
|
|
26
|
+
claude -p "I want to add user authentication to my app. Help me think through this." \
|
|
27
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
28
|
+
--dangerously-skip-permissions \
|
|
29
|
+
--max-turns 3 \
|
|
30
|
+
--output-format stream-json \
|
|
31
|
+
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
|
|
32
|
+
echo "Done."
|
|
33
|
+
|
|
34
|
+
# Turn 2: Answer a brainstorming question
|
|
35
|
+
echo ">>> Turn 2: Answering questions..."
|
|
36
|
+
claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
|
|
37
|
+
--continue \
|
|
38
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
39
|
+
--dangerously-skip-permissions \
|
|
40
|
+
--max-turns 3 \
|
|
41
|
+
--output-format stream-json \
|
|
42
|
+
> "$OUTPUT_DIR/turn2.json" 2>&1 || true
|
|
43
|
+
echo "Done."
|
|
44
|
+
|
|
45
|
+
# Turn 3: Ask to write a plan
|
|
46
|
+
echo ">>> Turn 3: Requesting plan..."
|
|
47
|
+
claude -p "Great, write this up as an implementation plan." \
|
|
48
|
+
--continue \
|
|
49
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
50
|
+
--dangerously-skip-permissions \
|
|
51
|
+
--max-turns 3 \
|
|
52
|
+
--output-format stream-json \
|
|
53
|
+
> "$OUTPUT_DIR/turn3.json" 2>&1 || true
|
|
54
|
+
echo "Done."
|
|
55
|
+
|
|
56
|
+
# Turn 4: Confirm plan looks good
|
|
57
|
+
echo ">>> Turn 4: Confirming plan..."
|
|
58
|
+
claude -p "The plan looks good. What are my options for executing it?" \
|
|
59
|
+
--continue \
|
|
60
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
61
|
+
--dangerously-skip-permissions \
|
|
62
|
+
--max-turns 2 \
|
|
63
|
+
--output-format stream-json \
|
|
64
|
+
> "$OUTPUT_DIR/turn4.json" 2>&1 || true
|
|
65
|
+
echo "Done."
|
|
66
|
+
|
|
67
|
+
# Turn 5: THE CRITICAL TEST
|
|
68
|
+
echo ">>> Turn 5: Requesting subagent-driven-development..."
|
|
69
|
+
FINAL_LOG="$OUTPUT_DIR/turn5.json"
|
|
70
|
+
claude -p "subagent-driven-development, please" \
|
|
71
|
+
--continue \
|
|
72
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
73
|
+
--dangerously-skip-permissions \
|
|
74
|
+
--max-turns 2 \
|
|
75
|
+
--output-format stream-json \
|
|
76
|
+
> "$FINAL_LOG" 2>&1 || true
|
|
77
|
+
echo "Done."
|
|
78
|
+
echo ""
|
|
79
|
+
|
|
80
|
+
echo "=== Results ==="
|
|
81
|
+
|
|
82
|
+
# Check final turn
|
|
83
|
+
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
|
|
84
|
+
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
|
|
85
|
+
echo "PASS: Skill was triggered"
|
|
86
|
+
TRIGGERED=true
|
|
87
|
+
else
|
|
88
|
+
echo "FAIL: Skill was NOT triggered"
|
|
89
|
+
TRIGGERED=false
|
|
90
|
+
|
|
91
|
+
# Show what was invoked instead
|
|
92
|
+
echo ""
|
|
93
|
+
echo "Tools invoked in final turn:"
|
|
94
|
+
grep '"type":"tool_use"' "$FINAL_LOG" | jq -r '.content[] | select(.type=="tool_use") | .name' 2>/dev/null | head -10 || \
|
|
95
|
+
grep -o '"name":"[^"]*"' "$FINAL_LOG" | head -10 || echo " (none found)"
|
|
96
|
+
fi
|
|
97
|
+
|
|
98
|
+
echo ""
|
|
99
|
+
echo "Skills triggered:"
|
|
100
|
+
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
|
|
101
|
+
|
|
102
|
+
echo ""
|
|
103
|
+
echo "Final turn response (first 500 chars):"
|
|
104
|
+
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
|
|
105
|
+
|
|
106
|
+
echo ""
|
|
107
|
+
echo "Logs in: $OUTPUT_DIR"
|
|
108
|
+
|
|
109
|
+
if [ "$TRIGGERED" = "true" ]; then
|
|
110
|
+
exit 0
|
|
111
|
+
else
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test with haiku model and user's CLAUDE.md
|
|
3
|
+
# This tests whether a cheaper/faster model fails more easily
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
9
|
+
|
|
10
|
+
TIMESTAMP=$(date +%s)
|
|
11
|
+
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/haiku"
|
|
12
|
+
mkdir -p "$OUTPUT_DIR"
|
|
13
|
+
|
|
14
|
+
PROJECT_DIR="$OUTPUT_DIR/project"
|
|
15
|
+
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
|
|
16
|
+
mkdir -p "$PROJECT_DIR/.claude"
|
|
17
|
+
|
|
18
|
+
echo "=== Haiku Model Test with User CLAUDE.md ==="
|
|
19
|
+
echo "Output dir: $OUTPUT_DIR"
|
|
20
|
+
echo "Plugin dir: $PLUGIN_DIR"
|
|
21
|
+
echo ""
|
|
22
|
+
|
|
23
|
+
cd "$PROJECT_DIR"
|
|
24
|
+
|
|
25
|
+
# Copy user's CLAUDE.md to simulate real environment
|
|
26
|
+
if [ -f "$HOME/.claude/CLAUDE.md" ]; then
|
|
27
|
+
cp "$HOME/.claude/CLAUDE.md" "$PROJECT_DIR/.claude/CLAUDE.md"
|
|
28
|
+
echo "Copied user CLAUDE.md"
|
|
29
|
+
else
|
|
30
|
+
echo "No user CLAUDE.md found, proceeding without"
|
|
31
|
+
fi
|
|
32
|
+
|
|
33
|
+
# Create a dummy plan file
|
|
34
|
+
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
|
|
35
|
+
# Auth System Implementation Plan
|
|
36
|
+
|
|
37
|
+
## Task 1: Add User Model
|
|
38
|
+
Create user model with email and password fields.
|
|
39
|
+
|
|
40
|
+
## Task 2: Add Auth Routes
|
|
41
|
+
Create login and register endpoints.
|
|
42
|
+
|
|
43
|
+
## Task 3: Add JWT Middleware
|
|
44
|
+
Protect routes with JWT validation.
|
|
45
|
+
|
|
46
|
+
## Task 4: Write Tests
|
|
47
|
+
Add comprehensive test coverage.
|
|
48
|
+
EOF
|
|
49
|
+
|
|
50
|
+
echo ""
|
|
51
|
+
|
|
52
|
+
# Turn 1: Start brainstorming
|
|
53
|
+
echo ">>> Turn 1: Brainstorming request..."
|
|
54
|
+
claude -p "I want to add user authentication to my app. Help me think through this." \
|
|
55
|
+
--model haiku \
|
|
56
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
57
|
+
--dangerously-skip-permissions \
|
|
58
|
+
--max-turns 3 \
|
|
59
|
+
--output-format stream-json \
|
|
60
|
+
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
|
|
61
|
+
echo "Done."
|
|
62
|
+
|
|
63
|
+
# Turn 2: Answer questions
|
|
64
|
+
echo ">>> Turn 2: Answering questions..."
|
|
65
|
+
claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
|
|
66
|
+
--continue \
|
|
67
|
+
--model haiku \
|
|
68
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
69
|
+
--dangerously-skip-permissions \
|
|
70
|
+
--max-turns 3 \
|
|
71
|
+
--output-format stream-json \
|
|
72
|
+
> "$OUTPUT_DIR/turn2.json" 2>&1 || true
|
|
73
|
+
echo "Done."
|
|
74
|
+
|
|
75
|
+
# Turn 3: Ask to write a plan
|
|
76
|
+
echo ">>> Turn 3: Requesting plan..."
|
|
77
|
+
claude -p "Great, write this up as an implementation plan." \
|
|
78
|
+
--continue \
|
|
79
|
+
--model haiku \
|
|
80
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
81
|
+
--dangerously-skip-permissions \
|
|
82
|
+
--max-turns 3 \
|
|
83
|
+
--output-format stream-json \
|
|
84
|
+
> "$OUTPUT_DIR/turn3.json" 2>&1 || true
|
|
85
|
+
echo "Done."
|
|
86
|
+
|
|
87
|
+
# Turn 4: Confirm plan looks good
|
|
88
|
+
echo ">>> Turn 4: Confirming plan..."
|
|
89
|
+
claude -p "The plan looks good. What are my options for executing it?" \
|
|
90
|
+
--continue \
|
|
91
|
+
--model haiku \
|
|
92
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
93
|
+
--dangerously-skip-permissions \
|
|
94
|
+
--max-turns 2 \
|
|
95
|
+
--output-format stream-json \
|
|
96
|
+
> "$OUTPUT_DIR/turn4.json" 2>&1 || true
|
|
97
|
+
echo "Done."
|
|
98
|
+
|
|
99
|
+
# Turn 5: THE CRITICAL TEST
|
|
100
|
+
echo ">>> Turn 5: Requesting subagent-driven-development..."
|
|
101
|
+
FINAL_LOG="$OUTPUT_DIR/turn5.json"
|
|
102
|
+
claude -p "subagent-driven-development, please" \
|
|
103
|
+
--continue \
|
|
104
|
+
--model haiku \
|
|
105
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
106
|
+
--dangerously-skip-permissions \
|
|
107
|
+
--max-turns 2 \
|
|
108
|
+
--output-format stream-json \
|
|
109
|
+
> "$FINAL_LOG" 2>&1 || true
|
|
110
|
+
echo "Done."
|
|
111
|
+
echo ""
|
|
112
|
+
|
|
113
|
+
echo "=== Results (Haiku) ==="
|
|
114
|
+
|
|
115
|
+
# Check final turn
|
|
116
|
+
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
|
|
117
|
+
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
|
|
118
|
+
echo "PASS: Skill was triggered"
|
|
119
|
+
TRIGGERED=true
|
|
120
|
+
else
|
|
121
|
+
echo "FAIL: Skill was NOT triggered"
|
|
122
|
+
TRIGGERED=false
|
|
123
|
+
|
|
124
|
+
echo ""
|
|
125
|
+
echo "Tools invoked in final turn:"
|
|
126
|
+
grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo " (none)"
|
|
127
|
+
fi
|
|
128
|
+
|
|
129
|
+
echo ""
|
|
130
|
+
echo "Skills triggered:"
|
|
131
|
+
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
|
|
132
|
+
|
|
133
|
+
echo ""
|
|
134
|
+
echo "Final turn response (first 500 chars):"
|
|
135
|
+
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
|
|
136
|
+
|
|
137
|
+
echo ""
|
|
138
|
+
echo "Logs in: $OUTPUT_DIR"
|
|
139
|
+
|
|
140
|
+
if [ "$TRIGGERED" = "true" ]; then
|
|
141
|
+
exit 0
|
|
142
|
+
else
|
|
143
|
+
exit 1
|
|
144
|
+
fi
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Test explicit skill requests in multi-turn conversations
|
|
3
|
+
# Usage: ./run-multiturn-test.sh
|
|
4
|
+
#
|
|
5
|
+
# This test builds actual conversation history to reproduce the failure mode
|
|
6
|
+
# where Claude skips skill invocation after extended conversation
|
|
7
|
+
|
|
8
|
+
set -e
|
|
9
|
+
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
+
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
12
|
+
|
|
13
|
+
TIMESTAMP=$(date +%s)
|
|
14
|
+
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/multiturn"
|
|
15
|
+
mkdir -p "$OUTPUT_DIR"
|
|
16
|
+
|
|
17
|
+
# Create project directory (conversation is cwd-based)
|
|
18
|
+
PROJECT_DIR="$OUTPUT_DIR/project"
|
|
19
|
+
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
|
|
20
|
+
|
|
21
|
+
echo "=== Multi-Turn Explicit Skill Request Test ==="
|
|
22
|
+
echo "Output dir: $OUTPUT_DIR"
|
|
23
|
+
echo "Project dir: $PROJECT_DIR"
|
|
24
|
+
echo "Plugin dir: $PLUGIN_DIR"
|
|
25
|
+
echo ""
|
|
26
|
+
|
|
27
|
+
cd "$PROJECT_DIR"
|
|
28
|
+
|
|
29
|
+
# Create a dummy plan file
|
|
30
|
+
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
|
|
31
|
+
# Auth System Implementation Plan
|
|
32
|
+
|
|
33
|
+
## Task 1: Add User Model
|
|
34
|
+
Create user model with email and password fields.
|
|
35
|
+
|
|
36
|
+
## Task 2: Add Auth Routes
|
|
37
|
+
Create login and register endpoints.
|
|
38
|
+
|
|
39
|
+
## Task 3: Add JWT Middleware
|
|
40
|
+
Protect routes with JWT validation.
|
|
41
|
+
|
|
42
|
+
## Task 4: Write Tests
|
|
43
|
+
Add comprehensive test coverage.
|
|
44
|
+
EOF
|
|
45
|
+
|
|
46
|
+
# Turn 1: Start a planning conversation
|
|
47
|
+
echo ">>> Turn 1: Starting planning conversation..."
|
|
48
|
+
TURN1_LOG="$OUTPUT_DIR/turn1.json"
|
|
49
|
+
claude -p "I need to implement an authentication system. Let's plan this out. The requirements are: user registration with email/password, JWT tokens, and protected routes." \
|
|
50
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
51
|
+
--dangerously-skip-permissions \
|
|
52
|
+
--max-turns 2 \
|
|
53
|
+
--output-format stream-json \
|
|
54
|
+
> "$TURN1_LOG" 2>&1 || true
|
|
55
|
+
|
|
56
|
+
echo "Turn 1 complete."
|
|
57
|
+
echo ""
|
|
58
|
+
|
|
59
|
+
# Turn 2: Continue with more planning detail
|
|
60
|
+
echo ">>> Turn 2: Continuing planning..."
|
|
61
|
+
TURN2_LOG="$OUTPUT_DIR/turn2.json"
|
|
62
|
+
claude -p "Good analysis. I've already written the plan to docs/superpowers/plans/auth-system.md. Now I'm ready to implement. What are my options for execution?" \
|
|
63
|
+
--continue \
|
|
64
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
65
|
+
--dangerously-skip-permissions \
|
|
66
|
+
--max-turns 2 \
|
|
67
|
+
--output-format stream-json \
|
|
68
|
+
> "$TURN2_LOG" 2>&1 || true
|
|
69
|
+
|
|
70
|
+
echo "Turn 2 complete."
|
|
71
|
+
echo ""
|
|
72
|
+
|
|
73
|
+
# Turn 3: The critical test - ask for subagent-driven-development
|
|
74
|
+
echo ">>> Turn 3: Requesting subagent-driven-development..."
|
|
75
|
+
TURN3_LOG="$OUTPUT_DIR/turn3.json"
|
|
76
|
+
claude -p "subagent-driven-development, please" \
|
|
77
|
+
--continue \
|
|
78
|
+
--plugin-dir "$PLUGIN_DIR" \
|
|
79
|
+
--dangerously-skip-permissions \
|
|
80
|
+
--max-turns 2 \
|
|
81
|
+
--output-format stream-json \
|
|
82
|
+
> "$TURN3_LOG" 2>&1 || true
|
|
83
|
+
|
|
84
|
+
echo "Turn 3 complete."
|
|
85
|
+
echo ""
|
|
86
|
+
|
|
87
|
+
echo "=== Results ==="
|
|
88
|
+
|
|
89
|
+
# Check if skill was triggered in Turn 3
|
|
90
|
+
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
|
|
91
|
+
if grep -q '"name":"Skill"' "$TURN3_LOG" && grep -qE "$SKILL_PATTERN" "$TURN3_LOG"; then
|
|
92
|
+
echo "PASS: Skill 'subagent-driven-development' was triggered in Turn 3"
|
|
93
|
+
TRIGGERED=true
|
|
94
|
+
else
|
|
95
|
+
echo "FAIL: Skill 'subagent-driven-development' was NOT triggered in Turn 3"
|
|
96
|
+
TRIGGERED=false
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
# Show what skills were triggered
|
|
100
|
+
echo ""
|
|
101
|
+
echo "Skills triggered in Turn 3:"
|
|
102
|
+
grep -o '"skill":"[^"]*"' "$TURN3_LOG" 2>/dev/null | sort -u || echo " (none)"
|
|
103
|
+
|
|
104
|
+
# Check for premature action in Turn 3
|
|
105
|
+
echo ""
|
|
106
|
+
echo "Checking for premature action in Turn 3..."
|
|
107
|
+
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$TURN3_LOG" | head -1 | cut -d: -f1)
|
|
108
|
+
if [ -n "$FIRST_SKILL_LINE" ]; then
|
|
109
|
+
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
|
|
110
|
+
grep '"type":"tool_use"' | \
|
|
111
|
+
grep -v '"name":"Skill"' | \
|
|
112
|
+
grep -v '"name":"TodoWrite"' || true)
|
|
113
|
+
if [ -n "$PREMATURE_TOOLS" ]; then
|
|
114
|
+
echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
|
|
115
|
+
echo "$PREMATURE_TOOLS" | head -5
|
|
116
|
+
else
|
|
117
|
+
echo "OK: No premature tool invocations detected"
|
|
118
|
+
fi
|
|
119
|
+
else
|
|
120
|
+
echo "WARNING: No Skill invocation found in Turn 3"
|
|
121
|
+
# Show what WAS invoked
|
|
122
|
+
echo ""
|
|
123
|
+
echo "Tools invoked in Turn 3:"
|
|
124
|
+
grep '"type":"tool_use"' "$TURN3_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo " (none)"
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# Show Turn 3 assistant response
|
|
128
|
+
echo ""
|
|
129
|
+
echo "Turn 3 first assistant response (truncated):"
|
|
130
|
+
grep '"type":"assistant"' "$TURN3_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
|
|
131
|
+
|
|
132
|
+
echo ""
|
|
133
|
+
echo "Logs:"
|
|
134
|
+
echo " Turn 1: $TURN1_LOG"
|
|
135
|
+
echo " Turn 2: $TURN2_LOG"
|
|
136
|
+
echo " Turn 3: $TURN3_LOG"
|
|
137
|
+
echo "Timestamp: $TIMESTAMP"
|
|
138
|
+
|
|
139
|
+
if [ "$TRIGGERED" = "true" ]; then
|
|
140
|
+
exit 0
|
|
141
|
+
else
|
|
142
|
+
exit 1
|
|
143
|
+
fi
|