workerssuper 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/.claude-plugin/marketplace.json +20 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/.codex/INSTALL.md +67 -0
  4. package/.cursor-plugin/plugin.json +18 -0
  5. package/.gitattributes +18 -0
  6. package/.github/FUNDING.yml +3 -0
  7. package/.github/ISSUE_TEMPLATE/bug_report.md +52 -0
  8. package/.github/ISSUE_TEMPLATE/config.yml +5 -0
  9. package/.github/ISSUE_TEMPLATE/feature_request.md +34 -0
  10. package/.github/ISSUE_TEMPLATE/platform_support.md +23 -0
  11. package/.github/PULL_REQUEST_TEMPLATE.md +87 -0
  12. package/.opencode/INSTALL.md +83 -0
  13. package/.opencode/plugins/superpowers.js +107 -0
  14. package/CHANGELOG.md +13 -0
  15. package/CODE_OF_CONDUCT.md +128 -0
  16. package/GEMINI.md +2 -0
  17. package/LICENSE +21 -0
  18. package/README.md +187 -0
  19. package/RELEASE-NOTES.md +1057 -0
  20. package/agents/code-reviewer.md +48 -0
  21. package/commands/brainstorm.md +5 -0
  22. package/commands/execute-plan.md +5 -0
  23. package/commands/write-plan.md +5 -0
  24. package/docs/README.codex.md +126 -0
  25. package/docs/README.opencode.md +130 -0
  26. package/docs/plans/2025-11-22-opencode-support-design.md +294 -0
  27. package/docs/plans/2025-11-22-opencode-support-implementation.md +1095 -0
  28. package/docs/plans/2025-11-28-skills-improvements-from-user-feedback.md +711 -0
  29. package/docs/plans/2026-01-17-visual-brainstorming.md +571 -0
  30. package/docs/superpowers/plans/2026-01-22-document-review-system.md +301 -0
  31. package/docs/superpowers/plans/2026-02-19-visual-brainstorming-refactor.md +523 -0
  32. package/docs/superpowers/plans/2026-03-11-zero-dep-brainstorm-server.md +479 -0
  33. package/docs/superpowers/specs/2026-01-22-document-review-system-design.md +136 -0
  34. package/docs/superpowers/specs/2026-02-19-visual-brainstorming-refactor-design.md +162 -0
  35. package/docs/superpowers/specs/2026-03-11-zero-dep-brainstorm-server-design.md +118 -0
  36. package/docs/testing.md +303 -0
  37. package/docs/windows/polyglot-hooks.md +212 -0
  38. package/gemini-extension.json +6 -0
  39. package/hooks/hooks-cursor.json +10 -0
  40. package/hooks/hooks.json +16 -0
  41. package/hooks/run-hook.cmd +46 -0
  42. package/hooks/session-start +57 -0
  43. package/package.json +5 -0
  44. package/skills/brainstorming/SKILL.md +164 -0
  45. package/skills/brainstorming/scripts/frame-template.html +214 -0
  46. package/skills/brainstorming/scripts/helper.js +88 -0
  47. package/skills/brainstorming/scripts/server.cjs +338 -0
  48. package/skills/brainstorming/scripts/start-server.sh +153 -0
  49. package/skills/brainstorming/scripts/stop-server.sh +55 -0
  50. package/skills/brainstorming/spec-document-reviewer-prompt.md +49 -0
  51. package/skills/brainstorming/visual-companion.md +286 -0
  52. package/skills/dispatching-parallel-agents/SKILL.md +182 -0
  53. package/skills/executing-plans/SKILL.md +70 -0
  54. package/skills/finishing-a-development-branch/SKILL.md +200 -0
  55. package/skills/receiving-code-review/SKILL.md +213 -0
  56. package/skills/requesting-code-review/SKILL.md +105 -0
  57. package/skills/requesting-code-review/code-reviewer.md +146 -0
  58. package/skills/subagent-driven-development/SKILL.md +277 -0
  59. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +26 -0
  60. package/skills/subagent-driven-development/implementer-prompt.md +113 -0
  61. package/skills/subagent-driven-development/spec-reviewer-prompt.md +61 -0
  62. package/skills/systematic-debugging/CREATION-LOG.md +119 -0
  63. package/skills/systematic-debugging/SKILL.md +296 -0
  64. package/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
  65. package/skills/systematic-debugging/condition-based-waiting.md +115 -0
  66. package/skills/systematic-debugging/defense-in-depth.md +122 -0
  67. package/skills/systematic-debugging/find-polluter.sh +63 -0
  68. package/skills/systematic-debugging/root-cause-tracing.md +169 -0
  69. package/skills/systematic-debugging/test-academic.md +14 -0
  70. package/skills/systematic-debugging/test-pressure-1.md +58 -0
  71. package/skills/systematic-debugging/test-pressure-2.md +68 -0
  72. package/skills/systematic-debugging/test-pressure-3.md +69 -0
  73. package/skills/test-driven-development/SKILL.md +371 -0
  74. package/skills/test-driven-development/testing-anti-patterns.md +299 -0
  75. package/skills/using-git-worktrees/SKILL.md +218 -0
  76. package/skills/using-superpowers/SKILL.md +115 -0
  77. package/skills/using-superpowers/references/codex-tools.md +25 -0
  78. package/skills/using-superpowers/references/gemini-tools.md +33 -0
  79. package/skills/verification-before-completion/SKILL.md +139 -0
  80. package/skills/writing-plans/SKILL.md +145 -0
  81. package/skills/writing-plans/plan-document-reviewer-prompt.md +49 -0
  82. package/skills/writing-skills/SKILL.md +655 -0
  83. package/skills/writing-skills/anthropic-best-practices.md +1150 -0
  84. package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  85. package/skills/writing-skills/graphviz-conventions.dot +172 -0
  86. package/skills/writing-skills/persuasion-principles.md +187 -0
  87. package/skills/writing-skills/render-graphs.js +168 -0
  88. package/skills/writing-skills/testing-skills-with-subagents.md +384 -0
  89. package/tests/brainstorm-server/package-lock.json +36 -0
  90. package/tests/brainstorm-server/package.json +10 -0
  91. package/tests/brainstorm-server/server.test.js +424 -0
  92. package/tests/brainstorm-server/windows-lifecycle.test.sh +351 -0
  93. package/tests/brainstorm-server/ws-protocol.test.js +392 -0
  94. package/tests/claude-code/README.md +158 -0
  95. package/tests/claude-code/analyze-token-usage.py +168 -0
  96. package/tests/claude-code/run-skill-tests.sh +187 -0
  97. package/tests/claude-code/test-document-review-system.sh +177 -0
  98. package/tests/claude-code/test-helpers.sh +202 -0
  99. package/tests/claude-code/test-subagent-driven-development-integration.sh +314 -0
  100. package/tests/claude-code/test-subagent-driven-development.sh +165 -0
  101. package/tests/explicit-skill-requests/prompts/action-oriented.txt +3 -0
  102. package/tests/explicit-skill-requests/prompts/after-planning-flow.txt +17 -0
  103. package/tests/explicit-skill-requests/prompts/claude-suggested-it.txt +11 -0
  104. package/tests/explicit-skill-requests/prompts/i-know-what-sdd-means.txt +8 -0
  105. package/tests/explicit-skill-requests/prompts/mid-conversation-execute-plan.txt +3 -0
  106. package/tests/explicit-skill-requests/prompts/please-use-brainstorming.txt +1 -0
  107. package/tests/explicit-skill-requests/prompts/skip-formalities.txt +3 -0
  108. package/tests/explicit-skill-requests/prompts/subagent-driven-development-please.txt +1 -0
  109. package/tests/explicit-skill-requests/prompts/use-systematic-debugging.txt +1 -0
  110. package/tests/explicit-skill-requests/run-all.sh +70 -0
  111. package/tests/explicit-skill-requests/run-claude-describes-sdd.sh +100 -0
  112. package/tests/explicit-skill-requests/run-extended-multiturn-test.sh +113 -0
  113. package/tests/explicit-skill-requests/run-haiku-test.sh +144 -0
  114. package/tests/explicit-skill-requests/run-multiturn-test.sh +143 -0
  115. package/tests/explicit-skill-requests/run-test.sh +136 -0
  116. package/tests/opencode/run-tests.sh +163 -0
  117. package/tests/opencode/setup.sh +73 -0
  118. package/tests/opencode/test-plugin-loading.sh +72 -0
  119. package/tests/opencode/test-priority.sh +198 -0
  120. package/tests/opencode/test-tools.sh +104 -0
  121. package/tests/skill-triggering/prompts/dispatching-parallel-agents.txt +8 -0
  122. package/tests/skill-triggering/prompts/executing-plans.txt +1 -0
  123. package/tests/skill-triggering/prompts/requesting-code-review.txt +3 -0
  124. package/tests/skill-triggering/prompts/systematic-debugging.txt +11 -0
  125. package/tests/skill-triggering/prompts/test-driven-development.txt +7 -0
  126. package/tests/skill-triggering/prompts/writing-plans.txt +10 -0
  127. package/tests/skill-triggering/run-all.sh +60 -0
  128. package/tests/skill-triggering/run-test.sh +88 -0
  129. package/tests/subagent-driven-dev/go-fractals/design.md +81 -0
  130. package/tests/subagent-driven-dev/go-fractals/plan.md +172 -0
  131. package/tests/subagent-driven-dev/go-fractals/scaffold.sh +45 -0
  132. package/tests/subagent-driven-dev/run-test.sh +106 -0
  133. package/tests/subagent-driven-dev/svelte-todo/design.md +70 -0
  134. package/tests/subagent-driven-dev/svelte-todo/plan.md +222 -0
  135. package/tests/subagent-driven-dev/svelte-todo/scaffold.sh +46 -0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Analyze token usage from Claude Code session transcripts.
4
+ Breaks down usage by main session and individual subagents.
5
+ """
6
+
7
+ import json
8
+ import sys
9
+ from pathlib import Path
10
+ from collections import defaultdict
11
+
12
+ def analyze_main_session(filepath):
13
+ """Analyze a session file and return token usage broken down by agent."""
14
+ main_usage = {
15
+ 'input_tokens': 0,
16
+ 'output_tokens': 0,
17
+ 'cache_creation': 0,
18
+ 'cache_read': 0,
19
+ 'messages': 0
20
+ }
21
+
22
+ # Track usage per subagent
23
+ subagent_usage = defaultdict(lambda: {
24
+ 'input_tokens': 0,
25
+ 'output_tokens': 0,
26
+ 'cache_creation': 0,
27
+ 'cache_read': 0,
28
+ 'messages': 0,
29
+ 'description': None
30
+ })
31
+
32
+ with open(filepath, 'r') as f:
33
+ for line in f:
34
+ try:
35
+ data = json.loads(line)
36
+
37
+ # Main session assistant messages
38
+ if data.get('type') == 'assistant' and 'message' in data:
39
+ main_usage['messages'] += 1
40
+ msg_usage = data['message'].get('usage', {})
41
+ main_usage['input_tokens'] += msg_usage.get('input_tokens', 0)
42
+ main_usage['output_tokens'] += msg_usage.get('output_tokens', 0)
43
+ main_usage['cache_creation'] += msg_usage.get('cache_creation_input_tokens', 0)
44
+ main_usage['cache_read'] += msg_usage.get('cache_read_input_tokens', 0)
45
+
46
+ # Subagent tool results
47
+ if data.get('type') == 'user' and 'toolUseResult' in data:
48
+ result = data['toolUseResult']
49
+ if 'usage' in result and 'agentId' in result:
50
+ agent_id = result['agentId']
51
+ usage = result['usage']
52
+
53
+ # Get description from prompt if available
54
+ if subagent_usage[agent_id]['description'] is None:
55
+ prompt = result.get('prompt', '')
56
+ # Extract first line as description
57
+ first_line = prompt.split('\n')[0] if prompt else f"agent-{agent_id}"
58
+ if first_line.startswith('You are '):
59
+ first_line = first_line[8:] # Remove "You are "
60
+ subagent_usage[agent_id]['description'] = first_line[:60]
61
+
62
+ subagent_usage[agent_id]['messages'] += 1
63
+ subagent_usage[agent_id]['input_tokens'] += usage.get('input_tokens', 0)
64
+ subagent_usage[agent_id]['output_tokens'] += usage.get('output_tokens', 0)
65
+ subagent_usage[agent_id]['cache_creation'] += usage.get('cache_creation_input_tokens', 0)
66
+ subagent_usage[agent_id]['cache_read'] += usage.get('cache_read_input_tokens', 0)
67
+ except Exception:
68
+ pass
69
+
70
+ return main_usage, dict(subagent_usage)
71
+
72
+ def format_tokens(n):
73
+ """Format token count with thousands separators."""
74
+ return f"{n:,}"
75
+
76
+ def calculate_cost(usage, input_cost_per_m=3.0, output_cost_per_m=15.0):
77
+ """Calculate estimated cost in dollars."""
78
+ total_input = usage['input_tokens'] + usage['cache_creation'] + usage['cache_read']
79
+ input_cost = total_input * input_cost_per_m / 1_000_000
80
+ output_cost = usage['output_tokens'] * output_cost_per_m / 1_000_000
81
+ return input_cost + output_cost
82
+
83
+ def main():
84
+ if len(sys.argv) < 2:
85
+ print("Usage: analyze-token-usage.py <session-file.jsonl>")
86
+ sys.exit(1)
87
+
88
+ main_session_file = sys.argv[1]
89
+
90
+ if not Path(main_session_file).exists():
91
+ print(f"Error: Session file not found: {main_session_file}")
92
+ sys.exit(1)
93
+
94
+ # Analyze the session
95
+ main_usage, subagent_usage = analyze_main_session(main_session_file)
96
+
97
+ print("=" * 100)
98
+ print("TOKEN USAGE ANALYSIS")
99
+ print("=" * 100)
100
+ print()
101
+
102
+ # Print breakdown
103
+ print("Usage Breakdown:")
104
+ print("-" * 100)
105
+ print(f"{'Agent':<15} {'Description':<35} {'Msgs':>5} {'Input':>10} {'Output':>10} {'Cache':>10} {'Cost':>8}")
106
+ print("-" * 100)
107
+
108
+ # Main session
109
+ cost = calculate_cost(main_usage)
110
+ print(f"{'main':<15} {'Main session (coordinator)':<35} "
111
+ f"{main_usage['messages']:>5} "
112
+ f"{format_tokens(main_usage['input_tokens']):>10} "
113
+ f"{format_tokens(main_usage['output_tokens']):>10} "
114
+ f"{format_tokens(main_usage['cache_read']):>10} "
115
+ f"${cost:>7.2f}")
116
+
117
+ # Subagents (sorted by agent ID)
118
+ for agent_id in sorted(subagent_usage.keys()):
119
+ usage = subagent_usage[agent_id]
120
+ cost = calculate_cost(usage)
121
+ desc = usage['description'] or f"agent-{agent_id}"
122
+ print(f"{agent_id:<15} {desc:<35} "
123
+ f"{usage['messages']:>5} "
124
+ f"{format_tokens(usage['input_tokens']):>10} "
125
+ f"{format_tokens(usage['output_tokens']):>10} "
126
+ f"{format_tokens(usage['cache_read']):>10} "
127
+ f"${cost:>7.2f}")
128
+
129
+ print("-" * 100)
130
+
131
+ # Calculate totals
132
+ total_usage = {
133
+ 'input_tokens': main_usage['input_tokens'],
134
+ 'output_tokens': main_usage['output_tokens'],
135
+ 'cache_creation': main_usage['cache_creation'],
136
+ 'cache_read': main_usage['cache_read'],
137
+ 'messages': main_usage['messages']
138
+ }
139
+
140
+ for usage in subagent_usage.values():
141
+ total_usage['input_tokens'] += usage['input_tokens']
142
+ total_usage['output_tokens'] += usage['output_tokens']
143
+ total_usage['cache_creation'] += usage['cache_creation']
144
+ total_usage['cache_read'] += usage['cache_read']
145
+ total_usage['messages'] += usage['messages']
146
+
147
+ total_input = total_usage['input_tokens'] + total_usage['cache_creation'] + total_usage['cache_read']
148
+ total_tokens = total_input + total_usage['output_tokens']
149
+ total_cost = calculate_cost(total_usage)
150
+
151
+ print()
152
+ print("TOTALS:")
153
+ print(f" Total messages: {format_tokens(total_usage['messages'])}")
154
+ print(f" Input tokens: {format_tokens(total_usage['input_tokens'])}")
155
+ print(f" Output tokens: {format_tokens(total_usage['output_tokens'])}")
156
+ print(f" Cache creation tokens: {format_tokens(total_usage['cache_creation'])}")
157
+ print(f" Cache read tokens: {format_tokens(total_usage['cache_read'])}")
158
+ print()
159
+ print(f" Total input (incl cache): {format_tokens(total_input)}")
160
+ print(f" Total tokens: {format_tokens(total_tokens)}")
161
+ print()
162
+ print(f" Estimated cost: ${total_cost:.2f}")
163
+ print(" (at $3/$15 per M tokens for input/output)")
164
+ print()
165
+ print("=" * 100)
166
+
167
+ if __name__ == '__main__':
168
+ main()
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env bash
2
+ # Test runner for Claude Code skills
3
+ # Tests skills by invoking Claude Code CLI and verifying behavior
4
+ set -euo pipefail
5
+
6
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
7
+ cd "$SCRIPT_DIR"
8
+
9
+ echo "========================================"
10
+ echo " Claude Code Skills Test Suite"
11
+ echo "========================================"
12
+ echo ""
13
+ echo "Repository: $(cd ../.. && pwd)"
14
+ echo "Test time: $(date)"
15
+ echo "Claude version: $(claude --version 2>/dev/null || echo 'not found')"
16
+ echo ""
17
+
18
+ # Check if Claude Code is available
19
+ if ! command -v claude &> /dev/null; then
20
+ echo "ERROR: Claude Code CLI not found"
21
+ echo "Install Claude Code first: https://code.claude.com"
22
+ exit 1
23
+ fi
24
+
25
+ # Parse command line arguments
26
+ VERBOSE=false
27
+ SPECIFIC_TEST=""
28
+ TIMEOUT=300 # Default 5 minute timeout per test
29
+ RUN_INTEGRATION=false
30
+
31
+ while [[ $# -gt 0 ]]; do
32
+ case $1 in
33
+ --verbose|-v)
34
+ VERBOSE=true
35
+ shift
36
+ ;;
37
+ --test|-t)
38
+ SPECIFIC_TEST="$2"
39
+ shift 2
40
+ ;;
41
+ --timeout)
42
+ TIMEOUT="$2"
43
+ shift 2
44
+ ;;
45
+ --integration|-i)
46
+ RUN_INTEGRATION=true
47
+ shift
48
+ ;;
49
+ --help|-h)
50
+ echo "Usage: $0 [options]"
51
+ echo ""
52
+ echo "Options:"
53
+ echo " --verbose, -v Show verbose output"
54
+ echo " --test, -t NAME Run only the specified test"
55
+ echo " --timeout SECONDS Set timeout per test (default: 300)"
56
+ echo " --integration, -i Run integration tests (slow, 10-30 min)"
57
+ echo " --help, -h Show this help"
58
+ echo ""
59
+ echo "Tests:"
60
+ echo " test-subagent-driven-development.sh Test skill loading and requirements"
61
+ echo ""
62
+ echo "Integration Tests (use --integration):"
63
+ echo " test-subagent-driven-development-integration.sh Full workflow execution"
64
+ exit 0
65
+ ;;
66
+ *)
67
+ echo "Unknown option: $1"
68
+ echo "Use --help for usage information"
69
+ exit 1
70
+ ;;
71
+ esac
72
+ done
73
+
74
+ # List of skill tests to run (fast unit tests)
75
+ tests=(
76
+ "test-subagent-driven-development.sh"
77
+ )
78
+
79
+ # Integration tests (slow, full execution)
80
+ integration_tests=(
81
+ "test-subagent-driven-development-integration.sh"
82
+ )
83
+
84
+ # Add integration tests if requested
85
+ if [ "$RUN_INTEGRATION" = true ]; then
86
+ tests+=("${integration_tests[@]}")
87
+ fi
88
+
89
+ # Filter to specific test if requested
90
+ if [ -n "$SPECIFIC_TEST" ]; then
91
+ tests=("$SPECIFIC_TEST")
92
+ fi
93
+
94
+ # Track results
95
+ passed=0
96
+ failed=0
97
+ skipped=0
98
+
99
+ # Run each test
100
+ for test in "${tests[@]}"; do
101
+ echo "----------------------------------------"
102
+ echo "Running: $test"
103
+ echo "----------------------------------------"
104
+
105
+ test_path="$SCRIPT_DIR/$test"
106
+
107
+ if [ ! -f "$test_path" ]; then
108
+ echo " [SKIP] Test file not found: $test"
109
+ skipped=$((skipped + 1))
110
+ continue
111
+ fi
112
+
113
+ if [ ! -x "$test_path" ]; then
114
+ echo " Making $test executable..."
115
+ chmod +x "$test_path"
116
+ fi
117
+
118
+ start_time=$(date +%s)
119
+
120
+ if [ "$VERBOSE" = true ]; then
121
+ if timeout "$TIMEOUT" bash "$test_path"; then
122
+ end_time=$(date +%s)
123
+ duration=$((end_time - start_time))
124
+ echo ""
125
+ echo " [PASS] $test (${duration}s)"
126
+ passed=$((passed + 1))
127
+ else
128
+ exit_code=$?
129
+ end_time=$(date +%s)
130
+ duration=$((end_time - start_time))
131
+ echo ""
132
+ if [ $exit_code -eq 124 ]; then
133
+ echo " [FAIL] $test (timeout after ${TIMEOUT}s)"
134
+ else
135
+ echo " [FAIL] $test (${duration}s)"
136
+ fi
137
+ failed=$((failed + 1))
138
+ fi
139
+ else
140
+ # Capture output for non-verbose mode
141
+ if output=$(timeout "$TIMEOUT" bash "$test_path" 2>&1); then
142
+ end_time=$(date +%s)
143
+ duration=$((end_time - start_time))
144
+ echo " [PASS] (${duration}s)"
145
+ passed=$((passed + 1))
146
+ else
147
+ exit_code=$?
148
+ end_time=$(date +%s)
149
+ duration=$((end_time - start_time))
150
+ if [ $exit_code -eq 124 ]; then
151
+ echo " [FAIL] (timeout after ${TIMEOUT}s)"
152
+ else
153
+ echo " [FAIL] (${duration}s)"
154
+ fi
155
+ echo ""
156
+ echo " Output:"
157
+ echo "$output" | sed 's/^/ /'
158
+ failed=$((failed + 1))
159
+ fi
160
+ fi
161
+
162
+ echo ""
163
+ done
164
+
165
+ # Print summary
166
+ echo "========================================"
167
+ echo " Test Results Summary"
168
+ echo "========================================"
169
+ echo ""
170
+ echo " Passed: $passed"
171
+ echo " Failed: $failed"
172
+ echo " Skipped: $skipped"
173
+ echo ""
174
+
175
+ if [ "$RUN_INTEGRATION" = false ] && [ ${#integration_tests[@]} -gt 0 ]; then
176
+ echo "Note: Integration tests were not run (they take 10-30 minutes)."
177
+ echo "Use --integration flag to run full workflow execution tests."
178
+ echo ""
179
+ fi
180
+
181
+ if [ $failed -gt 0 ]; then
182
+ echo "STATUS: FAILED"
183
+ exit 1
184
+ else
185
+ echo "STATUS: PASSED"
186
+ exit 0
187
+ fi
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env bash
2
+ # Integration Test: Document Review System
3
+ # Actually runs spec/plan review and verifies reviewers catch issues
4
+ set -euo pipefail
5
+
6
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
7
+ source "$SCRIPT_DIR/test-helpers.sh"
8
+
9
+ echo "========================================"
10
+ echo " Integration Test: Document Review System"
11
+ echo "========================================"
12
+ echo ""
13
+ echo "This test verifies the document review system by:"
14
+ echo " 1. Creating a spec with intentional errors"
15
+ echo " 2. Running the spec document reviewer"
16
+ echo " 3. Verifying the reviewer catches the errors"
17
+ echo ""
18
+
19
+ # Create test project
20
+ TEST_PROJECT=$(create_test_project)
21
+ echo "Test project: $TEST_PROJECT"
22
+
23
+ # Trap to cleanup
24
+ trap "cleanup_test_project $TEST_PROJECT" EXIT
25
+
26
+ cd "$TEST_PROJECT"
27
+
28
+ # Create directory structure
29
+ mkdir -p docs/superpowers/specs
30
+
31
+ # Create a spec document WITH INTENTIONAL ERRORS for the reviewer to catch
32
+ cat > docs/superpowers/specs/test-feature-design.md <<'EOF'
33
+ # Test Feature Design
34
+
35
+ ## Overview
36
+
37
+ This is a test feature that does something useful.
38
+
39
+ ## Requirements
40
+
41
+ 1. The feature should work correctly
42
+ 2. It should be fast
43
+ 3. TODO: Add more requirements here
44
+
45
+ ## Architecture
46
+
47
+ The feature will use a simple architecture with:
48
+ - A frontend component
49
+ - A backend service
50
+ - Error handling will be specified later once we understand the failure modes better
51
+
52
+ ## Data Flow
53
+
54
+ Data flows from the frontend to the backend.
55
+
56
+ ## Testing Strategy
57
+
58
+ Tests will be written to cover the main functionality.
59
+ EOF
60
+
61
+ # Initialize git repo
62
+ git init --quiet
63
+ git config user.email "test@test.com"
64
+ git config user.name "Test User"
65
+ git add .
66
+ git commit -m "Initial commit with test spec" --quiet
67
+
68
+ echo ""
69
+ echo "Created test spec with intentional errors:"
70
+ echo " - TODO placeholder in Requirements section"
71
+ echo " - 'specified later' deferral in Architecture section"
72
+ echo ""
73
+ echo "Running spec document reviewer..."
74
+ echo ""
75
+
76
+ # Run Claude to review the spec
77
+ OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"
78
+
79
+ PROMPT="You are testing the spec document reviewer.
80
+
81
+ Read the spec-document-reviewer-prompt.md template in skills/brainstorming/ to understand the review format.
82
+
83
+ Then review the spec at $TEST_PROJECT/docs/superpowers/specs/test-feature-design.md using the criteria from that template.
84
+
85
+ Look for:
86
+ - TODOs, placeholders, 'TBD', incomplete sections
87
+ - Sections saying 'to be defined later' or 'will spec when X is done'
88
+ - Sections noticeably less detailed than others
89
+
90
+ Output your review in the format specified in the template."
91
+
92
+ echo "================================================================================"
93
+ cd "$SCRIPT_DIR/../.." && timeout 120 claude -p "$PROMPT" --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
94
+ echo ""
95
+ echo "================================================================================"
96
+ echo "EXECUTION FAILED (exit code: $?)"
97
+ exit 1
98
+ }
99
+ echo "================================================================================"
100
+
101
+ echo ""
102
+ echo "Analyzing reviewer output..."
103
+ echo ""
104
+
105
+ # Verification tests
106
+ FAILED=0
107
+
108
+ echo "=== Verification Tests ==="
109
+ echo ""
110
+
111
+ # Test 1: Reviewer found the TODO
112
+ echo "Test 1: Reviewer found TODO..."
113
+ if grep -qi "TODO" "$OUTPUT_FILE" && grep -qi "requirements\|Requirements" "$OUTPUT_FILE"; then
114
+ echo " [PASS] Reviewer identified TODO in Requirements section"
115
+ else
116
+ echo " [FAIL] Reviewer did not identify TODO"
117
+ FAILED=$((FAILED + 1))
118
+ fi
119
+ echo ""
120
+
121
+ # Test 2: Reviewer found the "specified later" deferral
122
+ echo "Test 2: Reviewer found 'specified later' deferral..."
123
+ if grep -qi "specified later\|later\|defer\|incomplete\|error handling" "$OUTPUT_FILE"; then
124
+ echo " [PASS] Reviewer identified deferred content"
125
+ else
126
+ echo " [FAIL] Reviewer did not identify deferred content"
127
+ FAILED=$((FAILED + 1))
128
+ fi
129
+ echo ""
130
+
131
+ # Test 3: Reviewer output includes Issues section
132
+ echo "Test 3: Review output format..."
133
+ if grep -qi "issues\|Issues" "$OUTPUT_FILE"; then
134
+ echo " [PASS] Review includes Issues section"
135
+ else
136
+ echo " [FAIL] Review missing Issues section"
137
+ FAILED=$((FAILED + 1))
138
+ fi
139
+ echo ""
140
+
141
+ # Test 4: Reviewer did NOT approve (found issues)
142
+ echo "Test 4: Reviewer verdict..."
143
+ if grep -qi "Issues Found\|❌\|not approved\|issues found" "$OUTPUT_FILE"; then
144
+ echo " [PASS] Reviewer correctly found issues (not approved)"
145
+ elif grep -qi "Approved\|✅" "$OUTPUT_FILE" && ! grep -qi "Issues Found\|❌" "$OUTPUT_FILE"; then
146
+ echo " [FAIL] Reviewer incorrectly approved spec with errors"
147
+ FAILED=$((FAILED + 1))
148
+ else
149
+ echo " [PASS] Reviewer identified problems (ambiguous format but found issues)"
150
+ fi
151
+ echo ""
152
+
153
+ # Summary
154
+ echo "========================================"
155
+ echo " Test Summary"
156
+ echo "========================================"
157
+ echo ""
158
+
159
+ if [ $FAILED -eq 0 ]; then
160
+ echo "STATUS: PASSED"
161
+ echo "All verification tests passed!"
162
+ echo ""
163
+ echo "The spec document reviewer correctly:"
164
+ echo " ✓ Found TODO placeholder"
165
+ echo " ✓ Found 'specified later' deferral"
166
+ echo " ✓ Produced properly formatted review"
167
+ echo " ✓ Did not approve spec with errors"
168
+ exit 0
169
+ else
170
+ echo "STATUS: FAILED"
171
+ echo "Failed $FAILED verification tests"
172
+ echo ""
173
+ echo "Output saved to: $OUTPUT_FILE"
174
+ echo ""
175
+ echo "Review the output to see what went wrong."
176
+ exit 1
177
+ fi