npm - workerssuper - Versions diffs - 5.0.4 - Mend

workerssuper 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

package/.claude-plugin/marketplace.json +20 -0
package/.claude-plugin/plugin.json +13 -0
package/.codex/INSTALL.md +67 -0
package/.cursor-plugin/plugin.json +18 -0
package/.gitattributes +18 -0
package/.github/FUNDING.yml +3 -0
package/.github/ISSUE_TEMPLATE/bug_report.md +52 -0
package/.github/ISSUE_TEMPLATE/config.yml +5 -0
package/.github/ISSUE_TEMPLATE/feature_request.md +34 -0
package/.github/ISSUE_TEMPLATE/platform_support.md +23 -0
package/.github/PULL_REQUEST_TEMPLATE.md +87 -0
package/.opencode/INSTALL.md +83 -0
package/.opencode/plugins/superpowers.js +107 -0
package/CHANGELOG.md +13 -0
package/CODE_OF_CONDUCT.md +128 -0
package/GEMINI.md +2 -0
package/LICENSE +21 -0
package/README.md +187 -0
package/RELEASE-NOTES.md +1057 -0
package/agents/code-reviewer.md +48 -0
package/commands/brainstorm.md +5 -0
package/commands/execute-plan.md +5 -0
package/commands/write-plan.md +5 -0
package/docs/README.codex.md +126 -0
package/docs/README.opencode.md +130 -0
package/docs/plans/2025-11-22-opencode-support-design.md +294 -0
package/docs/plans/2025-11-22-opencode-support-implementation.md +1095 -0
package/docs/plans/2025-11-28-skills-improvements-from-user-feedback.md +711 -0
package/docs/plans/2026-01-17-visual-brainstorming.md +571 -0
package/docs/superpowers/plans/2026-01-22-document-review-system.md +301 -0
package/docs/superpowers/plans/2026-02-19-visual-brainstorming-refactor.md +523 -0
package/docs/superpowers/plans/2026-03-11-zero-dep-brainstorm-server.md +479 -0
package/docs/superpowers/specs/2026-01-22-document-review-system-design.md +136 -0
package/docs/superpowers/specs/2026-02-19-visual-brainstorming-refactor-design.md +162 -0
package/docs/superpowers/specs/2026-03-11-zero-dep-brainstorm-server-design.md +118 -0
package/docs/testing.md +303 -0
package/docs/windows/polyglot-hooks.md +212 -0
package/gemini-extension.json +6 -0
package/hooks/hooks-cursor.json +10 -0
package/hooks/hooks.json +16 -0
package/hooks/run-hook.cmd +46 -0
package/hooks/session-start +57 -0
package/package.json +5 -0
package/skills/brainstorming/SKILL.md +164 -0
package/skills/brainstorming/scripts/frame-template.html +214 -0
package/skills/brainstorming/scripts/helper.js +88 -0
package/skills/brainstorming/scripts/server.cjs +338 -0
package/skills/brainstorming/scripts/start-server.sh +153 -0
package/skills/brainstorming/scripts/stop-server.sh +55 -0
package/skills/brainstorming/spec-document-reviewer-prompt.md +49 -0
package/skills/brainstorming/visual-companion.md +286 -0
package/skills/dispatching-parallel-agents/SKILL.md +182 -0
package/skills/executing-plans/SKILL.md +70 -0
package/skills/finishing-a-development-branch/SKILL.md +200 -0
package/skills/receiving-code-review/SKILL.md +213 -0
package/skills/requesting-code-review/SKILL.md +105 -0
package/skills/requesting-code-review/code-reviewer.md +146 -0
package/skills/subagent-driven-development/SKILL.md +277 -0
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +26 -0
package/skills/subagent-driven-development/implementer-prompt.md +113 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +61 -0
package/skills/systematic-debugging/CREATION-LOG.md +119 -0
package/skills/systematic-debugging/SKILL.md +296 -0
package/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
package/skills/systematic-debugging/condition-based-waiting.md +115 -0
package/skills/systematic-debugging/defense-in-depth.md +122 -0
package/skills/systematic-debugging/find-polluter.sh +63 -0
package/skills/systematic-debugging/root-cause-tracing.md +169 -0
package/skills/systematic-debugging/test-academic.md +14 -0
package/skills/systematic-debugging/test-pressure-1.md +58 -0
package/skills/systematic-debugging/test-pressure-2.md +68 -0
package/skills/systematic-debugging/test-pressure-3.md +69 -0
package/skills/test-driven-development/SKILL.md +371 -0
package/skills/test-driven-development/testing-anti-patterns.md +299 -0
package/skills/using-git-worktrees/SKILL.md +218 -0
package/skills/using-superpowers/SKILL.md +115 -0
package/skills/using-superpowers/references/codex-tools.md +25 -0
package/skills/using-superpowers/references/gemini-tools.md +33 -0
package/skills/verification-before-completion/SKILL.md +139 -0
package/skills/writing-plans/SKILL.md +145 -0
package/skills/writing-plans/plan-document-reviewer-prompt.md +49 -0
package/skills/writing-skills/SKILL.md +655 -0
package/skills/writing-skills/anthropic-best-practices.md +1150 -0
package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
package/skills/writing-skills/graphviz-conventions.dot +172 -0
package/skills/writing-skills/persuasion-principles.md +187 -0
package/skills/writing-skills/render-graphs.js +168 -0
package/skills/writing-skills/testing-skills-with-subagents.md +384 -0
package/tests/brainstorm-server/package-lock.json +36 -0
package/tests/brainstorm-server/package.json +10 -0
package/tests/brainstorm-server/server.test.js +424 -0
package/tests/brainstorm-server/windows-lifecycle.test.sh +351 -0
package/tests/brainstorm-server/ws-protocol.test.js +392 -0
package/tests/claude-code/README.md +158 -0
package/tests/claude-code/analyze-token-usage.py +168 -0
package/tests/claude-code/run-skill-tests.sh +187 -0
package/tests/claude-code/test-document-review-system.sh +177 -0
package/tests/claude-code/test-helpers.sh +202 -0
package/tests/claude-code/test-subagent-driven-development-integration.sh +314 -0
package/tests/claude-code/test-subagent-driven-development.sh +165 -0
package/tests/explicit-skill-requests/prompts/action-oriented.txt +3 -0
package/tests/explicit-skill-requests/prompts/after-planning-flow.txt +17 -0
package/tests/explicit-skill-requests/prompts/claude-suggested-it.txt +11 -0
package/tests/explicit-skill-requests/prompts/i-know-what-sdd-means.txt +8 -0
package/tests/explicit-skill-requests/prompts/mid-conversation-execute-plan.txt +3 -0
package/tests/explicit-skill-requests/prompts/please-use-brainstorming.txt +1 -0
package/tests/explicit-skill-requests/prompts/skip-formalities.txt +3 -0
package/tests/explicit-skill-requests/prompts/subagent-driven-development-please.txt +1 -0
package/tests/explicit-skill-requests/prompts/use-systematic-debugging.txt +1 -0
package/tests/explicit-skill-requests/run-all.sh +70 -0
package/tests/explicit-skill-requests/run-claude-describes-sdd.sh +100 -0
package/tests/explicit-skill-requests/run-extended-multiturn-test.sh +113 -0
package/tests/explicit-skill-requests/run-haiku-test.sh +144 -0
package/tests/explicit-skill-requests/run-multiturn-test.sh +143 -0
package/tests/explicit-skill-requests/run-test.sh +136 -0
package/tests/opencode/run-tests.sh +163 -0
package/tests/opencode/setup.sh +73 -0
package/tests/opencode/test-plugin-loading.sh +72 -0
package/tests/opencode/test-priority.sh +198 -0
package/tests/opencode/test-tools.sh +104 -0
package/tests/skill-triggering/prompts/dispatching-parallel-agents.txt +8 -0
package/tests/skill-triggering/prompts/executing-plans.txt +1 -0
package/tests/skill-triggering/prompts/requesting-code-review.txt +3 -0
package/tests/skill-triggering/prompts/systematic-debugging.txt +11 -0
package/tests/skill-triggering/prompts/test-driven-development.txt +7 -0
package/tests/skill-triggering/prompts/writing-plans.txt +10 -0
package/tests/skill-triggering/run-all.sh +60 -0
package/tests/skill-triggering/run-test.sh +88 -0
package/tests/subagent-driven-dev/go-fractals/design.md +81 -0
package/tests/subagent-driven-dev/go-fractals/plan.md +172 -0
package/tests/subagent-driven-dev/go-fractals/scaffold.sh +45 -0
package/tests/subagent-driven-dev/run-test.sh +106 -0
package/tests/subagent-driven-dev/svelte-todo/design.md +70 -0
package/tests/subagent-driven-dev/svelte-todo/plan.md +222 -0
package/tests/subagent-driven-dev/svelte-todo/scaffold.sh +46 -0

package/tests/claude-code/analyze-token-usage.py ADDED Viewed

@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""
+Analyze token usage from Claude Code session transcripts.
+Breaks down usage by main session and individual subagents.
+"""
+import json
+import sys
+from pathlib import Path
+from collections import defaultdict
+def analyze_main_session(filepath):
+    """Analyze a session file and return token usage broken down by agent."""
+    main_usage = {
+        'input_tokens': 0,
+        'output_tokens': 0,
+        'cache_creation': 0,
+        'cache_read': 0,
+        'messages': 0
+    }
+    # Track usage per subagent
+    subagent_usage = defaultdict(lambda: {
+        'input_tokens': 0,
+        'output_tokens': 0,
+        'cache_creation': 0,
+        'cache_read': 0,
+        'messages': 0,
+        'description': None
+    })
+    with open(filepath, 'r') as f:
+        for line in f:
+            try:
+                data = json.loads(line)
+                # Main session assistant messages
+                if data.get('type') == 'assistant' and 'message' in data:
+                    main_usage['messages'] += 1
+                    msg_usage = data['message'].get('usage', {})
+                    main_usage['input_tokens'] += msg_usage.get('input_tokens', 0)
+                    main_usage['output_tokens'] += msg_usage.get('output_tokens', 0)
+                    main_usage['cache_creation'] += msg_usage.get('cache_creation_input_tokens', 0)
+                    main_usage['cache_read'] += msg_usage.get('cache_read_input_tokens', 0)
+                # Subagent tool results
+                if data.get('type') == 'user' and 'toolUseResult' in data:
+                    result = data['toolUseResult']
+                    if 'usage' in result and 'agentId' in result:
+                        agent_id = result['agentId']
+                        usage = result['usage']
+                        # Get description from prompt if available
+                        if subagent_usage[agent_id]['description'] is None:
+                            prompt = result.get('prompt', '')
+                            # Extract first line as description
+                            first_line = prompt.split('\n')[0] if prompt else f"agent-{agent_id}"
+                            if first_line.startswith('You are '):
+                                first_line = first_line[8:]  # Remove "You are "
+                            subagent_usage[agent_id]['description'] = first_line[:60]
+                        subagent_usage[agent_id]['messages'] += 1
+                        subagent_usage[agent_id]['input_tokens'] += usage.get('input_tokens', 0)
+                        subagent_usage[agent_id]['output_tokens'] += usage.get('output_tokens', 0)
+                        subagent_usage[agent_id]['cache_creation'] += usage.get('cache_creation_input_tokens', 0)
+                        subagent_usage[agent_id]['cache_read'] += usage.get('cache_read_input_tokens', 0)
+            except Exception:
+                pass
+    return main_usage, dict(subagent_usage)
+def format_tokens(n):
+    """Format token count with thousands separators."""
+    return f"{n:,}"
+def calculate_cost(usage, input_cost_per_m=3.0, output_cost_per_m=15.0):
+    """Calculate estimated cost in dollars."""
+    total_input = usage['input_tokens'] + usage['cache_creation'] + usage['cache_read']
+    input_cost = total_input * input_cost_per_m / 1_000_000
+    output_cost = usage['output_tokens'] * output_cost_per_m / 1_000_000
+    return input_cost + output_cost
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: analyze-token-usage.py <session-file.jsonl>")
+        sys.exit(1)
+    main_session_file = sys.argv[1]
+    if not Path(main_session_file).exists():
+        print(f"Error: Session file not found: {main_session_file}")
+        sys.exit(1)
+    # Analyze the session
+    main_usage, subagent_usage = analyze_main_session(main_session_file)
+    print("=" * 100)
+    print("TOKEN USAGE ANALYSIS")
+    print("=" * 100)
+    print()
+    # Print breakdown
+    print("Usage Breakdown:")
+    print("-" * 100)
+    print(f"{'Agent':<15} {'Description':<35} {'Msgs':>5} {'Input':>10} {'Output':>10} {'Cache':>10} {'Cost':>8}")
+    print("-" * 100)
+    # Main session
+    cost = calculate_cost(main_usage)
+    print(f"{'main':<15} {'Main session (coordinator)':<35} "
+          f"{main_usage['messages']:>5} "
+          f"{format_tokens(main_usage['input_tokens']):>10} "
+          f"{format_tokens(main_usage['output_tokens']):>10} "
+          f"{format_tokens(main_usage['cache_read']):>10} "
+          f"${cost:>7.2f}")
+    # Subagents (sorted by agent ID)
+    for agent_id in sorted(subagent_usage.keys()):
+        usage = subagent_usage[agent_id]
+        cost = calculate_cost(usage)
+        desc = usage['description'] or f"agent-{agent_id}"
+        print(f"{agent_id:<15} {desc:<35} "
+              f"{usage['messages']:>5} "
+              f"{format_tokens(usage['input_tokens']):>10} "
+              f"{format_tokens(usage['output_tokens']):>10} "
+              f"{format_tokens(usage['cache_read']):>10} "
+              f"${cost:>7.2f}")
+    print("-" * 100)
+    # Calculate totals
+    total_usage = {
+        'input_tokens': main_usage['input_tokens'],
+        'output_tokens': main_usage['output_tokens'],
+        'cache_creation': main_usage['cache_creation'],
+        'cache_read': main_usage['cache_read'],
+        'messages': main_usage['messages']
+    }
+    for usage in subagent_usage.values():
+        total_usage['input_tokens'] += usage['input_tokens']
+        total_usage['output_tokens'] += usage['output_tokens']
+        total_usage['cache_creation'] += usage['cache_creation']
+        total_usage['cache_read'] += usage['cache_read']
+        total_usage['messages'] += usage['messages']
+    total_input = total_usage['input_tokens'] + total_usage['cache_creation'] + total_usage['cache_read']
+    total_tokens = total_input + total_usage['output_tokens']
+    total_cost = calculate_cost(total_usage)
+    print()
+    print("TOTALS:")
+    print(f"  Total messages:         {format_tokens(total_usage['messages'])}")
+    print(f"  Input tokens:           {format_tokens(total_usage['input_tokens'])}")
+    print(f"  Output tokens:          {format_tokens(total_usage['output_tokens'])}")
+    print(f"  Cache creation tokens:  {format_tokens(total_usage['cache_creation'])}")
+    print(f"  Cache read tokens:      {format_tokens(total_usage['cache_read'])}")
+    print()
+    print(f"  Total input (incl cache): {format_tokens(total_input)}")
+    print(f"  Total tokens:             {format_tokens(total_tokens)}")
+    print()
+    print(f"  Estimated cost: ${total_cost:.2f}")
+    print("  (at $3/$15 per M tokens for input/output)")
+    print()
+    print("=" * 100)
+if __name__ == '__main__':
+    main()

package/tests/claude-code/run-skill-tests.sh ADDED Viewed

@@ -0,0 +1,187 @@
+#!/usr/bin/env bash
+# Test runner for Claude Code skills
+# Tests skills by invoking Claude Code CLI and verifying behavior
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+cd "$SCRIPT_DIR"
+echo "========================================"
+echo " Claude Code Skills Test Suite"
+echo "========================================"
+echo ""
+echo "Repository: $(cd ../.. && pwd)"
+echo "Test time: $(date)"
+echo "Claude version: $(claude --version 2>/dev/null || echo 'not found')"
+echo ""
+# Check if Claude Code is available
+if ! command -v claude &> /dev/null; then
+    echo "ERROR: Claude Code CLI not found"
+    echo "Install Claude Code first: https://code.claude.com"
+    exit 1
+fi
+# Parse command line arguments
+VERBOSE=false
+SPECIFIC_TEST=""
+TIMEOUT=300  # Default 5 minute timeout per test
+RUN_INTEGRATION=false
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --verbose|-v)
+            VERBOSE=true
+            shift
+            ;;
+        --test|-t)
+            SPECIFIC_TEST="$2"
+            shift 2
+            ;;
+        --timeout)
+            TIMEOUT="$2"
+            shift 2
+            ;;
+        --integration|-i)
+            RUN_INTEGRATION=true
+            shift
+            ;;
+        --help|-h)
+            echo "Usage: $0 [options]"
+            echo ""
+            echo "Options:"
+            echo "  --verbose, -v        Show verbose output"
+            echo "  --test, -t NAME      Run only the specified test"
+            echo "  --timeout SECONDS    Set timeout per test (default: 300)"
+            echo "  --integration, -i    Run integration tests (slow, 10-30 min)"
+            echo "  --help, -h           Show this help"
+            echo ""
+            echo "Tests:"
+            echo "  test-subagent-driven-development.sh  Test skill loading and requirements"
+            echo ""
+            echo "Integration Tests (use --integration):"
+            echo "  test-subagent-driven-development-integration.sh  Full workflow execution"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+# List of skill tests to run (fast unit tests)
+tests=(
+    "test-subagent-driven-development.sh"
+)
+# Integration tests (slow, full execution)
+integration_tests=(
+    "test-subagent-driven-development-integration.sh"
+)
+# Add integration tests if requested
+if [ "$RUN_INTEGRATION" = true ]; then
+    tests+=("${integration_tests[@]}")
+fi
+# Filter to specific test if requested
+if [ -n "$SPECIFIC_TEST" ]; then
+    tests=("$SPECIFIC_TEST")
+fi
+# Track results
+passed=0
+failed=0
+skipped=0
+# Run each test
+for test in "${tests[@]}"; do
+    echo "----------------------------------------"
+    echo "Running: $test"
+    echo "----------------------------------------"
+    test_path="$SCRIPT_DIR/$test"
+    if [ ! -f "$test_path" ]; then
+        echo "  [SKIP] Test file not found: $test"
+        skipped=$((skipped + 1))
+        continue
+    fi
+    if [ ! -x "$test_path" ]; then
+        echo "  Making $test executable..."
+        chmod +x "$test_path"
+    fi
+    start_time=$(date +%s)
+    if [ "$VERBOSE" = true ]; then
+        if timeout "$TIMEOUT" bash "$test_path"; then
+            end_time=$(date +%s)
+            duration=$((end_time - start_time))
+            echo ""
+            echo "  [PASS] $test (${duration}s)"
+            passed=$((passed + 1))
+        else
+            exit_code=$?
+            end_time=$(date +%s)
+            duration=$((end_time - start_time))
+            echo ""
+            if [ $exit_code -eq 124 ]; then
+                echo "  [FAIL] $test (timeout after ${TIMEOUT}s)"
+            else
+                echo "  [FAIL] $test (${duration}s)"
+            fi
+            failed=$((failed + 1))
+        fi
+    else
+        # Capture output for non-verbose mode
+        if output=$(timeout "$TIMEOUT" bash "$test_path" 2>&1); then
+            end_time=$(date +%s)
+            duration=$((end_time - start_time))
+            echo "  [PASS] (${duration}s)"
+            passed=$((passed + 1))
+        else
+            exit_code=$?
+            end_time=$(date +%s)
+            duration=$((end_time - start_time))
+            if [ $exit_code -eq 124 ]; then
+                echo "  [FAIL] (timeout after ${TIMEOUT}s)"
+            else
+                echo "  [FAIL] (${duration}s)"
+            fi
+            echo ""
+            echo "  Output:"
+            echo "$output" | sed 's/^/    /'
+            failed=$((failed + 1))
+        fi
+    fi
+    echo ""
+done
+# Print summary
+echo "========================================"
+echo " Test Results Summary"
+echo "========================================"
+echo ""
+echo "  Passed:  $passed"
+echo "  Failed:  $failed"
+echo "  Skipped: $skipped"
+echo ""
+if [ "$RUN_INTEGRATION" = false ] && [ ${#integration_tests[@]} -gt 0 ]; then
+    echo "Note: Integration tests were not run (they take 10-30 minutes)."
+    echo "Use --integration flag to run full workflow execution tests."
+    echo ""
+fi
+if [ $failed -gt 0 ]; then
+    echo "STATUS: FAILED"
+    exit 1
+else
+    echo "STATUS: PASSED"
+    exit 0
+fi

package/tests/claude-code/test-document-review-system.sh ADDED Viewed

@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+# Integration Test: Document Review System
+# Actually runs spec/plan review and verifies reviewers catch issues
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+source "$SCRIPT_DIR/test-helpers.sh"
+echo "========================================"
+echo " Integration Test: Document Review System"
+echo "========================================"
+echo ""
+echo "This test verifies the document review system by:"
+echo "  1. Creating a spec with intentional errors"
+echo "  2. Running the spec document reviewer"
+echo "  3. Verifying the reviewer catches the errors"
+echo ""
+# Create test project
+TEST_PROJECT=$(create_test_project)
+echo "Test project: $TEST_PROJECT"
+# Trap to cleanup
+trap "cleanup_test_project $TEST_PROJECT" EXIT
+cd "$TEST_PROJECT"
+# Create directory structure
+mkdir -p docs/superpowers/specs
+# Create a spec document WITH INTENTIONAL ERRORS for the reviewer to catch
+cat > docs/superpowers/specs/test-feature-design.md <<'EOF'
+# Test Feature Design
+## Overview
+This is a test feature that does something useful.
+## Requirements
+1. The feature should work correctly
+2. It should be fast
+3. TODO: Add more requirements here
+## Architecture
+The feature will use a simple architecture with:
+- A frontend component
+- A backend service
+- Error handling will be specified later once we understand the failure modes better
+## Data Flow
+Data flows from the frontend to the backend.
+## Testing Strategy
+Tests will be written to cover the main functionality.
+EOF
+# Initialize git repo
+git init --quiet
+git config user.email "test@test.com"
+git config user.name "Test User"
+git add .
+git commit -m "Initial commit with test spec" --quiet
+echo ""
+echo "Created test spec with intentional errors:"
+echo "  - TODO placeholder in Requirements section"
+echo "  - 'specified later' deferral in Architecture section"
+echo ""
+echo "Running spec document reviewer..."
+echo ""
+# Run Claude to review the spec
+OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"
+PROMPT="You are testing the spec document reviewer.
+Read the spec-document-reviewer-prompt.md template in skills/brainstorming/ to understand the review format.
+Then review the spec at $TEST_PROJECT/docs/superpowers/specs/test-feature-design.md using the criteria from that template.
+Look for:
+- TODOs, placeholders, 'TBD', incomplete sections
+- Sections saying 'to be defined later' or 'will spec when X is done'
+- Sections noticeably less detailed than others
+Output your review in the format specified in the template."
+echo "================================================================================"
+cd "$SCRIPT_DIR/../.." && timeout 120 claude -p "$PROMPT" --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
+    echo ""
+    echo "================================================================================"
+    echo "EXECUTION FAILED (exit code: $?)"
+    exit 1
+}
+echo "================================================================================"
+echo ""
+echo "Analyzing reviewer output..."
+echo ""
+# Verification tests
+FAILED=0
+echo "=== Verification Tests ==="
+echo ""
+# Test 1: Reviewer found the TODO
+echo "Test 1: Reviewer found TODO..."
+if grep -qi "TODO" "$OUTPUT_FILE" && grep -qi "requirements\|Requirements" "$OUTPUT_FILE"; then
+    echo "  [PASS] Reviewer identified TODO in Requirements section"
+else
+    echo "  [FAIL] Reviewer did not identify TODO"
+    FAILED=$((FAILED + 1))
+fi
+echo ""
+# Test 2: Reviewer found the "specified later" deferral
+echo "Test 2: Reviewer found 'specified later' deferral..."
+if grep -qi "specified later\|later\|defer\|incomplete\|error handling" "$OUTPUT_FILE"; then
+    echo "  [PASS] Reviewer identified deferred content"
+else
+    echo "  [FAIL] Reviewer did not identify deferred content"
+    FAILED=$((FAILED + 1))
+fi
+echo ""
+# Test 3: Reviewer output includes Issues section
+echo "Test 3: Review output format..."
+if grep -qi "issues\|Issues" "$OUTPUT_FILE"; then
+    echo "  [PASS] Review includes Issues section"
+else
+    echo "  [FAIL] Review missing Issues section"
+    FAILED=$((FAILED + 1))
+fi
+echo ""
+# Test 4: Reviewer did NOT approve (found issues)
+echo "Test 4: Reviewer verdict..."
+if grep -qi "Issues Found\|❌\|not approved\|issues found" "$OUTPUT_FILE"; then
+    echo "  [PASS] Reviewer correctly found issues (not approved)"
+elif grep -qi "Approved\|✅" "$OUTPUT_FILE" && ! grep -qi "Issues Found\|❌" "$OUTPUT_FILE"; then
+    echo "  [FAIL] Reviewer incorrectly approved spec with errors"
+    FAILED=$((FAILED + 1))
+else
+    echo "  [PASS] Reviewer identified problems (ambiguous format but found issues)"
+fi
+echo ""
+# Summary
+echo "========================================"
+echo " Test Summary"
+echo "========================================"
+echo ""
+if [ $FAILED -eq 0 ]; then
+    echo "STATUS: PASSED"
+    echo "All verification tests passed!"
+    echo ""
+    echo "The spec document reviewer correctly:"
+    echo "  ✓ Found TODO placeholder"
+    echo "  ✓ Found 'specified later' deferral"
+    echo "  ✓ Produced properly formatted review"
+    echo "  ✓ Did not approve spec with errors"
+    exit 0
+else
+    echo "STATUS: FAILED"
+    echo "Failed $FAILED verification tests"
+    echo ""
+    echo "Output saved to: $OUTPUT_FILE"
+    echo ""
+    echo "Review the output to see what went wrong."
+    exit 1
+fi