npm - @hustle-together/api-dev-tools - Versions diffs - 1.2.1 → 1.6.0 - Mend

@hustle-together/api-dev-tools 1.2.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +48 -5
package/bin/cli.js +27 -27
package/demo/workflow-demo.html +1945 -0
package/hooks/api-workflow-check.py +135 -26
package/hooks/enforce-interview.py +183 -0
package/hooks/track-tool-use.py +38 -3
package/hooks/verify-implementation.py +225 -0
package/package.json +2 -1
package/templates/settings.json +23 -1

package/hooks/api-workflow-check.py CHANGED Viewed

@@ -6,33 +6,104 @@ Purpose: Check if all required phases are complete before allowing stop
 This hook runs when Claude tries to stop/end the conversation.
 It checks api-dev-state.json to ensure critical workflow phases completed.
+Gap Fixes Applied:
+- Gap 2: Shows files_created vs files_modified to verify all claimed changes
+- Gap 3: Warns if there are verification_warnings that weren't addressed
+- Gap 4: Requires explicit verification that implementation matches interview
 Returns:
   - {"decision": "approve"} - Allow stopping
   - {"decision": "block", "reason": "..."} - Prevent stopping with explanation
 """
 import json
 import sys
+import subprocess
 from pathlib import Path
 # State file is in .claude/ directory (sibling to hooks/)
 STATE_FILE = Path(__file__).parent.parent / "api-dev-state.json"
 # Phases that MUST be complete before stopping
-# These are the critical phases - others are optional
 REQUIRED_PHASES = [
     ("research_initial", "Initial research (Context7/WebSearch)"),
+    ("interview", "User interview"),
     ("tdd_red", "TDD Red phase (failing tests written)"),
     ("tdd_green", "TDD Green phase (tests passing)"),
 ]
 # Phases that SHOULD be complete (warning but don't block)
 RECOMMENDED_PHASES = [
-    ("interview", "User interview"),
     ("schema_creation", "Schema creation"),
+    ("tdd_refactor", "TDD Refactor phase"),
     ("documentation", "Documentation updates"),
 ]
+def get_git_modified_files() -> list[str]:
+    """Get list of modified files from git.
+    Gap 2 Fix: Verify which files actually changed.
+    """
+    try:
+        result = subprocess.run(
+            ["git", "diff", "--name-only", "HEAD"],
+            capture_output=True,
+            text=True,
+            cwd=STATE_FILE.parent.parent  # Project root
+        )
+        if result.returncode == 0:
+            return [f.strip() for f in result.stdout.strip().split("\n") if f.strip()]
+    except Exception:
+        pass
+    return []
+def check_verification_warnings(state: dict) -> list[str]:
+    """Check for unaddressed verification warnings.
+    Gap 3 Fix: Don't accept "skipped" or warnings without explanation.
+    """
+    warnings = state.get("verification_warnings", [])
+    if warnings:
+        return [
+            "⚠️ Unaddressed verification warnings:",
+            *[f"  - {w}" for w in warnings[-5:]],  # Show last 5
+            "",
+            "Please review and address these warnings before completing."
+        ]
+    return []
+def check_interview_implementation_match(state: dict) -> list[str]:
+    """Verify implementation matches interview requirements.
+    Gap 4 Fix: Define specific "done" criteria based on interview.
+    """
+    issues = []
+    interview = state.get("phases", {}).get("interview", {})
+    questions = interview.get("questions", [])
+    # Extract key requirements from interview
+    all_text = " ".join(str(q) for q in questions)
+    # Check files_created includes expected patterns
+    files_created = state.get("files_created", [])
+    # Look for route files if interview mentioned endpoints
+    if "endpoint" in all_text.lower() or "/api/" in all_text.lower():
+        route_files = [f for f in files_created if "route.ts" in f]
+        if not route_files:
+            issues.append("⚠️ Interview mentioned endpoints but no route.ts files were created")
+    # Look for test files
+    test_files = [f for f in files_created if ".test." in f or "__tests__" in f]
+    if not test_files:
+        issues.append("⚠️ No test files tracked in files_created")
+    return issues
 def main():
     # If no state file, we're not in an API workflow - allow stop
     if not STATE_FILE.exists():
@@ -56,6 +127,9 @@ def main():
         print(json.dumps({"decision": "approve"}))
         sys.exit(0)
+    # Collect all issues
+    all_issues = []
     # Check required phases
     incomplete_required = []
     for phase_key, phase_name in REQUIRED_PHASES:
@@ -64,6 +138,10 @@ def main():
         if status != "complete":
             incomplete_required.append(f"  - {phase_name} ({status})")
+    if incomplete_required:
+        all_issues.append("❌ REQUIRED phases incomplete:")
+        all_issues.extend(incomplete_required)
     # Check recommended phases
     incomplete_recommended = []
     for phase_key, phase_name in RECOMMENDED_PHASES:
@@ -72,42 +150,73 @@ def main():
         if status != "complete":
             incomplete_recommended.append(f"  - {phase_name} ({status})")
+    # Gap 2: Check git diff vs tracked files
+    git_files = get_git_modified_files()
+    tracked_files = state.get("files_created", []) + state.get("files_modified", [])
+    if git_files and tracked_files:
+        # Find files in git but not tracked
+        untracked_changes = []
+        for gf in git_files:
+            if not any(gf.endswith(tf) or tf in gf for tf in tracked_files):
+                if gf.endswith(".ts") and ("/api/" in gf or "/lib/" in gf):
+                    untracked_changes.append(gf)
+        if untracked_changes:
+            all_issues.append("\n⚠️ Gap 2: Files changed but not tracked:")
+            all_issues.extend([f"  - {f}" for f in untracked_changes[:5]])
+    # Gap 3: Check for unaddressed warnings
+    warning_issues = check_verification_warnings(state)
+    if warning_issues:
+        all_issues.append("\n" + "\n".join(warning_issues))
+    # Gap 4: Check interview-implementation match
+    match_issues = check_interview_implementation_match(state)
+    if match_issues:
+        all_issues.append("\n⚠️ Gap 4: Implementation verification:")
+        all_issues.extend([f"  {i}" for i in match_issues])
     # Block if required phases incomplete
     if incomplete_required:
-        reason_parts = ["❌ API workflow has REQUIRED phases incomplete:\n"]
-        reason_parts.extend(incomplete_required)
-        if incomplete_recommended:
-            reason_parts.append("\n\n⚠️ Also recommended but not complete:")
-            reason_parts.extend(incomplete_recommended)
-        reason_parts.append("\n\nTo continue:")
-        reason_parts.append("  1. Complete required phases above")
-        reason_parts.append("  2. Use /api-status to see detailed progress")
-        reason_parts.append("  3. Or manually mark phases complete in .claude/api-dev-state.json")
+        all_issues.append("\n\nTo continue:")
+        all_issues.append("  1. Complete required phases above")
+        all_issues.append("  2. Use /api-status to see detailed progress")
+        all_issues.append("  3. Run `git diff --name-only` to verify changes")
         print(json.dumps({
             "decision": "block",
-            "reason": "\n".join(reason_parts)
+            "reason": "\n".join(all_issues)
         }))
         sys.exit(0)
-    # Warn about recommended phases but allow
-    if incomplete_recommended:
-        # Allow but the reason will be shown to user
-        print(json.dumps({
-            "decision": "approve",
-            "message": f"""⚠️ API workflow completing with optional phases pending:
-{chr(10).join(incomplete_recommended)}
+    # Build completion message
+    message_parts = ["✅ API workflow completing"]
-Consider running /api-status to review what was skipped."""
-        }))
-        sys.exit(0)
+    if incomplete_recommended:
+        message_parts.append("\n⚠️ Optional phases skipped:")
+        message_parts.extend(incomplete_recommended)
+    # Show summary of tracked files
+    files_created = state.get("files_created", [])
+    if files_created:
+        message_parts.append(f"\n📁 Files created: {len(files_created)}")
+        for f in files_created[:5]:
+            message_parts.append(f"  - {f}")
+        if len(files_created) > 5:
+            message_parts.append(f"  ... and {len(files_created) - 5} more")
+    # Show any remaining warnings
+    if warning_issues or match_issues:
+        message_parts.append("\n⚠️ Review suggested:")
+        if warning_issues:
+            message_parts.extend(warning_issues[:3])
+        if match_issues:
+            message_parts.extend(match_issues[:3])
-    # All phases complete
     print(json.dumps({
         "decision": "approve",
-        "message": "✅ API workflow completed successfully!"
+        "message": "\n".join(message_parts)
     }))
     sys.exit(0)

package/hooks/enforce-interview.py ADDED Viewed

@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""
+Hook: PreToolUse for Write/Edit
+Purpose: Block proceeding to schema/TDD if interview has no USER answers
+This hook ensures Claude actually asks the user questions and records
+their answers, rather than self-answering the interview.
+It checks:
+  1. Interview status is "complete"
+  2. There are actual questions with answers
+  3. Answers don't look auto-generated (contain user-specific details)
+Returns:
+  - {"permissionDecision": "allow"} - Let the tool run
+  - {"permissionDecision": "deny", "reason": "..."} - Block with explanation
+"""
+import json
+import sys
+from pathlib import Path
+# State file is in .claude/ directory (sibling to hooks/)
+STATE_FILE = Path(__file__).parent.parent / "api-dev-state.json"
+# Minimum questions required for a valid interview
+MIN_QUESTIONS = 3
+# Phrases that indicate self-answered (not real user input)
+SELF_ANSWER_INDICATORS = [
+    "based on common",
+    "self-answered",
+    "assumed",
+    "typical use case",
+    "standard implementation",
+    "common pattern",
+]
+def main():
+    # Read hook input from stdin
+    try:
+        input_data = json.load(sys.stdin)
+    except json.JSONDecodeError:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    tool_input = input_data.get("tool_input", {})
+    file_path = tool_input.get("file_path", "")
+    # Enforce for ANY file in /api/ directory (not just route.ts)
+    # This forces Claude to stop and interview before ANY API work
+    is_api_file = "/api/" in file_path and file_path.endswith(".ts")
+    is_schema_file = "/schemas/" in file_path and file_path.endswith(".ts")
+    # Skip test files - those are allowed during TDD
+    is_test_file = ".test." in file_path or "/__tests__/" in file_path or ".spec." in file_path
+    if is_test_file:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    if not is_schema_file and not is_api_file:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    # Check if state file exists
+    if not STATE_FILE.exists():
+        print(json.dumps({
+            "permissionDecision": "deny",
+            "reason": """❌ API workflow not started.
+Run /api-create [endpoint-name] to begin the interview-driven workflow."""
+        }))
+        sys.exit(0)
+    # Load state
+    try:
+        state = json.loads(STATE_FILE.read_text())
+    except json.JSONDecodeError:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    phases = state.get("phases", {})
+    interview = phases.get("interview", {})
+    interview_status = interview.get("status", "not_started")
+    interview_desc = interview.get("description", "").lower()
+    questions = interview.get("questions", [])
+    # Check 1: Interview must be complete
+    if interview_status != "complete":
+        print(json.dumps({
+            "permissionDecision": "deny",
+            "reason": f"""❌ BLOCKED: Interview phase not complete.
+Current status: {interview_status}
+AskUserQuestion calls: {interview.get('user_question_count', 0)}
+═══════════════════════════════════════════════════════════
+⚠️  YOU MUST STOP AND ASK THE USER QUESTIONS NOW
+═══════════════════════════════════════════════════════════
+Use the AskUserQuestion tool to ask EACH of these questions ONE AT A TIME:
+1. "What is the primary purpose of this endpoint?"
+2. "Who will use it and how?"
+3. "What parameters are essential vs optional?"
+WAIT for the user's response after EACH question before continuing.
+DO NOT:
+❌ Make up answers yourself
+❌ Assume what the user wants
+❌ Mark the interview as complete without asking
+❌ Try to write any code until you have real answers
+The system is tracking your AskUserQuestion calls. You need at least 3
+actual calls with user responses to proceed."""
+        }))
+        sys.exit(0)
+    # Check 2: Must have minimum questions
+    if len(questions) < MIN_QUESTIONS:
+        print(json.dumps({
+            "permissionDecision": "deny",
+            "reason": f"""❌ Interview incomplete - not enough questions asked.
+Questions recorded: {len(questions)}
+Minimum required: {MIN_QUESTIONS}
+You must ask the user more questions about their requirements.
+DO NOT proceed without understanding the user's actual needs."""
+        }))
+        sys.exit(0)
+    # Check 2.5: Verify AskUserQuestion tool was actually used
+    user_question_count = interview.get("user_question_count", 0)
+    tool_used_count = sum(1 for q in questions if q.get("tool_used", False))
+    if tool_used_count < MIN_QUESTIONS:
+        print(json.dumps({
+            "permissionDecision": "deny",
+            "reason": f"""❌ Interview not conducted properly.
+AskUserQuestion tool uses tracked: {tool_used_count}
+Minimum required: {MIN_QUESTIONS}
+You MUST use the AskUserQuestion tool to ask the user directly.
+Do NOT make up answers or mark the interview as complete without
+actually asking the user and receiving their responses.
+The system tracks when AskUserQuestion is used. Self-answering
+will be detected and blocked."""
+        }))
+        sys.exit(0)
+    # Check 3: Look for self-answer indicators
+    for indicator in SELF_ANSWER_INDICATORS:
+        if indicator in interview_desc:
+            print(json.dumps({
+                "permissionDecision": "deny",
+                "reason": f"""❌ Interview appears to be self-answered.
+Detected: "{indicator}" in interview description.
+You MUST actually ask the user questions using AskUserQuestion.
+Self-answering the interview defeats its purpose.
+Reset the interview phase and ask the user directly:
+  1. What do you want this endpoint to do?
+  2. Which providers/models should it support?
+  3. What parameters matter most to you?
+Wait for their real answers before proceeding."""
+            }))
+            sys.exit(0)
+    # All checks passed
+    print(json.dumps({"permissionDecision": "allow"}))
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/hooks/track-tool-use.py CHANGED Viewed

@@ -34,11 +34,12 @@ def main():
     tool_input = input_data.get("tool_input", {})
     tool_output = input_data.get("tool_output", {})
-    # Only track research-related tools
+    # Track research tools AND user questions
     research_tools = ["WebSearch", "WebFetch", "mcp__context7"]
     is_research_tool = any(t in tool_name for t in research_tools)
+    is_user_question = tool_name == "AskUserQuestion"
-    if not is_research_tool:
+    if not is_research_tool and not is_user_question:
         print(json.dumps({"continue": True}))
         sys.exit(0)
@@ -51,8 +52,42 @@ def main():
     else:
         state = create_initial_state()
-    # Get or create research phase
+    # Get phases
     phases = state.setdefault("phases", {})
+    # Handle AskUserQuestion separately - track in interview phase
+    if is_user_question:
+        interview = phases.setdefault("interview", {
+            "status": "not_started",
+            "questions": [],
+            "user_question_count": 0
+        })
+        # Track the question
+        questions = interview.setdefault("questions", [])
+        user_count = interview.get("user_question_count", 0) + 1
+        interview["user_question_count"] = user_count
+        question_entry = {
+            "question": tool_input.get("question", ""),
+            "timestamp": datetime.now().isoformat(),
+            "tool_used": True  # Proves AskUserQuestion was actually called
+        }
+        questions.append(question_entry)
+        # Update interview status
+        if interview.get("status") == "not_started":
+            interview["status"] = "in_progress"
+            interview["started_at"] = datetime.now().isoformat()
+        interview["last_activity"] = datetime.now().isoformat()
+        # Save and exit
+        STATE_FILE.write_text(json.dumps(state, indent=2))
+        print(json.dumps({"continue": True}))
+        sys.exit(0)
+    # Get or create research phase (for research tools)
     research = phases.setdefault("research_initial", {
         "status": "in_progress",
         "sources": [],

package/hooks/verify-implementation.py ADDED Viewed

@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""
+Hook: PreToolUse for Write/Edit (runs AFTER enforce-research and enforce-interview)
+Purpose: Verify implementation matches interview requirements
+This hook addresses these gaps:
+1. AI uses exact user terminology when researching (not paraphrasing)
+2. All changed files are tracked and verified
+3. Test files use same patterns as production code
+Returns:
+  - {"permissionDecision": "allow"} - Let the tool run
+  - {"permissionDecision": "deny", "reason": "..."} - Block with explanation
+"""
+import json
+import sys
+import re
+from pathlib import Path
+# State file is in .claude/ directory (sibling to hooks/)
+STATE_FILE = Path(__file__).parent.parent / "api-dev-state.json"
+def extract_key_terms(text: str) -> list[str]:
+    """Extract likely important terms from interview answers.
+    These are terms that should appear in research and implementation:
+    - Proper nouns (capitalized multi-word phrases)
+    - Technical terms (SDK names, API names, etc.)
+    - Specific patterns (e.g., "via X", "using X", "with X")
+    """
+    terms = []
+    # Look for "via X", "using X", "with X" patterns
+    via_patterns = re.findall(r'(?:via|using|with|through)\s+([A-Z][A-Za-z0-9\s]+?)(?:[,.\n]|$)', text)
+    terms.extend(via_patterns)
+    # Look for capitalized phrases (likely proper nouns/product names)
+    # e.g., "Vercel AI Gateway", "OpenAI API"
+    proper_nouns = re.findall(r'[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+', text)
+    terms.extend(proper_nouns)
+    # Clean up and dedupe
+    terms = [t.strip() for t in terms if len(t.strip()) > 3]
+    return list(set(terms))
+def check_research_used_exact_terms(state: dict) -> list[str]:
+    """Verify research sources used the exact terms from interview.
+    Gap 1 Fix: When user provides a term, use THAT EXACT TERM to search.
+    """
+    issues = []
+    interview = state.get("phases", {}).get("interview", {})
+    research = state.get("phases", {}).get("research_initial", {})
+    deep_research = state.get("phases", {}).get("research_deep", {})
+    questions = interview.get("questions", [])
+    if isinstance(questions, list) and len(questions) > 0:
+        # Extract key terms from all interview answers
+        all_text = " ".join(str(q) for q in questions)
+        key_terms = extract_key_terms(all_text)
+        # Check if these terms appear in research sources
+        research_sources = research.get("sources", []) + deep_research.get("sources", [])
+        research_text = " ".join(str(s) for s in research_sources).lower()
+        missing_terms = []
+        for term in key_terms:
+            # Check if term or close variant appears in research
+            term_lower = term.lower()
+            if term_lower not in research_text:
+                # Check for partial matches (e.g., "AI Gateway" in "Vercel AI Gateway")
+                words = term_lower.split()
+                if not any(all(w in research_text for w in words) for _ in [1]):
+                    missing_terms.append(term)
+        if missing_terms:
+            issues.append(
+                f"⚠️ Gap 1 Warning: User-specified terms not found in research:\n"
+                f"   Terms from interview: {missing_terms}\n"
+                f"   These EXACT terms should have been searched."
+            )
+    return issues
+def check_files_tracked(state: dict, file_path: str) -> list[str]:
+    """Verify we're tracking all files being modified.
+    Gap 2 Fix: Track files as they're modified, not after claiming completion.
+    """
+    issues = []
+    files_created = state.get("files_created", [])
+    files_modified = state.get("files_modified", [])
+    all_tracked = files_created + files_modified
+    # Normalize paths for comparison
+    normalized_path = file_path.replace("\\", "/")
+    # Check if this file is a test file
+    is_test = ".test." in file_path or "/__tests__/" in file_path or ".spec." in file_path
+    # For non-test files in api/ or lib/, they should be tracked
+    is_trackable = ("/api/" in file_path or "/lib/" in file_path) and file_path.endswith(".ts")
+    if is_trackable and not is_test:
+        # Check if any tracked file matches this one
+        found = False
+        for tracked in all_tracked:
+            if normalized_path.endswith(tracked) or tracked in normalized_path:
+                found = True
+                break
+        # Don't block, but log that this file should be tracked
+        if not found:
+            state.setdefault("files_modified", []).append(normalized_path.split("/src/")[-1] if "/src/" in normalized_path else normalized_path)
+            STATE_FILE.write_text(json.dumps(state, indent=2))
+    return issues
+def check_test_production_alignment(state: dict, file_path: str, content: str = "") -> list[str]:
+    """Verify test files use same patterns as production code.
+    Gap 5 Fix: Test files must use the same patterns as production code.
+    """
+    issues = []
+    is_test = ".test." in file_path or "/__tests__/" in file_path or ".spec." in file_path
+    if not is_test:
+        return issues
+    # Check interview for key configuration patterns
+    interview = state.get("phases", {}).get("interview", {})
+    questions = interview.get("questions", [])
+    all_text = " ".join(str(q) for q in questions)
+    # Look for environment variable patterns mentioned in interview
+    env_patterns = re.findall(r'[A-Z_]+_(?:KEY|API_KEY|TOKEN|SECRET)', all_text)
+    if env_patterns and content:
+        # If interview mentions specific env vars, test should check those
+        for pattern in env_patterns:
+            if pattern in content:
+                # Good - test is checking the right env var
+                pass
+        # Look for mismatches - e.g., checking OPENAI_API_KEY when we said "single gateway key"
+        if "gateway" in all_text.lower() or "single key" in all_text.lower():
+            # Interview mentioned gateway/single key - tests shouldn't check individual provider keys
+            old_patterns = ["OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY", "PERPLEXITY_API_KEY"]
+            found_old = [p for p in old_patterns if p in content]
+            if found_old and "AI_GATEWAY" not in content:
+                issues.append(
+                    f"⚠️ Gap 5 Warning: Test may be checking wrong environment variables.\n"
+                    f"   Interview mentioned: gateway/single key pattern\n"
+                    f"   Test checks: {found_old}\n"
+                    f"   Consider: Should test check AI_GATEWAY_API_KEY instead?"
+                )
+    return issues
+def main():
+    # Read hook input from stdin
+    try:
+        input_data = json.load(sys.stdin)
+    except json.JSONDecodeError:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    tool_input = input_data.get("tool_input", {})
+    file_path = tool_input.get("file_path", "")
+    new_content = tool_input.get("content", "") or tool_input.get("new_string", "")
+    # Only check for API/schema/lib files
+    is_api_file = "/api/" in file_path and file_path.endswith(".ts")
+    is_lib_file = "/lib/" in file_path and file_path.endswith(".ts")
+    if not is_api_file and not is_lib_file:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    # Load state
+    if not STATE_FILE.exists():
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    try:
+        state = json.loads(STATE_FILE.read_text())
+    except json.JSONDecodeError:
+        print(json.dumps({"permissionDecision": "allow"}))
+        sys.exit(0)
+    # Run verification checks
+    all_issues = []
+    # Check 1: Research used exact terms from interview
+    all_issues.extend(check_research_used_exact_terms(state))
+    # Check 2: Track this file
+    all_issues.extend(check_files_tracked(state, file_path))
+    # Check 5: Test/production alignment
+    all_issues.extend(check_test_production_alignment(state, file_path, new_content))
+    # If there are issues, warn but don't block (these are warnings)
+    # The user can review these in the state file
+    if all_issues:
+        # Store warnings in state for later review
+        state.setdefault("verification_warnings", []).extend(all_issues)
+        STATE_FILE.write_text(json.dumps(state, indent=2))
+    # Allow the operation - these are warnings, not blockers
+    print(json.dumps({"permissionDecision": "allow"}))
+    sys.exit(0)
+if __name__ == "__main__":
+    main()