npm - tribunal-kit - Versions diffs - 4.0.1 → 4.2.0 - Mend

tribunal-kit 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/.agent/GEMINI.md +4 -2
package/.agent/agents/api-architect.md +66 -0
package/.agent/agents/db-latency-auditor.md +216 -0
package/.agent/agents/precedence-reviewer.md +41 -4
package/.agent/agents/resilience-reviewer.md +88 -0
package/.agent/agents/schema-reviewer.md +67 -0
package/.agent/agents/throughput-optimizer.md +299 -0
package/.agent/agents/vitals-reviewer.md +223 -0
package/.agent/history/case-law/cases/case-0001.json +33 -0
package/.agent/history/case-law/index.json +35 -0
package/.agent/rules/GEMINI.md +20 -3
package/.agent/scripts/case_law_manager.py +237 -7
package/.agent/skills/agent-organizer/SKILL.md +42 -0
package/.agent/skills/agentic-patterns/SKILL.md +42 -0
package/.agent/skills/ai-prompt-injection-defense/SKILL.md +42 -0
package/.agent/skills/api-patterns/SKILL.md +42 -0
package/.agent/skills/api-security-auditor/SKILL.md +42 -0
package/.agent/skills/app-builder/SKILL.md +42 -0
package/.agent/skills/app-builder/templates/SKILL.md +70 -0
package/.agent/skills/appflow-wireframe/SKILL.md +42 -0
package/.agent/skills/architecture/SKILL.md +42 -0
package/.agent/skills/authentication-best-practices/SKILL.md +42 -0
package/.agent/skills/bash-linux/SKILL.md +42 -0
package/.agent/skills/behavioral-modes/SKILL.md +42 -0
package/.agent/skills/brainstorming/SKILL.md +42 -0
package/.agent/skills/building-native-ui/SKILL.md +42 -0
package/.agent/skills/clean-code/SKILL.md +42 -0
package/.agent/skills/code-review-checklist/SKILL.md +42 -0
package/.agent/skills/config-validator/SKILL.md +42 -0
package/.agent/skills/csharp-developer/SKILL.md +42 -0
package/.agent/skills/data-validation-schemas/SKILL.md +320 -0
package/.agent/skills/database-design/SKILL.md +42 -0
package/.agent/skills/deployment-procedures/SKILL.md +42 -0
package/.agent/skills/devops-engineer/SKILL.md +42 -0
package/.agent/skills/devops-incident-responder/SKILL.md +42 -0
package/.agent/skills/documentation-templates/SKILL.md +42 -0
package/.agent/skills/edge-computing/SKILL.md +42 -0
package/.agent/skills/error-resilience/SKILL.md +420 -0
package/.agent/skills/extract-design-system/SKILL.md +42 -0
package/.agent/skills/framer-motion-expert/SKILL.md +42 -0
package/.agent/skills/frontend-design/SKILL.md +42 -0
package/.agent/skills/game-design-expert/SKILL.md +42 -0
package/.agent/skills/game-engineering-expert/SKILL.md +42 -0
package/.agent/skills/geo-fundamentals/SKILL.md +42 -0
package/.agent/skills/github-operations/SKILL.md +42 -0
package/.agent/skills/gsap-core/SKILL.md +302 -0
package/.agent/skills/gsap-frameworks/SKILL.md +201 -0
package/.agent/skills/gsap-performance/SKILL.md +127 -0
package/.agent/skills/gsap-plugins/SKILL.md +474 -0
package/.agent/skills/gsap-react/SKILL.md +183 -0
package/.agent/skills/gsap-scrolltrigger/SKILL.md +344 -0
package/.agent/skills/gsap-timeline/SKILL.md +155 -0
package/.agent/skills/gsap-utils/SKILL.md +332 -0
package/.agent/skills/i18n-localization/SKILL.md +42 -0
package/.agent/skills/intelligent-routing/SKILL.md +72 -1
package/.agent/skills/lint-and-validate/SKILL.md +42 -0
package/.agent/skills/llm-engineering/SKILL.md +42 -0
package/.agent/skills/local-first/SKILL.md +42 -0
package/.agent/skills/mcp-builder/SKILL.md +42 -0
package/.agent/skills/mobile-design/SKILL.md +42 -0
package/.agent/skills/monorepo-management/SKILL.md +326 -0
package/.agent/skills/motion-engineering/SKILL.md +42 -0
package/.agent/skills/nextjs-react-expert/SKILL.md +42 -0
package/.agent/skills/nodejs-best-practices/SKILL.md +42 -0
package/.agent/skills/observability/SKILL.md +42 -0
package/.agent/skills/parallel-agents/SKILL.md +42 -0
package/.agent/skills/performance-profiling/SKILL.md +42 -0
package/.agent/skills/plan-writing/SKILL.md +42 -0
package/.agent/skills/platform-engineer/SKILL.md +42 -0
package/.agent/skills/playwright-best-practices/SKILL.md +42 -0
package/.agent/skills/powershell-windows/SKILL.md +42 -0
package/.agent/skills/project-idioms/SKILL.md +42 -0
package/.agent/skills/python-patterns/SKILL.md +42 -0
package/.agent/skills/python-pro/SKILL.md +42 -0
package/.agent/skills/react-specialist/SKILL.md +42 -0
package/.agent/skills/readme-builder/SKILL.md +42 -0
package/.agent/skills/realtime-patterns/SKILL.md +42 -0
package/.agent/skills/red-team-tactics/SKILL.md +42 -0
package/.agent/skills/rust-pro/SKILL.md +42 -0
package/.agent/skills/seo-fundamentals/SKILL.md +42 -0
package/.agent/skills/server-management/SKILL.md +42 -0
package/.agent/skills/shadcn-ui-expert/SKILL.md +42 -0
package/.agent/skills/skill-creator/SKILL.md +42 -0
package/.agent/skills/sql-pro/SKILL.md +42 -0
package/.agent/skills/supabase-postgres-best-practices/SKILL.md +42 -0
package/.agent/skills/swiftui-expert/SKILL.md +42 -0
package/.agent/skills/systematic-debugging/SKILL.md +42 -0
package/.agent/skills/tailwind-patterns/SKILL.md +42 -0
package/.agent/skills/tdd-workflow/SKILL.md +42 -0
package/.agent/skills/test-result-analyzer/SKILL.md +42 -0
package/.agent/skills/testing-patterns/SKILL.md +42 -0
package/.agent/skills/trend-researcher/SKILL.md +42 -0
package/.agent/skills/typescript-advanced/SKILL.md +327 -0
package/.agent/skills/ui-ux-pro-max/SKILL.md +42 -0
package/.agent/skills/ui-ux-researcher/SKILL.md +42 -0
package/.agent/skills/vue-expert/SKILL.md +42 -0
package/.agent/skills/vulnerability-scanner/SKILL.md +42 -0
package/.agent/skills/web-accessibility-auditor/SKILL.md +42 -0
package/.agent/skills/web-design-guidelines/SKILL.md +42 -0
package/.agent/skills/webapp-testing/SKILL.md +42 -0
package/.agent/skills/whimsy-injector/SKILL.md +42 -0
package/.agent/skills/workflow-optimizer/SKILL.md +42 -0
package/.agent/workflows/tribunal-backend.md +13 -2
package/.agent/workflows/tribunal-full.md +15 -8
package/.agent/workflows/tribunal-speed.md +183 -0
package/bin/tribunal-kit.js +10 -2
package/package.json +2 -2
package/.agent/skills/gsap-expert/SKILL.md +0 -194

package/.agent/scripts/case_law_manager.py CHANGED Viewed

@@ -22,9 +22,11 @@ import os
 import sys
 import json
 import hashlib
+import math
 import re
 from pathlib import Path
 from datetime import datetime
+from collections import Counter
 # ── Colours ──────────────────────────────────────────────────────────────────
 GREEN  = "\033[92m"
@@ -59,7 +61,28 @@ VALID_DOMAINS = {
     "performance", "mobile", "testing", "devops", "general"
 }
-VALID_VERDICTS = {"REJECTED", "APPROVED_WITH_CONDITIONS", "PRECEDENT_SET"}
+VALID_VERDICTS = {"REJECTED", "APPROVED_WITH_CONDITIONS", "PRECEDENT_SET", "OVERRULED"}
+# ── Noise filter (skip trivial rejections during auto-record) ────────────────
+NOISE_PATTERNS = [
+    r"\bformatting\b",
+    r"\bwhitespace\b",
+    r"\bindent(ation)?\b",
+    r"\bimport\s+order\b",
+    r"\btrailing\s+(comma|space|whitespace)\b",
+    r"\bsemicolon\b",
+    r"\bprettier\b",
+    r"\beslint.*fix\b",
+    r"\blint.*only\b",
+]
+def is_noise_rejection(reason: str) -> bool:
+    """Return True if the rejection reason is trivial (formatting/lint-only)."""
+    lower = reason.lower()
+    for pattern in NOISE_PATTERNS:
+        if re.search(pattern, lower):
+            return True
+    return False
 # ── Trivial-change filter (Semantic Delta) ────────────────────────────────────
 TRIVIAL_PATTERNS = [
@@ -158,9 +181,54 @@ def extract_tags(text: str) -> list[str]:
             break
     return tags
-# ── Similarity scoring ────────────────────────────────────────────────────────
+# ── Similarity scoring (TF-IDF Cosine — token-free) ──────────────────────────
+def _build_idf(corpus: list[list[str]]) -> dict[str, float]:
+    """Compute Inverse Document Frequency across all case tag-lists."""
+    n = len(corpus)
+    if n == 0:
+        return {}
+    doc_freq: dict[str, int] = Counter()
+    for tags in corpus:
+        for unique_tag in set(tags):
+            doc_freq[unique_tag] += 1
+    return {term: math.log((n + 1) / (df + 1)) + 1.0 for term, df in doc_freq.items()}
+def tfidf_cosine_similarity(query_tags: list[str], case_tags: list[str],
+                            idf: dict[str, float]) -> float:
+    """
+    TF-IDF weighted cosine similarity. No LLM required.
+    Significantly more accurate than Jaccard for code pattern matching.
+    """
+    if not query_tags or not case_tags:
+        return 0.0
+    # Term frequency vectors
+    tf_q = Counter(query_tags)
+    tf_c = Counter(case_tags)
+    # All unique terms
+    all_terms = set(tf_q) | set(tf_c)
+    # Weighted vectors
+    dot = 0.0
+    mag_q = 0.0
+    mag_c = 0.0
+    for term in all_terms:
+        w_q = tf_q.get(term, 0) * idf.get(term, 1.0)
+        w_c = tf_c.get(term, 0) * idf.get(term, 1.0)
+        dot   += w_q * w_c
+        mag_q += w_q ** 2
+        mag_c += w_c ** 2
+    if mag_q == 0 or mag_c == 0:
+        return 0.0
+    return dot / (math.sqrt(mag_q) * math.sqrt(mag_c))
+# Backward-compatibility alias
 def jaccard_similarity(tags_a: list[str], tags_b: list[str]) -> float:
-    """Simple token overlap — no LLM required."""
+    """Legacy fallback — kept for compatibility but no longer primary."""
     if not tags_a or not tags_b:
         return 0.0
     set_a, set_b = set(tags_a), set(tags_b)
@@ -271,10 +339,14 @@ def cmd_search_cases(args: list[str]) -> None:
         print(f"{YELLOW}No cases recorded yet. Use 'add-case' to record your first rejection.{RESET}")
         return
-    # Score every case
+    # Build corpus IDF from all stored cases
+    corpus = [entry.get("tags", []) for entry in index["cases"]]
+    idf = _build_idf(corpus)
+    # Score every case with TF-IDF cosine
     scored = []
     for entry in index["cases"]:
-        score = jaccard_similarity(query_tags, entry.get("tags", []))
+        score = tfidf_cosine_similarity(query_tags, entry.get("tags", []), idf)
         if score > 0.0:
             scored.append((score, entry))
@@ -438,6 +510,160 @@ def cmd_stats(args: list[str]) -> None:
     print(f"{CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━{RESET}\n")
+def cmd_auto_record(args: list[str]) -> None:
+    """
+    Non-interactive auto-recording for AI-driven case creation.
+    Called by the precedence-reviewer after a Tribunal rejection.
+    Usage:
+      python case_law_manager.py auto-record \\
+        --diff "code snippet" \\
+        --reason "why rejected" \\
+        --domain security \\
+        --verdict REJECTED \\
+        --reviewer security-auditor
+    """
+    # Parse flags
+    def get_flag(name: str) -> str:
+        flag = f"--{name}"
+        all_args = sys.argv[1:]
+        if flag in all_args:
+            idx = all_args.index(flag)
+            if idx + 1 < len(all_args):
+                return all_args[idx + 1]
+        return ""
+    diff_text = get_flag("diff")
+    reason    = get_flag("reason")
+    domain    = get_flag("domain") or "general"
+    verdict   = get_flag("verdict") or "REJECTED"
+    reviewer  = get_flag("reviewer") or None
+    pr_ref    = get_flag("pr-ref") or None
+    if not diff_text or not reason:
+        print(f"{RED}✖ auto-record requires --diff and --reason flags.{RESET}")
+        print(f"  Usage: auto-record --diff \"code\" --reason \"why\" --domain security --reviewer agent-name")
+        sys.exit(1)
+    # Noise filter — skip trivial rejections
+    if is_noise_rejection(reason):
+        print(f"{DIM}⊘ Skipped: trivial rejection (noise filter matched).{RESET}")
+        return
+    if domain not in VALID_DOMAINS:
+        domain = "general"
+    if verdict not in VALID_VERDICTS:
+        verdict = "REJECTED"
+    # Duplicate check: fingerprint match
+    fingerprint = content_hash(diff_text)
+    index = load_index()
+    for existing in index["cases"]:
+        if existing.get("fingerprint") == fingerprint:
+            print(f"{YELLOW}⊘ Duplicate: Case #{existing['id']:04d} already records this pattern.{RESET}")
+            return
+    # Build and persist
+    delta = semantic_delta(diff_text)
+    tags = extract_tags(diff_text + " " + reason)
+    case_id = index["next_id"]
+    case_record = {
+        "id": case_id,
+        "fingerprint": fingerprint,
+        "timestamp": datetime.now().isoformat(timespec="seconds"),
+        "domain": domain,
+        "verdict": verdict,
+        "reason": reason.strip(),
+        "pr_ref": pr_ref,
+        "reviewer": reviewer,
+        "tags": tags,
+        "diff_raw": diff_text.strip(),
+        "diff_delta": delta,
+        "auto_recorded": True
+    }
+    save_case(case_record)
+    index["cases"].append({
+        "id": case_id,
+        "fingerprint": fingerprint,
+        "domain": domain,
+        "verdict": verdict,
+        "tags": tags,
+        "timestamp": case_record["timestamp"],
+        "reason_summary": reason.strip()[:120]
+    })
+    index["next_id"] = case_id + 1
+    save_index(index)
+    print(f"{GREEN}✔ Auto-recorded Case #{case_id:04d}{RESET} [{verdict}] domain={domain}")
+    print(f"  {DIM}Reason: {reason[:80]}{RESET}")
+def cmd_overrule(args: list[str]) -> None:
+    """
+    Formally overrule a past precedent. Does NOT delete the case —
+    marks it as OVERRULED with a reason, preserving legal history.
+    """
+    case_id = None
+    if "--id" in args:
+        try:
+            case_id = int(args[args.index("--id") + 1])
+        except (IndexError, ValueError):
+            pass
+    if case_id is None:
+        print(f"{RED}✖ Provide a case ID: overrule --id 7{RESET}")
+        sys.exit(1)
+    case_record = load_case(case_id)
+    if not case_record:
+        print(f"{RED}✖ Case #{case_id:04d} not found.{RESET}")
+        sys.exit(1)
+    if case_record["verdict"] == "OVERRULED":
+        print(f"{YELLOW}Case #{case_id:04d} is already OVERRULED.{RESET}")
+        return
+    # Get reason for overruling
+    reason = None
+    if "--reason" in args:
+        try:
+            reason = args[args.index("--reason") + 1]
+        except (IndexError, ValueError):
+            pass
+    if not reason:
+        reason = prompt_line("Reason for overruling this precedent:")
+    if not reason or not reason.strip():
+        print(f"{RED}✖ An overrule reason is required.{RESET}")
+        sys.exit(1)
+    # Preserve history
+    old_verdict = case_record["verdict"]
+    case_record["verdict"] = "OVERRULED"
+    case_record["overruled_at"] = datetime.now().isoformat(timespec="seconds")
+    case_record["overrule_reason"] = reason.strip()
+    case_record["previous_verdict"] = old_verdict
+    save_case(case_record)
+    # Update index entry
+    index = load_index()
+    for entry in index["cases"]:
+        if entry["id"] == case_id:
+            entry["verdict"] = "OVERRULED"
+            break
+    save_index(index)
+    print(f"\n{GREEN}✔ Case #{case_id:04d} OVERRULED{RESET}")
+    print(f"  {DIM}Previous verdict : {old_verdict}{RESET}")
+    print(f"  {DIM}Overrule reason  : {reason.strip()}{RESET}")
+    print(f"  {DIM}The case is preserved in history but no longer blocks reviews.{RESET}")
+    print()
 # ── Input helpers ─────────────────────────────────────────────────────────────
 def prompt_multiline(prompt: str, sentinel: str) -> str:
     print(f"  {BOLD}{prompt}{RESET}")
@@ -480,9 +706,11 @@ def prompt_choice(label: str, choices: list[str], default: str) -> str:
 # ── Main ──────────────────────────────────────────────────────────────────────
 COMMANDS = {
     "add-case":     cmd_add_case,
+    "auto-record":  cmd_auto_record,
     "search-cases": cmd_search_cases,
     "list":         cmd_list,
     "show":         cmd_show,
+    "overrule":     cmd_overrule,
     "export":       cmd_export,
     "stats":        cmd_stats,
 }
@@ -498,10 +726,12 @@ def main() -> None:
 {BOLD}case_law_manager.py{RESET} — Tribunal Case Law Engine
 {BOLD}Commands:{RESET}
-  add-case                      Record a new rejected pattern
-  search-cases --query <text>   Find relevant precedents (token-free)
+  add-case                      Record a new rejected pattern (interactive)
+  auto-record --diff --reason   Record a rejection (non-interactive, for AI agents)
+  search-cases --query <text>   Find relevant precedents (TF-IDF cosine, token-free)
   list [--domain <domain>]      List all recorded cases
   show --id <N>                 Show full diff for a case
+  overrule --id <N>             Formally overrule a past precedent
   export [--stdout]             Export all cases to Markdown
   stats                         Show breakdown by domain/verdict

package/.agent/skills/agent-organizer/SKILL.md CHANGED Viewed

@@ -98,3 +98,45 @@ Automation without oversight is reckless. The Organizer manages when to pause an
 2. **Recovery Gate (After 3 Failures):** "The database migration script has failed 3 times. I am halting. How would you like to proceed?"
 ---
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/agentic-patterns/SKILL.md CHANGED Viewed

@@ -263,3 +263,45 @@ Evidence:    [link to terminal output, test result, or file diff]
 ```
 ---
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/ai-prompt-injection-defense/SKILL.md CHANGED Viewed

@@ -132,3 +132,45 @@ Many injections occur because the LLM includes malicious data in its output, whi
 - **Enforce JSON Schemas.** If the LLM goes off-script and starts blabbering, Zod validation should instantly fail the parsing and reject the output.
 ---
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/api-patterns/SKILL.md CHANGED Viewed

@@ -195,3 +195,45 @@ Protect against:
 | **OAuth 2.0 / OIDC** | Third-party login, delegated access |
 | **API Key** | Server-to-server, public API consumers |
 | **Passkey (WebAuthn)** | Modern passwordless (2026+) |
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/api-security-auditor/SKILL.md CHANGED Viewed

@@ -141,3 +141,45 @@ const server = new ApolloServer({
 ```
 ---
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/app-builder/SKILL.md CHANGED Viewed

@@ -520,3 +520,45 @@ Monorepo:
 |Payment|Stripe|LemonSqueezy, Paddle|
 |Email|-|Resend, SendGrid|
 |Search|-|Algolia, Typesense|
+---
+## 🤖 LLM-Specific Traps
+AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
+1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
+2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
+3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
+4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
+---
+## 🏛️ Tribunal Integration (Anti-Hallucination)
+**Slash command: `/review` or `/tribunal-full`**
+**Active reviewers: `logic-reviewer` · `security-auditor`**
+### ❌ Forbidden AI Tropes
+1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
+2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
+3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
+### ✅ Pre-Flight Self-Audit
+Review these questions before confirming output:
+```
+✅ Did I rely ONLY on real, verified tools and methods?
+✅ Is this solution appropriately scoped to the user's constraints?
+✅ Did I handle potential failure modes and edge cases?
+✅ Have I avoided generic boilerplate that doesn't add value?
+```
+### 🛑 Verification-Before-Completion (VBC) Protocol
+**CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
+- ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
+- ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.

package/.agent/skills/app-builder/templates/SKILL.md CHANGED Viewed

@@ -35,3 +35,73 @@ allowed-tools: Read, Glob, Grep
 2. Match to appropriate template
 3. Read ONLY that template's TEMPLATE.md
 4. Follow its tech stack and structure
+---
+## ðŸš¨ LLM Trap Table
+|Pattern|What AI Does Wrong|What Is Actually Correct|
+|:---|:---|:---|
+|[domain-specific trap 1]|[hallucination]|[correct behavior]|
+|[domain-specific trap 2]|[hallucination]|[correct behavior]|
+|[domain-specific trap 3]|[hallucination]|[correct behavior]|
+---
+## âœ… Pre-Flight Self-Audit
+Before producing any output, verify:
+``
+âœ… Did I read the actual files before making claims about them?
+âœ… Did I verify all method names against official documentation?
+âœ… Did I add // VERIFY: on any uncertain API calls?
+âœ… Are all imports from packages that actually exist in package.json?
+âœ… Did I test my logic with edge cases (null, empty, 0, max)?
+âœ… Did I avoid generating code for more than one module at a time?
+âœ… Am I working from evidence, not assumption?
+``
+---
+## ðŸ” VBC Protocol (Verify â†’ Build â†’ Confirm)
+``
+VERIFY:  Read the actual codebase before writing anything
+BUILD:   Generate the smallest meaningful unit of code
+CONFIRM: Verify the output is correct before presenting
+``
+---
+## ðŸš¨ LLM Trap Table
+|Pattern|What AI Does Wrong|What Is Actually Correct|
+|:---|:---|:---|
+|[domain-specific trap 1]|[hallucination]|[correct behavior]|
+|[domain-specific trap 2]|[hallucination]|[correct behavior]|
+|[domain-specific trap 3]|[hallucination]|[correct behavior]|
+---
+## âœ… Pre-Flight Self-Audit
+Before producing any output, verify:
+``
+âœ… Did I read the actual files before making claims about them?
+âœ… Did I verify all method names against official documentation?
+âœ… Did I add // VERIFY: on any uncertain API calls?
+âœ… Are all imports from packages that actually exist in package.json?
+âœ… Did I test my logic with edge cases (null, empty, 0, max)?
+âœ… Did I avoid generating code for more than one module at a time?
+âœ… Am I working from evidence, not assumption?
+``
+---
+## ðŸ” VBC Protocol (Verify â†’ Build â†’ Confirm)
+``
+VERIFY:  Read the actual codebase before writing anything
+BUILD:   Generate the smallest meaningful unit of code
+CONFIRM: Verify the output is correct before presenting
+``