npm - tribunal-kit - Versions diffs - 3.0.0 → 3.1.0 - Mend

tribunal-kit 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (226) hide show

package/.agent/ARCHITECTURE.md +99 -99
package/.agent/GEMINI.md +52 -52
package/.agent/agents/accessibility-reviewer.md +187 -220
package/.agent/agents/ai-code-reviewer.md +199 -233
package/.agent/agents/backend-specialist.md +215 -238
package/.agent/agents/code-archaeologist.md +161 -181
package/.agent/agents/database-architect.md +184 -207
package/.agent/agents/debugger.md +191 -218
package/.agent/agents/dependency-reviewer.md +103 -136
package/.agent/agents/devops-engineer.md +218 -238
package/.agent/agents/documentation-writer.md +201 -221
package/.agent/agents/explorer-agent.md +160 -180
package/.agent/agents/frontend-reviewer.md +160 -194
package/.agent/agents/frontend-specialist.md +248 -237
package/.agent/agents/game-developer.md +48 -52
package/.agent/agents/logic-reviewer.md +116 -149
package/.agent/agents/mobile-developer.md +200 -223
package/.agent/agents/mobile-reviewer.md +162 -195
package/.agent/agents/orchestrator.md +181 -211
package/.agent/agents/penetration-tester.md +157 -174
package/.agent/agents/performance-optimizer.md +183 -203
package/.agent/agents/performance-reviewer.md +178 -211
package/.agent/agents/product-manager.md +142 -162
package/.agent/agents/product-owner.md +6 -25
package/.agent/agents/project-planner.md +142 -162
package/.agent/agents/qa-automation-engineer.md +225 -242
package/.agent/agents/security-auditor.md +174 -194
package/.agent/agents/seo-specialist.md +193 -213
package/.agent/agents/sql-reviewer.md +161 -194
package/.agent/agents/supervisor-agent.md +184 -203
package/.agent/agents/swarm-worker-contracts.md +17 -17
package/.agent/agents/swarm-worker-registry.md +46 -46
package/.agent/agents/test-coverage-reviewer.md +160 -193
package/.agent/agents/test-engineer.md +0 -21
package/.agent/agents/type-safety-reviewer.md +175 -208
package/.agent/patterns/generator.md +9 -9
package/.agent/patterns/inversion.md +12 -12
package/.agent/patterns/pipeline.md +9 -9
package/.agent/patterns/reviewer.md +13 -13
package/.agent/patterns/tool-wrapper.md +9 -9
package/.agent/rules/GEMINI.md +63 -63
package/.agent/scripts/compress_skills.py +167 -0
package/.agent/scripts/consolidate_skills.py +173 -0
package/.agent/scripts/deep_compress.py +202 -0
package/.agent/scripts/minify_context.py +80 -0
package/.agent/scripts/security_scan.py +1 -1
package/.agent/scripts/strip_tribunal.py +41 -0
package/.agent/skills/agent-organizer/SKILL.md +92 -126
package/.agent/skills/agentic-patterns/SKILL.md +0 -70
package/.agent/skills/ai-prompt-injection-defense/SKILL.md +126 -160
package/.agent/skills/api-patterns/SKILL.md +123 -215
package/.agent/skills/api-security-auditor/SKILL.md +143 -177
package/.agent/skills/app-builder/SKILL.md +326 -50
package/.agent/skills/app-builder/templates/SKILL.md +13 -15
package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +16 -16
package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +22 -22
package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +18 -18
package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +20 -20
package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +17 -17
package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +18 -18
package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +21 -21
package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +19 -19
package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +26 -26
package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +26 -26
package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +19 -19
package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +18 -18
package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +20 -20
package/.agent/skills/appflow-wireframe/SKILL.md +87 -121
package/.agent/skills/architecture/SKILL.md +82 -252
package/.agent/skills/authentication-best-practices/SKILL.md +139 -173
package/.agent/skills/bash-linux/SKILL.md +120 -154
package/.agent/skills/behavioral-modes/SKILL.md +8 -69
package/.agent/skills/brainstorming/SKILL.md +428 -104
package/.agent/skills/building-native-ui/SKILL.md +143 -174
package/.agent/skills/clean-code/SKILL.md +323 -360
package/.agent/skills/code-review-checklist/SKILL.md +0 -62
package/.agent/skills/config-validator/SKILL.md +107 -141
package/.agent/skills/csharp-developer/SKILL.md +468 -528
package/.agent/skills/database-design/SKILL.md +104 -369
package/.agent/skills/deployment-procedures/SKILL.md +111 -145
package/.agent/skills/devops-engineer/SKILL.md +295 -332
package/.agent/skills/devops-incident-responder/SKILL.md +79 -113
package/.agent/skills/doc.md +5 -5
package/.agent/skills/documentation-templates/SKILL.md +19 -63
package/.agent/skills/edge-computing/SKILL.md +123 -157
package/.agent/skills/extract-design-system/SKILL.md +100 -134
package/.agent/skills/framer-motion-expert/SKILL.md +111 -855
package/.agent/skills/frontend-design/SKILL.md +151 -499
package/.agent/skills/game-design-expert/SKILL.md +71 -105
package/.agent/skills/game-engineering-expert/SKILL.md +88 -122
package/.agent/skills/geo-fundamentals/SKILL.md +89 -124
package/.agent/skills/github-operations/SKILL.md +279 -314
package/.agent/skills/gsap-expert/SKILL.md +119 -826
package/.agent/skills/i18n-localization/SKILL.md +104 -138
package/.agent/skills/intelligent-routing/SKILL.md +159 -127
package/.agent/skills/lint-and-validate/SKILL.md +8 -52
package/.agent/skills/llm-engineering/SKILL.md +344 -357
package/.agent/skills/local-first/SKILL.md +120 -154
package/.agent/skills/mcp-builder/SKILL.md +84 -118
package/.agent/skills/mobile-design/SKILL.md +213 -219
package/.agent/skills/motion-engineering/SKILL.md +184 -0
package/.agent/skills/nextjs-react-expert/SKILL.md +99 -698
package/.agent/skills/nodejs-best-practices/SKILL.md +498 -559
package/.agent/skills/observability/SKILL.md +293 -330
package/.agent/skills/parallel-agents/SKILL.md +88 -122
package/.agent/skills/performance-profiling/SKILL.md +217 -254
package/.agent/skills/plan-writing/SKILL.md +84 -118
package/.agent/skills/platform-engineer/SKILL.md +89 -123
package/.agent/skills/playwright-best-practices/SKILL.md +128 -162
package/.agent/skills/powershell-windows/SKILL.md +112 -146
package/.agent/skills/python-patterns/SKILL.md +7 -35
package/.agent/skills/python-pro/SKILL.md +148 -754
package/.agent/skills/react-specialist/SKILL.md +123 -827
package/.agent/skills/readme-builder/SKILL.md +15 -85
package/.agent/skills/realtime-patterns/SKILL.md +269 -304
package/.agent/skills/red-team-tactics/SKILL.md +10 -51
package/.agent/skills/rust-pro/SKILL.md +623 -701
package/.agent/skills/seo-fundamentals/SKILL.md +120 -154
package/.agent/skills/server-management/SKILL.md +156 -190
package/.agent/skills/shadcn-ui-expert/SKILL.md +172 -206
package/.agent/skills/skill-creator/SKILL.md +18 -58
package/.agent/skills/sql-pro/SKILL.md +579 -633
package/.agent/skills/supabase-postgres-best-practices/SKILL.md +28 -68
package/.agent/skills/swiftui-expert/SKILL.md +142 -176
package/.agent/skills/systematic-debugging/SKILL.md +84 -118
package/.agent/skills/tailwind-patterns/SKILL.md +516 -576
package/.agent/skills/tdd-workflow/SKILL.md +103 -137
package/.agent/skills/test-result-analyzer/SKILL.md +33 -73
package/.agent/skills/testing-patterns/SKILL.md +512 -573
package/.agent/skills/trend-researcher/SKILL.md +30 -71
package/.agent/skills/ui-ux-pro-max/SKILL.md +0 -41
package/.agent/skills/ui-ux-researcher/SKILL.md +51 -91
package/.agent/skills/vue-expert/SKILL.md +127 -866
package/.agent/skills/vulnerability-scanner/SKILL.md +354 -269
package/.agent/skills/web-accessibility-auditor/SKILL.md +159 -193
package/.agent/skills/web-design-guidelines/SKILL.md +17 -61
package/.agent/skills/webapp-testing/SKILL.md +111 -145
package/.agent/skills/whimsy-injector/SKILL.md +58 -132
package/.agent/skills/workflow-optimizer/SKILL.md +28 -68
package/.agent/workflows/api-tester.md +151 -151
package/.agent/workflows/audit.md +127 -138
package/.agent/workflows/brainstorm.md +110 -110
package/.agent/workflows/changelog.md +112 -112
package/.agent/workflows/create.md +124 -124
package/.agent/workflows/debug.md +165 -189
package/.agent/workflows/deploy.md +180 -189
package/.agent/workflows/enhance.md +128 -151
package/.agent/workflows/fix.md +114 -135
package/.agent/workflows/generate.md +12 -4
package/.agent/workflows/migrate.md +160 -160
package/.agent/workflows/orchestrate.md +168 -168
package/.agent/workflows/performance-benchmarker.md +114 -123
package/.agent/workflows/plan.md +173 -173
package/.agent/workflows/preview.md +80 -80
package/.agent/workflows/refactor.md +161 -183
package/.agent/workflows/review-ai.md +101 -129
package/.agent/workflows/review.md +116 -116
package/.agent/workflows/session.md +94 -94
package/.agent/workflows/status.md +79 -79
package/.agent/workflows/strengthen-skills.md +138 -139
package/.agent/workflows/swarm.md +179 -179
package/.agent/workflows/test.md +189 -211
package/.agent/workflows/tribunal-backend.md +93 -113
package/.agent/workflows/tribunal-database.md +94 -115
package/.agent/workflows/tribunal-frontend.md +95 -118
package/.agent/workflows/tribunal-full.md +92 -133
package/.agent/workflows/tribunal-mobile.md +94 -119
package/.agent/workflows/tribunal-performance.md +109 -133
package/.agent/workflows/ui-ux-pro-max.md +122 -143
package/package.json +1 -1
package/.agent/skills/api-patterns/api-style.md +0 -42
package/.agent/skills/api-patterns/auth.md +0 -24
package/.agent/skills/api-patterns/documentation.md +0 -26
package/.agent/skills/api-patterns/graphql.md +0 -41
package/.agent/skills/api-patterns/rate-limiting.md +0 -31
package/.agent/skills/api-patterns/response.md +0 -37
package/.agent/skills/api-patterns/rest.md +0 -40
package/.agent/skills/api-patterns/security-testing.md +0 -122
package/.agent/skills/api-patterns/trpc.md +0 -41
package/.agent/skills/api-patterns/versioning.md +0 -22
package/.agent/skills/app-builder/agent-coordination.md +0 -71
package/.agent/skills/app-builder/feature-building.md +0 -53
package/.agent/skills/app-builder/project-detection.md +0 -34
package/.agent/skills/app-builder/scaffolding.md +0 -118
package/.agent/skills/app-builder/tech-stack.md +0 -40
package/.agent/skills/architecture/context-discovery.md +0 -43
package/.agent/skills/architecture/examples.md +0 -94
package/.agent/skills/architecture/pattern-selection.md +0 -68
package/.agent/skills/architecture/patterns-reference.md +0 -50
package/.agent/skills/architecture/trade-off-analysis.md +0 -77
package/.agent/skills/brainstorming/dynamic-questioning.md +0 -360
package/.agent/skills/database-design/database-selection.md +0 -43
package/.agent/skills/database-design/indexing.md +0 -39
package/.agent/skills/database-design/migrations.md +0 -48
package/.agent/skills/database-design/optimization.md +0 -36
package/.agent/skills/database-design/orm-selection.md +0 -30
package/.agent/skills/database-design/schema-design.md +0 -56
package/.agent/skills/frontend-design/animation-guide.md +0 -331
package/.agent/skills/frontend-design/color-system.md +0 -329
package/.agent/skills/frontend-design/decision-trees.md +0 -418
package/.agent/skills/frontend-design/motion-graphics.md +0 -306
package/.agent/skills/frontend-design/typography-system.md +0 -363
package/.agent/skills/frontend-design/ux-psychology.md +0 -1116
package/.agent/skills/frontend-design/visual-effects.md +0 -383
package/.agent/skills/intelligent-routing/router-manifest.md +0 -65
package/.agent/skills/mobile-design/decision-trees.md +0 -516
package/.agent/skills/mobile-design/mobile-backend.md +0 -491
package/.agent/skills/mobile-design/mobile-color-system.md +0 -420
package/.agent/skills/mobile-design/mobile-debugging.md +0 -122
package/.agent/skills/mobile-design/mobile-design-thinking.md +0 -357
package/.agent/skills/mobile-design/mobile-navigation.md +0 -458
package/.agent/skills/mobile-design/mobile-performance.md +0 -767
package/.agent/skills/mobile-design/mobile-testing.md +0 -356
package/.agent/skills/mobile-design/mobile-typography.md +0 -433
package/.agent/skills/mobile-design/platform-android.md +0 -666
package/.agent/skills/mobile-design/platform-ios.md +0 -561
package/.agent/skills/mobile-design/touch-psychology.md +0 -537
package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +0 -312
package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +0 -240
package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +0 -490
package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +0 -264
package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +0 -581
package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +0 -432
package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +0 -684
package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +0 -150
package/.agent/skills/vulnerability-scanner/checklists.md +0 -121

package/.agent/skills/llm-engineering/SKILL.md CHANGED Viewed

@@ -1,357 +1,344 @@
----
-name: llm-engineering
-description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
-allowed-tools: Read, Write, Edit, Glob, Grep
-version: 2.0.0
-last-updated: 2026-04-01
-applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
----
-# LLM Engineering — Production AI Systems Mastery
-> An LLM without guardrails is a liability generator.
-> Every prompt is a contract. Every response is untrusted. Every token costs money.
----
-## Model Selection
-```
-Model                    │ Use Case                              │ Cost Tier
-─────────────────────────┼───────────────────────────────────────┼──────────
-GPT-4o                   │ Complex reasoning, code generation    │ $$$
-GPT-4o-mini              │ Classification, summaries, chat       │ $
-Claude 3.7 Sonnet        │ Long documents, analysis, code        │ $$$
-Claude 3.5 Haiku         │ Fast responses, simple tasks          │ $
-Gemini 2.5 Pro           │ Large context, multimodal, code       │ $$$
-Gemini 2.5 Flash         │ High throughput, cost-efficient       │ $
-Llama 3.3 70B (open)     │ Self-hosted, data privacy             │ Free*
-Mistral Large            │ European data residency, code         │ $$
-* = compute costs only
-Selection rules:
-1. Start with the cheapest model that works
-2. Upgrade only when eval scores require it
-3. Use large models for complex reasoning, small models for classification
-4. Fine-tune ONLY after prompt engineering and RAG are exhausted
-```
----
-## Prompt Engineering
-### System Prompt Design
-```typescript
-const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
-## Rules
-1. Answer ONLY questions about Acme products and services.
-2. If you don't know the answer, say "I'll connect you with a specialist."
-3. Never discuss competitors.
-4. Never make up product features or pricing.
-5. Keep responses under 200 words.
-## Response Format
-- Use bullet points for lists
-- Include product links when relevant
-- End with a follow-up question
-## Context
-Current date: ${new Date().toISOString().split("T")[0]}
-User plan: {{user_plan}}
-`;
-// ❌ HALLUCINATION TRAP: System prompts are NOT secrets
-// Users can extract system prompts with jailbreak techniques
-// Never put API keys, internal URLs, or secrets in system prompts
-```
-### Structured Output (JSON Mode)
-```typescript
-import { z } from "zod";
-import OpenAI from "openai";
-const SentimentSchema = z.object({
-  sentiment: z.enum(["positive", "negative", "neutral"]),
-  confidence: z.number().min(0).max(1),
-  reasoning: z.string(),
-  topics: z.array(z.string()),
-});
-type Sentiment = z.infer<typeof SentimentSchema>;
-async function analyzeSentiment(text: string): Promise<Sentiment> {
-  const response = await openai.chat.completions.create({
-    model: "gpt-4o-mini",
-    response_format: { type: "json_object" },
-    messages: [
-      {
-        role: "system",
-        content: `Analyze the sentiment of the given text.
-Respond with JSON matching this schema:
-{
-  "sentiment": "positive" | "negative" | "neutral",
-  "confidence": 0-1,
-  "reasoning": "brief explanation",
-  "topics": ["topic1", "topic2"]
-}`,
-      },
-      { role: "user", content: text },
-    ],
-  });
-  const raw = JSON.parse(response.choices[0].message.content ?? "{}");
-  return SentimentSchema.parse(raw); // Zod validates the LLM response
-}
-// ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
-// LLMs produce malformed JSON, wrong types, missing fields
-// ❌ const result = JSON.parse(response); // trust blindly
-// ✅ const result = Schema.parse(JSON.parse(response)); // validate
-```
-### Function Calling / Tool Use
-```typescript
-const tools: OpenAI.ChatCompletionTool[] = [
-  {
-    type: "function",
-    function: {
-      name: "search_products",
-      description: "Search products by name, category, or price range",
-      parameters: {
-        type: "object",
-        properties: {
-          query: { type: "string", description: "Search query" },
-          category: { type: "string", enum: ["electronics", "clothing", "home"] },
-          max_price: { type: "number", description: "Maximum price in USD" },
-        },
-        required: ["query"],
-      },
-    },
-  },
-  {
-    type: "function",
-    function: {
-      name: "get_order_status",
-      description: "Get the status of an order by order ID",
-      parameters: {
-        type: "object",
-        properties: {
-          order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
-        },
-        required: ["order_id"],
-      },
-    },
-  },
-];
-// Tool execution loop
-async function chatWithTools(userMessage: string) {
-  const messages: OpenAI.ChatCompletionMessageParam[] = [
-    { role: "system", content: SYSTEM_PROMPT },
-    { role: "user", content: userMessage },
-  ];
-  let response = await openai.chat.completions.create({
-    model: "gpt-4o-mini",
-    messages,
-    tools,
-  });
-  // Process tool calls
-  while (response.choices[0].finish_reason === "tool_calls") {
-    const toolCalls = response.choices[0].message.tool_calls ?? [];
-    messages.push(response.choices[0].message);
-    for (const call of toolCalls) {
-      const args = JSON.parse(call.function.arguments);
-      const result = await executeFunction(call.function.name, args);
-      messages.push({
-        role: "tool",
-        tool_call_id: call.id,
-        content: JSON.stringify(result),
-      });
-    }
-    response = await openai.chat.completions.create({
-      model: "gpt-4o-mini",
-      messages,
-      tools,
-    });
-  }
-  return response.choices[0].message.content;
-}
-```
----
-## RAG (Retrieval-Augmented Generation)
-### Pipeline
-```
-User Query
-    ↓
-[1] Embed query → vector
-    ↓
-[2] Search vector DB → top K chunks
-    ↓
-[3] (Optional) Rerank results → top N
-    ↓
-[4] Build prompt: system + context chunks + query
-    ↓
-[5] LLM generates answer with citations
-    ↓
-[6] Validate response (hallucination check)
-```
-### Chunking Strategy
-```typescript
-// ❌ BAD: Arbitrary character splitting
-const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
-// ✅ GOOD: Semantic chunking with overlap
-function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
-  const {
-    maxTokens = 512,      // chunk size
-    overlapTokens = 50,    // overlap between chunks
-    separator = "\n\n",    // split on paragraph boundaries first
-  } = options;
-  const paragraphs = text.split(separator);
-  const chunks: Chunk[] = [];
-  let current = "";
-  for (const para of paragraphs) {
-    if (tokenCount(current + para) > maxTokens && current) {
-      chunks.push({ text: current.trim(), tokens: tokenCount(current) });
-      // Keep overlap from previous chunk
-      const words = current.split(" ");
-      current = words.slice(-overlapTokens).join(" ") + separator + para;
-    } else {
-      current += separator + para;
-    }
-  }
-  if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
-  return chunks;
-}
-// Chunk size guidelines:
-// 256-512 tokens → precise retrieval (Q&A, support)
-// 512-1024 tokens → balanced (general RAG)
-// 1024-2048 tokens → broad context (summarization)
-```
-### Vector Store Selection
-```
-pgvector (PostgreSQL)  → Already using Postgres, <10M vectors, simple
-Pinecone               → Managed, serverless, easy scaling
-Weaviate               → Hybrid search (vector + keyword), multi-model
-Qdrant                 → High performance, Rust-based, self-hostable
-Chroma                 → Local development, prototyping
-Milvus                 → Enterprise scale, GPU acceleration
-// ❌ HALLUCINATION TRAP: Vector search is NOT keyword search
-// "Apple CEO" might not find "Tim Cook runs Apple Inc."
-// Use HYBRID search (vector + BM25 keyword) for production
-```
----
-## Streaming
-```typescript
-// Server-Sent Events for AI token streaming
-app.get("/api/chat", async (req, res) => {
-  res.setHeader("Content-Type", "text/event-stream");
-  res.setHeader("Cache-Control", "no-cache");
-  res.setHeader("Connection", "keep-alive");
-  const stream = await openai.chat.completions.create({
-    model: "gpt-4o-mini",
-    messages: [{ role: "user", content: req.query.message as string }],
-    stream: true,
-  });
-  for await (const chunk of stream) {
-    const content = chunk.choices[0]?.delta?.content;
-    if (content) {
-      res.write(`data: ${JSON.stringify({ content })}\n\n`);
-    }
-  }
-  res.write("data: [DONE]\n\n");
-  res.end();
-});
-// Client-side consumption
-const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
-eventSource.onmessage = (event) => {
-  if (event.data === "[DONE]") { eventSource.close(); return; }
-  const { content } = JSON.parse(event.data);
-  appendToChat(content);
-};
-```
----
-## Cost Optimization
-```
-1. Prompt caching        → Cache system prompts (OpenAI, Anthropic support this)
-2. Output token limiting → Set max_tokens to prevent runaway responses
-3. Tiered models         → Use cheap models for classification, expensive for reasoning
-4. Batch processing      → Use batch APIs for offline processing (50% discount)
-5. Chunked context       → Send only relevant chunks, not entire documents
-6. Response streaming    → Stream to reduce TTFT (time to first token)
-7. Structured output     → Shorter JSON responses vs verbose prose
-// Cost estimation:
-// GPT-4o: ~$2.50/1M input, ~$10/1M output
-// GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
-// 1M tokens ≈ 750,000 words ≈ 3,000 pages
-```
----
-## 🤖 LLM-Specific Traps
-1. **Trusting LLM JSON Output:** Always validate with Zod/schema. LLMs produce malformed JSON.
-2. **Secrets in System Prompts:** System prompts can be extracted. Never include API keys or internal URLs.
-3. **Fixed Character Chunking:** Splitting at 1000 chars breaks sentences. Use semantic/paragraph chunking.
-4. **Vector-Only Search:** Pure vector search misses exact matches. Use hybrid search for production.
-5. **No Token Limits:** Without `max_tokens`, models can generate 4000+ token responses. Set limits.
-6. **Single Model for Everything:** Use tiered models — cheap for simple tasks, expensive for reasoning.
-7. **No Eval Suite:** Deploying AI without evaluations is deploying untested code. Build evals.
-8. **Prompt Injection Blindness:** User input can override system instructions. Always sanitize and delimit.
-9. **Infinite Tool Loops:** Tool-calling agents can loop forever. Set max iterations (3-5).
-10. **No Rate Limiting:** API calls without rate limiting = surprise $10,000 bill. Set spend limits.
----
-## 🏛️ Tribunal Integration
-**Slash command: `/review-ai`**
-### ✅ Pre-Flight Self-Audit
-```
-✅ Am I validating all LLM responses with a schema?
-✅ Are there no secrets in system prompts?
-✅ Is user input delimited from system instructions?
-✅ Did I set max_tokens on all completions?
-✅ Is there rate limiting and cost monitoring?
-✅ Am I using the cheapest model that works?
-✅ Is chunking semantic (not fixed-character)?
-✅ Is search hybrid (vector + keyword)?
-✅ Do tool-calling loops have a max iteration limit?
-✅ Did I build evaluation tests for AI quality?
-```
+---
+name: llm-engineering
+description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
+allowed-tools: Read, Write, Edit, Glob, Grep
+version: 3.2.0
+last-updated: 2026-04-07
+applies-to-model: gemini-3-1-pro, claude-3-7-sonnet
+---
+# LLM Engineering — Production AI Systems Mastery
+---
+## Model Selection
+```
+Model                    │ Use Case                              │ Cost Tier
+─────────────────────────┼───────────────────────────────────────┼──────────
+GPT-4o                   │ Complex reasoning, vision, code       │ $$$
+GPT-4o-mini              │ Classification, summaries, chat       │ $
+o3-mini                  │ Deep reasoning, math, code review     │ $$
+Claude 3.7 Sonnet        │ Long documents, analysis, code        │ $$$
+Claude 3.5 Haiku         │ Fast responses, simple tasks          │ $
+Gemini 3.1 Pro (High)    │ Large context, multimodal, code       │ $$$
+Gemini 3.0 Flash         │ High throughput, cost-efficient       │ $
+Llama 3.3 70B (open)     │ Self-hosted, data privacy             │ Free*
+Mistral Large 2          │ European data residency, code         │ $$
+* = compute costs only
+Selection rules:
+1. Start with the cheapest model that passes your evals
+2. Upgrade only when eval scores require it
+3. Use large models for complex reasoning, small for classification/routing
+4. Fine-tune ONLY after prompt engineering and RAG are exhausted
+5. ❌ HALLUCINATION TRAP: Model names change frequently — always verify current names
+   from provider docs before hardcoding (e.g. "gpt-4o" vs "gpt-4o-2024-11-20")
+```
+---
+## Prompt Engineering
+### System Prompt Design
+```typescript
+const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
+## Rules
+1. Answer ONLY questions about Acme products and services.
+2. If you don't know the answer, say "I'll connect you with a specialist."
+3. Never discuss competitors.
+4. Never make up product features or pricing.
+5. Keep responses under 200 words.
+## Response Format
+- Use bullet points for lists
+- Include product links when relevant
+- End with a follow-up question
+## Context
+Current date: ${new Date().toISOString().split("T")[0]}
+User plan: {{user_plan}}
+`;
+// ❌ HALLUCINATION TRAP: System prompts are NOT secrets
+// Users can extract system prompts with jailbreak techniques
+// Never put API keys, internal URLs, or secrets in system prompts
+```
+### Structured Output (JSON Mode)
+```typescript
+import { z } from "zod";
+import OpenAI from "openai";
+const SentimentSchema = z.object({
+  sentiment: z.enum(["positive", "negative", "neutral"]),
+  confidence: z.number().min(0).max(1),
+  reasoning: z.string(),
+  topics: z.array(z.string()),
+});
+// OpenAI — json_schema mode (strict = true enforces schema exactly)
+async function analyzeSentiment(text: string) {
+  const response = await openai.chat.completions.create({
+    model: "gpt-4o-mini",
+    response_format: {
+      type: "json_schema",
+      json_schema: {
+        name: "sentiment_analysis",
+        strict: true,
+        schema: {
+          type: "object",
+          properties: {
+            sentiment: { type: "string", enum: ["positive", "negative", "neutral"] },
+            confidence: { type: "number" },
+            reasoning: { type: "string" },
+            topics: { type: "array", items: { type: "string" } },
+          },
+          required: ["sentiment", "confidence", "reasoning", "topics"],
+          additionalProperties: false, // required for strict mode
+        },
+      },
+    },
+    messages: [{ role: "system", content: "Analyze sentiment." }, { role: "user", content: text }],
+  });
+  const raw = JSON.parse(response.choices[0].message.content ?? "{}");
+  return SentimentSchema.parse(raw); // always validate with Zod even in strict mode
+}
+// Gemini — response_mime_type + response_schema
+import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
+const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
+const model = genAI.getGenerativeModel({
+  model: "gemini-2.0-flash",
+  generationConfig: {
+    responseMimeType: "application/json",
+    responseSchema: {
+      type: SchemaType.OBJECT,
+      properties: {
+        sentiment: { type: SchemaType.STRING, enum: ["positive", "negative", "neutral"] },
+        confidence: { type: SchemaType.NUMBER },
+        topics: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
+      },
+      required: ["sentiment", "confidence", "topics"],
+    },
+  },
+});
+// ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
+// LLMs produce malformed JSON, wrong types, missing fields even with strict mode
+// ❌ const result = JSON.parse(response); // trust blindly
+// ✅ const result = Schema.parse(JSON.parse(response)); // validate always
+```
+### Function Calling / Tool Use
+```typescript
+const tools: OpenAI.ChatCompletionTool[] = [
+  {
+    type: "function",
+    function: {
+      name: "search_products",
+      description: "Search products by name, category, or price range",
+      parameters: {
+        type: "object",
+        properties: {
+          query: { type: "string", description: "Search query" },
+          category: { type: "string", enum: ["electronics", "clothing", "home"] },
+          max_price: { type: "number", description: "Maximum price in USD" },
+        },
+        required: ["query"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "get_order_status",
+      description: "Get the status of an order by order ID",
+      parameters: {
+        type: "object",
+        properties: {
+          order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
+        },
+        required: ["order_id"],
+      },
+    },
+  },
+];
+// Tool execution loop
+async function chatWithTools(userMessage: string) {
+  const messages: OpenAI.ChatCompletionMessageParam[] = [
+    { role: "system", content: SYSTEM_PROMPT },
+    { role: "user", content: userMessage },
+  ];
+  let response = await openai.chat.completions.create({
+    model: "gpt-4o-mini",
+    messages,
+    tools,
+  });
+  // Process tool calls
+  while (response.choices[0].finish_reason === "tool_calls") {
+    const toolCalls = response.choices[0].message.tool_calls ?? [];
+    messages.push(response.choices[0].message);
+    for (const call of toolCalls) {
+      const args = JSON.parse(call.function.arguments);
+      const result = await executeFunction(call.function.name, args);
+      messages.push({
+        role: "tool",
+        tool_call_id: call.id,
+        content: JSON.stringify(result),
+      });
+    }
+    response = await openai.chat.completions.create({
+      model: "gpt-4o-mini",
+      messages,
+      tools,
+    });
+  }
+  return response.choices[0].message.content;
+}
+```
+---
+## RAG (Retrieval-Augmented Generation)
+### Pipeline
+```
+User Query
+    ↓
+[1] Embed query → vector
+    ↓
+[2] Search vector DB → top K chunks
+    ↓
+[3] (Optional) Rerank results → top N
+    ↓
+[4] Build prompt: system + context chunks + query
+    ↓
+[5] LLM generates answer with citations
+    ↓
+[6] Validate response (hallucination check)
+```
+### Chunking Strategy
+```typescript
+// ❌ BAD: Arbitrary character splitting
+const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
+// ✅ GOOD: Semantic chunking with overlap
+function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
+  const {
+    maxTokens = 512,      // chunk size
+    overlapTokens = 50,    // overlap between chunks
+    separator = "\n\n",    // split on paragraph boundaries first
+  } = options;
+  const paragraphs = text.split(separator);
+  const chunks: Chunk[] = [];
+  let current = "";
+  for (const para of paragraphs) {
+    if (tokenCount(current + para) > maxTokens && current) {
+      chunks.push({ text: current.trim(), tokens: tokenCount(current) });
+      // Keep overlap from previous chunk
+      const words = current.split(" ");
+      current = words.slice(-overlapTokens).join(" ") + separator + para;
+    } else {
+      current += separator + para;
+    }
+  }
+  if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
+  return chunks;
+}
+// Chunk size guidelines:
+// 256-512 tokens → precise retrieval (Q&A, support)
+// 512-1024 tokens → balanced (general RAG)
+// 1024-2048 tokens → broad context (summarization)
+```
+### Vector Store Selection
+```
+pgvector (PostgreSQL)  → Already using Postgres, <10M vectors, simple
+Pinecone               → Managed, serverless, easy scaling
+Weaviate               → Hybrid search (vector + keyword), multi-model
+Qdrant                 → High performance, Rust-based, self-hostable
+Chroma                 → Local development, prototyping
+Milvus                 → Enterprise scale, GPU acceleration
+// ❌ HALLUCINATION TRAP: Vector search is NOT keyword search
+// "Apple CEO" might not find "Tim Cook runs Apple Inc."
+// Use HYBRID search (vector + BM25 keyword) for production
+```
+---
+## Streaming
+```typescript
+// Server-Sent Events for AI token streaming
+app.get("/api/chat", async (req, res) => {
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+  const stream = await openai.chat.completions.create({
+    model: "gpt-4o-mini",
+    messages: [{ role: "user", content: req.query.message as string }],
+    stream: true,
+  });
+  for await (const chunk of stream) {
+    const content = chunk.choices[0]?.delta?.content;
+    if (content) {
+      res.write(`data: ${JSON.stringify({ content })}\n\n`);
+    }
+  }
+  res.write("data: [DONE]\n\n");
+  res.end();
+});
+// Client-side consumption
+const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
+eventSource.onmessage = (event) => {
+  if (event.data === "[DONE]") { eventSource.close(); return; }
+  const { content } = JSON.parse(event.data);
+  appendToChat(content);
+};
+```
+---
+## Cost Optimization
+```
+1. Prompt caching        → Cache system prompts (OpenAI, Anthropic support this)
+2. Output token limiting → Set max_tokens to prevent runaway responses
+3. Tiered models         → Use cheap models for classification, expensive for reasoning
+4. Batch processing      → Use batch APIs for offline processing (50% discount)
+5. Chunked context       → Send only relevant chunks, not entire documents
+6. Response streaming    → Stream to reduce TTFT (time to first token)
+7. Structured output     → Shorter JSON responses vs verbose prose
+// Cost estimation:
+// GPT-4o: ~$2.50/1M input, ~$10/1M output
+// GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
+// 1M tokens ≈ 750,000 words ≈ 3,000 pages
+```
+---