tribunal-kit 1.0.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/.agent/.shared/ui-ux-pro-max/README.md +3 -3
  2. package/.agent/ARCHITECTURE.md +205 -10
  3. package/.agent/GEMINI.md +37 -7
  4. package/.agent/agents/accessibility-reviewer.md +134 -0
  5. package/.agent/agents/ai-code-reviewer.md +129 -0
  6. package/.agent/agents/frontend-specialist.md +3 -0
  7. package/.agent/agents/game-developer.md +21 -21
  8. package/.agent/agents/logic-reviewer.md +12 -0
  9. package/.agent/agents/mobile-reviewer.md +79 -0
  10. package/.agent/agents/orchestrator.md +56 -26
  11. package/.agent/agents/performance-reviewer.md +36 -0
  12. package/.agent/agents/supervisor-agent.md +156 -0
  13. package/.agent/agents/swarm-worker-contracts.md +166 -0
  14. package/.agent/agents/swarm-worker-registry.md +92 -0
  15. package/.agent/rules/GEMINI.md +134 -5
  16. package/.agent/scripts/bundle_analyzer.py +259 -0
  17. package/.agent/scripts/dependency_analyzer.py +247 -0
  18. package/.agent/scripts/lint_runner.py +188 -0
  19. package/.agent/scripts/patch_skills_meta.py +177 -0
  20. package/.agent/scripts/patch_skills_output.py +285 -0
  21. package/.agent/scripts/schema_validator.py +279 -0
  22. package/.agent/scripts/security_scan.py +224 -0
  23. package/.agent/scripts/session_manager.py +144 -3
  24. package/.agent/scripts/skill_integrator.py +234 -0
  25. package/.agent/scripts/strengthen_skills.py +220 -0
  26. package/.agent/scripts/swarm_dispatcher.py +317 -0
  27. package/.agent/scripts/test_runner.py +192 -0
  28. package/.agent/scripts/test_swarm_dispatcher.py +163 -0
  29. package/.agent/skills/agent-organizer/SKILL.md +132 -0
  30. package/.agent/skills/agentic-patterns/SKILL.md +335 -0
  31. package/.agent/skills/api-patterns/SKILL.md +226 -50
  32. package/.agent/skills/app-builder/SKILL.md +215 -52
  33. package/.agent/skills/architecture/SKILL.md +176 -31
  34. package/.agent/skills/bash-linux/SKILL.md +150 -134
  35. package/.agent/skills/behavioral-modes/SKILL.md +152 -160
  36. package/.agent/skills/brainstorming/SKILL.md +148 -101
  37. package/.agent/skills/brainstorming/dynamic-questioning.md +10 -0
  38. package/.agent/skills/clean-code/SKILL.md +139 -134
  39. package/.agent/skills/code-review-checklist/SKILL.md +177 -80
  40. package/.agent/skills/config-validator/SKILL.md +165 -0
  41. package/.agent/skills/csharp-developer/SKILL.md +107 -0
  42. package/.agent/skills/database-design/SKILL.md +252 -29
  43. package/.agent/skills/deployment-procedures/SKILL.md +122 -175
  44. package/.agent/skills/devops-engineer/SKILL.md +134 -0
  45. package/.agent/skills/devops-incident-responder/SKILL.md +98 -0
  46. package/.agent/skills/documentation-templates/SKILL.md +175 -121
  47. package/.agent/skills/dotnet-core-expert/SKILL.md +103 -0
  48. package/.agent/skills/edge-computing/SKILL.md +213 -0
  49. package/.agent/skills/frontend-design/SKILL.md +76 -0
  50. package/.agent/skills/frontend-design/color-system.md +18 -0
  51. package/.agent/skills/frontend-design/typography-system.md +18 -0
  52. package/.agent/skills/game-development/SKILL.md +69 -0
  53. package/.agent/skills/geo-fundamentals/SKILL.md +158 -99
  54. package/.agent/skills/i18n-localization/SKILL.md +158 -96
  55. package/.agent/skills/intelligent-routing/SKILL.md +89 -285
  56. package/.agent/skills/intelligent-routing/router-manifest.md +65 -0
  57. package/.agent/skills/lint-and-validate/SKILL.md +229 -27
  58. package/.agent/skills/llm-engineering/SKILL.md +258 -0
  59. package/.agent/skills/local-first/SKILL.md +203 -0
  60. package/.agent/skills/mcp-builder/SKILL.md +159 -111
  61. package/.agent/skills/mobile-design/SKILL.md +102 -282
  62. package/.agent/skills/nextjs-react-expert/SKILL.md +143 -227
  63. package/.agent/skills/nodejs-best-practices/SKILL.md +201 -254
  64. package/.agent/skills/observability/SKILL.md +285 -0
  65. package/.agent/skills/parallel-agents/SKILL.md +124 -118
  66. package/.agent/skills/performance-profiling/SKILL.md +143 -89
  67. package/.agent/skills/plan-writing/SKILL.md +133 -97
  68. package/.agent/skills/platform-engineer/SKILL.md +135 -0
  69. package/.agent/skills/powershell-windows/SKILL.md +167 -104
  70. package/.agent/skills/python-patterns/SKILL.md +149 -361
  71. package/.agent/skills/python-pro/SKILL.md +114 -0
  72. package/.agent/skills/react-specialist/SKILL.md +107 -0
  73. package/.agent/skills/realtime-patterns/SKILL.md +296 -0
  74. package/.agent/skills/red-team-tactics/SKILL.md +136 -134
  75. package/.agent/skills/rust-pro/SKILL.md +237 -173
  76. package/.agent/skills/seo-fundamentals/SKILL.md +134 -82
  77. package/.agent/skills/server-management/SKILL.md +155 -104
  78. package/.agent/skills/sql-pro/SKILL.md +104 -0
  79. package/.agent/skills/systematic-debugging/SKILL.md +156 -79
  80. package/.agent/skills/tailwind-patterns/SKILL.md +163 -205
  81. package/.agent/skills/tdd-workflow/SKILL.md +148 -88
  82. package/.agent/skills/test-result-analyzer/SKILL.md +299 -0
  83. package/.agent/skills/testing-patterns/SKILL.md +141 -114
  84. package/.agent/skills/trend-researcher/SKILL.md +228 -0
  85. package/.agent/skills/ui-ux-pro-max/SKILL.md +107 -0
  86. package/.agent/skills/ui-ux-researcher/SKILL.md +234 -0
  87. package/.agent/skills/vue-expert/SKILL.md +118 -0
  88. package/.agent/skills/vulnerability-scanner/SKILL.md +228 -188
  89. package/.agent/skills/web-design-guidelines/SKILL.md +148 -33
  90. package/.agent/skills/webapp-testing/SKILL.md +171 -122
  91. package/.agent/skills/whimsy-injector/SKILL.md +349 -0
  92. package/.agent/skills/workflow-optimizer/SKILL.md +219 -0
  93. package/.agent/workflows/api-tester.md +279 -0
  94. package/.agent/workflows/audit.md +168 -0
  95. package/.agent/workflows/brainstorm.md +65 -19
  96. package/.agent/workflows/changelog.md +144 -0
  97. package/.agent/workflows/create.md +67 -14
  98. package/.agent/workflows/debug.md +122 -30
  99. package/.agent/workflows/deploy.md +82 -31
  100. package/.agent/workflows/enhance.md +59 -27
  101. package/.agent/workflows/fix.md +143 -0
  102. package/.agent/workflows/generate.md +84 -20
  103. package/.agent/workflows/migrate.md +163 -0
  104. package/.agent/workflows/orchestrate.md +66 -17
  105. package/.agent/workflows/performance-benchmarker.md +305 -0
  106. package/.agent/workflows/plan.md +76 -33
  107. package/.agent/workflows/preview.md +73 -17
  108. package/.agent/workflows/refactor.md +153 -0
  109. package/.agent/workflows/review-ai.md +140 -0
  110. package/.agent/workflows/review.md +83 -16
  111. package/.agent/workflows/session.md +154 -0
  112. package/.agent/workflows/status.md +74 -18
  113. package/.agent/workflows/strengthen-skills.md +99 -0
  114. package/.agent/workflows/swarm.md +194 -0
  115. package/.agent/workflows/test.md +80 -31
  116. package/.agent/workflows/tribunal-backend.md +55 -13
  117. package/.agent/workflows/tribunal-database.md +62 -18
  118. package/.agent/workflows/tribunal-frontend.md +58 -12
  119. package/.agent/workflows/tribunal-full.md +70 -11
  120. package/.agent/workflows/tribunal-mobile.md +123 -0
  121. package/.agent/workflows/tribunal-performance.md +152 -0
  122. package/.agent/workflows/ui-ux-pro-max.md +100 -82
  123. package/README.md +117 -62
  124. package/bin/tribunal-kit.js +329 -75
  125. package/package.json +10 -6
@@ -1,45 +1,247 @@
1
1
  ---
2
2
  name: lint-and-validate
3
- description: Automatic quality control, linting, and static analysis procedures. Use after every code modification to ensure syntax correctness and project standards. Triggers onKeywords: lint, format, check, validate, types, static analysis.
4
- allowed-tools: Read, Glob, Grep, Bash
3
+ description: Linting and validation principles for code quality enforcement.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 1.0.0
6
+ last-updated: 2026-03-12
7
+ applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
5
8
  ---
6
9
 
7
- # Lint and Validate Skill
10
+ # Linting & Validation
8
11
 
9
- > **MANDATORY:** Run appropriate validation tools after EVERY code change. Do not finish a task until the code is error-free.
12
+ > A linter is an automated code reviewer that never gets tired, never gets distracted,
13
+ > and catches the same class of problems every single time.
10
14
 
11
- ### Procedures by Ecosystem
15
+ ---
16
+
17
+ ## Why Linting Matters
18
+
19
+ Linting catches problems that code review misses:
20
+ - Unused variables left in after refactoring
21
+ - Missing `await` on async functions (silently returns a Promise instead of the value)
22
+ - Inconsistent code style that makes diffs hard to read
23
+ - Known dangerous patterns (e.g., `==` instead of `===` in JS)
24
+
25
+ Run linting in CI. Every PR that merges should pass lint. A lint check that doesn't block the build is decoration.
26
+
27
+ ---
28
+
29
+ ## JavaScript / TypeScript (ESLint + Prettier)
30
+
31
+ ```bash
32
+ # Install
33
+ npm install -D eslint @typescript-eslint/eslint-plugin @typescript-eslint/parser prettier
34
+
35
+ # Run
36
+ npx eslint . --ext .ts,.tsx
37
+ npx prettier --check .
38
+
39
+ # Fix auto-fixable issues
40
+ npx eslint . --ext .ts,.tsx --fix
41
+ npx prettier --write .
42
+ ```
43
+
44
+ **Recommended rules to enforce:**
45
+
46
+ ```json
47
+ // .eslintrc.json
48
+ {
49
+ "extends": [
50
+ "eslint:recommended",
51
+ "plugin:@typescript-eslint/recommended",
52
+ "plugin:@typescript-eslint/recommended-requiring-type-checking"
53
+ ],
54
+ "rules": {
55
+ "@typescript-eslint/no-explicit-any": "error",
56
+ "@typescript-eslint/no-floating-promises": "error",
57
+ "@typescript-eslint/await-thenable": "error",
58
+ "no-console": ["warn", { "allow": ["warn", "error"] }],
59
+ "eqeqeq": ["error", "always"]
60
+ }
61
+ }
62
+ ```
63
+
64
+ **Key rules explained:**
65
+
66
+ | Rule | Why It Matters |
67
+ |---|---|
68
+ | `no-floating-promises` | Missing `await` on async call = silent bug |
69
+ | `no-explicit-any` | `any` disables TypeScript's only protection |
70
+ | `eqeqeq` | `==` has coercion surprises; `===` is always explicit |
71
+ | `await-thenable` | Prevents `await`-ing non-async functions (always a mistake) |
12
72
 
13
- #### Node.js / TypeScript
14
- 1. **Lint/Fix:** `npm run lint` or `npx eslint "path" --fix`
15
- 2. **Types:** `npx tsc --noEmit`
16
- 3. **Security:** `npm audit --audit-level=high`
73
+ ---
74
+
75
+ ## Python (Ruff)
76
+
77
+ Ruff replaces flake8, black, isort, and pyupgrade in one fast tool:
78
+
79
+ ```bash
80
+ # Install
81
+ pip install ruff
82
+
83
+ # Check
84
+ ruff check .
85
+
86
+ # Fix auto-fixable
87
+ ruff check . --fix
88
+
89
+ # Format (replaces black)
90
+ ruff format .
17
91
 
18
- #### Python
19
- 1. **Linter (Ruff):** `ruff check "path" --fix` (Fast & Modern)
20
- 2. **Security (Bandit):** `bandit -r "path" -ll`
21
- 3. **Types (MyPy):** `mypy "path"`
92
+ # Pre-commit config
93
+ # .pre-commit-config.yaml
94
+ - repo: https://github.com/astral-sh/ruff-pre-commit
95
+ hooks:
96
+ - id: ruff
97
+ args: [--fix]
98
+ - id: ruff-format
99
+ ```
22
100
 
23
- ## The Quality Loop
24
- 1. **Write/Edit Code**
25
- 2. **Run Audit:** `npm run lint && npx tsc --noEmit`
26
- 3. **Analyze Report:** Check the "FINAL AUDIT REPORT" section.
27
- 4. **Fix & Repeat:** Submitting code with "FINAL AUDIT" failures is NOT allowed.
101
+ ```toml
102
+ # pyproject.toml
103
+ [tool.ruff]
104
+ line-length = 100
105
+ target-version = "py311"
28
106
 
29
- ## Error Handling
30
- - If `lint` fails: Fix the style or syntax issues immediately.
31
- - If `tsc` fails: Correct type mismatches before proceeding.
32
- - If no tool is configured: Check the project root for `.eslintrc`, `tsconfig.json`, `pyproject.toml` and suggest creating one.
107
+ [tool.ruff.lint]
108
+ select = ["E", "F", "I", "N", "UP", "B", "SIM", "ANN"]
109
+ # E: pycodestyle, F: pyflakes, I: isort, N: naming, UP: pyupgrade
110
+ # B: bugbear (common bugs), SIM: simplify, ANN: annotations
111
+ ```
33
112
 
34
113
  ---
35
- **Strict Rule:** No code should be committed or reported as "done" without passing these checks.
114
+
115
+ ## Type Checking
116
+
117
+ Linting and type checking catch different things. Run both.
118
+
119
+ **TypeScript:**
120
+ ```bash
121
+ npx tsc --noEmit # type check without emitting files
122
+ ```
123
+
124
+ **Python:**
125
+ ```bash
126
+ mypy src/ --ignore-missing-imports
127
+ # or
128
+ pyright src/
129
+ ```
130
+
131
+ **Required compiler options (TypeScript):**
132
+ ```json
133
+ {
134
+ "compilerOptions": {
135
+ "strict": true, // enables all strict checks
136
+ "noImplicitAny": true,
137
+ "noUncheckedIndexedAccess": true, // index access can be undefined
138
+ "exactOptionalPropertyTypes": true
139
+ }
140
+ }
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Pre-commit Integration
146
+
147
+ Run linting automatically before every commit:
148
+
149
+ ```yaml
150
+ # .pre-commit-config.yaml
151
+ repos:
152
+ - repo: https://github.com/pre-commit/pre-commit-hooks
153
+ hooks:
154
+ - id: check-merge-conflict
155
+ - id: check-added-large-files
156
+ - id: end-of-file-fixer
157
+ - id: trailing-whitespace
158
+
159
+ - repo: local
160
+ hooks:
161
+ - id: eslint
162
+ name: ESLint
163
+ language: node
164
+ entry: npx eslint --ext .ts,.tsx
165
+ types: [javascript, ts]
166
+
167
+ - id: tsc
168
+ name: TypeScript
169
+ language: node
170
+ entry: npx tsc --noEmit
171
+ pass_filenames: false
172
+ ```
36
173
 
37
174
  ---
38
175
 
39
176
  ## Scripts
40
177
 
41
- | Script | Purpose | Command |
42
- |--------|---------|---------|
43
- | `scripts/lint_runner.py` | Unified lint check | `python scripts/lint_runner.py <project_path>` |
44
- | `scripts/type_coverage.py` | Type coverage analysis | `python scripts/type_coverage.py <project_path>` |
178
+ | Script | Purpose | Run With |
179
+ |---|---|---|
180
+ | `scripts/lint_runner.py` | Runs project linting and reports findings | `python scripts/lint_runner.py <project_path>` |
181
+ | `scripts/type_coverage.py` | Measures TypeScript type coverage | `python scripts/type_coverage.py <project_path>` |
182
+
183
+ ---
184
+
185
+ ## Output Format
186
+
187
+ When this skill produces or reviews code, structure your output as follows:
188
+
189
+ ```
190
+ ━━━ Lint And Validate Report ━━━━━━━━━━━━━━━━━━━━━━━━
191
+ Skill: Lint And Validate
192
+ Language: [detected language / framework]
193
+ Scope: [N files · N functions]
194
+ ─────────────────────────────────────────────────
195
+ ✅ Passed: [checks that passed, or "All clean"]
196
+ ⚠️ Warnings: [non-blocking issues, or "None"]
197
+ ❌ Blocked: [blocking issues requiring fix, or "None"]
198
+ ─────────────────────────────────────────────────
199
+ VBC status: PENDING → VERIFIED
200
+ Evidence: [test output / lint pass / compile success]
201
+ ```
202
+
203
+ **VBC (Verification-Before-Completion) is mandatory.**
204
+ Do not mark status as VERIFIED until concrete terminal evidence is provided.
205
+
206
+
207
+
208
+ ---
209
+
210
+ ## 🤖 LLM-Specific Traps
211
+
212
+ AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
213
+
214
+ 1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
215
+ 2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
216
+ 3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
217
+ 4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
218
+ 5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
219
+
220
+ ---
221
+
222
+ ## 🏛️ Tribunal Integration (Anti-Hallucination)
223
+
224
+ **Slash command: `/review` or `/tribunal-full`**
225
+ **Active reviewers: `logic-reviewer` · `security-auditor`**
226
+
227
+ ### ❌ Forbidden AI Tropes
228
+
229
+ 1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
230
+ 2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
231
+ 3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
232
+
233
+ ### ✅ Pre-Flight Self-Audit
234
+
235
+ Review these questions before confirming output:
236
+ ```
237
+ ✅ Did I rely ONLY on real, verified tools and methods?
238
+ ✅ Is this solution appropriately scoped to the user's constraints?
239
+ ✅ Did I handle potential failure modes and edge cases?
240
+ ✅ Have I avoided generic boilerplate that doesn't add value?
241
+ ```
242
+
243
+ ### 🛑 Verification-Before-Completion (VBC) Protocol
45
244
 
245
+ **CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
246
+ - ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
247
+ - ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.
@@ -0,0 +1,258 @@
1
+ ---
2
+ name: llm-engineering
3
+ description: LLM engineering principles for production AI systems. RAG pipeline design, vector store selection, prompt engineering, evals, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 1.0.0
6
+ last-updated: 2026-03-12
7
+ applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
+ ---
9
+
10
+ # LLM Engineering Principles
11
+
12
+ > An LLM is a probabilistic function, not a deterministic API.
13
+ > Design your system to be correct despite that — not because you got lucky.
14
+
15
+ ---
16
+
17
+ ## When This Skill Activates
18
+
19
+ - Adding AI chat, completion, or summarization to an app
20
+ - Building a RAG (Retrieval-Augmented Generation) pipeline
21
+ - Integrating with OpenAI, Anthropic, Google Gemini, or local models
22
+ - Designing semantic search
23
+ - Setting up AI evals or monitoring
24
+
25
+ ---
26
+
27
+ ## Core Architecture Decision: What Pattern?
28
+
29
+ | Pattern | Use When | Avoid When |
30
+ |---|---|---|
31
+ | **Simple prompt** | Single-turn, no user docs | Needs accuracy on user data |
32
+ | **RAG** | Answers must cite user/company docs | Data changes every second |
33
+ | **Fine-tuning** | Consistent tone/style at scale | You have < 1000 examples |
34
+ | **Agent loop** | Multi-step tasks, tool use | Single-answer questions |
35
+ | **Hybrid** | RAG + agent (most production apps) | Over-engineering simple use case |
36
+
37
+ ---
38
+
39
+ ## RAG Pipeline Design
40
+
41
+ The core pattern for grounding LLMs in real data:
42
+
43
+ ```
44
+ INGEST RETRIEVE GENERATE
45
+ ───────── ───────── ─────────
46
+ Documents User query Retrieved chunks
47
+ │ │ │
48
+ ▼ ▼ ▼
49
+ Chunk (512 tokens) Embed query vector Rerank by relevance
50
+ │ │ │
51
+ ▼ ▼ ▼
52
+ Embed chunks ANN search in Build prompt:
53
+ │ vector store [system] + [chunks] + [query]
54
+ ▼ │ │
55
+ Store in vector DB Top-K results Call LLM → stream response
56
+ ```
57
+
58
+ ### Chunking Strategy
59
+
60
+ ```ts
61
+ // ❌ Fixed-size chunks break semantic units
62
+ chunk(document, { size: 512 }); // Splits mid-sentence
63
+
64
+ // ✅ Semantic chunking — split at natural boundaries
65
+ chunk(document, {
66
+ strategy: 'markdown-headers', // Or 'sentence', 'paragraph'
67
+ maxTokens: 512,
68
+ overlap: 64, // Overlap to preserve context at boundaries
69
+ });
70
+ ```
71
+
72
+ ### Embedding Model Selection
73
+
74
+ | Scale | Model | Dimensions | Notes |
75
+ |---|---|---|---|
76
+ | General English | `text-embedding-3-small` | 1536 | Best quality/cost ratio |
77
+ | Multilingual | `multilingual-e5-large` | 1024 | Open source, self-hostable |
78
+ | Code | `text-embedding-3-large` | 3072 | Higher cost, better code retrieval |
79
+ | Local/private | `nomic-embed-text` | 768 | Runs on CPU via Ollama |
80
+
81
+ ---
82
+
83
+ ## Vector Store Selection
84
+
85
+ | Need | Choose | Why |
86
+ |---|---|---|
87
+ | Already on PostgreSQL | `pgvector` | Zero infra, SQL joins with metadata |
88
+ | Managed, billion-scale | Pinecone | Hosted ANN, hybrid search built-in |
89
+ | Open source, self-hosted | Qdrant | Rust-native, fast, rich filtering |
90
+ | Already on Weaviate | Weaviate | GraphQL API, multimodal support |
91
+ | Embedded/local | ChromaDB | Zero infra, great for prototyping |
92
+
93
+ ```ts
94
+ // pgvector — stays inside your existing PostgreSQL
95
+ import { pgvector } from '@pgvector/pg';
96
+
97
+ // Store
98
+ await db.query(
99
+ 'INSERT INTO documents (content, embedding) VALUES ($1, $2)',
100
+ [text, JSON.stringify(embedding)] // embedding is float[]
101
+ );
102
+
103
+ // Query — cosine similarity
104
+ await db.query(
105
+ 'SELECT content FROM documents ORDER BY embedding <=> $1 LIMIT 5',
106
+ [JSON.stringify(queryEmbedding)]
107
+ );
108
+ ```
109
+
110
+ ---
111
+
112
+ ## Prompt Engineering Principles
113
+
114
+ ### Message Structure
115
+
116
+ ```ts
117
+ const messages = [
118
+ {
119
+ role: 'system',
120
+ content: `You are a helpful assistant for [Company].
121
+ You ONLY answer questions based on the provided context.
122
+ If the answer is not in the context, say "I don't have that information."
123
+ Do NOT make up information.`,
124
+ },
125
+ {
126
+ // Retrieved chunks injected here — NOT into system prompt
127
+ role: 'user',
128
+ content: `Context:\n${retrievedChunks.join('\n\n')}\n\nQuestion: ${userQuery}`,
129
+ },
130
+ ];
131
+ ```
132
+
133
+ ### Few-Shot Examples
134
+
135
+ ```ts
136
+ // ❌ Zero-shot on complex tasks — model guesses the format
137
+ "Extract entities from: John called Mary at 5pm"
138
+
139
+ // ✅ Few-shot — show the expected output format
140
+ `Extract entities. Output as JSON array.
141
+
142
+ Example:
143
+ Input: "Alice met Bob in London"
144
+ Output: [{"name":"Alice","type":"person"},{"name":"Bob","type":"person"},{"name":"London","type":"location"}]
145
+
146
+ Input: "${userText}"
147
+ Output:`
148
+ ```
149
+
150
+ ---
151
+
152
+ ## Evals: How to Know If It's Working
153
+
154
+ ```
155
+ Deterministic evals: Output matches expected exactly → code comparison
156
+ LLM-as-judge evals: Another LLM grades the output (1-5 scale)
157
+ Human evals: Gold standard, expensive, for calibration
158
+ A/B testing: Compare model/prompt versions on live traffic
159
+ ```
160
+
161
+ ### Eval Categories
162
+
163
+ | Category | What It Measures | Tooling |
164
+ |---|---|---|
165
+ | **Faithfulness** | Does answer match sources? | Ragas, ARES |
166
+ | **Relevance** | Does answer address the question? | LLM-as-judge |
167
+ | **Completeness** | Missing important info? | Human + LLM |
168
+ | **Groundedness** | Hallucination rate | Ragas |
169
+ | **Latency** | p50/p95 response time | OpenTelemetry |
170
+
171
+ ---
172
+
173
+ ## LLMOps: Production Concerns
174
+
175
+ ### Cost Control
176
+
177
+ ```ts
178
+ // Track tokens per request
179
+ const response = await openai.chat.completions.create({ ... });
180
+ const { prompt_tokens, completion_tokens } = response.usage;
181
+ logger.info({ prompt_tokens, completion_tokens, model: 'gpt-4o', cost_usd: calcCost() });
182
+
183
+ // Cache identical prompts — LLMs are deterministic at temp=0
184
+ const cacheKey = hash(systemPrompt + userQuery);
185
+ const cached = await cache.get(cacheKey);
186
+ if (cached) return cached;
187
+ ```
188
+
189
+ ### Retry with Exponential Backoff
190
+
191
+ ```ts
192
+ async function callWithRetry(fn: () => Promise<any>, maxRetries = 3) {
193
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
194
+ try {
195
+ return await fn();
196
+ } catch (err: any) {
197
+ if (err.status === 429 || err.status === 503) {
198
+ const delay = Math.pow(2, attempt) * 1000 + Math.random() * 500;
199
+ await sleep(delay);
200
+ continue;
201
+ }
202
+ throw err; // Non-retryable errors bubble up immediately
203
+ }
204
+ }
205
+ throw new Error('Max retries exceeded');
206
+ }
207
+ ```
208
+
209
+ ---
210
+
211
+ ## Output Format
212
+
213
+ When this skill produces or reviews code, structure your output as follows:
214
+
215
+ ```
216
+ ━━━ Llm Engineering Report ━━━━━━━━━━━━━━━━━━━━━━━━
217
+ Skill: Llm Engineering
218
+ Language: [detected language / framework]
219
+ Scope: [N files · N functions]
220
+ ─────────────────────────────────────────────────
221
+ ✅ Passed: [checks that passed, or "All clean"]
222
+ ⚠️ Warnings: [non-blocking issues, or "None"]
223
+ ❌ Blocked: [blocking issues requiring fix, or "None"]
224
+ ─────────────────────────────────────────────────
225
+ VBC status: PENDING → VERIFIED
226
+ Evidence: [test output / lint pass / compile success]
227
+ ```
228
+
229
+ **VBC (Verification-Before-Completion) is mandatory.**
230
+ Do not mark status as VERIFIED until concrete terminal evidence is provided.
231
+
232
+
233
+ ---
234
+
235
+ ## 🏛️ Tribunal Integration (Anti-Hallucination)
236
+
237
+ **Slash command: `/review-ai`**
238
+ **Active reviewers: `logic` · `security` · `ai-code-reviewer`**
239
+
240
+ ### ❌ Forbidden AI Tropes in LLM Engineering
241
+
242
+ 1. **Hallucinated model names** — `gpt-5`, `claude-4`, `gemini-ultra-3` — verify against current provider docs.
243
+ 2. **Prompt injection via concatenation** — never `systemPrompt + userInput`. Use separate message roles.
244
+ 3. **No eval strategy** — shipping LLM features with zero eval coverage is shipping blind.
245
+ 4. **Ignoring token limits** — context exceeding `max_tokens` silently fails or truncates unpredictably.
246
+ 5. **No cost tracking** — LLM costs compound at scale — always instrument from day one.
247
+ 6. **Synchronous LLM calls** — all LLM API calls are async. Never block the event loop waiting for them.
248
+
249
+ ### ✅ Pre-Flight Self-Audit
250
+
251
+ ```
252
+ ✅ Are all model names verified against current provider documentation?
253
+ ✅ Is user input isolated in role:"user" messages, never concatenated into system prompt?
254
+ ✅ Is there retry logic with backoff for 429 / 503 errors?
255
+ ✅ Is token usage logged per request for cost tracking?
256
+ ✅ Is there an eval strategy (even minimal) to detect regressions?
257
+ ✅ Are context windows respected — chunked or summarized if approaching limits?
258
+ ```