tribunal-kit 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/.agent/ARCHITECTURE.md +99 -99
  2. package/.agent/GEMINI.md +52 -52
  3. package/.agent/agents/accessibility-reviewer.md +187 -220
  4. package/.agent/agents/ai-code-reviewer.md +199 -233
  5. package/.agent/agents/backend-specialist.md +215 -238
  6. package/.agent/agents/code-archaeologist.md +161 -181
  7. package/.agent/agents/database-architect.md +184 -207
  8. package/.agent/agents/debugger.md +191 -218
  9. package/.agent/agents/dependency-reviewer.md +103 -136
  10. package/.agent/agents/devops-engineer.md +218 -238
  11. package/.agent/agents/documentation-writer.md +201 -221
  12. package/.agent/agents/explorer-agent.md +160 -180
  13. package/.agent/agents/frontend-reviewer.md +160 -194
  14. package/.agent/agents/frontend-specialist.md +248 -237
  15. package/.agent/agents/game-developer.md +48 -52
  16. package/.agent/agents/logic-reviewer.md +116 -149
  17. package/.agent/agents/mobile-developer.md +200 -223
  18. package/.agent/agents/mobile-reviewer.md +162 -195
  19. package/.agent/agents/orchestrator.md +181 -211
  20. package/.agent/agents/penetration-tester.md +157 -174
  21. package/.agent/agents/performance-optimizer.md +183 -203
  22. package/.agent/agents/performance-reviewer.md +178 -211
  23. package/.agent/agents/product-manager.md +142 -162
  24. package/.agent/agents/product-owner.md +6 -25
  25. package/.agent/agents/project-planner.md +142 -162
  26. package/.agent/agents/qa-automation-engineer.md +225 -242
  27. package/.agent/agents/security-auditor.md +174 -194
  28. package/.agent/agents/seo-specialist.md +193 -213
  29. package/.agent/agents/sql-reviewer.md +161 -194
  30. package/.agent/agents/supervisor-agent.md +184 -203
  31. package/.agent/agents/swarm-worker-contracts.md +17 -17
  32. package/.agent/agents/swarm-worker-registry.md +46 -46
  33. package/.agent/agents/test-coverage-reviewer.md +160 -193
  34. package/.agent/agents/test-engineer.md +0 -21
  35. package/.agent/agents/type-safety-reviewer.md +175 -208
  36. package/.agent/patterns/generator.md +9 -9
  37. package/.agent/patterns/inversion.md +12 -12
  38. package/.agent/patterns/pipeline.md +9 -9
  39. package/.agent/patterns/reviewer.md +13 -13
  40. package/.agent/patterns/tool-wrapper.md +9 -9
  41. package/.agent/rules/GEMINI.md +63 -63
  42. package/.agent/scripts/compress_skills.py +167 -0
  43. package/.agent/scripts/consolidate_skills.py +173 -0
  44. package/.agent/scripts/deep_compress.py +202 -0
  45. package/.agent/scripts/minify_context.py +80 -0
  46. package/.agent/scripts/security_scan.py +1 -1
  47. package/.agent/scripts/strip_tribunal.py +41 -0
  48. package/.agent/skills/agent-organizer/SKILL.md +92 -126
  49. package/.agent/skills/agentic-patterns/SKILL.md +0 -70
  50. package/.agent/skills/ai-prompt-injection-defense/SKILL.md +126 -160
  51. package/.agent/skills/api-patterns/SKILL.md +123 -215
  52. package/.agent/skills/api-security-auditor/SKILL.md +143 -177
  53. package/.agent/skills/app-builder/SKILL.md +326 -50
  54. package/.agent/skills/app-builder/templates/SKILL.md +13 -15
  55. package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +16 -16
  56. package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +22 -22
  57. package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +18 -18
  58. package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +20 -20
  59. package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +17 -17
  60. package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +18 -18
  61. package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +21 -21
  62. package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +19 -19
  63. package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +26 -26
  64. package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +26 -26
  65. package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +19 -19
  66. package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +18 -18
  67. package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +20 -20
  68. package/.agent/skills/appflow-wireframe/SKILL.md +87 -121
  69. package/.agent/skills/architecture/SKILL.md +82 -252
  70. package/.agent/skills/authentication-best-practices/SKILL.md +139 -173
  71. package/.agent/skills/bash-linux/SKILL.md +120 -154
  72. package/.agent/skills/behavioral-modes/SKILL.md +8 -69
  73. package/.agent/skills/brainstorming/SKILL.md +428 -104
  74. package/.agent/skills/building-native-ui/SKILL.md +143 -174
  75. package/.agent/skills/clean-code/SKILL.md +323 -360
  76. package/.agent/skills/code-review-checklist/SKILL.md +0 -62
  77. package/.agent/skills/config-validator/SKILL.md +107 -141
  78. package/.agent/skills/csharp-developer/SKILL.md +468 -528
  79. package/.agent/skills/database-design/SKILL.md +104 -369
  80. package/.agent/skills/deployment-procedures/SKILL.md +111 -145
  81. package/.agent/skills/devops-engineer/SKILL.md +295 -332
  82. package/.agent/skills/devops-incident-responder/SKILL.md +79 -113
  83. package/.agent/skills/doc.md +5 -5
  84. package/.agent/skills/documentation-templates/SKILL.md +19 -63
  85. package/.agent/skills/edge-computing/SKILL.md +123 -157
  86. package/.agent/skills/extract-design-system/SKILL.md +100 -134
  87. package/.agent/skills/framer-motion-expert/SKILL.md +111 -855
  88. package/.agent/skills/frontend-design/SKILL.md +151 -499
  89. package/.agent/skills/game-design-expert/SKILL.md +71 -105
  90. package/.agent/skills/game-engineering-expert/SKILL.md +88 -122
  91. package/.agent/skills/geo-fundamentals/SKILL.md +89 -124
  92. package/.agent/skills/github-operations/SKILL.md +279 -314
  93. package/.agent/skills/gsap-expert/SKILL.md +119 -826
  94. package/.agent/skills/i18n-localization/SKILL.md +104 -138
  95. package/.agent/skills/intelligent-routing/SKILL.md +159 -127
  96. package/.agent/skills/lint-and-validate/SKILL.md +8 -52
  97. package/.agent/skills/llm-engineering/SKILL.md +344 -357
  98. package/.agent/skills/local-first/SKILL.md +120 -154
  99. package/.agent/skills/mcp-builder/SKILL.md +84 -118
  100. package/.agent/skills/mobile-design/SKILL.md +213 -219
  101. package/.agent/skills/motion-engineering/SKILL.md +184 -0
  102. package/.agent/skills/nextjs-react-expert/SKILL.md +99 -698
  103. package/.agent/skills/nodejs-best-practices/SKILL.md +498 -559
  104. package/.agent/skills/observability/SKILL.md +293 -330
  105. package/.agent/skills/parallel-agents/SKILL.md +88 -122
  106. package/.agent/skills/performance-profiling/SKILL.md +217 -254
  107. package/.agent/skills/plan-writing/SKILL.md +84 -118
  108. package/.agent/skills/platform-engineer/SKILL.md +89 -123
  109. package/.agent/skills/playwright-best-practices/SKILL.md +128 -162
  110. package/.agent/skills/powershell-windows/SKILL.md +112 -146
  111. package/.agent/skills/python-patterns/SKILL.md +7 -35
  112. package/.agent/skills/python-pro/SKILL.md +148 -754
  113. package/.agent/skills/react-specialist/SKILL.md +123 -827
  114. package/.agent/skills/readme-builder/SKILL.md +15 -85
  115. package/.agent/skills/realtime-patterns/SKILL.md +269 -304
  116. package/.agent/skills/red-team-tactics/SKILL.md +10 -51
  117. package/.agent/skills/rust-pro/SKILL.md +623 -701
  118. package/.agent/skills/seo-fundamentals/SKILL.md +120 -154
  119. package/.agent/skills/server-management/SKILL.md +156 -190
  120. package/.agent/skills/shadcn-ui-expert/SKILL.md +172 -206
  121. package/.agent/skills/skill-creator/SKILL.md +18 -58
  122. package/.agent/skills/sql-pro/SKILL.md +579 -633
  123. package/.agent/skills/supabase-postgres-best-practices/SKILL.md +28 -68
  124. package/.agent/skills/swiftui-expert/SKILL.md +142 -176
  125. package/.agent/skills/systematic-debugging/SKILL.md +84 -118
  126. package/.agent/skills/tailwind-patterns/SKILL.md +516 -576
  127. package/.agent/skills/tdd-workflow/SKILL.md +103 -137
  128. package/.agent/skills/test-result-analyzer/SKILL.md +33 -73
  129. package/.agent/skills/testing-patterns/SKILL.md +512 -573
  130. package/.agent/skills/trend-researcher/SKILL.md +30 -71
  131. package/.agent/skills/ui-ux-pro-max/SKILL.md +0 -41
  132. package/.agent/skills/ui-ux-researcher/SKILL.md +51 -91
  133. package/.agent/skills/vue-expert/SKILL.md +127 -866
  134. package/.agent/skills/vulnerability-scanner/SKILL.md +354 -269
  135. package/.agent/skills/web-accessibility-auditor/SKILL.md +159 -193
  136. package/.agent/skills/web-design-guidelines/SKILL.md +17 -61
  137. package/.agent/skills/webapp-testing/SKILL.md +111 -145
  138. package/.agent/skills/whimsy-injector/SKILL.md +58 -132
  139. package/.agent/skills/workflow-optimizer/SKILL.md +28 -68
  140. package/.agent/workflows/api-tester.md +151 -151
  141. package/.agent/workflows/audit.md +127 -138
  142. package/.agent/workflows/brainstorm.md +110 -110
  143. package/.agent/workflows/changelog.md +112 -112
  144. package/.agent/workflows/create.md +124 -124
  145. package/.agent/workflows/debug.md +165 -189
  146. package/.agent/workflows/deploy.md +180 -189
  147. package/.agent/workflows/enhance.md +128 -151
  148. package/.agent/workflows/fix.md +114 -135
  149. package/.agent/workflows/generate.md +12 -4
  150. package/.agent/workflows/migrate.md +160 -160
  151. package/.agent/workflows/orchestrate.md +168 -168
  152. package/.agent/workflows/performance-benchmarker.md +114 -123
  153. package/.agent/workflows/plan.md +173 -173
  154. package/.agent/workflows/preview.md +80 -80
  155. package/.agent/workflows/refactor.md +161 -183
  156. package/.agent/workflows/review-ai.md +101 -129
  157. package/.agent/workflows/review.md +116 -116
  158. package/.agent/workflows/session.md +94 -94
  159. package/.agent/workflows/status.md +79 -79
  160. package/.agent/workflows/strengthen-skills.md +138 -139
  161. package/.agent/workflows/swarm.md +179 -179
  162. package/.agent/workflows/test.md +189 -211
  163. package/.agent/workflows/tribunal-backend.md +93 -113
  164. package/.agent/workflows/tribunal-database.md +94 -115
  165. package/.agent/workflows/tribunal-frontend.md +95 -118
  166. package/.agent/workflows/tribunal-full.md +92 -133
  167. package/.agent/workflows/tribunal-mobile.md +94 -119
  168. package/.agent/workflows/tribunal-performance.md +109 -133
  169. package/.agent/workflows/ui-ux-pro-max.md +122 -143
  170. package/package.json +1 -1
  171. package/.agent/skills/api-patterns/api-style.md +0 -42
  172. package/.agent/skills/api-patterns/auth.md +0 -24
  173. package/.agent/skills/api-patterns/documentation.md +0 -26
  174. package/.agent/skills/api-patterns/graphql.md +0 -41
  175. package/.agent/skills/api-patterns/rate-limiting.md +0 -31
  176. package/.agent/skills/api-patterns/response.md +0 -37
  177. package/.agent/skills/api-patterns/rest.md +0 -40
  178. package/.agent/skills/api-patterns/security-testing.md +0 -122
  179. package/.agent/skills/api-patterns/trpc.md +0 -41
  180. package/.agent/skills/api-patterns/versioning.md +0 -22
  181. package/.agent/skills/app-builder/agent-coordination.md +0 -71
  182. package/.agent/skills/app-builder/feature-building.md +0 -53
  183. package/.agent/skills/app-builder/project-detection.md +0 -34
  184. package/.agent/skills/app-builder/scaffolding.md +0 -118
  185. package/.agent/skills/app-builder/tech-stack.md +0 -40
  186. package/.agent/skills/architecture/context-discovery.md +0 -43
  187. package/.agent/skills/architecture/examples.md +0 -94
  188. package/.agent/skills/architecture/pattern-selection.md +0 -68
  189. package/.agent/skills/architecture/patterns-reference.md +0 -50
  190. package/.agent/skills/architecture/trade-off-analysis.md +0 -77
  191. package/.agent/skills/brainstorming/dynamic-questioning.md +0 -360
  192. package/.agent/skills/database-design/database-selection.md +0 -43
  193. package/.agent/skills/database-design/indexing.md +0 -39
  194. package/.agent/skills/database-design/migrations.md +0 -48
  195. package/.agent/skills/database-design/optimization.md +0 -36
  196. package/.agent/skills/database-design/orm-selection.md +0 -30
  197. package/.agent/skills/database-design/schema-design.md +0 -56
  198. package/.agent/skills/frontend-design/animation-guide.md +0 -331
  199. package/.agent/skills/frontend-design/color-system.md +0 -329
  200. package/.agent/skills/frontend-design/decision-trees.md +0 -418
  201. package/.agent/skills/frontend-design/motion-graphics.md +0 -306
  202. package/.agent/skills/frontend-design/typography-system.md +0 -363
  203. package/.agent/skills/frontend-design/ux-psychology.md +0 -1116
  204. package/.agent/skills/frontend-design/visual-effects.md +0 -383
  205. package/.agent/skills/intelligent-routing/router-manifest.md +0 -65
  206. package/.agent/skills/mobile-design/decision-trees.md +0 -516
  207. package/.agent/skills/mobile-design/mobile-backend.md +0 -491
  208. package/.agent/skills/mobile-design/mobile-color-system.md +0 -420
  209. package/.agent/skills/mobile-design/mobile-debugging.md +0 -122
  210. package/.agent/skills/mobile-design/mobile-design-thinking.md +0 -357
  211. package/.agent/skills/mobile-design/mobile-navigation.md +0 -458
  212. package/.agent/skills/mobile-design/mobile-performance.md +0 -767
  213. package/.agent/skills/mobile-design/mobile-testing.md +0 -356
  214. package/.agent/skills/mobile-design/mobile-typography.md +0 -433
  215. package/.agent/skills/mobile-design/platform-android.md +0 -666
  216. package/.agent/skills/mobile-design/platform-ios.md +0 -561
  217. package/.agent/skills/mobile-design/touch-psychology.md +0 -537
  218. package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +0 -312
  219. package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +0 -240
  220. package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +0 -490
  221. package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +0 -264
  222. package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +0 -581
  223. package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +0 -432
  224. package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +0 -684
  225. package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +0 -150
  226. package/.agent/skills/vulnerability-scanner/checklists.md +0 -121
@@ -1,357 +1,344 @@
1
- ---
2
- name: llm-engineering
3
- description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
4
- allowed-tools: Read, Write, Edit, Glob, Grep
5
- version: 2.0.0
6
- last-updated: 2026-04-01
7
- applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
- ---
9
-
10
- # LLM Engineering — Production AI Systems Mastery
11
-
12
- > An LLM without guardrails is a liability generator.
13
- > Every prompt is a contract. Every response is untrusted. Every token costs money.
14
-
15
- ---
16
-
17
- ## Model Selection
18
-
19
- ```
20
- Model Use Case Cost Tier
21
- ─────────────────────────┼───────────────────────────────────────┼──────────
22
- GPT-4o Complex reasoning, code generation │ $$$
23
- GPT-4o-mini Classification, summaries, chat │ $
24
- Claude 3.7 Sonnet Long documents, analysis, code │ $$$
25
- Claude 3.5 HaikuFast responses, simple tasks │ $
26
- Gemini 2.5 Pro Large context, multimodal, code $$$
27
- Gemini 2.5 Flash High throughput, cost-efficient $
28
- Llama 3.3 70B (open) │ Self-hosted, data privacy │ Free*
29
- Mistral Large │ European data residency, code │ $$
30
-
31
- * = compute costs only
32
-
33
- Selection rules:
34
- 1. Start with the cheapest model that works
35
- 2. Upgrade only when eval scores require it
36
- 3. Use large models for complex reasoning, small models for classification
37
- 4. Fine-tune ONLY after prompt engineering and RAG are exhausted
38
- ```
39
-
40
- ---
41
-
42
- ## Prompt Engineering
43
-
44
- ### System Prompt Design
45
-
46
- ```typescript
47
- const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
48
-
49
- ## Rules
50
- 1. Answer ONLY questions about Acme products and services.
51
- 2. If you don't know the answer, say "I'll connect you with a specialist."
52
- 3. Never discuss competitors.
53
- 4. Never make up product features or pricing.
54
- 5. Keep responses under 200 words.
55
-
56
- ## Response Format
57
- - Use bullet points for lists
58
- - Include product links when relevant
59
- - End with a follow-up question
60
-
61
- ## Context
62
- Current date: ${new Date().toISOString().split("T")[0]}
63
- User plan: {{user_plan}}
64
- `;
65
-
66
- // ❌ HALLUCINATION TRAP: System prompts are NOT secrets
67
- // Users can extract system prompts with jailbreak techniques
68
- // Never put API keys, internal URLs, or secrets in system prompts
69
- ```
70
-
71
- ### Structured Output (JSON Mode)
72
-
73
- ```typescript
74
- import { z } from "zod";
75
- import OpenAI from "openai";
76
-
77
- const SentimentSchema = z.object({
78
- sentiment: z.enum(["positive", "negative", "neutral"]),
79
- confidence: z.number().min(0).max(1),
80
- reasoning: z.string(),
81
- topics: z.array(z.string()),
82
- });
83
-
84
- type Sentiment = z.infer<typeof SentimentSchema>;
85
-
86
- async function analyzeSentiment(text: string): Promise<Sentiment> {
87
- const response = await openai.chat.completions.create({
88
- model: "gpt-4o-mini",
89
- response_format: { type: "json_object" },
90
- messages: [
91
- {
92
- role: "system",
93
- content: `Analyze the sentiment of the given text.
94
- Respond with JSON matching this schema:
95
- {
96
- "sentiment": "positive" | "negative" | "neutral",
97
- "confidence": 0-1,
98
- "reasoning": "brief explanation",
99
- "topics": ["topic1", "topic2"]
100
- }`,
101
- },
102
- { role: "user", content: text },
103
- ],
104
- });
105
-
106
- const raw = JSON.parse(response.choices[0].message.content ?? "{}");
107
- return SentimentSchema.parse(raw); // Zod validates the LLM response
108
- }
109
-
110
- // ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
111
- // LLMs produce malformed JSON, wrong types, missing fields
112
- // const result = JSON.parse(response); // trust blindly
113
- // const result = Schema.parse(JSON.parse(response)); // validate
114
- ```
115
-
116
- ### Function Calling / Tool Use
117
-
118
- ```typescript
119
- const tools: OpenAI.ChatCompletionTool[] = [
120
- {
121
- type: "function",
122
- function: {
123
- name: "search_products",
124
- description: "Search products by name, category, or price range",
125
- parameters: {
126
- type: "object",
127
- properties: {
128
- query: { type: "string", description: "Search query" },
129
- category: { type: "string", enum: ["electronics", "clothing", "home"] },
130
- max_price: { type: "number", description: "Maximum price in USD" },
131
- },
132
- required: ["query"],
133
- },
134
- },
135
- },
136
- {
137
- type: "function",
138
- function: {
139
- name: "get_order_status",
140
- description: "Get the status of an order by order ID",
141
- parameters: {
142
- type: "object",
143
- properties: {
144
- order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
145
- },
146
- required: ["order_id"],
147
- },
148
- },
149
- },
150
- ];
151
-
152
- // Tool execution loop
153
- async function chatWithTools(userMessage: string) {
154
- const messages: OpenAI.ChatCompletionMessageParam[] = [
155
- { role: "system", content: SYSTEM_PROMPT },
156
- { role: "user", content: userMessage },
157
- ];
158
-
159
- let response = await openai.chat.completions.create({
160
- model: "gpt-4o-mini",
161
- messages,
162
- tools,
163
- });
164
-
165
- // Process tool calls
166
- while (response.choices[0].finish_reason === "tool_calls") {
167
- const toolCalls = response.choices[0].message.tool_calls ?? [];
168
- messages.push(response.choices[0].message);
169
-
170
- for (const call of toolCalls) {
171
- const args = JSON.parse(call.function.arguments);
172
- const result = await executeFunction(call.function.name, args);
173
- messages.push({
174
- role: "tool",
175
- tool_call_id: call.id,
176
- content: JSON.stringify(result),
177
- });
178
- }
179
-
180
- response = await openai.chat.completions.create({
181
- model: "gpt-4o-mini",
182
- messages,
183
- tools,
184
- });
185
- }
186
-
187
- return response.choices[0].message.content;
188
- }
189
- ```
190
-
191
- ---
192
-
193
- ## RAG (Retrieval-Augmented Generation)
194
-
195
- ### Pipeline
196
-
197
- ```
198
- User Query
199
-
200
- [1] Embed query → vector
201
-
202
- [2] Search vector DB → top K chunks
203
-
204
- [3] (Optional) Rerank results → top N
205
-
206
- [4] Build prompt: system + context chunks + query
207
-
208
- [5] LLM generates answer with citations
209
-
210
- [6] Validate response (hallucination check)
211
- ```
212
-
213
- ### Chunking Strategy
214
-
215
- ```typescript
216
- // ❌ BAD: Arbitrary character splitting
217
- const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
218
-
219
- // ✅ GOOD: Semantic chunking with overlap
220
- function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
221
- const {
222
- maxTokens = 512, // chunk size
223
- overlapTokens = 50, // overlap between chunks
224
- separator = "\n\n", // split on paragraph boundaries first
225
- } = options;
226
-
227
- const paragraphs = text.split(separator);
228
- const chunks: Chunk[] = [];
229
- let current = "";
230
-
231
- for (const para of paragraphs) {
232
- if (tokenCount(current + para) > maxTokens && current) {
233
- chunks.push({ text: current.trim(), tokens: tokenCount(current) });
234
- // Keep overlap from previous chunk
235
- const words = current.split(" ");
236
- current = words.slice(-overlapTokens).join(" ") + separator + para;
237
- } else {
238
- current += separator + para;
239
- }
240
- }
241
- if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
242
-
243
- return chunks;
244
- }
245
-
246
- // Chunk size guidelines:
247
- // 256-512 tokens → precise retrieval (Q&A, support)
248
- // 512-1024 tokens → balanced (general RAG)
249
- // 1024-2048 tokens broad context (summarization)
250
- ```
251
-
252
- ### Vector Store Selection
253
-
254
- ```
255
- pgvector (PostgreSQL) → Already using Postgres, <10M vectors, simple
256
- Pinecone → Managed, serverless, easy scaling
257
- Weaviate → Hybrid search (vector + keyword), multi-model
258
- Qdrant → High performance, Rust-based, self-hostable
259
- Chroma → Local development, prototyping
260
- Milvus → Enterprise scale, GPU acceleration
261
-
262
- // HALLUCINATION TRAP: Vector search is NOT keyword search
263
- // "Apple CEO" might not find "Tim Cook runs Apple Inc."
264
- // Use HYBRID search (vector + BM25 keyword) for production
265
- ```
266
-
267
- ---
268
-
269
- ## Streaming
270
-
271
- ```typescript
272
- // Server-Sent Events for AI token streaming
273
- app.get("/api/chat", async (req, res) => {
274
- res.setHeader("Content-Type", "text/event-stream");
275
- res.setHeader("Cache-Control", "no-cache");
276
- res.setHeader("Connection", "keep-alive");
277
-
278
- const stream = await openai.chat.completions.create({
279
- model: "gpt-4o-mini",
280
- messages: [{ role: "user", content: req.query.message as string }],
281
- stream: true,
282
- });
283
-
284
- for await (const chunk of stream) {
285
- const content = chunk.choices[0]?.delta?.content;
286
- if (content) {
287
- res.write(`data: ${JSON.stringify({ content })}\n\n`);
288
- }
289
- }
290
-
291
- res.write("data: [DONE]\n\n");
292
- res.end();
293
- });
294
-
295
- // Client-side consumption
296
- const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
297
- eventSource.onmessage = (event) => {
298
- if (event.data === "[DONE]") { eventSource.close(); return; }
299
- const { content } = JSON.parse(event.data);
300
- appendToChat(content);
301
- };
302
- ```
303
-
304
- ---
305
-
306
- ## Cost Optimization
307
-
308
- ```
309
- 1. Prompt caching → Cache system prompts (OpenAI, Anthropic support this)
310
- 2. Output token limiting → Set max_tokens to prevent runaway responses
311
- 3. Tiered models → Use cheap models for classification, expensive for reasoning
312
- 4. Batch processing → Use batch APIs for offline processing (50% discount)
313
- 5. Chunked context → Send only relevant chunks, not entire documents
314
- 6. Response streaming → Stream to reduce TTFT (time to first token)
315
- 7. Structured output → Shorter JSON responses vs verbose prose
316
-
317
- // Cost estimation:
318
- // GPT-4o: ~$2.50/1M input, ~$10/1M output
319
- // GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
320
- // 1M tokens 750,000 words ≈ 3,000 pages
321
- ```
322
-
323
- ---
324
-
325
- ## 🤖 LLM-Specific Traps
326
-
327
- 1. **Trusting LLM JSON Output:** Always validate with Zod/schema. LLMs produce malformed JSON.
328
- 2. **Secrets in System Prompts:** System prompts can be extracted. Never include API keys or internal URLs.
329
- 3. **Fixed Character Chunking:** Splitting at 1000 chars breaks sentences. Use semantic/paragraph chunking.
330
- 4. **Vector-Only Search:** Pure vector search misses exact matches. Use hybrid search for production.
331
- 5. **No Token Limits:** Without `max_tokens`, models can generate 4000+ token responses. Set limits.
332
- 6. **Single Model for Everything:** Use tiered models — cheap for simple tasks, expensive for reasoning.
333
- 7. **No Eval Suite:** Deploying AI without evaluations is deploying untested code. Build evals.
334
- 8. **Prompt Injection Blindness:** User input can override system instructions. Always sanitize and delimit.
335
- 9. **Infinite Tool Loops:** Tool-calling agents can loop forever. Set max iterations (3-5).
336
- 10. **No Rate Limiting:** API calls without rate limiting = surprise $10,000 bill. Set spend limits.
337
-
338
- ---
339
-
340
- ## 🏛️ Tribunal Integration
341
-
342
- **Slash command: `/review-ai`**
343
-
344
- ### ✅ Pre-Flight Self-Audit
345
-
346
- ```
347
- ✅ Am I validating all LLM responses with a schema?
348
- ✅ Are there no secrets in system prompts?
349
- ✅ Is user input delimited from system instructions?
350
- ✅ Did I set max_tokens on all completions?
351
- ✅ Is there rate limiting and cost monitoring?
352
- ✅ Am I using the cheapest model that works?
353
- ✅ Is chunking semantic (not fixed-character)?
354
- ✅ Is search hybrid (vector + keyword)?
355
- ✅ Do tool-calling loops have a max iteration limit?
356
- ✅ Did I build evaluation tests for AI quality?
357
- ```
1
+ ---
2
+ name: llm-engineering
3
+ description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 3.2.0
6
+ last-updated: 2026-04-07
7
+ applies-to-model: gemini-3-1-pro, claude-3-7-sonnet
8
+ ---
9
+
10
+ # LLM Engineering — Production AI Systems Mastery
11
+
12
+ ---
13
+
14
+ ## Model Selection
15
+
16
+ ```
17
+ Model Use Case │ Cost Tier
18
+ ─────────────────────────┼───────────────────────────────────────┼──────────
19
+ GPT-4o │ Complex reasoning, vision, code │ $$$
20
+ GPT-4o-mini Classification, summaries, chat $
21
+ o3-mini │ Deep reasoning, math, code review │ $$
22
+ Claude 3.7 Sonnet Long documents, analysis, code │ $$$
23
+ Claude 3.5 Haiku Fast responses, simple tasks │ $
24
+ Gemini 3.1 Pro (High) Large context, multimodal, code │ $$$
25
+ Gemini 3.0 FlashHigh throughput, cost-efficient │ $
26
+ Llama 3.3 70B (open) Self-hosted, data privacy Free*
27
+ Mistral Large 2 European data residency, code $$
28
+
29
+ * = compute costs only
30
+
31
+ Selection rules:
32
+ 1. Start with the cheapest model that passes your evals
33
+ 2. Upgrade only when eval scores require it
34
+ 3. Use large models for complex reasoning, small for classification/routing
35
+ 4. Fine-tune ONLY after prompt engineering and RAG are exhausted
36
+ 5. HALLUCINATION TRAP: Model names change frequently always verify current names
37
+ from provider docs before hardcoding (e.g. "gpt-4o" vs "gpt-4o-2024-11-20")
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Prompt Engineering
43
+
44
+ ### System Prompt Design
45
+
46
+ ```typescript
47
+ const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
48
+
49
+ ## Rules
50
+ 1. Answer ONLY questions about Acme products and services.
51
+ 2. If you don't know the answer, say "I'll connect you with a specialist."
52
+ 3. Never discuss competitors.
53
+ 4. Never make up product features or pricing.
54
+ 5. Keep responses under 200 words.
55
+
56
+ ## Response Format
57
+ - Use bullet points for lists
58
+ - Include product links when relevant
59
+ - End with a follow-up question
60
+
61
+ ## Context
62
+ Current date: ${new Date().toISOString().split("T")[0]}
63
+ User plan: {{user_plan}}
64
+ `;
65
+
66
+ // ❌ HALLUCINATION TRAP: System prompts are NOT secrets
67
+ // Users can extract system prompts with jailbreak techniques
68
+ // Never put API keys, internal URLs, or secrets in system prompts
69
+ ```
70
+
71
+ ### Structured Output (JSON Mode)
72
+
73
+ ```typescript
74
+ import { z } from "zod";
75
+ import OpenAI from "openai";
76
+
77
+ const SentimentSchema = z.object({
78
+ sentiment: z.enum(["positive", "negative", "neutral"]),
79
+ confidence: z.number().min(0).max(1),
80
+ reasoning: z.string(),
81
+ topics: z.array(z.string()),
82
+ });
83
+
84
+ // OpenAI — json_schema mode (strict = true enforces schema exactly)
85
+ async function analyzeSentiment(text: string) {
86
+ const response = await openai.chat.completions.create({
87
+ model: "gpt-4o-mini",
88
+ response_format: {
89
+ type: "json_schema",
90
+ json_schema: {
91
+ name: "sentiment_analysis",
92
+ strict: true,
93
+ schema: {
94
+ type: "object",
95
+ properties: {
96
+ sentiment: { type: "string", enum: ["positive", "negative", "neutral"] },
97
+ confidence: { type: "number" },
98
+ reasoning: { type: "string" },
99
+ topics: { type: "array", items: { type: "string" } },
100
+ },
101
+ required: ["sentiment", "confidence", "reasoning", "topics"],
102
+ additionalProperties: false, // required for strict mode
103
+ },
104
+ },
105
+ },
106
+ messages: [{ role: "system", content: "Analyze sentiment." }, { role: "user", content: text }],
107
+ });
108
+ const raw = JSON.parse(response.choices[0].message.content ?? "{}");
109
+ return SentimentSchema.parse(raw); // always validate with Zod even in strict mode
110
+ }
111
+
112
+ // Gemini response_mime_type + response_schema
113
+ import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
114
+ const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
115
+ const model = genAI.getGenerativeModel({
116
+ model: "gemini-2.0-flash",
117
+ generationConfig: {
118
+ responseMimeType: "application/json",
119
+ responseSchema: {
120
+ type: SchemaType.OBJECT,
121
+ properties: {
122
+ sentiment: { type: SchemaType.STRING, enum: ["positive", "negative", "neutral"] },
123
+ confidence: { type: SchemaType.NUMBER },
124
+ topics: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
125
+ },
126
+ required: ["sentiment", "confidence", "topics"],
127
+ },
128
+ },
129
+ });
130
+
131
+ // ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
132
+ // LLMs produce malformed JSON, wrong types, missing fields even with strict mode
133
+ // ❌ const result = JSON.parse(response); // trust blindly
134
+ // ✅ const result = Schema.parse(JSON.parse(response)); // validate always
135
+ ```
136
+
137
+ ### Function Calling / Tool Use
138
+
139
+ ```typescript
140
+ const tools: OpenAI.ChatCompletionTool[] = [
141
+ {
142
+ type: "function",
143
+ function: {
144
+ name: "search_products",
145
+ description: "Search products by name, category, or price range",
146
+ parameters: {
147
+ type: "object",
148
+ properties: {
149
+ query: { type: "string", description: "Search query" },
150
+ category: { type: "string", enum: ["electronics", "clothing", "home"] },
151
+ max_price: { type: "number", description: "Maximum price in USD" },
152
+ },
153
+ required: ["query"],
154
+ },
155
+ },
156
+ },
157
+ {
158
+ type: "function",
159
+ function: {
160
+ name: "get_order_status",
161
+ description: "Get the status of an order by order ID",
162
+ parameters: {
163
+ type: "object",
164
+ properties: {
165
+ order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
166
+ },
167
+ required: ["order_id"],
168
+ },
169
+ },
170
+ },
171
+ ];
172
+
173
+ // Tool execution loop
174
+ async function chatWithTools(userMessage: string) {
175
+ const messages: OpenAI.ChatCompletionMessageParam[] = [
176
+ { role: "system", content: SYSTEM_PROMPT },
177
+ { role: "user", content: userMessage },
178
+ ];
179
+
180
+ let response = await openai.chat.completions.create({
181
+ model: "gpt-4o-mini",
182
+ messages,
183
+ tools,
184
+ });
185
+
186
+ // Process tool calls
187
+ while (response.choices[0].finish_reason === "tool_calls") {
188
+ const toolCalls = response.choices[0].message.tool_calls ?? [];
189
+ messages.push(response.choices[0].message);
190
+
191
+ for (const call of toolCalls) {
192
+ const args = JSON.parse(call.function.arguments);
193
+ const result = await executeFunction(call.function.name, args);
194
+ messages.push({
195
+ role: "tool",
196
+ tool_call_id: call.id,
197
+ content: JSON.stringify(result),
198
+ });
199
+ }
200
+
201
+ response = await openai.chat.completions.create({
202
+ model: "gpt-4o-mini",
203
+ messages,
204
+ tools,
205
+ });
206
+ }
207
+
208
+ return response.choices[0].message.content;
209
+ }
210
+ ```
211
+
212
+ ---
213
+
214
+ ## RAG (Retrieval-Augmented Generation)
215
+
216
+ ### Pipeline
217
+
218
+ ```
219
+ User Query
220
+
221
+ [1] Embed query → vector
222
+
223
+ [2] Search vector DB top K chunks
224
+
225
+ [3] (Optional) Rerank results → top N
226
+
227
+ [4] Build prompt: system + context chunks + query
228
+
229
+ [5] LLM generates answer with citations
230
+
231
+ [6] Validate response (hallucination check)
232
+ ```
233
+
234
+ ### Chunking Strategy
235
+
236
+ ```typescript
237
+ // BAD: Arbitrary character splitting
238
+ const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
239
+
240
+ // ✅ GOOD: Semantic chunking with overlap
241
+ function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
242
+ const {
243
+ maxTokens = 512, // chunk size
244
+ overlapTokens = 50, // overlap between chunks
245
+ separator = "\n\n", // split on paragraph boundaries first
246
+ } = options;
247
+
248
+ const paragraphs = text.split(separator);
249
+ const chunks: Chunk[] = [];
250
+ let current = "";
251
+
252
+ for (const para of paragraphs) {
253
+ if (tokenCount(current + para) > maxTokens && current) {
254
+ chunks.push({ text: current.trim(), tokens: tokenCount(current) });
255
+ // Keep overlap from previous chunk
256
+ const words = current.split(" ");
257
+ current = words.slice(-overlapTokens).join(" ") + separator + para;
258
+ } else {
259
+ current += separator + para;
260
+ }
261
+ }
262
+ if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
263
+
264
+ return chunks;
265
+ }
266
+
267
+ // Chunk size guidelines:
268
+ // 256-512 tokens → precise retrieval (Q&A, support)
269
+ // 512-1024 tokens → balanced (general RAG)
270
+ // 1024-2048 tokens → broad context (summarization)
271
+ ```
272
+
273
+ ### Vector Store Selection
274
+
275
+ ```
276
+ pgvector (PostgreSQL) → Already using Postgres, <10M vectors, simple
277
+ Pinecone → Managed, serverless, easy scaling
278
+ Weaviate → Hybrid search (vector + keyword), multi-model
279
+ Qdrant → High performance, Rust-based, self-hostable
280
+ Chroma → Local development, prototyping
281
+ Milvus → Enterprise scale, GPU acceleration
282
+
283
+ // ❌ HALLUCINATION TRAP: Vector search is NOT keyword search
284
+ // "Apple CEO" might not find "Tim Cook runs Apple Inc."
285
+ // Use HYBRID search (vector + BM25 keyword) for production
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Streaming
291
+
292
+ ```typescript
293
+ // Server-Sent Events for AI token streaming
294
+ app.get("/api/chat", async (req, res) => {
295
+ res.setHeader("Content-Type", "text/event-stream");
296
+ res.setHeader("Cache-Control", "no-cache");
297
+ res.setHeader("Connection", "keep-alive");
298
+
299
+ const stream = await openai.chat.completions.create({
300
+ model: "gpt-4o-mini",
301
+ messages: [{ role: "user", content: req.query.message as string }],
302
+ stream: true,
303
+ });
304
+
305
+ for await (const chunk of stream) {
306
+ const content = chunk.choices[0]?.delta?.content;
307
+ if (content) {
308
+ res.write(`data: ${JSON.stringify({ content })}\n\n`);
309
+ }
310
+ }
311
+
312
+ res.write("data: [DONE]\n\n");
313
+ res.end();
314
+ });
315
+
316
+ // Client-side consumption
317
+ const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
318
+ eventSource.onmessage = (event) => {
319
+ if (event.data === "[DONE]") { eventSource.close(); return; }
320
+ const { content } = JSON.parse(event.data);
321
+ appendToChat(content);
322
+ };
323
+ ```
324
+
325
+ ---
326
+
327
+ ## Cost Optimization
328
+
329
+ ```
330
+ 1. Prompt caching → Cache system prompts (OpenAI, Anthropic support this)
331
+ 2. Output token limiting Set max_tokens to prevent runaway responses
332
+ 3. Tiered models → Use cheap models for classification, expensive for reasoning
333
+ 4. Batch processing → Use batch APIs for offline processing (50% discount)
334
+ 5. Chunked context → Send only relevant chunks, not entire documents
335
+ 6. Response streaming → Stream to reduce TTFT (time to first token)
336
+ 7. Structured output → Shorter JSON responses vs verbose prose
337
+
338
+ // Cost estimation:
339
+ // GPT-4o: ~$2.50/1M input, ~$10/1M output
340
+ // GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
341
+ // 1M tokens ≈ 750,000 words ≈ 3,000 pages
342
+ ```
343
+
344
+ ---