tribunal-kit 3.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/ARCHITECTURE.md +99 -99
- package/.agent/GEMINI.md +52 -52
- package/.agent/agents/accessibility-reviewer.md +187 -220
- package/.agent/agents/ai-code-reviewer.md +199 -233
- package/.agent/agents/backend-specialist.md +215 -238
- package/.agent/agents/code-archaeologist.md +161 -181
- package/.agent/agents/database-architect.md +184 -207
- package/.agent/agents/debugger.md +191 -218
- package/.agent/agents/dependency-reviewer.md +103 -136
- package/.agent/agents/devops-engineer.md +218 -238
- package/.agent/agents/documentation-writer.md +201 -221
- package/.agent/agents/explorer-agent.md +160 -180
- package/.agent/agents/frontend-reviewer.md +160 -194
- package/.agent/agents/frontend-specialist.md +248 -237
- package/.agent/agents/game-developer.md +48 -52
- package/.agent/agents/logic-reviewer.md +116 -149
- package/.agent/agents/mobile-developer.md +200 -223
- package/.agent/agents/mobile-reviewer.md +162 -195
- package/.agent/agents/orchestrator.md +181 -211
- package/.agent/agents/penetration-tester.md +157 -174
- package/.agent/agents/performance-optimizer.md +183 -203
- package/.agent/agents/performance-reviewer.md +178 -211
- package/.agent/agents/precedence-reviewer.md +213 -0
- package/.agent/agents/product-manager.md +142 -162
- package/.agent/agents/product-owner.md +6 -25
- package/.agent/agents/project-planner.md +142 -162
- package/.agent/agents/qa-automation-engineer.md +225 -242
- package/.agent/agents/security-auditor.md +174 -194
- package/.agent/agents/seo-specialist.md +193 -213
- package/.agent/agents/sql-reviewer.md +161 -194
- package/.agent/agents/supervisor-agent.md +184 -203
- package/.agent/agents/swarm-worker-contracts.md +17 -17
- package/.agent/agents/swarm-worker-registry.md +46 -46
- package/.agent/agents/test-coverage-reviewer.md +160 -193
- package/.agent/agents/test-engineer.md +0 -21
- package/.agent/agents/type-safety-reviewer.md +175 -208
- package/.agent/patterns/generator.md +9 -9
- package/.agent/patterns/inversion.md +12 -12
- package/.agent/patterns/pipeline.md +9 -9
- package/.agent/patterns/reviewer.md +13 -13
- package/.agent/patterns/tool-wrapper.md +9 -9
- package/.agent/rules/GEMINI.md +63 -63
- package/.agent/scripts/append_flow.js +72 -0
- package/.agent/scripts/case_law_manager.py +525 -0
- package/.agent/scripts/compress_skills.py +167 -0
- package/.agent/scripts/consolidate_skills.py +173 -0
- package/.agent/scripts/deep_compress.py +202 -0
- package/.agent/scripts/minify_context.py +80 -0
- package/.agent/scripts/security_scan.py +1 -1
- package/.agent/scripts/skill_evolution.py +563 -0
- package/.agent/scripts/strip_tribunal.py +41 -0
- package/.agent/skills/agent-organizer/SKILL.md +100 -126
- package/.agent/skills/agentic-patterns/SKILL.md +0 -70
- package/.agent/skills/ai-prompt-injection-defense/SKILL.md +134 -160
- package/.agent/skills/api-patterns/SKILL.md +123 -215
- package/.agent/skills/api-security-auditor/SKILL.md +143 -177
- package/.agent/skills/app-builder/SKILL.md +334 -50
- package/.agent/skills/app-builder/templates/SKILL.md +13 -15
- package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +16 -16
- package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +22 -22
- package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +20 -20
- package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +17 -17
- package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +21 -21
- package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +20 -20
- package/.agent/skills/appflow-wireframe/SKILL.md +95 -121
- package/.agent/skills/architecture/SKILL.md +169 -331
- package/.agent/skills/authentication-best-practices/SKILL.md +139 -173
- package/.agent/skills/bash-linux/SKILL.md +129 -154
- package/.agent/skills/behavioral-modes/SKILL.md +8 -69
- package/.agent/skills/brainstorming/SKILL.md +436 -104
- package/.agent/skills/building-native-ui/SKILL.md +152 -174
- package/.agent/skills/clean-code/SKILL.md +331 -360
- package/.agent/skills/code-review-checklist/SKILL.md +0 -62
- package/.agent/skills/config-validator/SKILL.md +115 -141
- package/.agent/skills/csharp-developer/SKILL.md +468 -528
- package/.agent/skills/database-design/SKILL.md +104 -369
- package/.agent/skills/deployment-procedures/SKILL.md +119 -145
- package/.agent/skills/devops-engineer/SKILL.md +295 -332
- package/.agent/skills/devops-incident-responder/SKILL.md +87 -113
- package/.agent/skills/doc.md +5 -5
- package/.agent/skills/documentation-templates/SKILL.md +27 -63
- package/.agent/skills/edge-computing/SKILL.md +131 -157
- package/.agent/skills/extract-design-system/SKILL.md +108 -134
- package/.agent/skills/framer-motion-expert/SKILL.md +111 -855
- package/.agent/skills/frontend-design/SKILL.md +151 -499
- package/.agent/skills/game-design-expert/SKILL.md +79 -105
- package/.agent/skills/game-engineering-expert/SKILL.md +96 -122
- package/.agent/skills/geo-fundamentals/SKILL.md +97 -124
- package/.agent/skills/github-operations/SKILL.md +279 -314
- package/.agent/skills/gsap-expert/SKILL.md +119 -826
- package/.agent/skills/i18n-localization/SKILL.md +113 -138
- package/.agent/skills/intelligent-routing/SKILL.md +167 -127
- package/.agent/skills/lint-and-validate/SKILL.md +16 -52
- package/.agent/skills/llm-engineering/SKILL.md +344 -357
- package/.agent/skills/local-first/SKILL.md +128 -154
- package/.agent/skills/mcp-builder/SKILL.md +92 -118
- package/.agent/skills/mobile-design/SKILL.md +213 -219
- package/.agent/skills/motion-engineering/SKILL.md +184 -0
- package/.agent/skills/nextjs-react-expert/SKILL.md +99 -698
- package/.agent/skills/nodejs-best-practices/SKILL.md +498 -559
- package/.agent/skills/observability/SKILL.md +293 -330
- package/.agent/skills/parallel-agents/SKILL.md +96 -122
- package/.agent/skills/performance-profiling/SKILL.md +217 -254
- package/.agent/skills/plan-writing/SKILL.md +92 -118
- package/.agent/skills/platform-engineer/SKILL.md +97 -123
- package/.agent/skills/playwright-best-practices/SKILL.md +137 -162
- package/.agent/skills/powershell-windows/SKILL.md +112 -146
- package/.agent/skills/project-idioms/SKILL.md +87 -0
- package/.agent/skills/python-patterns/SKILL.md +15 -35
- package/.agent/skills/python-pro/SKILL.md +148 -754
- package/.agent/skills/react-specialist/SKILL.md +123 -827
- package/.agent/skills/readme-builder/SKILL.md +23 -85
- package/.agent/skills/realtime-patterns/SKILL.md +269 -304
- package/.agent/skills/red-team-tactics/SKILL.md +18 -51
- package/.agent/skills/rust-pro/SKILL.md +623 -701
- package/.agent/skills/seo-fundamentals/SKILL.md +129 -154
- package/.agent/skills/server-management/SKILL.md +164 -190
- package/.agent/skills/shadcn-ui-expert/SKILL.md +181 -206
- package/.agent/skills/skill-creator/SKILL.md +24 -56
- package/.agent/skills/sql-pro/SKILL.md +579 -633
- package/.agent/skills/supabase-postgres-best-practices/SKILL.md +35 -66
- package/.agent/skills/swiftui-expert/SKILL.md +151 -176
- package/.agent/skills/systematic-debugging/SKILL.md +92 -118
- package/.agent/skills/tailwind-patterns/SKILL.md +516 -576
- package/.agent/skills/tdd-workflow/SKILL.md +111 -137
- package/.agent/skills/test-result-analyzer/SKILL.md +33 -73
- package/.agent/skills/testing-patterns/SKILL.md +512 -573
- package/.agent/skills/trend-researcher/SKILL.md +30 -71
- package/.agent/skills/ui-ux-pro-max/SKILL.md +8 -41
- package/.agent/skills/ui-ux-researcher/SKILL.md +51 -91
- package/.agent/skills/vue-expert/SKILL.md +127 -866
- package/.agent/skills/vulnerability-scanner/SKILL.md +354 -269
- package/.agent/skills/web-accessibility-auditor/SKILL.md +168 -193
- package/.agent/skills/web-design-guidelines/SKILL.md +25 -61
- package/.agent/skills/webapp-testing/SKILL.md +119 -145
- package/.agent/skills/whimsy-injector/SKILL.md +58 -132
- package/.agent/skills/workflow-optimizer/SKILL.md +28 -68
- package/.agent/workflows/api-tester.md +151 -151
- package/.agent/workflows/audit.md +127 -138
- package/.agent/workflows/brainstorm.md +110 -110
- package/.agent/workflows/changelog.md +112 -112
- package/.agent/workflows/create.md +124 -124
- package/.agent/workflows/debug.md +165 -189
- package/.agent/workflows/deploy.md +180 -189
- package/.agent/workflows/enhance.md +128 -151
- package/.agent/workflows/fix.md +114 -135
- package/.agent/workflows/generate.md +13 -4
- package/.agent/workflows/migrate.md +160 -160
- package/.agent/workflows/orchestrate.md +168 -168
- package/.agent/workflows/performance-benchmarker.md +114 -123
- package/.agent/workflows/plan.md +173 -173
- package/.agent/workflows/preview.md +80 -80
- package/.agent/workflows/refactor.md +161 -183
- package/.agent/workflows/review-ai.md +101 -129
- package/.agent/workflows/review.md +116 -116
- package/.agent/workflows/session.md +94 -94
- package/.agent/workflows/status.md +79 -79
- package/.agent/workflows/strengthen-skills.md +138 -139
- package/.agent/workflows/swarm.md +179 -179
- package/.agent/workflows/test.md +189 -211
- package/.agent/workflows/tribunal-backend.md +94 -113
- package/.agent/workflows/tribunal-database.md +95 -115
- package/.agent/workflows/tribunal-frontend.md +96 -118
- package/.agent/workflows/tribunal-full.md +93 -133
- package/.agent/workflows/tribunal-mobile.md +95 -119
- package/.agent/workflows/tribunal-performance.md +110 -133
- package/.agent/workflows/ui-ux-pro-max.md +122 -143
- package/README.md +30 -1
- package/bin/tribunal-kit.js +175 -12
- package/package.json +25 -4
- package/.agent/skills/api-patterns/api-style.md +0 -42
- package/.agent/skills/api-patterns/auth.md +0 -24
- package/.agent/skills/api-patterns/documentation.md +0 -26
- package/.agent/skills/api-patterns/graphql.md +0 -41
- package/.agent/skills/api-patterns/rate-limiting.md +0 -31
- package/.agent/skills/api-patterns/response.md +0 -37
- package/.agent/skills/api-patterns/rest.md +0 -40
- package/.agent/skills/api-patterns/security-testing.md +0 -122
- package/.agent/skills/api-patterns/trpc.md +0 -41
- package/.agent/skills/api-patterns/versioning.md +0 -22
- package/.agent/skills/app-builder/agent-coordination.md +0 -71
- package/.agent/skills/app-builder/feature-building.md +0 -53
- package/.agent/skills/app-builder/project-detection.md +0 -34
- package/.agent/skills/app-builder/scaffolding.md +0 -118
- package/.agent/skills/app-builder/tech-stack.md +0 -40
- package/.agent/skills/architecture/context-discovery.md +0 -43
- package/.agent/skills/architecture/examples.md +0 -94
- package/.agent/skills/architecture/pattern-selection.md +0 -68
- package/.agent/skills/architecture/patterns-reference.md +0 -50
- package/.agent/skills/architecture/trade-off-analysis.md +0 -77
- package/.agent/skills/brainstorming/dynamic-questioning.md +0 -360
- package/.agent/skills/database-design/database-selection.md +0 -43
- package/.agent/skills/database-design/indexing.md +0 -39
- package/.agent/skills/database-design/migrations.md +0 -48
- package/.agent/skills/database-design/optimization.md +0 -36
- package/.agent/skills/database-design/orm-selection.md +0 -30
- package/.agent/skills/database-design/schema-design.md +0 -56
- package/.agent/skills/frontend-design/animation-guide.md +0 -331
- package/.agent/skills/frontend-design/color-system.md +0 -329
- package/.agent/skills/frontend-design/decision-trees.md +0 -418
- package/.agent/skills/frontend-design/motion-graphics.md +0 -306
- package/.agent/skills/frontend-design/typography-system.md +0 -363
- package/.agent/skills/frontend-design/ux-psychology.md +0 -1116
- package/.agent/skills/frontend-design/visual-effects.md +0 -383
- package/.agent/skills/intelligent-routing/router-manifest.md +0 -65
- package/.agent/skills/mobile-design/decision-trees.md +0 -516
- package/.agent/skills/mobile-design/mobile-backend.md +0 -491
- package/.agent/skills/mobile-design/mobile-color-system.md +0 -420
- package/.agent/skills/mobile-design/mobile-debugging.md +0 -122
- package/.agent/skills/mobile-design/mobile-design-thinking.md +0 -357
- package/.agent/skills/mobile-design/mobile-navigation.md +0 -458
- package/.agent/skills/mobile-design/mobile-performance.md +0 -767
- package/.agent/skills/mobile-design/mobile-testing.md +0 -356
- package/.agent/skills/mobile-design/mobile-typography.md +0 -433
- package/.agent/skills/mobile-design/platform-android.md +0 -666
- package/.agent/skills/mobile-design/platform-ios.md +0 -561
- package/.agent/skills/mobile-design/touch-psychology.md +0 -537
- package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +0 -312
- package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +0 -240
- package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +0 -490
- package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +0 -264
- package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +0 -581
- package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +0 -432
- package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +0 -684
- package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +0 -150
- package/.agent/skills/vulnerability-scanner/checklists.md +0 -121
|
@@ -1,357 +1,344 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: llm-engineering
|
|
3
|
-
description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
|
|
4
|
-
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
-
version: 2.0
|
|
6
|
-
last-updated: 2026-04-
|
|
7
|
-
applies-to-model: gemini-
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
# LLM Engineering — Production AI Systems Mastery
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
---
|
|
41
|
-
|
|
42
|
-
## Prompt Engineering
|
|
43
|
-
|
|
44
|
-
### System Prompt Design
|
|
45
|
-
|
|
46
|
-
```typescript
|
|
47
|
-
const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
|
|
48
|
-
|
|
49
|
-
## Rules
|
|
50
|
-
1. Answer ONLY questions about Acme products and services.
|
|
51
|
-
2. If you don't know the answer, say "I'll connect you with a specialist."
|
|
52
|
-
3. Never discuss competitors.
|
|
53
|
-
4. Never make up product features or pricing.
|
|
54
|
-
5. Keep responses under 200 words.
|
|
55
|
-
|
|
56
|
-
## Response Format
|
|
57
|
-
- Use bullet points for lists
|
|
58
|
-
- Include product links when relevant
|
|
59
|
-
- End with a follow-up question
|
|
60
|
-
|
|
61
|
-
## Context
|
|
62
|
-
Current date: ${new Date().toISOString().split("T")[0]}
|
|
63
|
-
User plan: {{user_plan}}
|
|
64
|
-
`;
|
|
65
|
-
|
|
66
|
-
// ❌ HALLUCINATION TRAP: System prompts are NOT secrets
|
|
67
|
-
// Users can extract system prompts with jailbreak techniques
|
|
68
|
-
// Never put API keys, internal URLs, or secrets in system prompts
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
### Structured Output (JSON Mode)
|
|
72
|
-
|
|
73
|
-
```typescript
|
|
74
|
-
import { z } from "zod";
|
|
75
|
-
import OpenAI from "openai";
|
|
76
|
-
|
|
77
|
-
const SentimentSchema = z.object({
|
|
78
|
-
sentiment: z.enum(["positive", "negative", "neutral"]),
|
|
79
|
-
confidence: z.number().min(0).max(1),
|
|
80
|
-
reasoning: z.string(),
|
|
81
|
-
topics: z.array(z.string()),
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
{
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
//
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
]
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
[
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
```
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
const
|
|
300
|
-
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
```
|
|
347
|
-
✅ Am I validating all LLM responses with a schema?
|
|
348
|
-
✅ Are there no secrets in system prompts?
|
|
349
|
-
✅ Is user input delimited from system instructions?
|
|
350
|
-
✅ Did I set max_tokens on all completions?
|
|
351
|
-
✅ Is there rate limiting and cost monitoring?
|
|
352
|
-
✅ Am I using the cheapest model that works?
|
|
353
|
-
✅ Is chunking semantic (not fixed-character)?
|
|
354
|
-
✅ Is search hybrid (vector + keyword)?
|
|
355
|
-
✅ Do tool-calling loops have a max iteration limit?
|
|
356
|
-
✅ Did I build evaluation tests for AI quality?
|
|
357
|
-
```
|
|
1
|
+
---
|
|
2
|
+
name: llm-engineering
|
|
3
|
+
description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
|
|
4
|
+
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
+
version: 3.2.0
|
|
6
|
+
last-updated: 2026-04-07
|
|
7
|
+
applies-to-model: gemini-3-1-pro, claude-3-7-sonnet
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# LLM Engineering — Production AI Systems Mastery
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Model Selection
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
Model │ Use Case │ Cost Tier
|
|
18
|
+
─────────────────────────┼───────────────────────────────────────┼──────────
|
|
19
|
+
GPT-4o │ Complex reasoning, vision, code │ $$$
|
|
20
|
+
GPT-4o-mini │ Classification, summaries, chat │ $
|
|
21
|
+
o3-mini │ Deep reasoning, math, code review │ $$
|
|
22
|
+
Claude 3.7 Sonnet │ Long documents, analysis, code │ $$$
|
|
23
|
+
Claude 3.5 Haiku │ Fast responses, simple tasks │ $
|
|
24
|
+
Gemini 3.1 Pro (High) │ Large context, multimodal, code │ $$$
|
|
25
|
+
Gemini 3.0 Flash │ High throughput, cost-efficient │ $
|
|
26
|
+
Llama 3.3 70B (open) │ Self-hosted, data privacy │ Free*
|
|
27
|
+
Mistral Large 2 │ European data residency, code │ $$
|
|
28
|
+
|
|
29
|
+
* = compute costs only
|
|
30
|
+
|
|
31
|
+
Selection rules:
|
|
32
|
+
1. Start with the cheapest model that passes your evals
|
|
33
|
+
2. Upgrade only when eval scores require it
|
|
34
|
+
3. Use large models for complex reasoning, small for classification/routing
|
|
35
|
+
4. Fine-tune ONLY after prompt engineering and RAG are exhausted
|
|
36
|
+
5. ❌ HALLUCINATION TRAP: Model names change frequently — always verify current names
|
|
37
|
+
from provider docs before hardcoding (e.g. "gpt-4o" vs "gpt-4o-2024-11-20")
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Prompt Engineering
|
|
43
|
+
|
|
44
|
+
### System Prompt Design
|
|
45
|
+
|
|
46
|
+
```typescript
|
|
47
|
+
const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
|
|
48
|
+
|
|
49
|
+
## Rules
|
|
50
|
+
1. Answer ONLY questions about Acme products and services.
|
|
51
|
+
2. If you don't know the answer, say "I'll connect you with a specialist."
|
|
52
|
+
3. Never discuss competitors.
|
|
53
|
+
4. Never make up product features or pricing.
|
|
54
|
+
5. Keep responses under 200 words.
|
|
55
|
+
|
|
56
|
+
## Response Format
|
|
57
|
+
- Use bullet points for lists
|
|
58
|
+
- Include product links when relevant
|
|
59
|
+
- End with a follow-up question
|
|
60
|
+
|
|
61
|
+
## Context
|
|
62
|
+
Current date: ${new Date().toISOString().split("T")[0]}
|
|
63
|
+
User plan: {{user_plan}}
|
|
64
|
+
`;
|
|
65
|
+
|
|
66
|
+
// ❌ HALLUCINATION TRAP: System prompts are NOT secrets
|
|
67
|
+
// Users can extract system prompts with jailbreak techniques
|
|
68
|
+
// Never put API keys, internal URLs, or secrets in system prompts
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Structured Output (JSON Mode)
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import { z } from "zod";
|
|
75
|
+
import OpenAI from "openai";
|
|
76
|
+
|
|
77
|
+
const SentimentSchema = z.object({
|
|
78
|
+
sentiment: z.enum(["positive", "negative", "neutral"]),
|
|
79
|
+
confidence: z.number().min(0).max(1),
|
|
80
|
+
reasoning: z.string(),
|
|
81
|
+
topics: z.array(z.string()),
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// OpenAI — json_schema mode (strict = true enforces schema exactly)
|
|
85
|
+
async function analyzeSentiment(text: string) {
|
|
86
|
+
const response = await openai.chat.completions.create({
|
|
87
|
+
model: "gpt-4o-mini",
|
|
88
|
+
response_format: {
|
|
89
|
+
type: "json_schema",
|
|
90
|
+
json_schema: {
|
|
91
|
+
name: "sentiment_analysis",
|
|
92
|
+
strict: true,
|
|
93
|
+
schema: {
|
|
94
|
+
type: "object",
|
|
95
|
+
properties: {
|
|
96
|
+
sentiment: { type: "string", enum: ["positive", "negative", "neutral"] },
|
|
97
|
+
confidence: { type: "number" },
|
|
98
|
+
reasoning: { type: "string" },
|
|
99
|
+
topics: { type: "array", items: { type: "string" } },
|
|
100
|
+
},
|
|
101
|
+
required: ["sentiment", "confidence", "reasoning", "topics"],
|
|
102
|
+
additionalProperties: false, // required for strict mode
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
messages: [{ role: "system", content: "Analyze sentiment." }, { role: "user", content: text }],
|
|
107
|
+
});
|
|
108
|
+
const raw = JSON.parse(response.choices[0].message.content ?? "{}");
|
|
109
|
+
return SentimentSchema.parse(raw); // always validate with Zod even in strict mode
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Gemini — response_mime_type + response_schema
|
|
113
|
+
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
|
|
114
|
+
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
|
|
115
|
+
const model = genAI.getGenerativeModel({
|
|
116
|
+
model: "gemini-2.0-flash",
|
|
117
|
+
generationConfig: {
|
|
118
|
+
responseMimeType: "application/json",
|
|
119
|
+
responseSchema: {
|
|
120
|
+
type: SchemaType.OBJECT,
|
|
121
|
+
properties: {
|
|
122
|
+
sentiment: { type: SchemaType.STRING, enum: ["positive", "negative", "neutral"] },
|
|
123
|
+
confidence: { type: SchemaType.NUMBER },
|
|
124
|
+
topics: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
|
|
125
|
+
},
|
|
126
|
+
required: ["sentiment", "confidence", "topics"],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
|
|
132
|
+
// LLMs produce malformed JSON, wrong types, missing fields even with strict mode
|
|
133
|
+
// ❌ const result = JSON.parse(response); // trust blindly
|
|
134
|
+
// ✅ const result = Schema.parse(JSON.parse(response)); // validate always
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Function Calling / Tool Use
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
const tools: OpenAI.ChatCompletionTool[] = [
|
|
141
|
+
{
|
|
142
|
+
type: "function",
|
|
143
|
+
function: {
|
|
144
|
+
name: "search_products",
|
|
145
|
+
description: "Search products by name, category, or price range",
|
|
146
|
+
parameters: {
|
|
147
|
+
type: "object",
|
|
148
|
+
properties: {
|
|
149
|
+
query: { type: "string", description: "Search query" },
|
|
150
|
+
category: { type: "string", enum: ["electronics", "clothing", "home"] },
|
|
151
|
+
max_price: { type: "number", description: "Maximum price in USD" },
|
|
152
|
+
},
|
|
153
|
+
required: ["query"],
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
type: "function",
|
|
159
|
+
function: {
|
|
160
|
+
name: "get_order_status",
|
|
161
|
+
description: "Get the status of an order by order ID",
|
|
162
|
+
parameters: {
|
|
163
|
+
type: "object",
|
|
164
|
+
properties: {
|
|
165
|
+
order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
|
|
166
|
+
},
|
|
167
|
+
required: ["order_id"],
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
},
|
|
171
|
+
];
|
|
172
|
+
|
|
173
|
+
// Tool execution loop
|
|
174
|
+
async function chatWithTools(userMessage: string) {
|
|
175
|
+
const messages: OpenAI.ChatCompletionMessageParam[] = [
|
|
176
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
177
|
+
{ role: "user", content: userMessage },
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
let response = await openai.chat.completions.create({
|
|
181
|
+
model: "gpt-4o-mini",
|
|
182
|
+
messages,
|
|
183
|
+
tools,
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// Process tool calls
|
|
187
|
+
while (response.choices[0].finish_reason === "tool_calls") {
|
|
188
|
+
const toolCalls = response.choices[0].message.tool_calls ?? [];
|
|
189
|
+
messages.push(response.choices[0].message);
|
|
190
|
+
|
|
191
|
+
for (const call of toolCalls) {
|
|
192
|
+
const args = JSON.parse(call.function.arguments);
|
|
193
|
+
const result = await executeFunction(call.function.name, args);
|
|
194
|
+
messages.push({
|
|
195
|
+
role: "tool",
|
|
196
|
+
tool_call_id: call.id,
|
|
197
|
+
content: JSON.stringify(result),
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
response = await openai.chat.completions.create({
|
|
202
|
+
model: "gpt-4o-mini",
|
|
203
|
+
messages,
|
|
204
|
+
tools,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return response.choices[0].message.content;
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## RAG (Retrieval-Augmented Generation)
|
|
215
|
+
|
|
216
|
+
### Pipeline
|
|
217
|
+
|
|
218
|
+
```
|
|
219
|
+
User Query
|
|
220
|
+
↓
|
|
221
|
+
[1] Embed query → vector
|
|
222
|
+
↓
|
|
223
|
+
[2] Search vector DB → top K chunks
|
|
224
|
+
↓
|
|
225
|
+
[3] (Optional) Rerank results → top N
|
|
226
|
+
↓
|
|
227
|
+
[4] Build prompt: system + context chunks + query
|
|
228
|
+
↓
|
|
229
|
+
[5] LLM generates answer with citations
|
|
230
|
+
↓
|
|
231
|
+
[6] Validate response (hallucination check)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Chunking Strategy
|
|
235
|
+
|
|
236
|
+
```typescript
|
|
237
|
+
// ❌ BAD: Arbitrary character splitting
|
|
238
|
+
const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
|
|
239
|
+
|
|
240
|
+
// ✅ GOOD: Semantic chunking with overlap
|
|
241
|
+
function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
|
|
242
|
+
const {
|
|
243
|
+
maxTokens = 512, // chunk size
|
|
244
|
+
overlapTokens = 50, // overlap between chunks
|
|
245
|
+
separator = "\n\n", // split on paragraph boundaries first
|
|
246
|
+
} = options;
|
|
247
|
+
|
|
248
|
+
const paragraphs = text.split(separator);
|
|
249
|
+
const chunks: Chunk[] = [];
|
|
250
|
+
let current = "";
|
|
251
|
+
|
|
252
|
+
for (const para of paragraphs) {
|
|
253
|
+
if (tokenCount(current + para) > maxTokens && current) {
|
|
254
|
+
chunks.push({ text: current.trim(), tokens: tokenCount(current) });
|
|
255
|
+
// Keep overlap from previous chunk
|
|
256
|
+
const words = current.split(" ");
|
|
257
|
+
current = words.slice(-overlapTokens).join(" ") + separator + para;
|
|
258
|
+
} else {
|
|
259
|
+
current += separator + para;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
|
|
263
|
+
|
|
264
|
+
return chunks;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Chunk size guidelines:
|
|
268
|
+
// 256-512 tokens → precise retrieval (Q&A, support)
|
|
269
|
+
// 512-1024 tokens → balanced (general RAG)
|
|
270
|
+
// 1024-2048 tokens → broad context (summarization)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Vector Store Selection
|
|
274
|
+
|
|
275
|
+
```
|
|
276
|
+
pgvector (PostgreSQL) → Already using Postgres, <10M vectors, simple
|
|
277
|
+
Pinecone → Managed, serverless, easy scaling
|
|
278
|
+
Weaviate → Hybrid search (vector + keyword), multi-model
|
|
279
|
+
Qdrant → High performance, Rust-based, self-hostable
|
|
280
|
+
Chroma → Local development, prototyping
|
|
281
|
+
Milvus → Enterprise scale, GPU acceleration
|
|
282
|
+
|
|
283
|
+
// ❌ HALLUCINATION TRAP: Vector search is NOT keyword search
|
|
284
|
+
// "Apple CEO" might not find "Tim Cook runs Apple Inc."
|
|
285
|
+
// Use HYBRID search (vector + BM25 keyword) for production
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Streaming
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
// Server-Sent Events for AI token streaming
|
|
294
|
+
app.get("/api/chat", async (req, res) => {
|
|
295
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
296
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
297
|
+
res.setHeader("Connection", "keep-alive");
|
|
298
|
+
|
|
299
|
+
const stream = await openai.chat.completions.create({
|
|
300
|
+
model: "gpt-4o-mini",
|
|
301
|
+
messages: [{ role: "user", content: req.query.message as string }],
|
|
302
|
+
stream: true,
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
for await (const chunk of stream) {
|
|
306
|
+
const content = chunk.choices[0]?.delta?.content;
|
|
307
|
+
if (content) {
|
|
308
|
+
res.write(`data: ${JSON.stringify({ content })}\n\n`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
res.write("data: [DONE]\n\n");
|
|
313
|
+
res.end();
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
// Client-side consumption
|
|
317
|
+
const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
|
|
318
|
+
eventSource.onmessage = (event) => {
|
|
319
|
+
if (event.data === "[DONE]") { eventSource.close(); return; }
|
|
320
|
+
const { content } = JSON.parse(event.data);
|
|
321
|
+
appendToChat(content);
|
|
322
|
+
};
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## Cost Optimization
|
|
328
|
+
|
|
329
|
+
```
|
|
330
|
+
1. Prompt caching → Cache system prompts (OpenAI, Anthropic support this)
|
|
331
|
+
2. Output token limiting → Set max_tokens to prevent runaway responses
|
|
332
|
+
3. Tiered models → Use cheap models for classification, expensive for reasoning
|
|
333
|
+
4. Batch processing → Use batch APIs for offline processing (50% discount)
|
|
334
|
+
5. Chunked context → Send only relevant chunks, not entire documents
|
|
335
|
+
6. Response streaming → Stream to reduce TTFT (time to first token)
|
|
336
|
+
7. Structured output → Shorter JSON responses vs verbose prose
|
|
337
|
+
|
|
338
|
+
// Cost estimation:
|
|
339
|
+
// GPT-4o: ~$2.50/1M input, ~$10/1M output
|
|
340
|
+
// GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
|
|
341
|
+
// 1M tokens ≈ 750,000 words ≈ 3,000 pages
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
---
|