tribunal-kit 2.4.6 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/ARCHITECTURE.md +99 -99
- package/.agent/GEMINI.md +52 -52
- package/.agent/agents/accessibility-reviewer.md +139 -86
- package/.agent/agents/ai-code-reviewer.md +160 -90
- package/.agent/agents/backend-specialist.md +164 -127
- package/.agent/agents/code-archaeologist.md +115 -73
- package/.agent/agents/database-architect.md +130 -110
- package/.agent/agents/debugger.md +137 -97
- package/.agent/agents/dependency-reviewer.md +78 -30
- package/.agent/agents/devops-engineer.md +161 -118
- package/.agent/agents/documentation-writer.md +151 -87
- package/.agent/agents/explorer-agent.md +117 -99
- package/.agent/agents/frontend-reviewer.md +127 -47
- package/.agent/agents/frontend-specialist.md +169 -109
- package/.agent/agents/game-developer.md +28 -164
- package/.agent/agents/logic-reviewer.md +87 -49
- package/.agent/agents/mobile-developer.md +151 -103
- package/.agent/agents/mobile-reviewer.md +133 -50
- package/.agent/agents/orchestrator.md +121 -110
- package/.agent/agents/penetration-tester.md +103 -77
- package/.agent/agents/performance-optimizer.md +136 -92
- package/.agent/agents/performance-reviewer.md +139 -69
- package/.agent/agents/product-manager.md +104 -70
- package/.agent/agents/product-owner.md +6 -25
- package/.agent/agents/project-planner.md +95 -95
- package/.agent/agents/qa-automation-engineer.md +174 -87
- package/.agent/agents/security-auditor.md +133 -129
- package/.agent/agents/seo-specialist.md +160 -99
- package/.agent/agents/sql-reviewer.md +132 -44
- package/.agent/agents/supervisor-agent.md +137 -109
- package/.agent/agents/swarm-worker-contracts.md +17 -17
- package/.agent/agents/swarm-worker-registry.md +46 -46
- package/.agent/agents/test-coverage-reviewer.md +132 -53
- package/.agent/agents/test-engineer.md +0 -21
- package/.agent/agents/type-safety-reviewer.md +143 -33
- package/.agent/patterns/generator.md +9 -9
- package/.agent/patterns/inversion.md +12 -12
- package/.agent/patterns/pipeline.md +9 -9
- package/.agent/patterns/reviewer.md +13 -13
- package/.agent/patterns/tool-wrapper.md +9 -9
- package/.agent/rules/GEMINI.md +63 -63
- package/.agent/scripts/__pycache__/auto_preview.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/bundle_analyzer.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/checklist.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/dependency_analyzer.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/security_scan.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/session_manager.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/skill_integrator.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/swarm_dispatcher.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/test_runner.cpython-311.pyc +0 -0
- package/.agent/scripts/__pycache__/verify_all.cpython-311.pyc +0 -0
- package/.agent/scripts/compress_skills.py +167 -0
- package/.agent/scripts/consolidate_skills.py +173 -0
- package/.agent/scripts/deep_compress.py +202 -0
- package/.agent/scripts/minify_context.py +80 -0
- package/.agent/scripts/security_scan.py +1 -1
- package/.agent/scripts/strip_tribunal.py +41 -0
- package/.agent/skills/agent-organizer/SKILL.md +60 -100
- package/.agent/skills/agentic-patterns/SKILL.md +0 -70
- package/.agent/skills/ai-prompt-injection-defense/SKILL.md +108 -53
- package/.agent/skills/api-patterns/SKILL.md +197 -257
- package/.agent/skills/api-security-auditor/SKILL.md +125 -57
- package/.agent/skills/app-builder/SKILL.md +326 -50
- package/.agent/skills/app-builder/templates/SKILL.md +13 -15
- package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +16 -16
- package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +22 -22
- package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +20 -20
- package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +17 -17
- package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +21 -21
- package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +26 -26
- package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +19 -19
- package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +18 -18
- package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +20 -20
- package/.agent/skills/appflow-wireframe/SKILL.md +71 -98
- package/.agent/skills/architecture/SKILL.md +161 -200
- package/.agent/skills/authentication-best-practices/SKILL.md +121 -54
- package/.agent/skills/bash-linux/SKILL.md +71 -166
- package/.agent/skills/behavioral-modes/SKILL.md +8 -69
- package/.agent/skills/brainstorming/SKILL.md +345 -127
- package/.agent/skills/building-native-ui/SKILL.md +125 -57
- package/.agent/skills/clean-code/SKILL.md +266 -149
- package/.agent/skills/code-review-checklist/SKILL.md +0 -62
- package/.agent/skills/config-validator/SKILL.md +73 -131
- package/.agent/skills/csharp-developer/SKILL.md +434 -73
- package/.agent/skills/database-design/SKILL.md +190 -275
- package/.agent/skills/deployment-procedures/SKILL.md +81 -158
- package/.agent/skills/devops-engineer/SKILL.md +255 -94
- package/.agent/skills/devops-incident-responder/SKILL.md +50 -69
- package/.agent/skills/doc.md +5 -5
- package/.agent/skills/documentation-templates/SKILL.md +19 -63
- package/.agent/skills/edge-computing/SKILL.md +75 -165
- package/.agent/skills/extract-design-system/SKILL.md +84 -58
- package/.agent/skills/framer-motion-expert/SKILL.md +195 -0
- package/.agent/skills/frontend-design/SKILL.md +151 -499
- package/.agent/skills/game-design-expert/SKILL.md +71 -0
- package/.agent/skills/game-engineering-expert/SKILL.md +88 -0
- package/.agent/skills/geo-fundamentals/SKILL.md +52 -178
- package/.agent/skills/github-operations/SKILL.md +197 -272
- package/.agent/skills/gsap-expert/SKILL.md +194 -0
- package/.agent/skills/i18n-localization/SKILL.md +60 -172
- package/.agent/skills/intelligent-routing/SKILL.md +123 -103
- package/.agent/skills/lint-and-validate/SKILL.md +8 -52
- package/.agent/skills/llm-engineering/SKILL.md +281 -195
- package/.agent/skills/local-first/SKILL.md +76 -159
- package/.agent/skills/mcp-builder/SKILL.md +48 -188
- package/.agent/skills/mobile-design/SKILL.md +213 -219
- package/.agent/skills/motion-engineering/SKILL.md +184 -0
- package/.agent/skills/nextjs-react-expert/SKILL.md +184 -203
- package/.agent/skills/nodejs-best-practices/SKILL.md +403 -185
- package/.agent/skills/observability/SKILL.md +211 -203
- package/.agent/skills/parallel-agents/SKILL.md +53 -146
- package/.agent/skills/performance-profiling/SKILL.md +171 -151
- package/.agent/skills/plan-writing/SKILL.md +49 -153
- package/.agent/skills/platform-engineer/SKILL.md +57 -103
- package/.agent/skills/playwright-best-practices/SKILL.md +110 -63
- package/.agent/skills/powershell-windows/SKILL.md +61 -179
- package/.agent/skills/python-patterns/SKILL.md +7 -35
- package/.agent/skills/python-pro/SKILL.md +273 -114
- package/.agent/skills/react-specialist/SKILL.md +227 -108
- package/.agent/skills/readme-builder/SKILL.md +15 -85
- package/.agent/skills/realtime-patterns/SKILL.md +216 -243
- package/.agent/skills/red-team-tactics/SKILL.md +10 -51
- package/.agent/skills/rust-pro/SKILL.md +525 -142
- package/.agent/skills/seo-fundamentals/SKILL.md +92 -153
- package/.agent/skills/server-management/SKILL.md +110 -166
- package/.agent/skills/shadcn-ui-expert/SKILL.md +154 -55
- package/.agent/skills/skill-creator/SKILL.md +18 -58
- package/.agent/skills/sql-pro/SKILL.md +543 -68
- package/.agent/skills/supabase-postgres-best-practices/SKILL.md +28 -68
- package/.agent/skills/swiftui-expert/SKILL.md +124 -57
- package/.agent/skills/systematic-debugging/SKILL.md +49 -151
- package/.agent/skills/tailwind-patterns/SKILL.md +433 -149
- package/.agent/skills/tdd-workflow/SKILL.md +63 -169
- package/.agent/skills/test-result-analyzer/SKILL.md +33 -73
- package/.agent/skills/testing-patterns/SKILL.md +437 -130
- package/.agent/skills/trend-researcher/SKILL.md +30 -71
- package/.agent/skills/ui-ux-pro-max/SKILL.md +0 -41
- package/.agent/skills/ui-ux-researcher/SKILL.md +51 -91
- package/.agent/skills/vue-expert/SKILL.md +225 -119
- package/.agent/skills/vulnerability-scanner/SKILL.md +264 -226
- package/.agent/skills/web-accessibility-auditor/SKILL.md +141 -58
- package/.agent/skills/web-design-guidelines/SKILL.md +17 -61
- package/.agent/skills/webapp-testing/SKILL.md +71 -196
- package/.agent/skills/whimsy-injector/SKILL.md +58 -132
- package/.agent/skills/workflow-optimizer/SKILL.md +28 -68
- package/.agent/workflows/api-tester.md +96 -224
- package/.agent/workflows/audit.md +81 -122
- package/.agent/workflows/brainstorm.md +69 -105
- package/.agent/workflows/changelog.md +65 -97
- package/.agent/workflows/create.md +73 -88
- package/.agent/workflows/debug.md +80 -111
- package/.agent/workflows/deploy.md +119 -92
- package/.agent/workflows/enhance.md +80 -91
- package/.agent/workflows/fix.md +68 -97
- package/.agent/workflows/generate.md +165 -164
- package/.agent/workflows/migrate.md +106 -109
- package/.agent/workflows/orchestrate.md +103 -86
- package/.agent/workflows/performance-benchmarker.md +77 -268
- package/.agent/workflows/plan.md +120 -98
- package/.agent/workflows/preview.md +39 -96
- package/.agent/workflows/refactor.md +105 -97
- package/.agent/workflows/review-ai.md +63 -102
- package/.agent/workflows/review.md +71 -110
- package/.agent/workflows/session.md +53 -113
- package/.agent/workflows/status.md +42 -88
- package/.agent/workflows/strengthen-skills.md +90 -51
- package/.agent/workflows/swarm.md +114 -129
- package/.agent/workflows/test.md +125 -102
- package/.agent/workflows/tribunal-backend.md +60 -78
- package/.agent/workflows/tribunal-database.md +62 -100
- package/.agent/workflows/tribunal-frontend.md +62 -82
- package/.agent/workflows/tribunal-full.md +56 -100
- package/.agent/workflows/tribunal-mobile.md +65 -94
- package/.agent/workflows/tribunal-performance.md +62 -105
- package/.agent/workflows/ui-ux-pro-max.md +72 -121
- package/README.md +11 -15
- package/package.json +1 -1
- package/.agent/skills/api-patterns/api-style.md +0 -42
- package/.agent/skills/api-patterns/auth.md +0 -24
- package/.agent/skills/api-patterns/documentation.md +0 -26
- package/.agent/skills/api-patterns/graphql.md +0 -41
- package/.agent/skills/api-patterns/rate-limiting.md +0 -31
- package/.agent/skills/api-patterns/response.md +0 -37
- package/.agent/skills/api-patterns/rest.md +0 -40
- package/.agent/skills/api-patterns/security-testing.md +0 -122
- package/.agent/skills/api-patterns/trpc.md +0 -41
- package/.agent/skills/api-patterns/versioning.md +0 -22
- package/.agent/skills/app-builder/agent-coordination.md +0 -71
- package/.agent/skills/app-builder/feature-building.md +0 -53
- package/.agent/skills/app-builder/project-detection.md +0 -34
- package/.agent/skills/app-builder/scaffolding.md +0 -118
- package/.agent/skills/app-builder/tech-stack.md +0 -40
- package/.agent/skills/architecture/context-discovery.md +0 -43
- package/.agent/skills/architecture/examples.md +0 -94
- package/.agent/skills/architecture/pattern-selection.md +0 -68
- package/.agent/skills/architecture/patterns-reference.md +0 -50
- package/.agent/skills/architecture/trade-off-analysis.md +0 -77
- package/.agent/skills/brainstorming/dynamic-questioning.md +0 -360
- package/.agent/skills/database-design/database-selection.md +0 -43
- package/.agent/skills/database-design/indexing.md +0 -39
- package/.agent/skills/database-design/migrations.md +0 -48
- package/.agent/skills/database-design/optimization.md +0 -36
- package/.agent/skills/database-design/orm-selection.md +0 -30
- package/.agent/skills/database-design/schema-design.md +0 -56
- package/.agent/skills/dotnet-core-expert/SKILL.md +0 -103
- package/.agent/skills/framer-motion-animations/SKILL.md +0 -74
- package/.agent/skills/frontend-design/animation-guide.md +0 -331
- package/.agent/skills/frontend-design/color-system.md +0 -329
- package/.agent/skills/frontend-design/decision-trees.md +0 -418
- package/.agent/skills/frontend-design/motion-graphics.md +0 -306
- package/.agent/skills/frontend-design/typography-system.md +0 -363
- package/.agent/skills/frontend-design/ux-psychology.md +0 -1116
- package/.agent/skills/frontend-design/visual-effects.md +0 -383
- package/.agent/skills/game-development/2d-games/SKILL.md +0 -119
- package/.agent/skills/game-development/3d-games/SKILL.md +0 -135
- package/.agent/skills/game-development/SKILL.md +0 -236
- package/.agent/skills/game-development/game-art/SKILL.md +0 -185
- package/.agent/skills/game-development/game-audio/SKILL.md +0 -190
- package/.agent/skills/game-development/game-design/SKILL.md +0 -129
- package/.agent/skills/game-development/mobile-games/SKILL.md +0 -108
- package/.agent/skills/game-development/multiplayer/SKILL.md +0 -132
- package/.agent/skills/game-development/pc-games/SKILL.md +0 -144
- package/.agent/skills/game-development/vr-ar/SKILL.md +0 -123
- package/.agent/skills/game-development/web-games/SKILL.md +0 -150
- package/.agent/skills/intelligent-routing/router-manifest.md +0 -65
- package/.agent/skills/mobile-design/decision-trees.md +0 -516
- package/.agent/skills/mobile-design/mobile-backend.md +0 -491
- package/.agent/skills/mobile-design/mobile-color-system.md +0 -420
- package/.agent/skills/mobile-design/mobile-debugging.md +0 -122
- package/.agent/skills/mobile-design/mobile-design-thinking.md +0 -357
- package/.agent/skills/mobile-design/mobile-navigation.md +0 -458
- package/.agent/skills/mobile-design/mobile-performance.md +0 -767
- package/.agent/skills/mobile-design/mobile-testing.md +0 -356
- package/.agent/skills/mobile-design/mobile-typography.md +0 -433
- package/.agent/skills/mobile-design/platform-android.md +0 -666
- package/.agent/skills/mobile-design/platform-ios.md +0 -561
- package/.agent/skills/mobile-design/touch-psychology.md +0 -537
- package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +0 -312
- package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +0 -240
- package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +0 -490
- package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +0 -264
- package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +0 -581
- package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +0 -432
- package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +0 -684
- package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +0 -150
- package/.agent/skills/vulnerability-scanner/checklists.md +0 -121
|
@@ -1,258 +1,344 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: llm-engineering
|
|
3
|
-
description: LLM engineering
|
|
3
|
+
description: LLM engineering mastery for production AI systems. Prompt engineering, RAG pipeline design, vector store selection, embedding strategies, chunking, reranking, structured output, function calling, streaming, evals, guard-rails, cost optimization, and LLMOps. Use when building AI features, chat interfaces, semantic search, or any system calling an LLM API.
|
|
4
4
|
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
-
version:
|
|
6
|
-
last-updated: 2026-
|
|
7
|
-
applies-to-model: gemini-
|
|
5
|
+
version: 3.2.0
|
|
6
|
+
last-updated: 2026-04-07
|
|
7
|
+
applies-to-model: gemini-3-1-pro, claude-3-7-sonnet
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
-
# LLM Engineering
|
|
11
|
-
|
|
12
|
-
> An LLM is a probabilistic function, not a deterministic API.
|
|
13
|
-
> Design your system to be correct despite that — not because you got lucky.
|
|
10
|
+
# LLM Engineering — Production AI Systems Mastery
|
|
14
11
|
|
|
15
12
|
---
|
|
16
13
|
|
|
17
|
-
##
|
|
14
|
+
## Model Selection
|
|
18
15
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
16
|
+
```
|
|
17
|
+
Model │ Use Case │ Cost Tier
|
|
18
|
+
─────────────────────────┼───────────────────────────────────────┼──────────
|
|
19
|
+
GPT-4o │ Complex reasoning, vision, code │ $$$
|
|
20
|
+
GPT-4o-mini │ Classification, summaries, chat │ $
|
|
21
|
+
o3-mini │ Deep reasoning, math, code review │ $$
|
|
22
|
+
Claude 3.7 Sonnet │ Long documents, analysis, code │ $$$
|
|
23
|
+
Claude 3.5 Haiku │ Fast responses, simple tasks │ $
|
|
24
|
+
Gemini 3.1 Pro (High) │ Large context, multimodal, code │ $$$
|
|
25
|
+
Gemini 3.0 Flash │ High throughput, cost-efficient │ $
|
|
26
|
+
Llama 3.3 70B (open) │ Self-hosted, data privacy │ Free*
|
|
27
|
+
Mistral Large 2 │ European data residency, code │ $$
|
|
28
|
+
|
|
29
|
+
* = compute costs only
|
|
30
|
+
|
|
31
|
+
Selection rules:
|
|
32
|
+
1. Start with the cheapest model that passes your evals
|
|
33
|
+
2. Upgrade only when eval scores require it
|
|
34
|
+
3. Use large models for complex reasoning, small for classification/routing
|
|
35
|
+
4. Fine-tune ONLY after prompt engineering and RAG are exhausted
|
|
36
|
+
5. ❌ HALLUCINATION TRAP: Model names change frequently — always verify current names
|
|
37
|
+
from provider docs before hardcoding (e.g. "gpt-4o" vs "gpt-4o-2024-11-20")
|
|
38
|
+
```
|
|
24
39
|
|
|
25
40
|
---
|
|
26
41
|
|
|
27
|
-
##
|
|
42
|
+
## Prompt Engineering
|
|
28
43
|
|
|
29
|
-
|
|
30
|
-
|---|---|---|
|
|
31
|
-
| **Simple prompt** | Single-turn, no user docs | Needs accuracy on user data |
|
|
32
|
-
| **RAG** | Answers must cite user/company docs | Data changes every second |
|
|
33
|
-
| **Fine-tuning** | Consistent tone/style at scale | You have < 1000 examples |
|
|
34
|
-
| **Agent loop** | Multi-step tasks, tool use | Single-answer questions |
|
|
35
|
-
| **Hybrid** | RAG + agent (most production apps) | Over-engineering simple use case |
|
|
44
|
+
### System Prompt Design
|
|
36
45
|
|
|
37
|
-
|
|
46
|
+
```typescript
|
|
47
|
+
const SYSTEM_PROMPT = `You are a customer support agent for Acme Corp.
|
|
38
48
|
|
|
39
|
-
##
|
|
49
|
+
## Rules
|
|
50
|
+
1. Answer ONLY questions about Acme products and services.
|
|
51
|
+
2. If you don't know the answer, say "I'll connect you with a specialist."
|
|
52
|
+
3. Never discuss competitors.
|
|
53
|
+
4. Never make up product features or pricing.
|
|
54
|
+
5. Keep responses under 200 words.
|
|
40
55
|
|
|
41
|
-
|
|
56
|
+
## Response Format
|
|
57
|
+
- Use bullet points for lists
|
|
58
|
+
- Include product links when relevant
|
|
59
|
+
- End with a follow-up question
|
|
42
60
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
▼ ▼ ▼
|
|
52
|
-
Embed chunks ANN search in Build prompt:
|
|
53
|
-
│ vector store [system] + [chunks] + [query]
|
|
54
|
-
▼ │ │
|
|
55
|
-
Store in vector DB Top-K results Call LLM → stream response
|
|
61
|
+
## Context
|
|
62
|
+
Current date: ${new Date().toISOString().split("T")[0]}
|
|
63
|
+
User plan: {{user_plan}}
|
|
64
|
+
`;
|
|
65
|
+
|
|
66
|
+
// ❌ HALLUCINATION TRAP: System prompts are NOT secrets
|
|
67
|
+
// Users can extract system prompts with jailbreak techniques
|
|
68
|
+
// Never put API keys, internal URLs, or secrets in system prompts
|
|
56
69
|
```
|
|
57
70
|
|
|
58
|
-
###
|
|
71
|
+
### Structured Output (JSON Mode)
|
|
59
72
|
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
|
|
73
|
+
```typescript
|
|
74
|
+
import { z } from "zod";
|
|
75
|
+
import OpenAI from "openai";
|
|
63
76
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
77
|
+
const SentimentSchema = z.object({
|
|
78
|
+
sentiment: z.enum(["positive", "negative", "neutral"]),
|
|
79
|
+
confidence: z.number().min(0).max(1),
|
|
80
|
+
reasoning: z.string(),
|
|
81
|
+
topics: z.array(z.string()),
|
|
69
82
|
});
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
### Embedding Model Selection
|
|
73
83
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
84
|
+
// OpenAI — json_schema mode (strict = true enforces schema exactly)
|
|
85
|
+
async function analyzeSentiment(text: string) {
|
|
86
|
+
const response = await openai.chat.completions.create({
|
|
87
|
+
model: "gpt-4o-mini",
|
|
88
|
+
response_format: {
|
|
89
|
+
type: "json_schema",
|
|
90
|
+
json_schema: {
|
|
91
|
+
name: "sentiment_analysis",
|
|
92
|
+
strict: true,
|
|
93
|
+
schema: {
|
|
94
|
+
type: "object",
|
|
95
|
+
properties: {
|
|
96
|
+
sentiment: { type: "string", enum: ["positive", "negative", "neutral"] },
|
|
97
|
+
confidence: { type: "number" },
|
|
98
|
+
reasoning: { type: "string" },
|
|
99
|
+
topics: { type: "array", items: { type: "string" } },
|
|
100
|
+
},
|
|
101
|
+
required: ["sentiment", "confidence", "reasoning", "topics"],
|
|
102
|
+
additionalProperties: false, // required for strict mode
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
messages: [{ role: "system", content: "Analyze sentiment." }, { role: "user", content: text }],
|
|
107
|
+
});
|
|
108
|
+
const raw = JSON.parse(response.choices[0].message.content ?? "{}");
|
|
109
|
+
return SentimentSchema.parse(raw); // always validate with Zod even in strict mode
|
|
110
|
+
}
|
|
80
111
|
|
|
81
|
-
|
|
112
|
+
// Gemini — response_mime_type + response_schema
|
|
113
|
+
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
|
|
114
|
+
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
|
|
115
|
+
const model = genAI.getGenerativeModel({
|
|
116
|
+
model: "gemini-2.0-flash",
|
|
117
|
+
generationConfig: {
|
|
118
|
+
responseMimeType: "application/json",
|
|
119
|
+
responseSchema: {
|
|
120
|
+
type: SchemaType.OBJECT,
|
|
121
|
+
properties: {
|
|
122
|
+
sentiment: { type: SchemaType.STRING, enum: ["positive", "negative", "neutral"] },
|
|
123
|
+
confidence: { type: SchemaType.NUMBER },
|
|
124
|
+
topics: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
|
|
125
|
+
},
|
|
126
|
+
required: ["sentiment", "confidence", "topics"],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
});
|
|
82
130
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
| Already on PostgreSQL | `pgvector` | Zero infra, SQL joins with metadata |
|
|
88
|
-
| Managed, billion-scale | Pinecone | Hosted ANN, hybrid search built-in |
|
|
89
|
-
| Open source, self-hosted | Qdrant | Rust-native, fast, rich filtering |
|
|
90
|
-
| Already on Weaviate | Weaviate | GraphQL API, multimodal support |
|
|
91
|
-
| Embedded/local | ChromaDB | Zero infra, great for prototyping |
|
|
92
|
-
|
|
93
|
-
```ts
|
|
94
|
-
// pgvector — stays inside your existing PostgreSQL
|
|
95
|
-
import { pgvector } from '@pgvector/pg';
|
|
96
|
-
|
|
97
|
-
// Store
|
|
98
|
-
await db.query(
|
|
99
|
-
'INSERT INTO documents (content, embedding) VALUES ($1, $2)',
|
|
100
|
-
[text, JSON.stringify(embedding)] // embedding is float[]
|
|
101
|
-
);
|
|
102
|
-
|
|
103
|
-
// Query — cosine similarity
|
|
104
|
-
await db.query(
|
|
105
|
-
'SELECT content FROM documents ORDER BY embedding <=> $1 LIMIT 5',
|
|
106
|
-
[JSON.stringify(queryEmbedding)]
|
|
107
|
-
);
|
|
131
|
+
// ❌ HALLUCINATION TRAP: Always validate LLM JSON output with Zod/schema
|
|
132
|
+
// LLMs produce malformed JSON, wrong types, missing fields even with strict mode
|
|
133
|
+
// ❌ const result = JSON.parse(response); // trust blindly
|
|
134
|
+
// ✅ const result = Schema.parse(JSON.parse(response)); // validate always
|
|
108
135
|
```
|
|
109
136
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
## Prompt Engineering Principles
|
|
137
|
+
### Function Calling / Tool Use
|
|
113
138
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
```ts
|
|
117
|
-
const messages = [
|
|
139
|
+
```typescript
|
|
140
|
+
const tools: OpenAI.ChatCompletionTool[] = [
|
|
118
141
|
{
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
142
|
+
type: "function",
|
|
143
|
+
function: {
|
|
144
|
+
name: "search_products",
|
|
145
|
+
description: "Search products by name, category, or price range",
|
|
146
|
+
parameters: {
|
|
147
|
+
type: "object",
|
|
148
|
+
properties: {
|
|
149
|
+
query: { type: "string", description: "Search query" },
|
|
150
|
+
category: { type: "string", enum: ["electronics", "clothing", "home"] },
|
|
151
|
+
max_price: { type: "number", description: "Maximum price in USD" },
|
|
152
|
+
},
|
|
153
|
+
required: ["query"],
|
|
154
|
+
},
|
|
155
|
+
},
|
|
124
156
|
},
|
|
125
157
|
{
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
158
|
+
type: "function",
|
|
159
|
+
function: {
|
|
160
|
+
name: "get_order_status",
|
|
161
|
+
description: "Get the status of an order by order ID",
|
|
162
|
+
parameters: {
|
|
163
|
+
type: "object",
|
|
164
|
+
properties: {
|
|
165
|
+
order_id: { type: "string", description: "The order ID (e.g., ORD-12345)" },
|
|
166
|
+
},
|
|
167
|
+
required: ["order_id"],
|
|
168
|
+
},
|
|
169
|
+
},
|
|
129
170
|
},
|
|
130
171
|
];
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
### Few-Shot Examples
|
|
134
172
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
173
|
+
// Tool execution loop
|
|
174
|
+
async function chatWithTools(userMessage: string) {
|
|
175
|
+
const messages: OpenAI.ChatCompletionMessageParam[] = [
|
|
176
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
177
|
+
{ role: "user", content: userMessage },
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
let response = await openai.chat.completions.create({
|
|
181
|
+
model: "gpt-4o-mini",
|
|
182
|
+
messages,
|
|
183
|
+
tools,
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// Process tool calls
|
|
187
|
+
while (response.choices[0].finish_reason === "tool_calls") {
|
|
188
|
+
const toolCalls = response.choices[0].message.tool_calls ?? [];
|
|
189
|
+
messages.push(response.choices[0].message);
|
|
190
|
+
|
|
191
|
+
for (const call of toolCalls) {
|
|
192
|
+
const args = JSON.parse(call.function.arguments);
|
|
193
|
+
const result = await executeFunction(call.function.name, args);
|
|
194
|
+
messages.push({
|
|
195
|
+
role: "tool",
|
|
196
|
+
tool_call_id: call.id,
|
|
197
|
+
content: JSON.stringify(result),
|
|
198
|
+
});
|
|
199
|
+
}
|
|
141
200
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
201
|
+
response = await openai.chat.completions.create({
|
|
202
|
+
model: "gpt-4o-mini",
|
|
203
|
+
messages,
|
|
204
|
+
tools,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
145
207
|
|
|
146
|
-
|
|
147
|
-
|
|
208
|
+
return response.choices[0].message.content;
|
|
209
|
+
}
|
|
148
210
|
```
|
|
149
211
|
|
|
150
212
|
---
|
|
151
213
|
|
|
152
|
-
##
|
|
214
|
+
## RAG (Retrieval-Augmented Generation)
|
|
215
|
+
|
|
216
|
+
### Pipeline
|
|
153
217
|
|
|
154
218
|
```
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
219
|
+
User Query
|
|
220
|
+
↓
|
|
221
|
+
[1] Embed query → vector
|
|
222
|
+
↓
|
|
223
|
+
[2] Search vector DB → top K chunks
|
|
224
|
+
↓
|
|
225
|
+
[3] (Optional) Rerank results → top N
|
|
226
|
+
↓
|
|
227
|
+
[4] Build prompt: system + context chunks + query
|
|
228
|
+
↓
|
|
229
|
+
[5] LLM generates answer with citations
|
|
230
|
+
↓
|
|
231
|
+
[6] Validate response (hallucination check)
|
|
159
232
|
```
|
|
160
233
|
|
|
161
|
-
###
|
|
162
|
-
|
|
163
|
-
| Category | What It Measures | Tooling |
|
|
164
|
-
|---|---|---|
|
|
165
|
-
| **Faithfulness** | Does answer match sources? | Ragas, ARES |
|
|
166
|
-
| **Relevance** | Does answer address the question? | LLM-as-judge |
|
|
167
|
-
| **Completeness** | Missing important info? | Human + LLM |
|
|
168
|
-
| **Groundedness** | Hallucination rate | Ragas |
|
|
169
|
-
| **Latency** | p50/p95 response time | OpenTelemetry |
|
|
170
|
-
|
|
171
|
-
---
|
|
172
|
-
|
|
173
|
-
## LLMOps: Production Concerns
|
|
174
|
-
|
|
175
|
-
### Cost Control
|
|
176
|
-
|
|
177
|
-
```ts
|
|
178
|
-
// Track tokens per request
|
|
179
|
-
const response = await openai.chat.completions.create({ ... });
|
|
180
|
-
const { prompt_tokens, completion_tokens } = response.usage;
|
|
181
|
-
logger.info({ prompt_tokens, completion_tokens, model: 'gpt-4o', cost_usd: calcCost() });
|
|
182
|
-
|
|
183
|
-
// Cache identical prompts — LLMs are deterministic at temp=0
|
|
184
|
-
const cacheKey = hash(systemPrompt + userQuery);
|
|
185
|
-
const cached = await cache.get(cacheKey);
|
|
186
|
-
if (cached) return cached;
|
|
187
|
-
```
|
|
234
|
+
### Chunking Strategy
|
|
188
235
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
236
|
+
```typescript
|
|
237
|
+
// ❌ BAD: Arbitrary character splitting
|
|
238
|
+
const chunks = text.match(/.{1,1000}/g); // breaks mid-sentence, mid-word
|
|
239
|
+
|
|
240
|
+
// ✅ GOOD: Semantic chunking with overlap
|
|
241
|
+
function chunkDocument(text: string, options: ChunkOptions = {}): Chunk[] {
|
|
242
|
+
const {
|
|
243
|
+
maxTokens = 512, // chunk size
|
|
244
|
+
overlapTokens = 50, // overlap between chunks
|
|
245
|
+
separator = "\n\n", // split on paragraph boundaries first
|
|
246
|
+
} = options;
|
|
247
|
+
|
|
248
|
+
const paragraphs = text.split(separator);
|
|
249
|
+
const chunks: Chunk[] = [];
|
|
250
|
+
let current = "";
|
|
251
|
+
|
|
252
|
+
for (const para of paragraphs) {
|
|
253
|
+
if (tokenCount(current + para) > maxTokens && current) {
|
|
254
|
+
chunks.push({ text: current.trim(), tokens: tokenCount(current) });
|
|
255
|
+
// Keep overlap from previous chunk
|
|
256
|
+
const words = current.split(" ");
|
|
257
|
+
current = words.slice(-overlapTokens).join(" ") + separator + para;
|
|
258
|
+
} else {
|
|
259
|
+
current += separator + para;
|
|
203
260
|
}
|
|
204
261
|
}
|
|
205
|
-
|
|
206
|
-
}
|
|
207
|
-
```
|
|
262
|
+
if (current.trim()) chunks.push({ text: current.trim(), tokens: tokenCount(current) });
|
|
208
263
|
|
|
209
|
-
|
|
264
|
+
return chunks;
|
|
265
|
+
}
|
|
210
266
|
|
|
211
|
-
|
|
267
|
+
// Chunk size guidelines:
|
|
268
|
+
// 256-512 tokens → precise retrieval (Q&A, support)
|
|
269
|
+
// 512-1024 tokens → balanced (general RAG)
|
|
270
|
+
// 1024-2048 tokens → broad context (summarization)
|
|
271
|
+
```
|
|
212
272
|
|
|
213
|
-
|
|
273
|
+
### Vector Store Selection
|
|
214
274
|
|
|
215
275
|
```
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
❌
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
Evidence: [test output / lint pass / compile success]
|
|
276
|
+
pgvector (PostgreSQL) → Already using Postgres, <10M vectors, simple
|
|
277
|
+
Pinecone → Managed, serverless, easy scaling
|
|
278
|
+
Weaviate → Hybrid search (vector + keyword), multi-model
|
|
279
|
+
Qdrant → High performance, Rust-based, self-hostable
|
|
280
|
+
Chroma → Local development, prototyping
|
|
281
|
+
Milvus → Enterprise scale, GPU acceleration
|
|
282
|
+
|
|
283
|
+
// ❌ HALLUCINATION TRAP: Vector search is NOT keyword search
|
|
284
|
+
// "Apple CEO" might not find "Tim Cook runs Apple Inc."
|
|
285
|
+
// Use HYBRID search (vector + BM25 keyword) for production
|
|
227
286
|
```
|
|
228
287
|
|
|
229
|
-
**VBC (Verification-Before-Completion) is mandatory.**
|
|
230
|
-
Do not mark status as VERIFIED until concrete terminal evidence is provided.
|
|
231
|
-
|
|
232
|
-
|
|
233
288
|
---
|
|
234
289
|
|
|
235
|
-
##
|
|
290
|
+
## Streaming
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
// Server-Sent Events for AI token streaming
|
|
294
|
+
app.get("/api/chat", async (req, res) => {
|
|
295
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
296
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
297
|
+
res.setHeader("Connection", "keep-alive");
|
|
298
|
+
|
|
299
|
+
const stream = await openai.chat.completions.create({
|
|
300
|
+
model: "gpt-4o-mini",
|
|
301
|
+
messages: [{ role: "user", content: req.query.message as string }],
|
|
302
|
+
stream: true,
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
for await (const chunk of stream) {
|
|
306
|
+
const content = chunk.choices[0]?.delta?.content;
|
|
307
|
+
if (content) {
|
|
308
|
+
res.write(`data: ${JSON.stringify({ content })}\n\n`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
236
311
|
|
|
237
|
-
|
|
238
|
-
|
|
312
|
+
res.write("data: [DONE]\n\n");
|
|
313
|
+
res.end();
|
|
314
|
+
});
|
|
239
315
|
|
|
240
|
-
|
|
316
|
+
// Client-side consumption
|
|
317
|
+
const eventSource = new EventSource(`/api/chat?message=${encodeURIComponent(msg)}`);
|
|
318
|
+
eventSource.onmessage = (event) => {
|
|
319
|
+
if (event.data === "[DONE]") { eventSource.close(); return; }
|
|
320
|
+
const { content } = JSON.parse(event.data);
|
|
321
|
+
appendToChat(content);
|
|
322
|
+
};
|
|
323
|
+
```
|
|
241
324
|
|
|
242
|
-
|
|
243
|
-
2. **Prompt injection via concatenation** — never `systemPrompt + userInput`. Use separate message roles.
|
|
244
|
-
3. **No eval strategy** — shipping LLM features with zero eval coverage is shipping blind.
|
|
245
|
-
4. **Ignoring token limits** — context exceeding `max_tokens` silently fails or truncates unpredictably.
|
|
246
|
-
5. **No cost tracking** — LLM costs compound at scale — always instrument from day one.
|
|
247
|
-
6. **Synchronous LLM calls** — all LLM API calls are async. Never block the event loop waiting for them.
|
|
325
|
+
---
|
|
248
326
|
|
|
249
|
-
|
|
327
|
+
## Cost Optimization
|
|
250
328
|
|
|
251
329
|
```
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
330
|
+
1. Prompt caching → Cache system prompts (OpenAI, Anthropic support this)
|
|
331
|
+
2. Output token limiting → Set max_tokens to prevent runaway responses
|
|
332
|
+
3. Tiered models → Use cheap models for classification, expensive for reasoning
|
|
333
|
+
4. Batch processing → Use batch APIs for offline processing (50% discount)
|
|
334
|
+
5. Chunked context → Send only relevant chunks, not entire documents
|
|
335
|
+
6. Response streaming → Stream to reduce TTFT (time to first token)
|
|
336
|
+
7. Structured output → Shorter JSON responses vs verbose prose
|
|
337
|
+
|
|
338
|
+
// Cost estimation:
|
|
339
|
+
// GPT-4o: ~$2.50/1M input, ~$10/1M output
|
|
340
|
+
// GPT-4o-mini: ~$0.15/1M input, ~$0.60/1M output
|
|
341
|
+
// 1M tokens ≈ 750,000 words ≈ 3,000 pages
|
|
258
342
|
```
|
|
343
|
+
|
|
344
|
+
---
|