@neyugn/agent-kits 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +514 -0
  3. package/README.vi.md +410 -0
  4. package/README.zh.md +410 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +422 -0
  7. package/kits/coder/ARCHITECTURE.md +289 -0
  8. package/kits/coder/agents/ai-engineer.md +344 -0
  9. package/kits/coder/agents/backend-specialist.md +270 -0
  10. package/kits/coder/agents/cloud-architect.md +363 -0
  11. package/kits/coder/agents/code-reviewer.md +284 -0
  12. package/kits/coder/agents/data-engineer.md +401 -0
  13. package/kits/coder/agents/database-specialist.md +251 -0
  14. package/kits/coder/agents/debugger.md +209 -0
  15. package/kits/coder/agents/devops-engineer.md +281 -0
  16. package/kits/coder/agents/documentation-writer.md +296 -0
  17. package/kits/coder/agents/frontend-specialist.md +298 -0
  18. package/kits/coder/agents/i18n-specialist.md +348 -0
  19. package/kits/coder/agents/integration-specialist.md +314 -0
  20. package/kits/coder/agents/mobile-developer.md +271 -0
  21. package/kits/coder/agents/multi-tenant-architect.md +281 -0
  22. package/kits/coder/agents/orchestrator.md +263 -0
  23. package/kits/coder/agents/performance-analyst.md +327 -0
  24. package/kits/coder/agents/project-planner.md +277 -0
  25. package/kits/coder/agents/queue-specialist.md +282 -0
  26. package/kits/coder/agents/realtime-specialist.md +267 -0
  27. package/kits/coder/agents/security-auditor.md +253 -0
  28. package/kits/coder/agents/test-engineer.md +315 -0
  29. package/kits/coder/agents/ux-researcher.md +388 -0
  30. package/kits/coder/rules/.cursorrules +287 -0
  31. package/kits/coder/rules/CLAUDE.md +287 -0
  32. package/kits/coder/rules/CODEX.md +287 -0
  33. package/kits/coder/rules/GEMINI.md +287 -0
  34. package/kits/coder/scripts/checklist.py +318 -0
  35. package/kits/coder/scripts/kit_status.py +292 -0
  36. package/kits/coder/scripts/skills_manager.py +243 -0
  37. package/kits/coder/scripts/verify_all.py +391 -0
  38. package/kits/coder/skills/accessibility-patterns/SKILL.md +372 -0
  39. package/kits/coder/skills/accessibility-patterns/scripts/a11y_checker.py +211 -0
  40. package/kits/coder/skills/ai-rag-patterns/SKILL.md +444 -0
  41. package/kits/coder/skills/api-patterns/SKILL.md +316 -0
  42. package/kits/coder/skills/api-patterns/assets/.gitkeep +1 -0
  43. package/kits/coder/skills/api-patterns/references/deep-dive.md +21 -0
  44. package/kits/coder/skills/api-patterns/scripts/api_validator.py +253 -0
  45. package/kits/coder/skills/api-patterns/scripts/validate.py +56 -0
  46. package/kits/coder/skills/auth-patterns/SKILL.md +267 -0
  47. package/kits/coder/skills/aws-patterns/SKILL.md +576 -0
  48. package/kits/coder/skills/brainstorming/SKILL.md +370 -0
  49. package/kits/coder/skills/brainstorming/assets/.gitkeep +1 -0
  50. package/kits/coder/skills/brainstorming/references/deep-dive.md +21 -0
  51. package/kits/coder/skills/brainstorming/scripts/validate.py +56 -0
  52. package/kits/coder/skills/clean-code/SKILL.md +240 -0
  53. package/kits/coder/skills/clean-code/assets/.gitkeep +1 -0
  54. package/kits/coder/skills/clean-code/references/deep-dive.md +21 -0
  55. package/kits/coder/skills/clean-code/scripts/lint_runner.py +186 -0
  56. package/kits/coder/skills/clean-code/scripts/validate.py +56 -0
  57. package/kits/coder/skills/database-design/SKILL.md +255 -0
  58. package/kits/coder/skills/database-design/assets/.gitkeep +1 -0
  59. package/kits/coder/skills/database-design/references/deep-dive.md +21 -0
  60. package/kits/coder/skills/database-design/scripts/schema_validator.py +272 -0
  61. package/kits/coder/skills/database-design/scripts/validate.py +56 -0
  62. package/kits/coder/skills/docker-patterns/SKILL.md +240 -0
  63. package/kits/coder/skills/documentation-templates/SKILL.md +441 -0
  64. package/kits/coder/skills/e2e-testing/SKILL.md +457 -0
  65. package/kits/coder/skills/flutter-patterns/SKILL.md +330 -0
  66. package/kits/coder/skills/frontend-design/SKILL.md +127 -0
  67. package/kits/coder/skills/github-actions/SKILL.md +349 -0
  68. package/kits/coder/skills/gitlab-ci-patterns/SKILL.md +466 -0
  69. package/kits/coder/skills/graphql-patterns/SKILL.md +558 -0
  70. package/kits/coder/skills/i18n-localization/SKILL.md +345 -0
  71. package/kits/coder/skills/i18n-localization/scripts/i18n_checker.py +267 -0
  72. package/kits/coder/skills/kubernetes-patterns/SKILL.md +357 -0
  73. package/kits/coder/skills/mermaid-diagrams/SKILL.md +351 -0
  74. package/kits/coder/skills/mobile-design/SKILL.md +305 -0
  75. package/kits/coder/skills/monitoring-observability/SKILL.md +458 -0
  76. package/kits/coder/skills/multi-tenancy/SKILL.md +317 -0
  77. package/kits/coder/skills/multi-tenancy/assets/.gitkeep +1 -0
  78. package/kits/coder/skills/multi-tenancy/references/deep-dive.md +21 -0
  79. package/kits/coder/skills/multi-tenancy/scripts/validate.py +56 -0
  80. package/kits/coder/skills/nodejs-best-practices/SKILL.md +220 -0
  81. package/kits/coder/skills/performance-profiling/SKILL.md +333 -0
  82. package/kits/coder/skills/performance-profiling/assets/.gitkeep +1 -0
  83. package/kits/coder/skills/performance-profiling/references/deep-dive.md +21 -0
  84. package/kits/coder/skills/performance-profiling/scripts/validate.py +56 -0
  85. package/kits/coder/skills/plan-writing/SKILL.md +360 -0
  86. package/kits/coder/skills/plan-writing/assets/.gitkeep +1 -0
  87. package/kits/coder/skills/plan-writing/references/deep-dive.md +21 -0
  88. package/kits/coder/skills/plan-writing/scripts/validate.py +56 -0
  89. package/kits/coder/skills/postgres-patterns/SKILL.md +361 -0
  90. package/kits/coder/skills/prompt-engineering/SKILL.md +277 -0
  91. package/kits/coder/skills/queue-patterns/SKILL.md +359 -0
  92. package/kits/coder/skills/queue-patterns/assets/.gitkeep +1 -0
  93. package/kits/coder/skills/queue-patterns/references/deep-dive.md +21 -0
  94. package/kits/coder/skills/queue-patterns/scripts/validate.py +56 -0
  95. package/kits/coder/skills/react-native-patterns/SKILL.md +393 -0
  96. package/kits/coder/skills/react-patterns/SKILL.md +319 -0
  97. package/kits/coder/skills/realtime-patterns/SKILL.md +506 -0
  98. package/kits/coder/skills/realtime-patterns/assets/.gitkeep +1 -0
  99. package/kits/coder/skills/realtime-patterns/references/deep-dive.md +21 -0
  100. package/kits/coder/skills/realtime-patterns/scripts/validate.py +56 -0
  101. package/kits/coder/skills/redis-patterns/SKILL.md +484 -0
  102. package/kits/coder/skills/security-fundamentals/SKILL.md +363 -0
  103. package/kits/coder/skills/security-fundamentals/assets/.gitkeep +1 -0
  104. package/kits/coder/skills/security-fundamentals/references/deep-dive.md +21 -0
  105. package/kits/coder/skills/security-fundamentals/scripts/security_scan.py +326 -0
  106. package/kits/coder/skills/security-fundamentals/scripts/validate.py +56 -0
  107. package/kits/coder/skills/seo-patterns/SKILL.md +262 -0
  108. package/kits/coder/skills/seo-patterns/scripts/seo_checker.py +211 -0
  109. package/kits/coder/skills/systematic-debugging/SKILL.md +478 -0
  110. package/kits/coder/skills/systematic-debugging/assets/.gitkeep +1 -0
  111. package/kits/coder/skills/systematic-debugging/references/deep-dive.md +21 -0
  112. package/kits/coder/skills/systematic-debugging/scripts/validate.py +56 -0
  113. package/kits/coder/skills/tailwind-patterns/SKILL.md +395 -0
  114. package/kits/coder/skills/terraform-patterns/SKILL.md +470 -0
  115. package/kits/coder/skills/testing-patterns/SKILL.md +285 -0
  116. package/kits/coder/skills/testing-patterns/assets/.gitkeep +1 -0
  117. package/kits/coder/skills/testing-patterns/references/deep-dive.md +21 -0
  118. package/kits/coder/skills/testing-patterns/scripts/test_runner.py +219 -0
  119. package/kits/coder/skills/testing-patterns/scripts/validate.py +56 -0
  120. package/kits/coder/skills/typescript-patterns/SKILL.md +417 -0
  121. package/kits/coder/skills/ui-ux-pro-max/SKILL.md +364 -0
  122. package/kits/coder/skills/ui-ux-pro-max/data/charts.csv +26 -0
  123. package/kits/coder/skills/ui-ux-pro-max/data/colors.csv +97 -0
  124. package/kits/coder/skills/ui-ux-pro-max/data/icons.csv +101 -0
  125. package/kits/coder/skills/ui-ux-pro-max/data/landing.csv +31 -0
  126. package/kits/coder/skills/ui-ux-pro-max/data/products.csv +97 -0
  127. package/kits/coder/skills/ui-ux-pro-max/data/prompts.csv +24 -0
  128. package/kits/coder/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
  129. package/kits/coder/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  130. package/kits/coder/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  131. package/kits/coder/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  132. package/kits/coder/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  133. package/kits/coder/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  134. package/kits/coder/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  135. package/kits/coder/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
  136. package/kits/coder/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  137. package/kits/coder/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  138. package/kits/coder/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  139. package/kits/coder/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  140. package/kits/coder/skills/ui-ux-pro-max/data/styles.csv +59 -0
  141. package/kits/coder/skills/ui-ux-pro-max/data/typography.csv +58 -0
  142. package/kits/coder/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  143. package/kits/coder/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  144. package/kits/coder/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
  145. package/kits/coder/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-314.pyc +0 -0
  146. package/kits/coder/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-314.pyc +0 -0
  147. package/kits/coder/skills/ui-ux-pro-max/scripts/core.py +257 -0
  148. package/kits/coder/skills/ui-ux-pro-max/scripts/design_system.py +488 -0
  149. package/kits/coder/skills/ui-ux-pro-max/scripts/search.py +76 -0
  150. package/kits/coder/workflows/.gitkeep +20 -0
  151. package/kits/coder/workflows/create.md +152 -0
  152. package/kits/coder/workflows/debug.md +223 -0
  153. package/kits/coder/workflows/deploy.md +283 -0
  154. package/kits/coder/workflows/orchestrate.md +243 -0
  155. package/kits/coder/workflows/plan.md +134 -0
  156. package/kits/coder/workflows/test.md +237 -0
  157. package/kits/coder/workflows/ui-ux-pro-max.md +109 -0
  158. package/package.json +49 -0
@@ -0,0 +1,444 @@
1
+ ---
2
+ name: ai-rag-patterns
3
+ description: Retrieval-Augmented Generation (RAG) patterns for LLM applications. Use when building RAG systems, vector search, embeddings, semantic search, or document retrieval pipelines.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 2.0
6
+ ---
7
+
8
+ # AI RAG Patterns - Retrieval-Augmented Generation
9
+
10
+ > **Philosophy:** Retrieval quality determines generation quality. Garbage in, garbage out.
11
+
12
+ ---
13
+
14
+ ## When to Use This Skill
15
+
16
+ | ✅ Use | ❌ Don't Use |
17
+ | -------------------------------- | ----------------------------- |
18
+ | Building Q&A over documents | Pure generative tasks |
19
+ | Semantic search implementation | Dataset too small (<100 docs) |
20
+ | Reducing LLM hallucinations | Data privacy restrictions |
21
+ | Domain-specific knowledge access | Simple keyword search |
22
+ | Document processing pipelines | Real-time streaming data |
23
+
24
+ ---
25
+
26
+ ## Core Rules (Non-Negotiable)
27
+
28
+ 1. **Semantic chunking first** - Chunk by meaning, not token counts
29
+ 2. **DataLoader always** - Batch embedding generation
30
+ 3. **Hybrid search default** - Combine dense + sparse retrieval
31
+ 4. **Reranking required** - Never trust first-stage retrieval alone
32
+ 5. **Evaluation mandatory** - Measure retrieval quality separately
33
+
34
+ ---
35
+
36
+ ## RAG Architecture Overview
37
+
38
+ ```
39
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
40
+ │ Documents │───▷│ Chunking │───▷│ Embedding │
41
+ └─────────────┘ └─────────────┘ └─────────────┘
42
+
43
+
44
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
45
+ │ Response │◁───│ LLM │◁───│ Retrieval │
46
+ └─────────────┘ └─────────────┘ └─────────────┘
47
+
48
+
49
+ ┌─────────────┐
50
+ │ Reranking │
51
+ └─────────────┘
52
+ ```
53
+
54
+ ---
55
+
56
+ ## Vector Database Selection
57
+
58
+ | Database | Type | Best For | Pricing |
59
+ | ------------ | ----------- | ---------------------------- | ----------- |
60
+ | **Pinecone** | Managed | Production, scalable | Pay-per-use |
61
+ | **Weaviate** | Open-source | Hybrid search, self-hosted | Free (OSS) |
62
+ | **Chroma** | Embedded | Prototyping, local dev | Free |
63
+ | **Qdrant** | Open-source | Fast filtering, on-premise | Free (OSS) |
64
+ | **pgvector** | Extension | PostgreSQL integration | Free |
65
+ | **Milvus** | Open-source | High performance, enterprise | Free (OSS) |
66
+
67
+ ### Decision Tree
68
+
69
+ ```
70
+ What's your scale?
71
+
72
+ ├─ Prototyping / Small scale?
73
+ │ └─ → Chroma (embedded, zero setup)
74
+
75
+ ├─ Already using PostgreSQL?
76
+ │ └─ → pgvector (integrated, no new infra)
77
+
78
+ ├─ Production, managed service?
79
+ │ └─ → Pinecone (scalable, low ops)
80
+
81
+ └─ Self-hosted, enterprise?
82
+ └─ → Qdrant or Milvus (full control)
83
+ ```
84
+
85
+ ---
86
+
87
+ ## Embedding Model Selection
88
+
89
+ | Model | Dimensions | Speed | Quality | Cost |
90
+ | ------------------------ | ---------- | ------- | ------- | ------------ |
91
+ | `text-embedding-3-small` | 1536 | Fast | Good | $0.02/1M |
92
+ | `text-embedding-3-large` | 3072 | Medium | Best | $0.13/1M |
93
+ | `bge-large-en-v1.5` | 1024 | Fast | Best | Free (local) |
94
+ | `all-MiniLM-L6-v2` | 384 | Fastest | Good | Free (local) |
95
+ | `e5-large-v2` | 1024 | Medium | Best | Free (local) |
96
+
97
+ **Recommendation:** Start with `text-embedding-3-small`, evaluate, upgrade if needed.
98
+
99
+ ---
100
+
101
+ ## Chunking Strategies
102
+
103
+ ### 1. Recursive Character Splitting (Default)
104
+
105
+ ```python
106
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
107
+
108
+ splitter = RecursiveCharacterTextSplitter(
109
+ chunk_size=1000,
110
+ chunk_overlap=200,
111
+ separators=["\n\n", "\n", " ", ""]
112
+ )
113
+ chunks = splitter.split_documents(documents)
114
+ ```
115
+
116
+ ### 2. Semantic Chunking (Recommended)
117
+
118
+ ```python
119
+ from langchain.text_splitter import SemanticChunker
120
+ from langchain.embeddings import OpenAIEmbeddings
121
+
122
+ splitter = SemanticChunker(
123
+ embeddings=OpenAIEmbeddings(),
124
+ breakpoint_threshold_type="percentile"
125
+ )
126
+ chunks = splitter.split_documents(documents)
127
+ ```
128
+
129
+ ### 3. Markdown/Code Aware
130
+
131
+ ```python
132
+ from langchain.text_splitter import MarkdownHeaderTextSplitter
133
+
134
+ headers = [
135
+ ("#", "h1"),
136
+ ("##", "h2"),
137
+ ("###", "h3"),
138
+ ]
139
+ splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers)
140
+ ```
141
+
142
+ ### Chunking Best Practices
143
+
144
+ | Parameter | Recommended | Why |
145
+ | ---------- | -------------- | ------------------------------ |
146
+ | Chunk size | 500-1000 chars | Balance context vs specificity |
147
+ | Overlap | 10-20% | Preserve context at boundaries |
148
+ | Separators | Semantic | Respect document structure |
149
+
150
+ ---
151
+
152
+ ## Retrieval Strategies
153
+
154
+ ### 1. Dense Retrieval (Vector Similarity)
155
+
156
+ ```python
157
+ # Basic vector search
158
+ results = vectorstore.similarity_search(query, k=5)
159
+
160
+ # With score threshold
161
+ results = vectorstore.similarity_search_with_relevance_scores(
162
+ query,
163
+ k=10,
164
+ score_threshold=0.7
165
+ )
166
+ ```
167
+
168
+ ### 2. Sparse Retrieval (BM25/Keyword)
169
+
170
+ ```python
171
+ from langchain.retrievers import BM25Retriever
172
+
173
+ bm25 = BM25Retriever.from_documents(documents)
174
+ bm25.k = 5
175
+ results = bm25.get_relevant_documents(query)
176
+ ```
177
+
178
+ ### 3. Hybrid Search (Recommended)
179
+
180
+ ```python
181
+ from langchain.retrievers import EnsembleRetriever
182
+
183
+ ensemble = EnsembleRetriever(
184
+ retrievers=[bm25_retriever, vector_retriever],
185
+ weights=[0.3, 0.7] # Favor semantic
186
+ )
187
+ results = ensemble.get_relevant_documents(query)
188
+ ```
189
+
190
+ ### 4. Multi-Query Retrieval
191
+
192
+ ```python
193
+ from langchain.retrievers.multi_query import MultiQueryRetriever
194
+
195
+ retriever = MultiQueryRetriever.from_llm(
196
+ retriever=vectorstore.as_retriever(),
197
+ llm=llm
198
+ )
199
+ # Generates multiple query variations automatically
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Reranking Patterns
205
+
206
+ ### Cross-Encoder Reranking
207
+
208
+ ```python
209
+ from sentence_transformers import CrossEncoder
210
+
211
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
212
+
213
+ # Get initial candidates
214
+ candidates = vectorstore.similarity_search(query, k=20)
215
+
216
+ # Rerank
217
+ pairs = [[query, doc.page_content] for doc in candidates]
218
+ scores = reranker.predict(pairs)
219
+
220
+ # Sort by score
221
+ reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)[:5]
222
+ ```
223
+
224
+ ### Maximal Marginal Relevance (MMR)
225
+
226
+ ```python
227
+ # Balance relevance + diversity
228
+ results = vectorstore.max_marginal_relevance_search(
229
+ query,
230
+ k=5,
231
+ fetch_k=20,
232
+ lambda_mult=0.5 # 0=diversity, 1=relevance
233
+ )
234
+ ```
235
+
236
+ ---
237
+
238
+ ## Advanced RAG Patterns
239
+
240
+ ### Parent Document Retriever
241
+
242
+ ```python
243
+ from langchain.retrievers import ParentDocumentRetriever
244
+
245
+ # Small chunks for retrieval, large for context
246
+ child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
247
+ parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
248
+
249
+ retriever = ParentDocumentRetriever(
250
+ vectorstore=vectorstore,
251
+ docstore=store,
252
+ child_splitter=child_splitter,
253
+ parent_splitter=parent_splitter
254
+ )
255
+ ```
256
+
257
+ ### Contextual Compression
258
+
259
+ ```python
260
+ from langchain.retrievers import ContextualCompressionRetriever
261
+ from langchain.retrievers.document_compressors import LLMChainExtractor
262
+
263
+ compressor = LLMChainExtractor.from_llm(llm)
264
+
265
+ retriever = ContextualCompressionRetriever(
266
+ base_compressor=compressor,
267
+ base_retriever=vectorstore.as_retriever()
268
+ )
269
+ # Returns only relevant parts of documents
270
+ ```
271
+
272
+ ### Hypothetical Document Embedding (HyDE)
273
+
274
+ ```python
275
+ from langchain.chains import HypotheticalDocumentEmbedder
276
+
277
+ hyde = HypotheticalDocumentEmbedder.from_llm(
278
+ llm=llm,
279
+ embeddings=embeddings,
280
+ prompt=hyde_prompt
281
+ )
282
+ # Generate hypothetical answer, then search for similar
283
+ ```
284
+
285
+ ---
286
+
287
+ ## RAG Prompt Patterns
288
+
289
+ ### Basic with Citations
290
+
291
+ ```python
292
+ prompt = """Answer based on the context below. Include citations [1], [2], etc.
293
+
294
+ Context:
295
+ {context}
296
+
297
+ Question: {question}
298
+
299
+ Answer (with citations):"""
300
+ ```
301
+
302
+ ### Grounded with Confidence
303
+
304
+ ```python
305
+ prompt = """Use ONLY the provided context. If you cannot answer, say "I don't know."
306
+
307
+ Context:
308
+ {context}
309
+
310
+ Question: {question}
311
+
312
+ Answer:
313
+ Confidence (0-100%):
314
+ Sources used:"""
315
+ ```
316
+
317
+ ### Chain-of-Thought RAG
318
+
319
+ ```python
320
+ prompt = """Given the context, reason step by step to answer.
321
+
322
+ Context:
323
+ {context}
324
+
325
+ Question: {question}
326
+
327
+ Let me think step by step:
328
+ 1. First, I'll identify relevant information...
329
+ 2. Then, I'll synthesize...
330
+ 3. Finally, I'll conclude...
331
+
332
+ Answer:"""
333
+ ```
334
+
335
+ ---
336
+
337
+ ## Evaluation Metrics
338
+
339
+ ```python
340
+ def evaluate_rag(qa_chain, test_cases):
341
+ metrics = {
342
+ 'retrieval_precision': [], # Relevant in top-k
343
+ 'retrieval_recall': [], # Found all relevant
344
+ 'answer_relevance': [], # Answer matches question
345
+ 'groundedness': [], # Answer from context only
346
+ 'faithfulness': [], # No hallucination
347
+ }
348
+
349
+ for test in test_cases:
350
+ result = qa_chain({"query": test['question']})
351
+
352
+ # Retrieval metrics
353
+ retrieved_ids = [d.id for d in result['source_documents']]
354
+ precision = len(set(retrieved_ids) & set(test['relevant_ids'])) / len(retrieved_ids)
355
+ recall = len(set(retrieved_ids) & set(test['relevant_ids'])) / len(test['relevant_ids'])
356
+
357
+ metrics['retrieval_precision'].append(precision)
358
+ metrics['retrieval_recall'].append(recall)
359
+
360
+ # Use LLM-as-judge for semantic metrics
361
+ # ...
362
+
363
+ return {k: sum(v)/len(v) for k, v in metrics.items()}
364
+ ```
365
+
366
+ ---
367
+
368
+ ## Production Considerations
369
+
370
+ ### Metadata for Filtering
371
+
372
+ ```python
373
+ # Add metadata during indexing
374
+ for doc in documents:
375
+ doc.metadata = {
376
+ "source": doc.metadata.get("source"),
377
+ "date": doc.metadata.get("date"),
378
+ "category": classify(doc.page_content),
379
+ "author": extract_author(doc),
380
+ }
381
+
382
+ # Filter during retrieval
383
+ results = vectorstore.similarity_search(
384
+ query,
385
+ filter={"category": "technical", "date": {"$gte": "2024-01-01"}},
386
+ k=5
387
+ )
388
+ ```
389
+
390
+ ### Caching Strategy
391
+
392
+ ```python
393
+ from langchain.cache import RedisSemanticCache
394
+
395
+ langchain.llm_cache = RedisSemanticCache(
396
+ redis_url="redis://localhost:6379",
397
+ embedding=embeddings,
398
+ score_threshold=0.95
399
+ )
400
+ ```
401
+
402
+ ---
403
+
404
+ ## Anti-Patterns
405
+
406
+ | ❌ Don't | ✅ Do |
407
+ | ------------------------------ | ----------------------------------- |
408
+ | Fixed-size chunking only | Semantic chunking + structure-aware |
409
+ | Pure vector search | Hybrid search (dense + sparse) |
410
+ | Use first retrieval results | Rerank before generation |
411
+ | Same embedding for all content | Evaluate per content type |
412
+ | Cram max context into prompt | Use relevance thresholds |
413
+ | Measure only final answer | Evaluate retrieval separately |
414
+ | Ignore metadata | Add rich metadata for filtering |
415
+
416
+ ---
417
+
418
+ ## Production Checklist
419
+
420
+ Before deployment:
421
+
422
+ - [ ] Semantic chunking implemented?
423
+ - [ ] Hybrid search configured?
424
+ - [ ] Reranking step added?
425
+ - [ ] Metadata extraction automated?
426
+ - [ ] Retrieval quality measured?
427
+ - [ ] Embedding refresh strategy?
428
+ - [ ] Access control for documents?
429
+ - [ ] Caching for repeated queries?
430
+
431
+ ---
432
+
433
+ ## Related Skills
434
+
435
+ | Need | Skill |
436
+ | -------------------- | -------------------- |
437
+ | LLM prompt design | `prompt-engineering` |
438
+ | Vector DB (Postgres) | `postgres-patterns` |
439
+ | Redis caching | `redis-patterns` |
440
+ | API design | `api-patterns` |
441
+
442
+ ---
443
+
444
+ > **Remember:** RAG is only as good as your retrieval. Invest 80% of effort in retrieval quality, 20% in generation.