@neyugn/agent-kits 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +514 -0
- package/README.vi.md +410 -0
- package/README.zh.md +410 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +422 -0
- package/kits/coder/ARCHITECTURE.md +289 -0
- package/kits/coder/agents/ai-engineer.md +344 -0
- package/kits/coder/agents/backend-specialist.md +270 -0
- package/kits/coder/agents/cloud-architect.md +363 -0
- package/kits/coder/agents/code-reviewer.md +284 -0
- package/kits/coder/agents/data-engineer.md +401 -0
- package/kits/coder/agents/database-specialist.md +251 -0
- package/kits/coder/agents/debugger.md +209 -0
- package/kits/coder/agents/devops-engineer.md +281 -0
- package/kits/coder/agents/documentation-writer.md +296 -0
- package/kits/coder/agents/frontend-specialist.md +298 -0
- package/kits/coder/agents/i18n-specialist.md +348 -0
- package/kits/coder/agents/integration-specialist.md +314 -0
- package/kits/coder/agents/mobile-developer.md +271 -0
- package/kits/coder/agents/multi-tenant-architect.md +281 -0
- package/kits/coder/agents/orchestrator.md +263 -0
- package/kits/coder/agents/performance-analyst.md +327 -0
- package/kits/coder/agents/project-planner.md +277 -0
- package/kits/coder/agents/queue-specialist.md +282 -0
- package/kits/coder/agents/realtime-specialist.md +267 -0
- package/kits/coder/agents/security-auditor.md +253 -0
- package/kits/coder/agents/test-engineer.md +315 -0
- package/kits/coder/agents/ux-researcher.md +388 -0
- package/kits/coder/rules/.cursorrules +287 -0
- package/kits/coder/rules/CLAUDE.md +287 -0
- package/kits/coder/rules/CODEX.md +287 -0
- package/kits/coder/rules/GEMINI.md +287 -0
- package/kits/coder/scripts/checklist.py +318 -0
- package/kits/coder/scripts/kit_status.py +292 -0
- package/kits/coder/scripts/skills_manager.py +243 -0
- package/kits/coder/scripts/verify_all.py +391 -0
- package/kits/coder/skills/accessibility-patterns/SKILL.md +372 -0
- package/kits/coder/skills/accessibility-patterns/scripts/a11y_checker.py +211 -0
- package/kits/coder/skills/ai-rag-patterns/SKILL.md +444 -0
- package/kits/coder/skills/api-patterns/SKILL.md +316 -0
- package/kits/coder/skills/api-patterns/assets/.gitkeep +1 -0
- package/kits/coder/skills/api-patterns/references/deep-dive.md +21 -0
- package/kits/coder/skills/api-patterns/scripts/api_validator.py +253 -0
- package/kits/coder/skills/api-patterns/scripts/validate.py +56 -0
- package/kits/coder/skills/auth-patterns/SKILL.md +267 -0
- package/kits/coder/skills/aws-patterns/SKILL.md +576 -0
- package/kits/coder/skills/brainstorming/SKILL.md +370 -0
- package/kits/coder/skills/brainstorming/assets/.gitkeep +1 -0
- package/kits/coder/skills/brainstorming/references/deep-dive.md +21 -0
- package/kits/coder/skills/brainstorming/scripts/validate.py +56 -0
- package/kits/coder/skills/clean-code/SKILL.md +240 -0
- package/kits/coder/skills/clean-code/assets/.gitkeep +1 -0
- package/kits/coder/skills/clean-code/references/deep-dive.md +21 -0
- package/kits/coder/skills/clean-code/scripts/lint_runner.py +186 -0
- package/kits/coder/skills/clean-code/scripts/validate.py +56 -0
- package/kits/coder/skills/database-design/SKILL.md +255 -0
- package/kits/coder/skills/database-design/assets/.gitkeep +1 -0
- package/kits/coder/skills/database-design/references/deep-dive.md +21 -0
- package/kits/coder/skills/database-design/scripts/schema_validator.py +272 -0
- package/kits/coder/skills/database-design/scripts/validate.py +56 -0
- package/kits/coder/skills/docker-patterns/SKILL.md +240 -0
- package/kits/coder/skills/documentation-templates/SKILL.md +441 -0
- package/kits/coder/skills/e2e-testing/SKILL.md +457 -0
- package/kits/coder/skills/flutter-patterns/SKILL.md +330 -0
- package/kits/coder/skills/frontend-design/SKILL.md +127 -0
- package/kits/coder/skills/github-actions/SKILL.md +349 -0
- package/kits/coder/skills/gitlab-ci-patterns/SKILL.md +466 -0
- package/kits/coder/skills/graphql-patterns/SKILL.md +558 -0
- package/kits/coder/skills/i18n-localization/SKILL.md +345 -0
- package/kits/coder/skills/i18n-localization/scripts/i18n_checker.py +267 -0
- package/kits/coder/skills/kubernetes-patterns/SKILL.md +357 -0
- package/kits/coder/skills/mermaid-diagrams/SKILL.md +351 -0
- package/kits/coder/skills/mobile-design/SKILL.md +305 -0
- package/kits/coder/skills/monitoring-observability/SKILL.md +458 -0
- package/kits/coder/skills/multi-tenancy/SKILL.md +317 -0
- package/kits/coder/skills/multi-tenancy/assets/.gitkeep +1 -0
- package/kits/coder/skills/multi-tenancy/references/deep-dive.md +21 -0
- package/kits/coder/skills/multi-tenancy/scripts/validate.py +56 -0
- package/kits/coder/skills/nodejs-best-practices/SKILL.md +220 -0
- package/kits/coder/skills/performance-profiling/SKILL.md +333 -0
- package/kits/coder/skills/performance-profiling/assets/.gitkeep +1 -0
- package/kits/coder/skills/performance-profiling/references/deep-dive.md +21 -0
- package/kits/coder/skills/performance-profiling/scripts/validate.py +56 -0
- package/kits/coder/skills/plan-writing/SKILL.md +360 -0
- package/kits/coder/skills/plan-writing/assets/.gitkeep +1 -0
- package/kits/coder/skills/plan-writing/references/deep-dive.md +21 -0
- package/kits/coder/skills/plan-writing/scripts/validate.py +56 -0
- package/kits/coder/skills/postgres-patterns/SKILL.md +361 -0
- package/kits/coder/skills/prompt-engineering/SKILL.md +277 -0
- package/kits/coder/skills/queue-patterns/SKILL.md +359 -0
- package/kits/coder/skills/queue-patterns/assets/.gitkeep +1 -0
- package/kits/coder/skills/queue-patterns/references/deep-dive.md +21 -0
- package/kits/coder/skills/queue-patterns/scripts/validate.py +56 -0
- package/kits/coder/skills/react-native-patterns/SKILL.md +393 -0
- package/kits/coder/skills/react-patterns/SKILL.md +319 -0
- package/kits/coder/skills/realtime-patterns/SKILL.md +506 -0
- package/kits/coder/skills/realtime-patterns/assets/.gitkeep +1 -0
- package/kits/coder/skills/realtime-patterns/references/deep-dive.md +21 -0
- package/kits/coder/skills/realtime-patterns/scripts/validate.py +56 -0
- package/kits/coder/skills/redis-patterns/SKILL.md +484 -0
- package/kits/coder/skills/security-fundamentals/SKILL.md +363 -0
- package/kits/coder/skills/security-fundamentals/assets/.gitkeep +1 -0
- package/kits/coder/skills/security-fundamentals/references/deep-dive.md +21 -0
- package/kits/coder/skills/security-fundamentals/scripts/security_scan.py +326 -0
- package/kits/coder/skills/security-fundamentals/scripts/validate.py +56 -0
- package/kits/coder/skills/seo-patterns/SKILL.md +262 -0
- package/kits/coder/skills/seo-patterns/scripts/seo_checker.py +211 -0
- package/kits/coder/skills/systematic-debugging/SKILL.md +478 -0
- package/kits/coder/skills/systematic-debugging/assets/.gitkeep +1 -0
- package/kits/coder/skills/systematic-debugging/references/deep-dive.md +21 -0
- package/kits/coder/skills/systematic-debugging/scripts/validate.py +56 -0
- package/kits/coder/skills/tailwind-patterns/SKILL.md +395 -0
- package/kits/coder/skills/terraform-patterns/SKILL.md +470 -0
- package/kits/coder/skills/testing-patterns/SKILL.md +285 -0
- package/kits/coder/skills/testing-patterns/assets/.gitkeep +1 -0
- package/kits/coder/skills/testing-patterns/references/deep-dive.md +21 -0
- package/kits/coder/skills/testing-patterns/scripts/test_runner.py +219 -0
- package/kits/coder/skills/testing-patterns/scripts/validate.py +56 -0
- package/kits/coder/skills/typescript-patterns/SKILL.md +417 -0
- package/kits/coder/skills/ui-ux-pro-max/SKILL.md +364 -0
- package/kits/coder/skills/ui-ux-pro-max/data/charts.csv +26 -0
- package/kits/coder/skills/ui-ux-pro-max/data/colors.csv +97 -0
- package/kits/coder/skills/ui-ux-pro-max/data/icons.csv +101 -0
- package/kits/coder/skills/ui-ux-pro-max/data/landing.csv +31 -0
- package/kits/coder/skills/ui-ux-pro-max/data/products.csv +97 -0
- package/kits/coder/skills/ui-ux-pro-max/data/prompts.csv +24 -0
- package/kits/coder/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
- package/kits/coder/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
- package/kits/coder/skills/ui-ux-pro-max/data/styles.csv +59 -0
- package/kits/coder/skills/ui-ux-pro-max/data/typography.csv +58 -0
- package/kits/coder/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
- package/kits/coder/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
- package/kits/coder/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
- package/kits/coder/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-314.pyc +0 -0
- package/kits/coder/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-314.pyc +0 -0
- package/kits/coder/skills/ui-ux-pro-max/scripts/core.py +257 -0
- package/kits/coder/skills/ui-ux-pro-max/scripts/design_system.py +488 -0
- package/kits/coder/skills/ui-ux-pro-max/scripts/search.py +76 -0
- package/kits/coder/workflows/.gitkeep +20 -0
- package/kits/coder/workflows/create.md +152 -0
- package/kits/coder/workflows/debug.md +223 -0
- package/kits/coder/workflows/deploy.md +283 -0
- package/kits/coder/workflows/orchestrate.md +243 -0
- package/kits/coder/workflows/plan.md +134 -0
- package/kits/coder/workflows/test.md +237 -0
- package/kits/coder/workflows/ui-ux-pro-max.md +109 -0
- package/package.json +49 -0
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ai-rag-patterns
|
|
3
|
+
description: Retrieval-Augmented Generation (RAG) patterns for LLM applications. Use when building RAG systems, vector search, embeddings, semantic search, or document retrieval pipelines.
|
|
4
|
+
allowed-tools: Read, Write, Edit, Glob, Grep
|
|
5
|
+
version: 2.0
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# AI RAG Patterns - Retrieval-Augmented Generation
|
|
9
|
+
|
|
10
|
+
> **Philosophy:** Retrieval quality determines generation quality. Garbage in, garbage out.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## When to Use This Skill
|
|
15
|
+
|
|
16
|
+
| ✅ Use | ❌ Don't Use |
|
|
17
|
+
| -------------------------------- | ----------------------------- |
|
|
18
|
+
| Building Q&A over documents | Pure generative tasks |
|
|
19
|
+
| Semantic search implementation | Dataset too small (<100 docs) |
|
|
20
|
+
| Reducing LLM hallucinations | Data privacy restrictions |
|
|
21
|
+
| Domain-specific knowledge access | Simple keyword search |
|
|
22
|
+
| Document processing pipelines | Real-time streaming data |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Core Rules (Non-Negotiable)
|
|
27
|
+
|
|
28
|
+
1. **Semantic chunking first** - Chunk by meaning, not token counts
|
|
29
|
+
2. **DataLoader always** - Batch embedding generation
|
|
30
|
+
3. **Hybrid search default** - Combine dense + sparse retrieval
|
|
31
|
+
4. **Reranking required** - Never trust first-stage retrieval alone
|
|
32
|
+
5. **Evaluation mandatory** - Measure retrieval quality separately
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## RAG Architecture Overview
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
40
|
+
│ Documents │───▷│ Chunking │───▷│ Embedding │
|
|
41
|
+
└─────────────┘ └─────────────┘ └─────────────┘
|
|
42
|
+
│
|
|
43
|
+
▼
|
|
44
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
45
|
+
│ Response │◁───│ LLM │◁───│ Retrieval │
|
|
46
|
+
└─────────────┘ └─────────────┘ └─────────────┘
|
|
47
|
+
▲
|
|
48
|
+
│
|
|
49
|
+
┌─────────────┐
|
|
50
|
+
│ Reranking │
|
|
51
|
+
└─────────────┘
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Vector Database Selection
|
|
57
|
+
|
|
58
|
+
| Database | Type | Best For | Pricing |
|
|
59
|
+
| ------------ | ----------- | ---------------------------- | ----------- |
|
|
60
|
+
| **Pinecone** | Managed | Production, scalable | Pay-per-use |
|
|
61
|
+
| **Weaviate** | Open-source | Hybrid search, self-hosted | Free (OSS) |
|
|
62
|
+
| **Chroma** | Embedded | Prototyping, local dev | Free |
|
|
63
|
+
| **Qdrant** | Open-source | Fast filtering, on-premise | Free (OSS) |
|
|
64
|
+
| **pgvector** | Extension | PostgreSQL integration | Free |
|
|
65
|
+
| **Milvus** | Open-source | High performance, enterprise | Free (OSS) |
|
|
66
|
+
|
|
67
|
+
### Decision Tree
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
What's your scale?
|
|
71
|
+
│
|
|
72
|
+
├─ Prototyping / Small scale?
|
|
73
|
+
│ └─ → Chroma (embedded, zero setup)
|
|
74
|
+
│
|
|
75
|
+
├─ Already using PostgreSQL?
|
|
76
|
+
│ └─ → pgvector (integrated, no new infra)
|
|
77
|
+
│
|
|
78
|
+
├─ Production, managed service?
|
|
79
|
+
│ └─ → Pinecone (scalable, low ops)
|
|
80
|
+
│
|
|
81
|
+
└─ Self-hosted, enterprise?
|
|
82
|
+
└─ → Qdrant or Milvus (full control)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Embedding Model Selection
|
|
88
|
+
|
|
89
|
+
| Model | Dimensions | Speed | Quality | Cost |
|
|
90
|
+
| ------------------------ | ---------- | ------- | ------- | ------------ |
|
|
91
|
+
| `text-embedding-3-small` | 1536 | Fast | Good | $0.02/1M |
|
|
92
|
+
| `text-embedding-3-large` | 3072 | Medium | Best | $0.13/1M |
|
|
93
|
+
| `bge-large-en-v1.5` | 1024 | Fast | Best | Free (local) |
|
|
94
|
+
| `all-MiniLM-L6-v2` | 384 | Fastest | Good | Free (local) |
|
|
95
|
+
| `e5-large-v2` | 1024 | Medium | Best | Free (local) |
|
|
96
|
+
|
|
97
|
+
**Recommendation:** Start with `text-embedding-3-small`, evaluate, upgrade if needed.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Chunking Strategies
|
|
102
|
+
|
|
103
|
+
### 1. Recursive Character Splitting (Default)
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
107
|
+
|
|
108
|
+
splitter = RecursiveCharacterTextSplitter(
|
|
109
|
+
chunk_size=1000,
|
|
110
|
+
chunk_overlap=200,
|
|
111
|
+
separators=["\n\n", "\n", " ", ""]
|
|
112
|
+
)
|
|
113
|
+
chunks = splitter.split_documents(documents)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 2. Semantic Chunking (Recommended)
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from langchain.text_splitter import SemanticChunker
|
|
120
|
+
from langchain.embeddings import OpenAIEmbeddings
|
|
121
|
+
|
|
122
|
+
splitter = SemanticChunker(
|
|
123
|
+
embeddings=OpenAIEmbeddings(),
|
|
124
|
+
breakpoint_threshold_type="percentile"
|
|
125
|
+
)
|
|
126
|
+
chunks = splitter.split_documents(documents)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### 3. Markdown/Code Aware
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from langchain.text_splitter import MarkdownHeaderTextSplitter
|
|
133
|
+
|
|
134
|
+
headers = [
|
|
135
|
+
("#", "h1"),
|
|
136
|
+
("##", "h2"),
|
|
137
|
+
("###", "h3"),
|
|
138
|
+
]
|
|
139
|
+
splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Chunking Best Practices
|
|
143
|
+
|
|
144
|
+
| Parameter | Recommended | Why |
|
|
145
|
+
| ---------- | -------------- | ------------------------------ |
|
|
146
|
+
| Chunk size | 500-1000 chars | Balance context vs specificity |
|
|
147
|
+
| Overlap | 10-20% | Preserve context at boundaries |
|
|
148
|
+
| Separators | Semantic | Respect document structure |
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Retrieval Strategies
|
|
153
|
+
|
|
154
|
+
### 1. Dense Retrieval (Vector Similarity)
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# Basic vector search
|
|
158
|
+
results = vectorstore.similarity_search(query, k=5)
|
|
159
|
+
|
|
160
|
+
# With score threshold
|
|
161
|
+
results = vectorstore.similarity_search_with_relevance_scores(
|
|
162
|
+
query,
|
|
163
|
+
k=10,
|
|
164
|
+
score_threshold=0.7
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### 2. Sparse Retrieval (BM25/Keyword)
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from langchain.retrievers import BM25Retriever
|
|
172
|
+
|
|
173
|
+
bm25 = BM25Retriever.from_documents(documents)
|
|
174
|
+
bm25.k = 5
|
|
175
|
+
results = bm25.get_relevant_documents(query)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### 3. Hybrid Search (Recommended)
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from langchain.retrievers import EnsembleRetriever
|
|
182
|
+
|
|
183
|
+
ensemble = EnsembleRetriever(
|
|
184
|
+
retrievers=[bm25_retriever, vector_retriever],
|
|
185
|
+
weights=[0.3, 0.7] # Favor semantic
|
|
186
|
+
)
|
|
187
|
+
results = ensemble.get_relevant_documents(query)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### 4. Multi-Query Retrieval
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
from langchain.retrievers.multi_query import MultiQueryRetriever
|
|
194
|
+
|
|
195
|
+
retriever = MultiQueryRetriever.from_llm(
|
|
196
|
+
retriever=vectorstore.as_retriever(),
|
|
197
|
+
llm=llm
|
|
198
|
+
)
|
|
199
|
+
# Generates multiple query variations automatically
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Reranking Patterns
|
|
205
|
+
|
|
206
|
+
### Cross-Encoder Reranking
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from sentence_transformers import CrossEncoder
|
|
210
|
+
|
|
211
|
+
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
|
212
|
+
|
|
213
|
+
# Get initial candidates
|
|
214
|
+
candidates = vectorstore.similarity_search(query, k=20)
|
|
215
|
+
|
|
216
|
+
# Rerank
|
|
217
|
+
pairs = [[query, doc.page_content] for doc in candidates]
|
|
218
|
+
scores = reranker.predict(pairs)
|
|
219
|
+
|
|
220
|
+
# Sort by score
|
|
221
|
+
reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)[:5]
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Maximal Marginal Relevance (MMR)
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
# Balance relevance + diversity
|
|
228
|
+
results = vectorstore.max_marginal_relevance_search(
|
|
229
|
+
query,
|
|
230
|
+
k=5,
|
|
231
|
+
fetch_k=20,
|
|
232
|
+
lambda_mult=0.5 # 0=diversity, 1=relevance
|
|
233
|
+
)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Advanced RAG Patterns
|
|
239
|
+
|
|
240
|
+
### Parent Document Retriever
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
from langchain.retrievers import ParentDocumentRetriever
|
|
244
|
+
|
|
245
|
+
# Small chunks for retrieval, large for context
|
|
246
|
+
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
|
|
247
|
+
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
|
|
248
|
+
|
|
249
|
+
retriever = ParentDocumentRetriever(
|
|
250
|
+
vectorstore=vectorstore,
|
|
251
|
+
docstore=store,
|
|
252
|
+
child_splitter=child_splitter,
|
|
253
|
+
parent_splitter=parent_splitter
|
|
254
|
+
)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Contextual Compression
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from langchain.retrievers import ContextualCompressionRetriever
|
|
261
|
+
from langchain.retrievers.document_compressors import LLMChainExtractor
|
|
262
|
+
|
|
263
|
+
compressor = LLMChainExtractor.from_llm(llm)
|
|
264
|
+
|
|
265
|
+
retriever = ContextualCompressionRetriever(
|
|
266
|
+
base_compressor=compressor,
|
|
267
|
+
base_retriever=vectorstore.as_retriever()
|
|
268
|
+
)
|
|
269
|
+
# Returns only relevant parts of documents
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Hypothetical Document Embedding (HyDE)
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
from langchain.chains import HypotheticalDocumentEmbedder
|
|
276
|
+
|
|
277
|
+
hyde = HypotheticalDocumentEmbedder.from_llm(
|
|
278
|
+
llm=llm,
|
|
279
|
+
embeddings=embeddings,
|
|
280
|
+
prompt=hyde_prompt
|
|
281
|
+
)
|
|
282
|
+
# Generate hypothetical answer, then search for similar
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## RAG Prompt Patterns
|
|
288
|
+
|
|
289
|
+
### Basic with Citations
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
prompt = """Answer based on the context below. Include citations [1], [2], etc.
|
|
293
|
+
|
|
294
|
+
Context:
|
|
295
|
+
{context}
|
|
296
|
+
|
|
297
|
+
Question: {question}
|
|
298
|
+
|
|
299
|
+
Answer (with citations):"""
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Grounded with Confidence
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
prompt = """Use ONLY the provided context. If you cannot answer, say "I don't know."
|
|
306
|
+
|
|
307
|
+
Context:
|
|
308
|
+
{context}
|
|
309
|
+
|
|
310
|
+
Question: {question}
|
|
311
|
+
|
|
312
|
+
Answer:
|
|
313
|
+
Confidence (0-100%):
|
|
314
|
+
Sources used:"""
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Chain-of-Thought RAG
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
prompt = """Given the context, reason step by step to answer.
|
|
321
|
+
|
|
322
|
+
Context:
|
|
323
|
+
{context}
|
|
324
|
+
|
|
325
|
+
Question: {question}
|
|
326
|
+
|
|
327
|
+
Let me think step by step:
|
|
328
|
+
1. First, I'll identify relevant information...
|
|
329
|
+
2. Then, I'll synthesize...
|
|
330
|
+
3. Finally, I'll conclude...
|
|
331
|
+
|
|
332
|
+
Answer:"""
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## Evaluation Metrics
|
|
338
|
+
|
|
339
|
+
```python
|
|
340
|
+
def evaluate_rag(qa_chain, test_cases):
|
|
341
|
+
metrics = {
|
|
342
|
+
'retrieval_precision': [], # Relevant in top-k
|
|
343
|
+
'retrieval_recall': [], # Found all relevant
|
|
344
|
+
'answer_relevance': [], # Answer matches question
|
|
345
|
+
'groundedness': [], # Answer from context only
|
|
346
|
+
'faithfulness': [], # No hallucination
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
for test in test_cases:
|
|
350
|
+
result = qa_chain({"query": test['question']})
|
|
351
|
+
|
|
352
|
+
# Retrieval metrics
|
|
353
|
+
retrieved_ids = [d.id for d in result['source_documents']]
|
|
354
|
+
precision = len(set(retrieved_ids) & set(test['relevant_ids'])) / len(retrieved_ids)
|
|
355
|
+
recall = len(set(retrieved_ids) & set(test['relevant_ids'])) / len(test['relevant_ids'])
|
|
356
|
+
|
|
357
|
+
metrics['retrieval_precision'].append(precision)
|
|
358
|
+
metrics['retrieval_recall'].append(recall)
|
|
359
|
+
|
|
360
|
+
# Use LLM-as-judge for semantic metrics
|
|
361
|
+
# ...
|
|
362
|
+
|
|
363
|
+
return {k: sum(v)/len(v) for k, v in metrics.items()}
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
---
|
|
367
|
+
|
|
368
|
+
## Production Considerations
|
|
369
|
+
|
|
370
|
+
### Metadata for Filtering
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
# Add metadata during indexing
|
|
374
|
+
for doc in documents:
|
|
375
|
+
doc.metadata = {
|
|
376
|
+
"source": doc.metadata.get("source"),
|
|
377
|
+
"date": doc.metadata.get("date"),
|
|
378
|
+
"category": classify(doc.page_content),
|
|
379
|
+
"author": extract_author(doc),
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
# Filter during retrieval
|
|
383
|
+
results = vectorstore.similarity_search(
|
|
384
|
+
query,
|
|
385
|
+
filter={"category": "technical", "date": {"$gte": "2024-01-01"}},
|
|
386
|
+
k=5
|
|
387
|
+
)
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Caching Strategy
|
|
391
|
+
|
|
392
|
+
```python
|
|
393
|
+
from langchain.cache import RedisSemanticCache
|
|
394
|
+
|
|
395
|
+
langchain.llm_cache = RedisSemanticCache(
|
|
396
|
+
redis_url="redis://localhost:6379",
|
|
397
|
+
embedding=embeddings,
|
|
398
|
+
score_threshold=0.95
|
|
399
|
+
)
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
---
|
|
403
|
+
|
|
404
|
+
## Anti-Patterns
|
|
405
|
+
|
|
406
|
+
| ❌ Don't | ✅ Do |
|
|
407
|
+
| ------------------------------ | ----------------------------------- |
|
|
408
|
+
| Fixed-size chunking only | Semantic chunking + structure-aware |
|
|
409
|
+
| Pure vector search | Hybrid search (dense + sparse) |
|
|
410
|
+
| Use first retrieval results | Rerank before generation |
|
|
411
|
+
| Same embedding for all content | Evaluate per content type |
|
|
412
|
+
| Cram max context into prompt | Use relevance thresholds |
|
|
413
|
+
| Measure only final answer | Evaluate retrieval separately |
|
|
414
|
+
| Ignore metadata | Add rich metadata for filtering |
|
|
415
|
+
|
|
416
|
+
---
|
|
417
|
+
|
|
418
|
+
## Production Checklist
|
|
419
|
+
|
|
420
|
+
Before deployment:
|
|
421
|
+
|
|
422
|
+
- [ ] Semantic chunking implemented?
|
|
423
|
+
- [ ] Hybrid search configured?
|
|
424
|
+
- [ ] Reranking step added?
|
|
425
|
+
- [ ] Metadata extraction automated?
|
|
426
|
+
- [ ] Retrieval quality measured?
|
|
427
|
+
- [ ] Embedding refresh strategy?
|
|
428
|
+
- [ ] Access control for documents?
|
|
429
|
+
- [ ] Caching for repeated queries?
|
|
430
|
+
|
|
431
|
+
---
|
|
432
|
+
|
|
433
|
+
## Related Skills
|
|
434
|
+
|
|
435
|
+
| Need | Skill |
|
|
436
|
+
| -------------------- | -------------------- |
|
|
437
|
+
| LLM prompt design | `prompt-engineering` |
|
|
438
|
+
| Vector DB (Postgres) | `postgres-patterns` |
|
|
439
|
+
| Redis caching | `redis-patterns` |
|
|
440
|
+
| API design | `api-patterns` |
|
|
441
|
+
|
|
442
|
+
---
|
|
443
|
+
|
|
444
|
+
> **Remember:** RAG is only as good as your retrieval. Invest 80% of effort in retrieval quality, 20% in generation.
|