maestro-bundle 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -6
- package/package.json +1 -1
- package/src/cli.mjs +12 -1
- package/templates/bundle-ai-agents/AGENTS.md +6 -0
- package/templates/bundle-ai-agents/PRD_TEMPLATE.md +161 -0
- package/templates/bundle-ai-agents/skills/agent-orchestration/SKILL.md +107 -41
- package/templates/bundle-ai-agents/skills/agent-orchestration/references/graph-patterns.md +50 -0
- package/templates/bundle-ai-agents/skills/agent-orchestration/references/routing-strategies.md +47 -0
- package/templates/bundle-ai-agents/skills/api-design/SKILL.md +125 -16
- package/templates/bundle-ai-agents/skills/api-design/references/pydantic-patterns.md +72 -0
- package/templates/bundle-ai-agents/skills/api-design/references/rest-conventions.md +51 -0
- package/templates/bundle-ai-agents/skills/clean-architecture/SKILL.md +113 -21
- package/templates/bundle-ai-agents/skills/clean-architecture/references/dependency-injection.md +60 -0
- package/templates/bundle-ai-agents/skills/clean-architecture/references/layer-rules.md +56 -0
- package/templates/bundle-ai-agents/skills/context-engineering/SKILL.md +104 -36
- package/templates/bundle-ai-agents/skills/context-engineering/references/compression-techniques.md +76 -0
- package/templates/bundle-ai-agents/skills/context-engineering/references/context-budget-calculator.md +45 -0
- package/templates/bundle-ai-agents/skills/database-modeling/SKILL.md +146 -19
- package/templates/bundle-ai-agents/skills/database-modeling/references/index-strategies.md +48 -0
- package/templates/bundle-ai-agents/skills/database-modeling/references/naming-conventions.md +27 -0
- package/templates/bundle-ai-agents/skills/docker-containerization/SKILL.md +124 -15
- package/templates/bundle-ai-agents/skills/docker-containerization/references/compose-patterns.md +97 -0
- package/templates/bundle-ai-agents/skills/docker-containerization/references/dockerfile-checklist.md +37 -0
- package/templates/bundle-ai-agents/skills/eval-testing/SKILL.md +113 -25
- package/templates/bundle-ai-agents/skills/eval-testing/references/eval-types.md +52 -0
- package/templates/bundle-ai-agents/skills/eval-testing/references/golden-dataset-template.md +59 -0
- package/templates/bundle-ai-agents/skills/memory-management/SKILL.md +112 -28
- package/templates/bundle-ai-agents/skills/memory-management/references/memory-tiers.md +41 -0
- package/templates/bundle-ai-agents/skills/memory-management/references/namespace-conventions.md +41 -0
- package/templates/bundle-ai-agents/skills/prompt-engineering/SKILL.md +139 -47
- package/templates/bundle-ai-agents/skills/prompt-engineering/references/anti-patterns.md +59 -0
- package/templates/bundle-ai-agents/skills/prompt-engineering/references/prompt-templates.md +75 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/SKILL.md +104 -27
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/chunking-strategies.md +27 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/embedding-models.md +31 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/rag-evaluation.md +39 -0
- package/templates/bundle-ai-agents/skills/testing-strategy/SKILL.md +127 -18
- package/templates/bundle-ai-agents/skills/testing-strategy/references/fixture-patterns.md +81 -0
- package/templates/bundle-ai-agents/skills/testing-strategy/references/naming-conventions.md +69 -0
- package/templates/bundle-base/skills/branch-strategy/SKILL.md +134 -21
- package/templates/bundle-base/skills/branch-strategy/references/branch-rules.md +40 -0
- package/templates/bundle-base/skills/code-review/SKILL.md +123 -38
- package/templates/bundle-base/skills/code-review/references/review-checklist.md +45 -0
- package/templates/bundle-base/skills/commit-pattern/SKILL.md +98 -39
- package/templates/bundle-base/skills/commit-pattern/references/conventional-commits.md +40 -0
- package/templates/bundle-data-pipeline/AGENTS.md +6 -0
- package/templates/bundle-data-pipeline/PRD_TEMPLATE.md +161 -0
- package/templates/bundle-data-pipeline/skills/data-preprocessing/SKILL.md +110 -19
- package/templates/bundle-data-pipeline/skills/data-preprocessing/references/pandas-cheatsheet.md +63 -0
- package/templates/bundle-data-pipeline/skills/data-preprocessing/references/pandera-schemas.md +44 -0
- package/templates/bundle-data-pipeline/skills/docker-containerization/SKILL.md +132 -16
- package/templates/bundle-data-pipeline/skills/docker-containerization/references/compose-patterns.md +82 -0
- package/templates/bundle-data-pipeline/skills/docker-containerization/references/dockerfile-best-practices.md +57 -0
- package/templates/bundle-data-pipeline/skills/feature-engineering/SKILL.md +143 -45
- package/templates/bundle-data-pipeline/skills/feature-engineering/references/encoding-guide.md +41 -0
- package/templates/bundle-data-pipeline/skills/feature-engineering/references/scaling-guide.md +38 -0
- package/templates/bundle-data-pipeline/skills/mlops-pipeline/SKILL.md +156 -37
- package/templates/bundle-data-pipeline/skills/mlops-pipeline/references/mlflow-commands.md +69 -0
- package/templates/bundle-data-pipeline/skills/model-training/SKILL.md +152 -33
- package/templates/bundle-data-pipeline/skills/model-training/references/evaluation-metrics.md +52 -0
- package/templates/bundle-data-pipeline/skills/model-training/references/model-selection-guide.md +41 -0
- package/templates/bundle-data-pipeline/skills/rag-pipeline/SKILL.md +127 -39
- package/templates/bundle-data-pipeline/skills/rag-pipeline/references/chunking-strategies.md +51 -0
- package/templates/bundle-data-pipeline/skills/rag-pipeline/references/embedding-models.md +49 -0
- package/templates/bundle-frontend-spa/AGENTS.md +6 -0
- package/templates/bundle-frontend-spa/PRD_TEMPLATE.md +161 -0
- package/templates/bundle-frontend-spa/skills/authentication/SKILL.md +196 -13
- package/templates/bundle-frontend-spa/skills/authentication/references/jwt-security.md +41 -0
- package/templates/bundle-frontend-spa/skills/component-design/SKILL.md +191 -41
- package/templates/bundle-frontend-spa/skills/component-design/references/accessibility-checklist.md +41 -0
- package/templates/bundle-frontend-spa/skills/component-design/references/tailwind-patterns.md +65 -0
- package/templates/bundle-frontend-spa/skills/e2e-testing/SKILL.md +241 -79
- package/templates/bundle-frontend-spa/skills/e2e-testing/references/playwright-selectors.md +66 -0
- package/templates/bundle-frontend-spa/skills/e2e-testing/references/test-patterns.md +82 -0
- package/templates/bundle-frontend-spa/skills/integration-api/SKILL.md +221 -31
- package/templates/bundle-frontend-spa/skills/integration-api/references/api-patterns.md +81 -0
- package/templates/bundle-frontend-spa/skills/react-patterns/SKILL.md +195 -70
- package/templates/bundle-frontend-spa/skills/react-patterns/references/component-checklist.md +22 -0
- package/templates/bundle-frontend-spa/skills/react-patterns/references/hook-patterns.md +63 -0
- package/templates/bundle-frontend-spa/skills/responsive-layout/SKILL.md +162 -22
- package/templates/bundle-frontend-spa/skills/responsive-layout/references/breakpoint-guide.md +63 -0
- package/templates/bundle-frontend-spa/skills/state-management/SKILL.md +158 -30
- package/templates/bundle-frontend-spa/skills/state-management/references/react-query-config.md +64 -0
- package/templates/bundle-frontend-spa/skills/state-management/references/state-patterns.md +78 -0
- package/templates/bundle-jhipster-microservices/AGENTS.md +6 -0
- package/templates/bundle-jhipster-microservices/PRD_TEMPLATE.md +161 -0
- package/templates/bundle-jhipster-microservices/skills/ci-cd-pipeline/SKILL.md +135 -45
- package/templates/bundle-jhipster-microservices/skills/ci-cd-pipeline/references/gitlab-ci-templates.md +93 -0
- package/templates/bundle-jhipster-microservices/skills/clean-architecture/SKILL.md +87 -21
- package/templates/bundle-jhipster-microservices/skills/clean-architecture/references/layer-rules.md +78 -0
- package/templates/bundle-jhipster-microservices/skills/ddd-tactical/SKILL.md +94 -25
- package/templates/bundle-jhipster-microservices/skills/ddd-tactical/references/ddd-patterns.md +48 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/SKILL.md +63 -21
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/references/angular-microservices.md +40 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/references/angular-structure.md +59 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-docker-k8s/SKILL.md +125 -91
- package/templates/bundle-jhipster-microservices/skills/jhipster-docker-k8s/references/docker-k8s-commands.md +68 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/SKILL.md +72 -20
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/references/cross-service-entities.md +36 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/references/jdl-types.md +56 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-gateway/SKILL.md +80 -8
- package/templates/bundle-jhipster-microservices/skills/jhipster-gateway/references/gateway-config.md +43 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-kafka/SKILL.md +115 -22
- package/templates/bundle-jhipster-microservices/skills/jhipster-kafka/references/kafka-events.md +39 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-registry/SKILL.md +92 -23
- package/templates/bundle-jhipster-microservices/skills/jhipster-registry/references/consul-config.md +61 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-service/SKILL.md +81 -18
- package/templates/bundle-jhipster-microservices/skills/jhipster-service/references/service-patterns.md +40 -0
- package/templates/bundle-jhipster-microservices/skills/testing-strategy/SKILL.md +101 -20
- package/templates/bundle-jhipster-microservices/skills/testing-strategy/references/test-naming.md +55 -0
- package/templates/bundle-jhipster-monorepo/AGENTS.md +6 -0
- package/templates/bundle-jhipster-monorepo/PRD_TEMPLATE.md +161 -0
- package/templates/bundle-jhipster-monorepo/skills/clean-architecture/SKILL.md +87 -21
- package/templates/bundle-jhipster-monorepo/skills/clean-architecture/references/layer-rules.md +78 -0
- package/templates/bundle-jhipster-monorepo/skills/ddd-tactical/SKILL.md +94 -25
- package/templates/bundle-jhipster-monorepo/skills/ddd-tactical/references/ddd-patterns.md +48 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-angular/SKILL.md +99 -52
- package/templates/bundle-jhipster-monorepo/skills/jhipster-angular/references/angular-structure.md +59 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-entities/SKILL.md +89 -36
- package/templates/bundle-jhipster-monorepo/skills/jhipster-entities/references/jdl-types.md +56 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-liquibase/SKILL.md +123 -23
- package/templates/bundle-jhipster-monorepo/skills/jhipster-liquibase/references/liquibase-operations.md +95 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-security/SKILL.md +106 -19
- package/templates/bundle-jhipster-monorepo/skills/jhipster-security/references/security-checklist.md +47 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-spring/SKILL.md +84 -16
- package/templates/bundle-jhipster-monorepo/skills/jhipster-spring/references/spring-layers.md +41 -0
- package/templates/bundle-jhipster-monorepo/skills/testing-strategy/SKILL.md +101 -20
- package/templates/bundle-jhipster-monorepo/skills/testing-strategy/references/test-naming.md +55 -0
|
@@ -1,23 +1,54 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: rag-pipeline
|
|
3
|
-
description:
|
|
3
|
+
description: Build complete RAG pipelines with ingestion, chunking, embedding, indexing, and retrieval using LangChain + pgvector. Use when implementing semantic search, answering questions over documents, or creating retrieval systems.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Maestro
|
|
4
6
|
---
|
|
5
7
|
|
|
6
8
|
# RAG Pipeline
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Build production-ready Retrieval-Augmented Generation pipelines with hybrid search, re-ranking, and quality evaluation.
|
|
9
11
|
|
|
12
|
+
## When to Use
|
|
13
|
+
- Building a semantic search system over documents
|
|
14
|
+
- Answering questions from a knowledge base (PDFs, Markdown, code)
|
|
15
|
+
- Creating a retrieval layer for an AI agent
|
|
16
|
+
- Indexing project documentation, skills, or bundles into a vector store
|
|
17
|
+
- Improving an existing RAG pipeline's accuracy or performance
|
|
18
|
+
|
|
19
|
+
## Available Operations
|
|
20
|
+
1. Ingest documents (load, split, enrich with metadata)
|
|
21
|
+
2. Generate embeddings and index into pgvector
|
|
22
|
+
3. Configure hybrid retrieval (semantic + keyword BM25)
|
|
23
|
+
4. Add re-ranking for precision
|
|
24
|
+
5. Build a query chain with LLM
|
|
25
|
+
6. Evaluate retrieval quality with golden datasets
|
|
26
|
+
|
|
27
|
+
## Multi-Step Workflow
|
|
28
|
+
|
|
29
|
+
### Step 1: Set Up Environment
|
|
30
|
+
|
|
31
|
+
Install required dependencies and verify database connectivity.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install langchain langchain-openai langchain-postgres langchain-community langchain-cohere pgvector rank-bm25
|
|
10
35
|
```
|
|
11
|
-
|
|
36
|
+
|
|
37
|
+
Verify pgvector is available:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
psql $DATABASE_URL -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
|
12
41
|
```
|
|
13
42
|
|
|
14
|
-
|
|
43
|
+
### Step 2: Ingest Documents
|
|
44
|
+
|
|
45
|
+
Load documents from the target directory and split into chunks with appropriate overlap.
|
|
15
46
|
|
|
16
47
|
```python
|
|
17
48
|
from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
|
|
18
49
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
19
50
|
|
|
20
|
-
#
|
|
51
|
+
# Load documents by type
|
|
21
52
|
loader = DirectoryLoader(
|
|
22
53
|
"./documents/",
|
|
23
54
|
glob="**/*.md",
|
|
@@ -25,7 +56,7 @@ loader = DirectoryLoader(
|
|
|
25
56
|
)
|
|
26
57
|
docs = loader.load()
|
|
27
58
|
|
|
28
|
-
#
|
|
59
|
+
# Split with Markdown-aware separators
|
|
29
60
|
splitter = RecursiveCharacterTextSplitter(
|
|
30
61
|
chunk_size=1000,
|
|
31
62
|
chunk_overlap=200,
|
|
@@ -34,10 +65,13 @@ splitter = RecursiveCharacterTextSplitter(
|
|
|
34
65
|
chunks = splitter.split_documents(docs)
|
|
35
66
|
```
|
|
36
67
|
|
|
37
|
-
|
|
68
|
+
### Step 3: Enrich Chunks with Metadata
|
|
69
|
+
|
|
70
|
+
Every chunk must carry metadata for filtering and traceability.
|
|
38
71
|
|
|
39
|
-
Cada chunk deve ter:
|
|
40
72
|
```python
|
|
73
|
+
from datetime import datetime
|
|
74
|
+
|
|
41
75
|
for chunk in chunks:
|
|
42
76
|
chunk.metadata.update({
|
|
43
77
|
"source": chunk.metadata.get("source", "unknown"),
|
|
@@ -47,7 +81,7 @@ for chunk in chunks:
|
|
|
47
81
|
})
|
|
48
82
|
```
|
|
49
83
|
|
|
50
|
-
|
|
84
|
+
### Step 4: Embed and Index into pgvector
|
|
51
85
|
|
|
52
86
|
```python
|
|
53
87
|
from langchain_openai import OpenAIEmbeddings
|
|
@@ -63,26 +97,26 @@ vectorstore = PGVector(
|
|
|
63
97
|
vectorstore.add_documents(chunks)
|
|
64
98
|
```
|
|
65
99
|
|
|
66
|
-
|
|
100
|
+
### Step 5: Configure Hybrid Retrieval
|
|
101
|
+
|
|
102
|
+
Combine semantic search with keyword-based BM25 using Reciprocal Rank Fusion.
|
|
67
103
|
|
|
68
104
|
```python
|
|
69
105
|
from langchain.retrievers import EnsembleRetriever
|
|
70
106
|
from langchain_community.retrievers import BM25Retriever
|
|
71
107
|
|
|
72
|
-
# Semântico
|
|
73
108
|
semantic_retriever = vectorstore.as_retriever(search_kwargs={"k": 20})
|
|
74
|
-
|
|
75
|
-
# Keyword
|
|
76
109
|
bm25_retriever = BM25Retriever.from_documents(chunks, k=20)
|
|
77
110
|
|
|
78
|
-
# Ensemble com RRF
|
|
79
111
|
hybrid_retriever = EnsembleRetriever(
|
|
80
112
|
retrievers=[semantic_retriever, bm25_retriever],
|
|
81
113
|
weights=[0.6, 0.4]
|
|
82
114
|
)
|
|
83
115
|
```
|
|
84
116
|
|
|
85
|
-
|
|
117
|
+
### Step 6: Add Re-Ranking
|
|
118
|
+
|
|
119
|
+
Use Cohere re-ranker to refine top-k results for higher precision.
|
|
86
120
|
|
|
87
121
|
```python
|
|
88
122
|
from langchain.retrievers import ContextualCompressionRetriever
|
|
@@ -95,18 +129,19 @@ final_retriever = ContextualCompressionRetriever(
|
|
|
95
129
|
)
|
|
96
130
|
```
|
|
97
131
|
|
|
98
|
-
|
|
132
|
+
### Step 7: Build Query Chain
|
|
99
133
|
|
|
100
134
|
```python
|
|
101
135
|
from langchain_core.prompts import ChatPromptTemplate
|
|
102
136
|
from langchain_core.output_parsers import StrOutputParser
|
|
137
|
+
from langchain_core.runnables import RunnablePassthrough
|
|
103
138
|
|
|
104
139
|
prompt = ChatPromptTemplate.from_template("""
|
|
105
|
-
|
|
106
|
-
|
|
140
|
+
Answer the question based only on the provided context.
|
|
141
|
+
If the answer is not in the context, say "I could not find that information."
|
|
107
142
|
|
|
108
|
-
|
|
109
|
-
|
|
143
|
+
Context: {context}
|
|
144
|
+
Question: {question}
|
|
110
145
|
""")
|
|
111
146
|
|
|
112
147
|
chain = (
|
|
@@ -116,13 +151,55 @@ chain = (
|
|
|
116
151
|
| StrOutputParser()
|
|
117
152
|
)
|
|
118
153
|
|
|
119
|
-
result = chain.invoke("
|
|
154
|
+
result = chain.invoke("Which skill should I use to create React components?")
|
|
120
155
|
```
|
|
121
156
|
|
|
122
|
-
|
|
157
|
+
### Step 8: Evaluate Retrieval Quality
|
|
158
|
+
|
|
159
|
+
Run evaluation against a golden dataset to measure retrieval accuracy.
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
python -m evals.run_rag_eval --dataset evals/rag_golden_dataset.json --min-score 0.8
|
|
163
|
+
```
|
|
123
164
|
|
|
124
|
-
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
|
|
165
|
+
## Resources
|
|
166
|
+
- `references/chunking-strategies.md` - Guidance on chunk sizes and overlap for different document types
|
|
167
|
+
- `references/embedding-models.md` - Comparison of embedding models and their trade-offs
|
|
168
|
+
- `references/rag-evaluation.md` - How to build golden datasets and measure RAG quality
|
|
169
|
+
|
|
170
|
+
## Examples
|
|
171
|
+
|
|
172
|
+
### Example 1: Index Project Documentation
|
|
173
|
+
User asks: "Set up RAG for our project docs so the agent can answer questions about our architecture."
|
|
174
|
+
Response approach:
|
|
175
|
+
1. Scan the `docs/` directory for Markdown files
|
|
176
|
+
2. Split with Markdown-aware separators (chunk_size=1000, overlap=200)
|
|
177
|
+
3. Enrich metadata with doc_type and source path
|
|
178
|
+
4. Embed with text-embedding-3-large and index into pgvector
|
|
179
|
+
5. Configure hybrid retrieval with BM25 fallback
|
|
180
|
+
6. Wire up a query chain and test with sample questions
|
|
181
|
+
|
|
182
|
+
### Example 2: Improve Retrieval Accuracy
|
|
183
|
+
User asks: "Our RAG is returning irrelevant results for technical queries."
|
|
184
|
+
Response approach:
|
|
185
|
+
1. Check current chunk sizes -- may be too large or too small
|
|
186
|
+
2. Verify metadata filtering is applied for doc_type
|
|
187
|
+
3. Add Cohere re-ranking to refine top-k
|
|
188
|
+
4. Adjust ensemble weights (increase semantic weight for technical content)
|
|
189
|
+
5. Build a golden dataset of 10-20 question/answer pairs and run evals
|
|
190
|
+
|
|
191
|
+
### Example 3: Add a New Document Source
|
|
192
|
+
User asks: "Add our API specs (OpenAPI YAML) to the RAG pipeline."
|
|
193
|
+
Response approach:
|
|
194
|
+
1. Add a YAML loader to the ingestion pipeline
|
|
195
|
+
2. Configure appropriate splitter for structured YAML content
|
|
196
|
+
3. Set doc_type metadata to "api_spec"
|
|
197
|
+
4. Re-index and test retrieval with API-related queries
|
|
198
|
+
|
|
199
|
+
## Notes
|
|
200
|
+
- Always test chunks with real questions before deploying
|
|
201
|
+
- Keep metadata complete on all chunks for filtering and traceability
|
|
202
|
+
- Measure retrieval quality with a golden dataset (minimum 10 question/answer pairs)
|
|
203
|
+
- Re-ranking is critical for precision -- always enable it in production
|
|
204
|
+
- Implement a fallback response when retrieval returns no relevant results
|
|
205
|
+
- Monitor token costs: embedding large document sets can be expensive
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Chunking Strategies Reference
|
|
2
|
+
|
|
3
|
+
## Recommended Chunk Sizes by Document Type
|
|
4
|
+
|
|
5
|
+
| Document Type | Chunk Size | Overlap | Separators |
|
|
6
|
+
|---|---|---|---|
|
|
7
|
+
| Markdown docs | 1000 | 200 | `\n## `, `\n### `, `\n\n`, `\n` |
|
|
8
|
+
| Source code | 1500 | 300 | `\nclass `, `\ndef `, `\n\n`, `\n` |
|
|
9
|
+
| API specs (YAML/JSON) | 800 | 100 | `\n- `, `\n `, `\n\n` |
|
|
10
|
+
| PDF documents | 1200 | 250 | `\n\n`, `\n`, `. ` |
|
|
11
|
+
| Plain text | 1000 | 200 | `\n\n`, `\n`, `. `, ` ` |
|
|
12
|
+
|
|
13
|
+
## Guidelines
|
|
14
|
+
|
|
15
|
+
- **Too small** (< 500 tokens): Loses context, retrieval finds fragments without meaning.
|
|
16
|
+
- **Too large** (> 2000 tokens): Dilutes relevance, wastes context window space.
|
|
17
|
+
- **Overlap**: 15-25% of chunk_size prevents information loss at boundaries.
|
|
18
|
+
- **Separators**: Order matters -- RecursiveCharacterTextSplitter tries separators in order, falling back to the next one.
|
|
19
|
+
|
|
20
|
+
## Metadata to Attach
|
|
21
|
+
|
|
22
|
+
Every chunk should carry:
|
|
23
|
+
- `source` -- file path or URL
|
|
24
|
+
- `doc_type` -- classification (skill, prd, code, api_spec, etc.)
|
|
25
|
+
- `language` -- content language for multilingual pipelines
|
|
26
|
+
- `created_at` -- timestamp for freshness filtering
|
|
27
|
+
- `section_title` -- nearest heading for context
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Embedding Models Reference
|
|
2
|
+
|
|
3
|
+
## Model Comparison
|
|
4
|
+
|
|
5
|
+
| Model | Dimensions | Max Tokens | Cost | Quality |
|
|
6
|
+
|---|---|---|---|---|
|
|
7
|
+
| text-embedding-3-large | 3072 (or 1536) | 8191 | $0.13/1M tokens | Best |
|
|
8
|
+
| text-embedding-3-small | 1536 | 8191 | $0.02/1M tokens | Good |
|
|
9
|
+
| text-embedding-ada-002 | 1536 | 8191 | $0.10/1M tokens | Legacy |
|
|
10
|
+
|
|
11
|
+
## Recommendations
|
|
12
|
+
|
|
13
|
+
- **Production**: Use `text-embedding-3-large` with `dimensions=1536` for best quality/cost balance.
|
|
14
|
+
- **Development/Prototyping**: Use `text-embedding-3-small` to reduce costs.
|
|
15
|
+
- **Consistency**: Never mix embedding models in the same collection -- re-embed everything if you switch.
|
|
16
|
+
|
|
17
|
+
## pgvector Index Types
|
|
18
|
+
|
|
19
|
+
| Index | Build Speed | Query Speed | Recall | Use Case |
|
|
20
|
+
|---|---|---|---|---|
|
|
21
|
+
| HNSW | Slow | Fast | High | Production (< 10M vectors) |
|
|
22
|
+
| IVFFlat | Fast | Medium | Medium | Large datasets, quick setup |
|
|
23
|
+
| None (brute force) | N/A | Slow | Perfect | Small datasets (< 50k) |
|
|
24
|
+
|
|
25
|
+
### Recommended HNSW Settings
|
|
26
|
+
|
|
27
|
+
```sql
|
|
28
|
+
CREATE INDEX idx_embeddings ON documents
|
|
29
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
30
|
+
WITH (m = 16, ef_construction = 200);
|
|
31
|
+
```
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# RAG Evaluation Reference
|
|
2
|
+
|
|
3
|
+
## Golden Dataset Format
|
|
4
|
+
|
|
5
|
+
```json
|
|
6
|
+
{
|
|
7
|
+
"evals": [
|
|
8
|
+
{
|
|
9
|
+
"id": "rag-eval-001",
|
|
10
|
+
"question": "Which skill handles React component creation?",
|
|
11
|
+
"expected_answer": "The react-patterns skill covers component creation.",
|
|
12
|
+
"expected_sources": ["skills/react-patterns/SKILL.md"],
|
|
13
|
+
"relevance_threshold": 0.8
|
|
14
|
+
}
|
|
15
|
+
]
|
|
16
|
+
}
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Key Metrics
|
|
20
|
+
|
|
21
|
+
| Metric | What It Measures | Target |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| Retrieval Precision | % of retrieved docs that are relevant | > 0.7 |
|
|
24
|
+
| Retrieval Recall | % of relevant docs that are retrieved | > 0.8 |
|
|
25
|
+
| Faithfulness | Answer grounded in retrieved context | > 0.9 |
|
|
26
|
+
| Answer Relevancy | Answer addresses the question | > 0.85 |
|
|
27
|
+
|
|
28
|
+
## Evaluation Workflow
|
|
29
|
+
|
|
30
|
+
1. Build golden dataset with 20+ question/answer/source triples.
|
|
31
|
+
2. Run retrieval for each question and compare against expected sources.
|
|
32
|
+
3. Generate answers and evaluate faithfulness with LLM-as-judge.
|
|
33
|
+
4. Track metrics over time -- regression means something changed in ingestion or indexing.
|
|
34
|
+
|
|
35
|
+
## Common Failure Modes
|
|
36
|
+
|
|
37
|
+
- **Low precision**: Chunks too large, no re-ranking, or poor metadata filtering.
|
|
38
|
+
- **Low recall**: Chunks too small, missing document sources, or embedding model mismatch.
|
|
39
|
+
- **Low faithfulness**: LLM hallucinating beyond context -- tighten the system prompt.
|
|
@@ -1,27 +1,54 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: testing-strategy
|
|
3
|
-
description:
|
|
3
|
+
description: Implement testing strategies with unit, integration, and e2e tests using Pytest or JUnit. Use when writing tests, defining test strategy, or improving test coverage.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Maestro
|
|
4
6
|
---
|
|
5
7
|
|
|
6
|
-
#
|
|
8
|
+
# Testing Strategy
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Implement the testing pyramid with domain unit tests, integration tests for repositories, and e2e tests for API endpoints, following consistent naming and fixture patterns.
|
|
11
|
+
|
|
12
|
+
## When to Use
|
|
13
|
+
- Writing tests for new features or bug fixes
|
|
14
|
+
- Setting up test infrastructure for a new project
|
|
15
|
+
- Improving test coverage on existing code
|
|
16
|
+
- Reviewing test quality and naming conventions
|
|
17
|
+
- Configuring CI/CD test pipelines
|
|
18
|
+
|
|
19
|
+
## Available Operations
|
|
20
|
+
1. Write unit tests for domain entities and value objects
|
|
21
|
+
2. Write integration tests for repositories and external services
|
|
22
|
+
3. Write e2e tests for API endpoints
|
|
23
|
+
4. Configure test fixtures and factories
|
|
24
|
+
5. Run tests and measure coverage
|
|
25
|
+
6. Set up test pipelines in CI/CD
|
|
26
|
+
|
|
27
|
+
## Multi-Step Workflow
|
|
28
|
+
|
|
29
|
+
### Step 1: Understand the Testing Pyramid
|
|
9
30
|
|
|
10
31
|
```
|
|
11
|
-
/ E2E \
|
|
12
|
-
/
|
|
13
|
-
/
|
|
32
|
+
/ E2E \ Few, slow, expensive
|
|
33
|
+
/ Integr. \ Moderate count
|
|
34
|
+
/ Unit Tests \ Many, fast, cheap
|
|
14
35
|
```
|
|
15
36
|
|
|
16
|
-
|
|
37
|
+
- **Unit tests**: Test domain logic in isolation. No database, no HTTP, no external services.
|
|
38
|
+
- **Integration tests**: Test adapters (repositories, clients) against real infrastructure.
|
|
39
|
+
- **E2E tests**: Test full API request/response cycles.
|
|
17
40
|
|
|
18
|
-
|
|
41
|
+
### Step 2: Write Unit Tests for Domain Entities
|
|
42
|
+
|
|
43
|
+
Test business rules without any infrastructure dependency.
|
|
19
44
|
|
|
20
45
|
```python
|
|
21
46
|
# tests/domain/test_demand.py
|
|
47
|
+
import pytest
|
|
48
|
+
|
|
22
49
|
class TestDemand:
|
|
23
50
|
def test_should_decompose_new_demand(self):
|
|
24
|
-
demand = Demand(id=DemandId.generate(), description="
|
|
51
|
+
demand = Demand(id=DemandId.generate(), description="Create CRUD")
|
|
25
52
|
planner = FakePlanner(tasks=[Task(...), Task(...)])
|
|
26
53
|
|
|
27
54
|
tasks = demand.decompose(planner)
|
|
@@ -30,14 +57,14 @@ class TestDemand:
|
|
|
30
57
|
assert demand.status == DemandStatus.PLANNED
|
|
31
58
|
|
|
32
59
|
def test_should_reject_decompose_if_already_planned(self):
|
|
33
|
-
demand = Demand(id=DemandId.generate(), description="
|
|
60
|
+
demand = Demand(id=DemandId.generate(), description="Create CRUD")
|
|
34
61
|
demand.decompose(FakePlanner(tasks=[Task(...)]))
|
|
35
62
|
|
|
36
63
|
with pytest.raises(DemandAlreadyDecomposedException):
|
|
37
64
|
demand.decompose(FakePlanner(tasks=[]))
|
|
38
65
|
|
|
39
66
|
def test_should_not_allow_more_than_20_tasks(self):
|
|
40
|
-
demand = Demand(id=DemandId.generate(), description="Mega
|
|
67
|
+
demand = Demand(id=DemandId.generate(), description="Mega project")
|
|
41
68
|
for i in range(20):
|
|
42
69
|
demand.add_task(Task(...))
|
|
43
70
|
|
|
@@ -45,9 +72,10 @@ class TestDemand:
|
|
|
45
72
|
demand.add_task(Task(...))
|
|
46
73
|
```
|
|
47
74
|
|
|
48
|
-
|
|
75
|
+
### Step 3: Write Unit Tests for Value Objects
|
|
49
76
|
|
|
50
77
|
```python
|
|
78
|
+
# tests/domain/test_compliance_score.py
|
|
51
79
|
class TestComplianceScore:
|
|
52
80
|
def test_passing_score(self):
|
|
53
81
|
score = ComplianceScore(85.0)
|
|
@@ -62,10 +90,16 @@ class TestComplianceScore:
|
|
|
62
90
|
ComplianceScore(150.0)
|
|
63
91
|
```
|
|
64
92
|
|
|
65
|
-
|
|
93
|
+
### Step 4: Write Integration Tests for Repositories
|
|
94
|
+
|
|
95
|
+
Test against a real database using fixtures with rollback.
|
|
66
96
|
|
|
67
97
|
```python
|
|
68
98
|
# tests/infrastructure/test_pg_demand_repository.py
|
|
99
|
+
import pytest
|
|
100
|
+
from sqlalchemy import create_engine
|
|
101
|
+
from sqlalchemy.orm import Session
|
|
102
|
+
|
|
69
103
|
@pytest.fixture
|
|
70
104
|
def db_session():
|
|
71
105
|
engine = create_engine(TEST_DATABASE_URL)
|
|
@@ -84,12 +118,87 @@ class TestPgDemandRepository:
|
|
|
84
118
|
assert found.description == "Test"
|
|
85
119
|
```
|
|
86
120
|
|
|
87
|
-
|
|
121
|
+
### Step 5: Run Tests and Measure Coverage
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Run all tests
|
|
125
|
+
pytest tests/ -v
|
|
126
|
+
|
|
127
|
+
# Run only unit tests
|
|
128
|
+
pytest tests/domain/ -v
|
|
129
|
+
|
|
130
|
+
# Run only integration tests
|
|
131
|
+
pytest tests/infrastructure/ -v --timeout=30
|
|
88
132
|
|
|
133
|
+
# Run with coverage report
|
|
134
|
+
pytest tests/ --cov=src --cov-report=term-missing --cov-fail-under=80
|
|
135
|
+
|
|
136
|
+
# Run specific test file
|
|
137
|
+
pytest tests/domain/test_demand.py -v
|
|
138
|
+
|
|
139
|
+
# Run tests matching a pattern
|
|
140
|
+
pytest tests/ -k "test_should_decompose" -v
|
|
89
141
|
```
|
|
90
|
-
test_should_<resultado>_when_<condição>
|
|
91
142
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
143
|
+
### Step 6: Set Up CI/CD Test Pipeline
|
|
144
|
+
|
|
145
|
+
```yaml
|
|
146
|
+
test:
|
|
147
|
+
stage: test
|
|
148
|
+
script:
|
|
149
|
+
- pip install -r requirements-test.txt
|
|
150
|
+
- pytest tests/domain/ -v --junitxml=reports/unit.xml
|
|
151
|
+
- pytest tests/infrastructure/ -v --junitxml=reports/integration.xml --timeout=60
|
|
152
|
+
- pytest tests/ --cov=src --cov-report=xml --cov-fail-under=80
|
|
153
|
+
artifacts:
|
|
154
|
+
reports:
|
|
155
|
+
junit:
|
|
156
|
+
- reports/*.xml
|
|
157
|
+
coverage_report:
|
|
158
|
+
coverage_format: cobertura
|
|
159
|
+
path: coverage.xml
|
|
95
160
|
```
|
|
161
|
+
|
|
162
|
+
## Resources
|
|
163
|
+
- `references/naming-conventions.md` - Test naming patterns and organization guidelines
|
|
164
|
+
- `references/fixture-patterns.md` - Common pytest fixture patterns and factories
|
|
165
|
+
|
|
166
|
+
## Examples
|
|
167
|
+
|
|
168
|
+
### Example 1: Write Tests for a New Feature
|
|
169
|
+
User asks: "Write tests for the new user registration feature."
|
|
170
|
+
Response approach:
|
|
171
|
+
1. Unit test the `User` entity: valid creation, email validation, password rules
|
|
172
|
+
2. Unit test the `RegisterUser` use case: happy path, duplicate email, invalid data
|
|
173
|
+
3. Integration test the `PgUserRepository`: save and find
|
|
174
|
+
4. E2e test the `POST /api/v1/users` endpoint: 201 on success, 422 on invalid data
|
|
175
|
+
5. Run: `pytest tests/ -v --cov=src`
|
|
176
|
+
|
|
177
|
+
### Example 2: Improve Test Coverage
|
|
178
|
+
User asks: "Our coverage is at 45%. Get it to 80%."
|
|
179
|
+
Response approach:
|
|
180
|
+
1. Run `pytest --cov=src --cov-report=term-missing` to find uncovered lines
|
|
181
|
+
2. Prioritize domain layer (business rules are the most valuable to test)
|
|
182
|
+
3. Add edge case tests for existing entities (error paths, boundary values)
|
|
183
|
+
4. Add integration tests for untested repositories
|
|
184
|
+
5. Skip e2e tests for now (lowest ROI per coverage point)
|
|
185
|
+
6. Target: domain 95%, application 85%, infrastructure 70%
|
|
186
|
+
|
|
187
|
+
### Example 3: Set Up Testing for a New Project
|
|
188
|
+
User asks: "Set up the test infrastructure for our new Python project."
|
|
189
|
+
Response approach:
|
|
190
|
+
1. Install dependencies: `pip install pytest pytest-cov pytest-asyncio`
|
|
191
|
+
2. Create `conftest.py` with database session fixtures
|
|
192
|
+
3. Create directory structure: `tests/domain/`, `tests/application/`, `tests/infrastructure/`, `tests/api/`
|
|
193
|
+
4. Add `pytest.ini` or `pyproject.toml` with test configuration
|
|
194
|
+
5. Create a sample test to verify the setup
|
|
195
|
+
6. Add test commands to Makefile or scripts
|
|
196
|
+
|
|
197
|
+
## Notes
|
|
198
|
+
- Follow the naming convention: `test_should_<result>_when_<condition>`
|
|
199
|
+
- Unit tests must run without any external dependencies (no DB, no HTTP)
|
|
200
|
+
- Use fakes/stubs for dependencies in unit tests, not mocks (fakes are more maintainable)
|
|
201
|
+
- Integration tests should roll back database changes after each test
|
|
202
|
+
- Coverage threshold: 80% minimum, but focus on testing business rules, not getters/setters
|
|
203
|
+
- Run unit tests first in CI (fast feedback), integration tests second, e2e last
|
|
204
|
+
- Each test should test one behavior -- avoid testing multiple things in a single test
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Fixture Patterns Reference
|
|
2
|
+
|
|
3
|
+
## Database Session Fixture (with rollback)
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def db_session():
|
|
8
|
+
engine = create_engine(TEST_DATABASE_URL)
|
|
9
|
+
connection = engine.connect()
|
|
10
|
+
transaction = connection.begin()
|
|
11
|
+
session = Session(bind=connection)
|
|
12
|
+
|
|
13
|
+
yield session
|
|
14
|
+
|
|
15
|
+
session.close()
|
|
16
|
+
transaction.rollback()
|
|
17
|
+
connection.close()
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Factory Fixtures
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
@pytest.fixture
|
|
24
|
+
def make_demand():
|
|
25
|
+
def _make(
|
|
26
|
+
description: str = "Test demand",
|
|
27
|
+
status: DemandStatus = DemandStatus.CREATED,
|
|
28
|
+
) -> Demand:
|
|
29
|
+
return Demand(
|
|
30
|
+
id=DemandId.generate(),
|
|
31
|
+
description=description,
|
|
32
|
+
status=status,
|
|
33
|
+
)
|
|
34
|
+
return _make
|
|
35
|
+
|
|
36
|
+
# Usage in test
|
|
37
|
+
def test_something(make_demand):
|
|
38
|
+
demand = make_demand(description="Custom demand")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Async Fixtures
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
@pytest.fixture
|
|
45
|
+
async def async_client(app):
|
|
46
|
+
async with AsyncClient(app=app, base_url="http://test") as client:
|
|
47
|
+
yield client
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Fake Implementations
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
class FakeDemandRepository(DemandRepository):
|
|
54
|
+
def __init__(self):
|
|
55
|
+
self._demands: dict[str, Demand] = {}
|
|
56
|
+
|
|
57
|
+
def find_by_id(self, id: DemandId) -> Demand:
|
|
58
|
+
demand = self._demands.get(str(id))
|
|
59
|
+
if not demand:
|
|
60
|
+
raise DemandNotFoundException(id)
|
|
61
|
+
return demand
|
|
62
|
+
|
|
63
|
+
def save(self, demand: Demand) -> None:
|
|
64
|
+
self._demands[str(demand.id)] = demand
|
|
65
|
+
|
|
66
|
+
class FakeEventBus(EventBus):
|
|
67
|
+
def __init__(self):
|
|
68
|
+
self.published_events: list[DomainEvent] = []
|
|
69
|
+
|
|
70
|
+
def publish(self, event: DomainEvent) -> None:
|
|
71
|
+
self.published_events.append(event)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Fixture Scoping
|
|
75
|
+
|
|
76
|
+
| Scope | Lifecycle | Use For |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| `function` (default) | Each test | Most fixtures |
|
|
79
|
+
| `class` | Each test class | Shared setup within a class |
|
|
80
|
+
| `module` | Each test file | Expensive setup (DB schema) |
|
|
81
|
+
| `session` | Entire test run | One-time setup (create DB) |
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Test Naming Conventions Reference
|
|
2
|
+
|
|
3
|
+
## Test Method Naming
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
test_should_<expected_result>_when_<condition>
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
### Examples
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
# Good names
|
|
13
|
+
test_should_return_error_when_email_is_invalid
|
|
14
|
+
test_should_decompose_demand_when_status_is_created
|
|
15
|
+
test_should_reject_merge_when_conflicts_exist
|
|
16
|
+
test_should_create_user_when_data_is_valid
|
|
17
|
+
test_should_raise_not_found_when_id_does_not_exist
|
|
18
|
+
|
|
19
|
+
# Bad names
|
|
20
|
+
test_demand() # What about demand?
|
|
21
|
+
test_error() # What error? When?
|
|
22
|
+
test_create() # Create what? Under what condition?
|
|
23
|
+
test_1() # Meaningless
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Test Class Naming
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
class TestDemand: # Domain entity tests
|
|
30
|
+
class TestDecomposeDemand: # Use case tests
|
|
31
|
+
class TestPgDemandRepository: # Repository integration tests
|
|
32
|
+
class TestDemandEndpoints: # API e2e tests
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## File Organization
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
tests/
|
|
39
|
+
domain/
|
|
40
|
+
test_demand.py
|
|
41
|
+
test_task.py
|
|
42
|
+
test_compliance_score.py
|
|
43
|
+
application/
|
|
44
|
+
test_decompose_demand.py
|
|
45
|
+
test_create_demand.py
|
|
46
|
+
infrastructure/
|
|
47
|
+
test_pg_demand_repository.py
|
|
48
|
+
test_pg_task_repository.py
|
|
49
|
+
api/
|
|
50
|
+
test_demand_endpoints.py
|
|
51
|
+
test_task_endpoints.py
|
|
52
|
+
conftest.py # Shared fixtures
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Test Structure (AAA Pattern)
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
def test_should_decompose_demand_when_status_is_created(self):
|
|
59
|
+
# Arrange
|
|
60
|
+
demand = Demand(id=DemandId.generate(), description="Create CRUD")
|
|
61
|
+
planner = FakePlanner(tasks=[Task(...)])
|
|
62
|
+
|
|
63
|
+
# Act
|
|
64
|
+
tasks = demand.decompose(planner)
|
|
65
|
+
|
|
66
|
+
# Assert
|
|
67
|
+
assert len(tasks) == 1
|
|
68
|
+
assert demand.status == DemandStatus.PLANNED
|
|
69
|
+
```
|