ragmint 0.2.3__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragmint-0.2.3/src/ragmint.egg-info → ragmint-0.3.0}/PKG-INFO +84 -24
- {ragmint-0.2.3 → ragmint-0.3.0}/README.md +80 -22
- {ragmint-0.2.3 → ragmint-0.3.0}/pyproject.toml +5 -3
- ragmint-0.3.0/src/ragmint/core/embeddings.py +55 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/core/pipeline.py +8 -9
- ragmint-0.3.0/src/ragmint/core/retriever.py +148 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/explainer.py +5 -3
- ragmint-0.3.0/src/ragmint/tests/test_embeddings.py +46 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_explainer_integration.py +1 -1
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_pipeline.py +3 -2
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_retriever.py +3 -2
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_tuner.py +1 -1
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tuner.py +62 -20
- {ragmint-0.2.3 → ragmint-0.3.0/src/ragmint.egg-info}/PKG-INFO +84 -24
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint.egg-info/SOURCES.txt +1 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint.egg-info/requires.txt +3 -1
- ragmint-0.2.3/src/ragmint/core/embeddings.py +0 -19
- ragmint-0.2.3/src/ragmint/core/retriever.py +0 -33
- {ragmint-0.2.3 → ragmint-0.3.0}/LICENSE +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/MANIFEST.in +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/setup.cfg +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/__main__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/autotuner.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/core/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/core/chunking.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/core/evaluation.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/core/reranker.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/experiments/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/experiments/validation_qa.json +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/leaderboard.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/optimization/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/optimization/search.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/conftest.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_autotuner.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_explainer.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_integration_autotuner_ragmint.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_leaderboard.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/tests/test_search.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/utils/__init__.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/utils/caching.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/utils/data_loader.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/utils/logger.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint/utils/metrics.py +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint.egg-info/dependency_links.txt +0 -0
- {ragmint-0.2.3 → ragmint-0.3.0}/src/ragmint.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragmint
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A modular framework for evaluating and optimizing RAG pipelines.
|
|
5
5
|
Author-email: Andre Oliveira <oandreoliveira@outlook.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -11,7 +11,7 @@ Keywords: RAG,LLM,retrieval,optimization,AI,evaluation
|
|
|
11
11
|
Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: numpy<2.0.0
|
|
15
15
|
Requires-Dist: pandas>=2.0
|
|
16
16
|
Requires-Dist: scikit-learn>=1.3
|
|
17
17
|
Requires-Dist: openai>=1.0
|
|
@@ -24,6 +24,8 @@ Requires-Dist: pytest
|
|
|
24
24
|
Requires-Dist: colorama
|
|
25
25
|
Requires-Dist: google-generativeai>=0.8.0
|
|
26
26
|
Requires-Dist: supabase>=2.4.0
|
|
27
|
+
Requires-Dist: python-dotenv
|
|
28
|
+
Requires-Dist: sentence-transformers
|
|
27
29
|
Dynamic: license-file
|
|
28
30
|
|
|
29
31
|
# Ragmint
|
|
@@ -49,8 +51,8 @@ It provides a complete toolkit for **retriever selection**, **embedding model tu
|
|
|
49
51
|
- 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs
|
|
50
52
|
- 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB
|
|
51
53
|
- 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency
|
|
52
|
-
- ⚙️ **Retrievers** — FAISS, Chroma,
|
|
53
|
-
- 🧩 **Embeddings** —
|
|
54
|
+
- ⚙️ **Retrievers** — FAISS, Chroma, scikit-learn
|
|
55
|
+
- 🧩 **Embeddings** — Hugging Face
|
|
54
56
|
- 💾 **Caching, experiment tracking, and reproducibility** out of the box
|
|
55
57
|
- 🧰 **Clean modular structure** for easy integration in research and production setups
|
|
56
58
|
|
|
@@ -94,15 +96,69 @@ optimization:
|
|
|
94
96
|
### 3️⃣ Manual Pipeline Usage
|
|
95
97
|
|
|
96
98
|
```python
|
|
97
|
-
from ragmint.
|
|
99
|
+
from ragmint.tuner import RAGMint
|
|
100
|
+
|
|
101
|
+
# Initialize RAGMint with available components
|
|
102
|
+
rag = RAGMint(
|
|
103
|
+
docs_path="data/docs/",
|
|
104
|
+
retrievers=["faiss", "chroma", "sklearn"],
|
|
105
|
+
embeddings=["all-MiniLM-L6-v2", "sentence-transformers/all-MiniLM-L12-v2"],
|
|
106
|
+
rerankers=["mmr"]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Run optimization over 3 trials using the default validation set
|
|
110
|
+
best, results = rag.optimize(
|
|
111
|
+
validation_set=None,
|
|
112
|
+
metric="faithfulness",
|
|
113
|
+
trials=3
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
print("Best configuration:", best)
|
|
117
|
+
```
|
|
118
|
+
---
|
|
119
|
+
# 🧩 Embeddings and Retrievers
|
|
120
|
+
|
|
121
|
+
**Ragmint** supports a flexible set of embeddings and retrievers, allowing you to adapt easily to various **RAG architectures**.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## 🔤 Available Embeddings (Hugging Face / OpenAI)
|
|
126
|
+
|
|
127
|
+
You can select from the following models:
|
|
128
|
+
|
|
129
|
+
* `sentence-transformers/all-MiniLM-L6-v2` — **lightweight**, general-purpose
|
|
130
|
+
* `sentence-transformers/all-mpnet-base-v2` — **higher accuracy**, slower
|
|
131
|
+
* `BAAI/bge-base-en-v1.5` — **multilingual**, dense embeddings
|
|
132
|
+
* `intfloat/multilingual-e5-base` — ideal for **multilingual corpora**
|
|
98
133
|
|
|
99
|
-
pipeline = RAGPipeline({
|
|
100
|
-
"embedding_model": "text-embedding-3-small",
|
|
101
|
-
"retriever": "faiss",
|
|
102
|
-
})
|
|
103
134
|
|
|
104
|
-
|
|
105
|
-
|
|
135
|
+
|
|
136
|
+
### Configuration Example
|
|
137
|
+
|
|
138
|
+
Use the following format in your config file to specify the embedding model:
|
|
139
|
+
|
|
140
|
+
```yaml
|
|
141
|
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
142
|
+
```
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## 🔍 Available Retrievers
|
|
146
|
+
|
|
147
|
+
**Ragmint** integrates multiple **retrieval backends** to suit different needs:
|
|
148
|
+
|
|
149
|
+
| Retriever | Description |
|
|
150
|
+
| :--- | :--- |
|
|
151
|
+
| **FAISS** | Fast vector similarity search; efficient for dense embeddings |
|
|
152
|
+
| **Chroma** | Persistent vector DB; works well for incremental updates |
|
|
153
|
+
| **scikit-learn (NearestNeighbors)** | Lightweight, zero-dependency local retriever |
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
### Configuration Example
|
|
157
|
+
|
|
158
|
+
To specify the retriever in your configuration file, use the following format:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
retriever: faiss
|
|
106
162
|
```
|
|
107
163
|
|
|
108
164
|
---
|
|
@@ -174,8 +230,7 @@ lb.show_top(3)
|
|
|
174
230
|
|
|
175
231
|
## 🧠 Explainability with Gemini / Claude
|
|
176
232
|
|
|
177
|
-
Compare two RAG configurations and receive natural language insights
|
|
178
|
-
on **why** one performs better.
|
|
233
|
+
Compare two RAG configurations and receive **natural language insights** on why one performs better.
|
|
179
234
|
|
|
180
235
|
```python
|
|
181
236
|
from ragmint.explainer import explain_results
|
|
@@ -189,7 +244,7 @@ print(explanation)
|
|
|
189
244
|
|
|
190
245
|
> Set your API keys in a `.env` file or via environment variables:
|
|
191
246
|
> ```
|
|
192
|
-
> export
|
|
247
|
+
> export GEMINI_API_KEY="your_gemini_key"
|
|
193
248
|
> export ANTHROPIC_API_KEY="your_claude_key"
|
|
194
249
|
> ```
|
|
195
250
|
|
|
@@ -240,16 +295,21 @@ Your `pyproject.toml` includes all required dependencies:
|
|
|
240
295
|
name = "ragmint"
|
|
241
296
|
version = "0.1.0"
|
|
242
297
|
dependencies = [
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
298
|
+
"numpy<2.0.0",
|
|
299
|
+
"pandas>=2.0",
|
|
300
|
+
"scikit-learn>=1.3",
|
|
301
|
+
"openai>=1.0",
|
|
302
|
+
"tqdm",
|
|
303
|
+
"pyyaml",
|
|
304
|
+
"chromadb>=0.4",
|
|
305
|
+
"faiss-cpu; sys_platform != 'darwin'",
|
|
306
|
+
"optuna>=3.0",
|
|
307
|
+
"pytest",
|
|
308
|
+
"colorama",
|
|
309
|
+
"google-generativeai>=0.8.0",
|
|
310
|
+
"supabase>=2.4.0",
|
|
311
|
+
"python-dotenv",
|
|
312
|
+
"sentence-transformers"
|
|
253
313
|
]
|
|
254
314
|
```
|
|
255
315
|
|
|
@@ -21,8 +21,8 @@ It provides a complete toolkit for **retriever selection**, **embedding model tu
|
|
|
21
21
|
- 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs
|
|
22
22
|
- 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB
|
|
23
23
|
- 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency
|
|
24
|
-
- ⚙️ **Retrievers** — FAISS, Chroma,
|
|
25
|
-
- 🧩 **Embeddings** —
|
|
24
|
+
- ⚙️ **Retrievers** — FAISS, Chroma, scikit-learn
|
|
25
|
+
- 🧩 **Embeddings** — Hugging Face
|
|
26
26
|
- 💾 **Caching, experiment tracking, and reproducibility** out of the box
|
|
27
27
|
- 🧰 **Clean modular structure** for easy integration in research and production setups
|
|
28
28
|
|
|
@@ -66,15 +66,69 @@ optimization:
|
|
|
66
66
|
### 3️⃣ Manual Pipeline Usage
|
|
67
67
|
|
|
68
68
|
```python
|
|
69
|
-
from ragmint.
|
|
69
|
+
from ragmint.tuner import RAGMint
|
|
70
|
+
|
|
71
|
+
# Initialize RAGMint with available components
|
|
72
|
+
rag = RAGMint(
|
|
73
|
+
docs_path="data/docs/",
|
|
74
|
+
retrievers=["faiss", "chroma", "sklearn"],
|
|
75
|
+
embeddings=["all-MiniLM-L6-v2", "sentence-transformers/all-MiniLM-L12-v2"],
|
|
76
|
+
rerankers=["mmr"]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Run optimization over 3 trials using the default validation set
|
|
80
|
+
best, results = rag.optimize(
|
|
81
|
+
validation_set=None,
|
|
82
|
+
metric="faithfulness",
|
|
83
|
+
trials=3
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
print("Best configuration:", best)
|
|
87
|
+
```
|
|
88
|
+
---
|
|
89
|
+
# 🧩 Embeddings and Retrievers
|
|
90
|
+
|
|
91
|
+
**Ragmint** supports a flexible set of embeddings and retrievers, allowing you to adapt easily to various **RAG architectures**.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## 🔤 Available Embeddings (Hugging Face / OpenAI)
|
|
96
|
+
|
|
97
|
+
You can select from the following models:
|
|
98
|
+
|
|
99
|
+
* `sentence-transformers/all-MiniLM-L6-v2` — **lightweight**, general-purpose
|
|
100
|
+
* `sentence-transformers/all-mpnet-base-v2` — **higher accuracy**, slower
|
|
101
|
+
* `BAAI/bge-base-en-v1.5` — **multilingual**, dense embeddings
|
|
102
|
+
* `intfloat/multilingual-e5-base` — ideal for **multilingual corpora**
|
|
70
103
|
|
|
71
|
-
pipeline = RAGPipeline({
|
|
72
|
-
"embedding_model": "text-embedding-3-small",
|
|
73
|
-
"retriever": "faiss",
|
|
74
|
-
})
|
|
75
104
|
|
|
76
|
-
|
|
77
|
-
|
|
105
|
+
|
|
106
|
+
### Configuration Example
|
|
107
|
+
|
|
108
|
+
Use the following format in your config file to specify the embedding model:
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
112
|
+
```
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 🔍 Available Retrievers
|
|
116
|
+
|
|
117
|
+
**Ragmint** integrates multiple **retrieval backends** to suit different needs:
|
|
118
|
+
|
|
119
|
+
| Retriever | Description |
|
|
120
|
+
| :--- | :--- |
|
|
121
|
+
| **FAISS** | Fast vector similarity search; efficient for dense embeddings |
|
|
122
|
+
| **Chroma** | Persistent vector DB; works well for incremental updates |
|
|
123
|
+
| **scikit-learn (NearestNeighbors)** | Lightweight, zero-dependency local retriever |
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
### Configuration Example
|
|
127
|
+
|
|
128
|
+
To specify the retriever in your configuration file, use the following format:
|
|
129
|
+
|
|
130
|
+
```yaml
|
|
131
|
+
retriever: faiss
|
|
78
132
|
```
|
|
79
133
|
|
|
80
134
|
---
|
|
@@ -146,8 +200,7 @@ lb.show_top(3)
|
|
|
146
200
|
|
|
147
201
|
## 🧠 Explainability with Gemini / Claude
|
|
148
202
|
|
|
149
|
-
Compare two RAG configurations and receive natural language insights
|
|
150
|
-
on **why** one performs better.
|
|
203
|
+
Compare two RAG configurations and receive **natural language insights** on why one performs better.
|
|
151
204
|
|
|
152
205
|
```python
|
|
153
206
|
from ragmint.explainer import explain_results
|
|
@@ -161,7 +214,7 @@ print(explanation)
|
|
|
161
214
|
|
|
162
215
|
> Set your API keys in a `.env` file or via environment variables:
|
|
163
216
|
> ```
|
|
164
|
-
> export
|
|
217
|
+
> export GEMINI_API_KEY="your_gemini_key"
|
|
165
218
|
> export ANTHROPIC_API_KEY="your_claude_key"
|
|
166
219
|
> ```
|
|
167
220
|
|
|
@@ -212,16 +265,21 @@ Your `pyproject.toml` includes all required dependencies:
|
|
|
212
265
|
name = "ragmint"
|
|
213
266
|
version = "0.1.0"
|
|
214
267
|
dependencies = [
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
268
|
+
"numpy<2.0.0",
|
|
269
|
+
"pandas>=2.0",
|
|
270
|
+
"scikit-learn>=1.3",
|
|
271
|
+
"openai>=1.0",
|
|
272
|
+
"tqdm",
|
|
273
|
+
"pyyaml",
|
|
274
|
+
"chromadb>=0.4",
|
|
275
|
+
"faiss-cpu; sys_platform != 'darwin'",
|
|
276
|
+
"optuna>=3.0",
|
|
277
|
+
"pytest",
|
|
278
|
+
"colorama",
|
|
279
|
+
"google-generativeai>=0.8.0",
|
|
280
|
+
"supabase>=2.4.0",
|
|
281
|
+
"python-dotenv",
|
|
282
|
+
"sentence-transformers"
|
|
225
283
|
]
|
|
226
284
|
```
|
|
227
285
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ragmint"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "A modular framework for evaluating and optimizing RAG pipelines."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "Apache License 2.0" }
|
|
@@ -14,7 +14,7 @@ authors = [
|
|
|
14
14
|
keywords = ["RAG", "LLM", "retrieval", "optimization", "AI", "evaluation"]
|
|
15
15
|
requires-python = ">=3.9"
|
|
16
16
|
dependencies = [
|
|
17
|
-
"numpy
|
|
17
|
+
"numpy<2.0.0",
|
|
18
18
|
"pandas>=2.0",
|
|
19
19
|
"scikit-learn>=1.3",
|
|
20
20
|
"openai>=1.0",
|
|
@@ -26,7 +26,9 @@ dependencies = [
|
|
|
26
26
|
"pytest",
|
|
27
27
|
"colorama",
|
|
28
28
|
"google-generativeai>=0.8.0",
|
|
29
|
-
"supabase>=2.4.0"
|
|
29
|
+
"supabase>=2.4.0",
|
|
30
|
+
"python-dotenv",
|
|
31
|
+
"sentence-transformers"
|
|
30
32
|
]
|
|
31
33
|
|
|
32
34
|
[project.urls]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from sentence_transformers import SentenceTransformer
|
|
6
|
+
except ImportError:
|
|
7
|
+
SentenceTransformer = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Embeddings:
|
|
11
|
+
"""
|
|
12
|
+
Wrapper for embedding backends: HuggingFace (SentenceTransformers) or Dummy.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
model = Embeddings("huggingface", model_name="all-MiniLM-L6-v2")
|
|
16
|
+
embeddings = model.encode(["example text"])
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, backend: str = "huggingface", model_name: str = None):
|
|
20
|
+
load_dotenv()
|
|
21
|
+
self.backend = backend.lower()
|
|
22
|
+
self.model_name = model_name or "all-MiniLM-L6-v2"
|
|
23
|
+
|
|
24
|
+
if self.backend == "huggingface":
|
|
25
|
+
if SentenceTransformer is None:
|
|
26
|
+
raise ImportError("Please install `sentence-transformers` to use HuggingFace embeddings.")
|
|
27
|
+
self.model = SentenceTransformer(self.model_name)
|
|
28
|
+
self.dim = self.model.get_sentence_embedding_dimension()
|
|
29
|
+
|
|
30
|
+
elif self.backend == "dummy":
|
|
31
|
+
self.model = None
|
|
32
|
+
self.dim = 768 # Default embedding dimension for dummy backend
|
|
33
|
+
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError(f"Unsupported embedding backend: {backend}")
|
|
36
|
+
|
|
37
|
+
def encode(self, texts):
|
|
38
|
+
if isinstance(texts, str):
|
|
39
|
+
texts = [texts]
|
|
40
|
+
|
|
41
|
+
if self.backend == "huggingface":
|
|
42
|
+
embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
43
|
+
|
|
44
|
+
elif self.backend == "dummy":
|
|
45
|
+
# Return a NumPy array of shape (len(texts), dim)
|
|
46
|
+
embeddings = np.random.rand(len(texts), self.dim).astype(np.float32)
|
|
47
|
+
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Unknown embedding backend: {self.backend}")
|
|
50
|
+
|
|
51
|
+
# ✅ Always ensure NumPy array output
|
|
52
|
+
if not isinstance(embeddings, np.ndarray):
|
|
53
|
+
embeddings = np.array(embeddings, dtype=np.float32)
|
|
54
|
+
|
|
55
|
+
return embeddings
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
1
|
+
from typing import Any, Dict
|
|
2
2
|
from .retriever import Retriever
|
|
3
3
|
from .reranker import Reranker
|
|
4
4
|
from .evaluation import Evaluator
|
|
@@ -7,7 +7,7 @@ from .evaluation import Evaluator
|
|
|
7
7
|
class RAGPipeline:
|
|
8
8
|
"""
|
|
9
9
|
Core Retrieval-Augmented Generation pipeline.
|
|
10
|
-
|
|
10
|
+
Retrieves, reranks, and evaluates a query given the configured backends.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
def __init__(self, retriever: Retriever, reranker: Reranker, evaluator: Evaluator):
|
|
@@ -16,18 +16,17 @@ class RAGPipeline:
|
|
|
16
16
|
self.evaluator = evaluator
|
|
17
17
|
|
|
18
18
|
def run(self, query: str, top_k: int = 5) -> Dict[str, Any]:
|
|
19
|
-
# Retrieve
|
|
19
|
+
# Retrieve
|
|
20
20
|
retrieved_docs = self.retriever.retrieve(query, top_k=top_k)
|
|
21
|
+
|
|
21
22
|
# Rerank
|
|
22
23
|
reranked_docs = self.reranker.rerank(query, retrieved_docs)
|
|
23
24
|
|
|
24
|
-
#
|
|
25
|
-
if reranked_docs
|
|
26
|
-
answer = reranked_docs[0]["text"]
|
|
27
|
-
else:
|
|
28
|
-
answer = ""
|
|
29
|
-
|
|
25
|
+
# Construct pseudo-answer from top doc
|
|
26
|
+
answer = reranked_docs[0]["text"] if reranked_docs else ""
|
|
30
27
|
context = "\n".join([d["text"] for d in reranked_docs])
|
|
28
|
+
|
|
29
|
+
# Evaluate
|
|
31
30
|
metrics = self.evaluator.evaluate(query, answer, context)
|
|
32
31
|
|
|
33
32
|
return {
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from typing import List, Dict, Any, Optional
|
|
2
|
+
import numpy as np
|
|
3
|
+
from .embeddings import Embeddings
|
|
4
|
+
|
|
5
|
+
# Optional imports
|
|
6
|
+
try:
|
|
7
|
+
import faiss
|
|
8
|
+
except ImportError:
|
|
9
|
+
faiss = None
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import chromadb
|
|
13
|
+
except ImportError:
|
|
14
|
+
chromadb = None
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from sklearn.neighbors import BallTree
|
|
18
|
+
except ImportError:
|
|
19
|
+
BallTree = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Retriever:
|
|
23
|
+
"""
|
|
24
|
+
Multi-backend retriever supporting NumPy, FAISS, Chroma, and Scikit-learn BallTree.
|
|
25
|
+
|
|
26
|
+
Backends:
|
|
27
|
+
- "numpy" : basic cosine similarity using NumPy (default)
|
|
28
|
+
- "faiss" : fast dense vector search (in-memory)
|
|
29
|
+
- "chroma" : persistent local vector database
|
|
30
|
+
- "sklearn": BallTree for cosine or Euclidean distance
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
retriever = Retriever(embedder, documents=["A", "B", "C"], backend="faiss")
|
|
34
|
+
retriever.retrieve("example query", top_k=3)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
embedder: Embeddings,
|
|
40
|
+
documents: Optional[List[str]] = None,
|
|
41
|
+
embeddings: Optional[np.ndarray] = None,
|
|
42
|
+
backend: str = "numpy",
|
|
43
|
+
):
|
|
44
|
+
self.embedder = embedder
|
|
45
|
+
self.documents = documents or []
|
|
46
|
+
self.backend = backend.lower()
|
|
47
|
+
self.embeddings = None
|
|
48
|
+
self.index = None
|
|
49
|
+
self.client = None
|
|
50
|
+
|
|
51
|
+
# Initialize embeddings
|
|
52
|
+
if embeddings is not None:
|
|
53
|
+
self.embeddings = np.array(embeddings)
|
|
54
|
+
elif self.documents:
|
|
55
|
+
self.embeddings = self.embedder.encode(self.documents)
|
|
56
|
+
else:
|
|
57
|
+
self.embeddings = np.zeros((0, self.embedder.dim))
|
|
58
|
+
|
|
59
|
+
# Normalize for cosine
|
|
60
|
+
if self.embeddings.size > 0:
|
|
61
|
+
self.embeddings = self._normalize(self.embeddings)
|
|
62
|
+
|
|
63
|
+
# Initialize backend
|
|
64
|
+
self._init_backend()
|
|
65
|
+
|
|
66
|
+
# ------------------------
|
|
67
|
+
# Backend Initialization
|
|
68
|
+
# ------------------------
|
|
69
|
+
def _init_backend(self):
|
|
70
|
+
if self.backend == "faiss":
|
|
71
|
+
if faiss is None:
|
|
72
|
+
raise ImportError("faiss not installed. Run `pip install faiss-cpu`.")
|
|
73
|
+
self.index = faiss.IndexFlatIP(self.embedder.dim)
|
|
74
|
+
self.index.add(self.embeddings.astype("float32"))
|
|
75
|
+
|
|
76
|
+
elif self.backend == "chroma":
|
|
77
|
+
if chromadb is None:
|
|
78
|
+
raise ImportError("chromadb not installed. Run `pip install chromadb`.")
|
|
79
|
+
self.client = chromadb.Client()
|
|
80
|
+
self.collection = self.client.create_collection(name="ragmint_retriever")
|
|
81
|
+
for i, doc in enumerate(self.documents):
|
|
82
|
+
self.collection.add(
|
|
83
|
+
ids=[str(i)],
|
|
84
|
+
documents=[doc],
|
|
85
|
+
embeddings=[self.embeddings[i].tolist()],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
elif self.backend == "sklearn":
|
|
89
|
+
if BallTree is None:
|
|
90
|
+
raise ImportError("scikit-learn not installed. Run `pip install scikit-learn`.")
|
|
91
|
+
self.index = BallTree(self.embeddings)
|
|
92
|
+
|
|
93
|
+
elif self.backend != "numpy":
|
|
94
|
+
raise ValueError(f"Unsupported retriever backend: {self.backend}")
|
|
95
|
+
|
|
96
|
+
# ------------------------
|
|
97
|
+
# Retrieval
|
|
98
|
+
# ------------------------
|
|
99
|
+
def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
|
100
|
+
if len(self.documents) == 0 or self.embeddings.size == 0:
|
|
101
|
+
return [{"text": "", "score": 0.0}]
|
|
102
|
+
|
|
103
|
+
query_vec = self.embedder.encode([query])[0]
|
|
104
|
+
query_vec = self._normalize(query_vec)
|
|
105
|
+
|
|
106
|
+
if self.backend == "numpy":
|
|
107
|
+
scores = np.dot(self.embeddings, query_vec)
|
|
108
|
+
top_indices = np.argsort(scores)[::-1][:top_k]
|
|
109
|
+
return [
|
|
110
|
+
{"text": self.documents[i], "score": float(scores[i])}
|
|
111
|
+
for i in top_indices
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
elif self.backend == "faiss":
|
|
115
|
+
query_vec = np.expand_dims(query_vec.astype("float32"), axis=0)
|
|
116
|
+
scores, indices = self.index.search(query_vec, top_k)
|
|
117
|
+
return [
|
|
118
|
+
{"text": self.documents[int(i)], "score": float(scores[0][j])}
|
|
119
|
+
for j, i in enumerate(indices[0])
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
elif self.backend == "chroma":
|
|
123
|
+
results = self.collection.query(query_texts=[query], n_results=top_k)
|
|
124
|
+
docs = results["documents"][0]
|
|
125
|
+
scores = results["distances"][0]
|
|
126
|
+
return [{"text": d, "score": 1 - s} for d, s in zip(docs, scores)]
|
|
127
|
+
|
|
128
|
+
elif self.backend == "sklearn":
|
|
129
|
+
distances, indices = self.index.query([query_vec], k=top_k)
|
|
130
|
+
scores = 1 - distances[0]
|
|
131
|
+
return [
|
|
132
|
+
{"text": self.documents[int(i)], "score": float(scores[j])}
|
|
133
|
+
for j, i in enumerate(indices[0])
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
else:
|
|
137
|
+
raise ValueError(f"Unknown backend: {self.backend}")
|
|
138
|
+
|
|
139
|
+
# ------------------------
|
|
140
|
+
# Utils
|
|
141
|
+
# ------------------------
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _normalize(vectors: np.ndarray) -> np.ndarray:
|
|
144
|
+
if vectors.ndim == 1:
|
|
145
|
+
norm = np.linalg.norm(vectors)
|
|
146
|
+
return vectors / norm if norm > 0 else vectors
|
|
147
|
+
norms = np.linalg.norm(vectors, axis=1, keepdims=True)
|
|
148
|
+
return np.divide(vectors, norms, out=np.zeros_like(vectors), where=norms != 0)
|
|
@@ -7,9 +7,12 @@ outperforms another. Falls back gracefully if no API key is provided.
|
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
9
|
import json
|
|
10
|
+
from dotenv import load_dotenv
|
|
10
11
|
|
|
12
|
+
# Load environment variables from .env file if available
|
|
13
|
+
load_dotenv()
|
|
11
14
|
|
|
12
|
-
def explain_results(results_a: dict, results_b: dict, model: str = "gemini-
|
|
15
|
+
def explain_results(results_a: dict, results_b: dict, model: str = "gemini-2.5-flash-lite") -> str:
|
|
13
16
|
"""
|
|
14
17
|
Generate a natural-language explanation comparing two RAG experiment results.
|
|
15
18
|
Priority:
|
|
@@ -26,8 +29,7 @@ def explain_results(results_a: dict, results_b: dict, model: str = "gemini-1.5-p
|
|
|
26
29
|
"""
|
|
27
30
|
|
|
28
31
|
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
|
|
29
|
-
google_key = os.getenv("
|
|
30
|
-
|
|
32
|
+
google_key = os.getenv("GOOGLE_API_KEY") # fixed var name
|
|
31
33
|
|
|
32
34
|
# 1️⃣ Try Anthropic Claude first
|
|
33
35
|
if anthropic_key:
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
from ragmint.core.embeddings import Embeddings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_dummy_backend_output_shape():
|
|
7
|
+
model = Embeddings(backend="dummy")
|
|
8
|
+
texts = ["hello", "world"]
|
|
9
|
+
embeddings = model.encode(texts)
|
|
10
|
+
|
|
11
|
+
# Expect 2x768 array
|
|
12
|
+
assert isinstance(embeddings, np.ndarray)
|
|
13
|
+
assert embeddings.shape == (2, 768)
|
|
14
|
+
assert embeddings.dtype == np.float32
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_dummy_backend_single_string():
|
|
18
|
+
model = Embeddings(backend="dummy")
|
|
19
|
+
text = "test"
|
|
20
|
+
embeddings = model.encode(text)
|
|
21
|
+
|
|
22
|
+
assert embeddings.shape == (1, 768)
|
|
23
|
+
assert isinstance(embeddings, np.ndarray)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
'''@pytest.mark.skipif(
|
|
27
|
+
not hasattr(__import__('importlib').util.find_spec("sentence_transformers"), "loader"),
|
|
28
|
+
reason="sentence-transformers not installed"
|
|
29
|
+
)
|
|
30
|
+
def test_huggingface_backend_output_shape():
|
|
31
|
+
model = Embeddings(backend="huggingface", model_name="all-MiniLM-L6-v2")
|
|
32
|
+
texts = ["This is a test.", "Another sentence."]
|
|
33
|
+
embeddings = model.encode(texts)
|
|
34
|
+
|
|
35
|
+
# Expect 2x384 for MiniLM-L6-v2
|
|
36
|
+
assert isinstance(embeddings, np.ndarray)
|
|
37
|
+
assert embeddings.ndim == 2
|
|
38
|
+
assert embeddings.shape[0] == len(texts)
|
|
39
|
+
assert embeddings.dtype == np.float32
|
|
40
|
+
'''
|
|
41
|
+
|
|
42
|
+
def test_invalid_backend():
|
|
43
|
+
try:
|
|
44
|
+
Embeddings(backend="unknown")
|
|
45
|
+
except ValueError as e:
|
|
46
|
+
assert "Unsupported embedding backend" in str(e)
|
|
@@ -7,7 +7,7 @@ from ragmint.explainer import explain_results
|
|
|
7
7
|
def test_real_gemini_explanation():
|
|
8
8
|
"""Run real Gemini call if GOOGLE_API_KEY is set."""
|
|
9
9
|
if not os.getenv("GEMINI_API_KEY"):
|
|
10
|
-
pytest.skip("
|
|
10
|
+
pytest.skip("GEMINI_API_KEY not set")
|
|
11
11
|
|
|
12
12
|
config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
|
|
13
13
|
config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from ragmint.core.pipeline import RAGPipeline
|
|
3
3
|
from ragmint.core.retriever import Retriever
|
|
4
|
+
from ragmint.core.embeddings import Embeddings
|
|
4
5
|
from ragmint.core.reranker import Reranker
|
|
5
6
|
from ragmint.core.evaluation import Evaluator
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def test_pipeline_run():
|
|
9
10
|
docs = ["doc1 text", "doc2 text"]
|
|
10
|
-
|
|
11
|
-
retriever = Retriever(
|
|
11
|
+
embedder = Embeddings(backend="dummy")
|
|
12
|
+
retriever = Retriever(embedder=embedder, documents=docs)
|
|
12
13
|
reranker = Reranker("mmr")
|
|
13
14
|
evaluator = Evaluator()
|
|
14
15
|
pipeline = RAGPipeline(retriever, reranker, evaluator)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from ragmint.core.retriever import Retriever
|
|
3
|
+
from ragmint.core.embeddings import Embeddings
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
def test_retrieve_basic():
|
|
6
|
-
embeddings = [np.random.rand(5) for _ in range(3)]
|
|
7
7
|
docs = ["doc A", "doc B", "doc C"]
|
|
8
|
-
|
|
8
|
+
embedder = Embeddings(backend="dummy")
|
|
9
|
+
retriever = Retriever(embedder=embedder, documents=docs)
|
|
9
10
|
|
|
10
11
|
results = retriever.retrieve("sample query", top_k=2)
|
|
11
12
|
assert isinstance(results, list)
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Dict, List, Tuple
|
|
4
|
+
from typing import Any, Dict, List, Tuple
|
|
5
5
|
from time import perf_counter
|
|
6
6
|
|
|
7
7
|
from .core.pipeline import RAGPipeline
|
|
8
|
-
from .core.embeddings import
|
|
8
|
+
from .core.embeddings import Embeddings
|
|
9
9
|
from .core.retriever import Retriever
|
|
10
10
|
from .core.reranker import Reranker
|
|
11
11
|
from .core.evaluation import Evaluator
|
|
12
12
|
from .optimization.search import GridSearch, RandomSearch, BayesianSearch
|
|
13
|
-
|
|
14
13
|
from .utils.data_loader import load_validation_set
|
|
15
14
|
|
|
16
15
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
@@ -19,6 +18,7 @@ logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
|
19
18
|
class RAGMint:
|
|
20
19
|
"""
|
|
21
20
|
Main RAG pipeline optimizer and evaluator.
|
|
21
|
+
Runs combinations of retrievers, embeddings, and rerankers to find the best setup.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
def __init__(
|
|
@@ -36,53 +36,91 @@ class RAGMint:
|
|
|
36
36
|
self.documents: List[str] = self._load_docs()
|
|
37
37
|
self.embeddings_cache: Dict[str, Any] = {}
|
|
38
38
|
|
|
39
|
+
# -------------------------
|
|
40
|
+
# Document Loading
|
|
41
|
+
# -------------------------
|
|
39
42
|
def _load_docs(self) -> List[str]:
|
|
40
43
|
if not os.path.exists(self.docs_path):
|
|
41
44
|
logging.warning(f"Corpus path not found: {self.docs_path}")
|
|
42
45
|
return []
|
|
46
|
+
|
|
43
47
|
docs = []
|
|
44
48
|
for file in os.listdir(self.docs_path):
|
|
45
|
-
if file.endswith(".txt"
|
|
49
|
+
if file.endswith((".txt", ".md", ".rst")):
|
|
46
50
|
with open(os.path.join(self.docs_path, file), "r", encoding="utf-8") as f:
|
|
47
51
|
docs.append(f.read())
|
|
48
|
-
|
|
52
|
+
|
|
53
|
+
logging.info(f"📚 Loaded {len(docs)} documents from {self.docs_path}")
|
|
49
54
|
return docs
|
|
50
55
|
|
|
51
|
-
|
|
56
|
+
# -------------------------
|
|
57
|
+
# Embedding Cache
|
|
58
|
+
# -------------------------
|
|
59
|
+
def _embed_docs(self, model_name: str) -> Any:
|
|
60
|
+
"""Compute and cache document embeddings."""
|
|
52
61
|
if model_name in self.embeddings_cache:
|
|
53
62
|
return self.embeddings_cache[model_name]
|
|
54
63
|
|
|
55
|
-
model =
|
|
64
|
+
model = Embeddings(backend="huggingface", model_name=model_name)
|
|
56
65
|
embeddings = model.encode(self.documents)
|
|
57
66
|
self.embeddings_cache[model_name] = embeddings
|
|
58
67
|
return embeddings
|
|
59
68
|
|
|
69
|
+
# -------------------------
|
|
70
|
+
# Build Pipeline
|
|
71
|
+
# -------------------------
|
|
60
72
|
def _build_pipeline(self, config: Dict[str, str]) -> RAGPipeline:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
73
|
+
"""Builds a pipeline from one configuration."""
|
|
74
|
+
retriever_backend = config["retriever"]
|
|
75
|
+
model_name = config["embedding_model"]
|
|
76
|
+
reranker_name = config["reranker"]
|
|
77
|
+
|
|
78
|
+
# Load embeddings (cached)
|
|
79
|
+
embeddings = self._embed_docs(model_name)
|
|
80
|
+
embedder = Embeddings(backend="huggingface", model_name=model_name)
|
|
81
|
+
|
|
82
|
+
# Initialize retriever with backend
|
|
83
|
+
logging.info(f"⚙️ Initializing retriever backend: {retriever_backend}")
|
|
84
|
+
retriever = Retriever(
|
|
85
|
+
embedder=embedder,
|
|
86
|
+
documents=self.documents,
|
|
87
|
+
embeddings=embeddings,
|
|
88
|
+
backend=retriever_backend,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
reranker = Reranker(reranker_name)
|
|
65
92
|
evaluator = Evaluator()
|
|
93
|
+
|
|
66
94
|
return RAGPipeline(retriever, reranker, evaluator)
|
|
67
95
|
|
|
96
|
+
# -------------------------
|
|
97
|
+
# Evaluate Configuration
|
|
98
|
+
# -------------------------
|
|
68
99
|
def _evaluate_config(
|
|
69
100
|
self, config: Dict[str, Any], validation: List[Dict[str, str]], metric: str
|
|
70
101
|
) -> Dict[str, float]:
|
|
102
|
+
"""Evaluates a single configuration."""
|
|
71
103
|
pipeline = self._build_pipeline(config)
|
|
72
|
-
|
|
73
104
|
scores = []
|
|
74
105
|
start = perf_counter()
|
|
106
|
+
|
|
75
107
|
for sample in validation:
|
|
76
|
-
query = sample.get("question") or sample.get("query")
|
|
77
|
-
reference = sample.get("answer")
|
|
108
|
+
query = sample.get("question") or sample.get("query") or ""
|
|
78
109
|
result = pipeline.run(query)
|
|
79
110
|
score = result["metrics"].get(metric, 0.0)
|
|
80
111
|
scores.append(score)
|
|
81
|
-
elapsed = perf_counter() - start
|
|
82
112
|
|
|
113
|
+
elapsed = perf_counter() - start
|
|
83
114
|
avg_score = sum(scores) / len(scores) if scores else 0.0
|
|
84
|
-
return {metric: avg_score, "latency": elapsed / max(1, len(validation))}
|
|
85
115
|
|
|
116
|
+
return {
|
|
117
|
+
metric: avg_score,
|
|
118
|
+
"latency": elapsed / max(1, len(validation)),
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# -------------------------
|
|
122
|
+
# Optimize
|
|
123
|
+
# -------------------------
|
|
86
124
|
def optimize(
|
|
87
125
|
self,
|
|
88
126
|
validation_set: str,
|
|
@@ -90,6 +128,7 @@ class RAGMint:
|
|
|
90
128
|
search_type: str = "random",
|
|
91
129
|
trials: int = 10,
|
|
92
130
|
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
131
|
+
"""Run optimization search over retrievers/embeddings/rerankers."""
|
|
93
132
|
validation = load_validation_set(validation_set or "default")
|
|
94
133
|
|
|
95
134
|
search_space = {
|
|
@@ -98,8 +137,9 @@ class RAGMint:
|
|
|
98
137
|
"reranker": self.rerankers,
|
|
99
138
|
}
|
|
100
139
|
|
|
101
|
-
logging.info(f"Starting {search_type} optimization with {trials} trials")
|
|
140
|
+
logging.info(f"🚀 Starting {search_type} optimization with {trials} trials")
|
|
102
141
|
|
|
142
|
+
# Select search strategy
|
|
103
143
|
try:
|
|
104
144
|
if search_type == "grid":
|
|
105
145
|
searcher = GridSearch(search_space)
|
|
@@ -108,16 +148,18 @@ class RAGMint:
|
|
|
108
148
|
else:
|
|
109
149
|
searcher = RandomSearch(search_space, n_trials=trials)
|
|
110
150
|
except Exception as e:
|
|
111
|
-
logging.warning(f"
|
|
151
|
+
logging.warning(f"⚠️ Fallback to RandomSearch due to missing deps: {e}")
|
|
112
152
|
searcher = RandomSearch(search_space, n_trials=trials)
|
|
113
153
|
|
|
154
|
+
# Run trials
|
|
114
155
|
results = []
|
|
115
156
|
for config in searcher:
|
|
116
157
|
metrics = self._evaluate_config(config, validation, metric)
|
|
117
158
|
result = {**config, **metrics}
|
|
118
159
|
results.append(result)
|
|
119
|
-
logging.info(f"Tested config: {config} -> {metrics}")
|
|
160
|
+
logging.info(f"🔹 Tested config: {config} -> {metrics}")
|
|
120
161
|
|
|
121
162
|
best = max(results, key=lambda r: r.get(metric, 0.0)) if results else {}
|
|
122
|
-
logging.info(f"
|
|
163
|
+
logging.info(f"🏆 Best configuration: {best}")
|
|
164
|
+
|
|
123
165
|
return best, results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragmint
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A modular framework for evaluating and optimizing RAG pipelines.
|
|
5
5
|
Author-email: Andre Oliveira <oandreoliveira@outlook.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -11,7 +11,7 @@ Keywords: RAG,LLM,retrieval,optimization,AI,evaluation
|
|
|
11
11
|
Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: numpy<2.0.0
|
|
15
15
|
Requires-Dist: pandas>=2.0
|
|
16
16
|
Requires-Dist: scikit-learn>=1.3
|
|
17
17
|
Requires-Dist: openai>=1.0
|
|
@@ -24,6 +24,8 @@ Requires-Dist: pytest
|
|
|
24
24
|
Requires-Dist: colorama
|
|
25
25
|
Requires-Dist: google-generativeai>=0.8.0
|
|
26
26
|
Requires-Dist: supabase>=2.4.0
|
|
27
|
+
Requires-Dist: python-dotenv
|
|
28
|
+
Requires-Dist: sentence-transformers
|
|
27
29
|
Dynamic: license-file
|
|
28
30
|
|
|
29
31
|
# Ragmint
|
|
@@ -49,8 +51,8 @@ It provides a complete toolkit for **retriever selection**, **embedding model tu
|
|
|
49
51
|
- 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs
|
|
50
52
|
- 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB
|
|
51
53
|
- 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency
|
|
52
|
-
- ⚙️ **Retrievers** — FAISS, Chroma,
|
|
53
|
-
- 🧩 **Embeddings** —
|
|
54
|
+
- ⚙️ **Retrievers** — FAISS, Chroma, scikit-learn
|
|
55
|
+
- 🧩 **Embeddings** — Hugging Face
|
|
54
56
|
- 💾 **Caching, experiment tracking, and reproducibility** out of the box
|
|
55
57
|
- 🧰 **Clean modular structure** for easy integration in research and production setups
|
|
56
58
|
|
|
@@ -94,15 +96,69 @@ optimization:
|
|
|
94
96
|
### 3️⃣ Manual Pipeline Usage
|
|
95
97
|
|
|
96
98
|
```python
|
|
97
|
-
from ragmint.
|
|
99
|
+
from ragmint.tuner import RAGMint
|
|
100
|
+
|
|
101
|
+
# Initialize RAGMint with available components
|
|
102
|
+
rag = RAGMint(
|
|
103
|
+
docs_path="data/docs/",
|
|
104
|
+
retrievers=["faiss", "chroma", "sklearn"],
|
|
105
|
+
embeddings=["all-MiniLM-L6-v2", "sentence-transformers/all-MiniLM-L12-v2"],
|
|
106
|
+
rerankers=["mmr"]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Run optimization over 3 trials using the default validation set
|
|
110
|
+
best, results = rag.optimize(
|
|
111
|
+
validation_set=None,
|
|
112
|
+
metric="faithfulness",
|
|
113
|
+
trials=3
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
print("Best configuration:", best)
|
|
117
|
+
```
|
|
118
|
+
---
|
|
119
|
+
# 🧩 Embeddings and Retrievers
|
|
120
|
+
|
|
121
|
+
**Ragmint** supports a flexible set of embeddings and retrievers, allowing you to adapt easily to various **RAG architectures**.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## 🔤 Available Embeddings (Hugging Face / OpenAI)
|
|
126
|
+
|
|
127
|
+
You can select from the following models:
|
|
128
|
+
|
|
129
|
+
* `sentence-transformers/all-MiniLM-L6-v2` — **lightweight**, general-purpose
|
|
130
|
+
* `sentence-transformers/all-mpnet-base-v2` — **higher accuracy**, slower
|
|
131
|
+
* `BAAI/bge-base-en-v1.5` — **multilingual**, dense embeddings
|
|
132
|
+
* `intfloat/multilingual-e5-base` — ideal for **multilingual corpora**
|
|
98
133
|
|
|
99
|
-
pipeline = RAGPipeline({
|
|
100
|
-
"embedding_model": "text-embedding-3-small",
|
|
101
|
-
"retriever": "faiss",
|
|
102
|
-
})
|
|
103
134
|
|
|
104
|
-
|
|
105
|
-
|
|
135
|
+
|
|
136
|
+
### Configuration Example
|
|
137
|
+
|
|
138
|
+
Use the following format in your config file to specify the embedding model:
|
|
139
|
+
|
|
140
|
+
```yaml
|
|
141
|
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
142
|
+
```
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## 🔍 Available Retrievers
|
|
146
|
+
|
|
147
|
+
**Ragmint** integrates multiple **retrieval backends** to suit different needs:
|
|
148
|
+
|
|
149
|
+
| Retriever | Description |
|
|
150
|
+
| :--- | :--- |
|
|
151
|
+
| **FAISS** | Fast vector similarity search; efficient for dense embeddings |
|
|
152
|
+
| **Chroma** | Persistent vector DB; works well for incremental updates |
|
|
153
|
+
| **scikit-learn (NearestNeighbors)** | Lightweight, zero-dependency local retriever |
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
### Configuration Example
|
|
157
|
+
|
|
158
|
+
To specify the retriever in your configuration file, use the following format:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
retriever: faiss
|
|
106
162
|
```
|
|
107
163
|
|
|
108
164
|
---
|
|
@@ -174,8 +230,7 @@ lb.show_top(3)
|
|
|
174
230
|
|
|
175
231
|
## 🧠 Explainability with Gemini / Claude
|
|
176
232
|
|
|
177
|
-
Compare two RAG configurations and receive natural language insights
|
|
178
|
-
on **why** one performs better.
|
|
233
|
+
Compare two RAG configurations and receive **natural language insights** on why one performs better.
|
|
179
234
|
|
|
180
235
|
```python
|
|
181
236
|
from ragmint.explainer import explain_results
|
|
@@ -189,7 +244,7 @@ print(explanation)
|
|
|
189
244
|
|
|
190
245
|
> Set your API keys in a `.env` file or via environment variables:
|
|
191
246
|
> ```
|
|
192
|
-
> export
|
|
247
|
+
> export GEMINI_API_KEY="your_gemini_key"
|
|
193
248
|
> export ANTHROPIC_API_KEY="your_claude_key"
|
|
194
249
|
> ```
|
|
195
250
|
|
|
@@ -240,16 +295,21 @@ Your `pyproject.toml` includes all required dependencies:
|
|
|
240
295
|
name = "ragmint"
|
|
241
296
|
version = "0.1.0"
|
|
242
297
|
dependencies = [
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
298
|
+
"numpy<2.0.0",
|
|
299
|
+
"pandas>=2.0",
|
|
300
|
+
"scikit-learn>=1.3",
|
|
301
|
+
"openai>=1.0",
|
|
302
|
+
"tqdm",
|
|
303
|
+
"pyyaml",
|
|
304
|
+
"chromadb>=0.4",
|
|
305
|
+
"faiss-cpu; sys_platform != 'darwin'",
|
|
306
|
+
"optuna>=3.0",
|
|
307
|
+
"pytest",
|
|
308
|
+
"colorama",
|
|
309
|
+
"google-generativeai>=0.8.0",
|
|
310
|
+
"supabase>=2.4.0",
|
|
311
|
+
"python-dotenv",
|
|
312
|
+
"sentence-transformers"
|
|
253
313
|
]
|
|
254
314
|
```
|
|
255
315
|
|
|
@@ -27,6 +27,7 @@ src/ragmint/optimization/search.py
|
|
|
27
27
|
src/ragmint/tests/__init__.py
|
|
28
28
|
src/ragmint/tests/conftest.py
|
|
29
29
|
src/ragmint/tests/test_autotuner.py
|
|
30
|
+
src/ragmint/tests/test_embeddings.py
|
|
30
31
|
src/ragmint/tests/test_explainer.py
|
|
31
32
|
src/ragmint/tests/test_explainer_integration.py
|
|
32
33
|
src/ragmint/tests/test_integration_autotuner_ragmint.py
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class EmbeddingModel:
|
|
5
|
-
"""
|
|
6
|
-
Wrapper for embedding backends (OpenAI, HuggingFace, etc.)
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
def __init__(self, backend: str = "dummy"):
|
|
10
|
-
self.backend = backend
|
|
11
|
-
|
|
12
|
-
def encode(self, texts):
|
|
13
|
-
if self.backend == "openai":
|
|
14
|
-
# Example placeholder — integrate with actual OpenAI API
|
|
15
|
-
return [np.random.rand(768) for _ in texts]
|
|
16
|
-
elif self.backend == "huggingface":
|
|
17
|
-
return [np.random.rand(768) for _ in texts]
|
|
18
|
-
else:
|
|
19
|
-
return [np.random.rand(768) for _ in texts]
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
from typing import List, Dict, Any
|
|
2
|
-
import numpy as np
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class Retriever:
|
|
6
|
-
"""
|
|
7
|
-
Simple vector retriever using cosine similarity.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
def __init__(self, embeddings: List[np.ndarray], documents: List[str]):
|
|
11
|
-
if len(embeddings) == 0:
|
|
12
|
-
self.embeddings = np.zeros((1, 768))
|
|
13
|
-
else:
|
|
14
|
-
self.embeddings = np.array(embeddings)
|
|
15
|
-
self.documents = documents or [""]
|
|
16
|
-
|
|
17
|
-
def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
|
18
|
-
if self.embeddings.size == 0 or len(self.documents) == 0:
|
|
19
|
-
return [{"text": "", "score": 0.0}]
|
|
20
|
-
|
|
21
|
-
query_vec = self._embed(query)
|
|
22
|
-
scores = self._cosine_similarity(query_vec, self.embeddings)
|
|
23
|
-
top_indices = np.argsort(scores)[::-1][:min(top_k, len(scores))]
|
|
24
|
-
return [{"text": self.documents[i], "score": float(scores[i])} for i in top_indices]
|
|
25
|
-
|
|
26
|
-
def _embed(self, query: str) -> np.ndarray:
|
|
27
|
-
dim = self.embeddings.shape[1] if len(self.embeddings.shape) > 1 else 768
|
|
28
|
-
return np.random.rand(dim)
|
|
29
|
-
|
|
30
|
-
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
|
31
|
-
a_norm = a / np.linalg.norm(a)
|
|
32
|
-
b_norm = b / np.linalg.norm(b, axis=1, keepdims=True)
|
|
33
|
-
return np.dot(b_norm, a_norm)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|