ragmint 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {ragmint-0.1.0/src/ragmint.egg-info → ragmint-0.2.0}/PKG-INFO +124 -30
  2. ragmint-0.2.0/README.md +284 -0
  3. {ragmint-0.1.0 → ragmint-0.2.0}/pyproject.toml +4 -2
  4. ragmint-0.2.0/src/ragmint/autotuner.py +33 -0
  5. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/evaluation.py +11 -0
  6. ragmint-0.2.0/src/ragmint/explainer.py +61 -0
  7. ragmint-0.2.0/src/ragmint/leaderboard.py +45 -0
  8. ragmint-0.2.0/src/ragmint/tests/conftest.py +16 -0
  9. ragmint-0.2.0/src/ragmint/tests/test_autotuner.py +42 -0
  10. ragmint-0.2.0/src/ragmint/tests/test_explainer.py +20 -0
  11. ragmint-0.2.0/src/ragmint/tests/test_explainer_integration.py +18 -0
  12. ragmint-0.2.0/src/ragmint/tests/test_integration_autotuner_ragmint.py +60 -0
  13. ragmint-0.2.0/src/ragmint/tests/test_leaderboard.py +39 -0
  14. ragmint-0.2.0/src/ragmint/tests/test_tuner.py +71 -0
  15. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/tuner.py +1 -1
  16. ragmint-0.2.0/src/ragmint/utils/data_loader.py +65 -0
  17. {ragmint-0.1.0 → ragmint-0.2.0/src/ragmint.egg-info}/PKG-INFO +124 -30
  18. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint.egg-info/SOURCES.txt +9 -0
  19. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint.egg-info/requires.txt +2 -0
  20. ragmint-0.1.0/README.md +0 -192
  21. ragmint-0.1.0/src/ragmint/tests/test_tuner.py +0 -38
  22. ragmint-0.1.0/src/ragmint/utils/data_loader.py +0 -35
  23. {ragmint-0.1.0 → ragmint-0.2.0}/LICENSE +0 -0
  24. {ragmint-0.1.0 → ragmint-0.2.0}/setup.cfg +0 -0
  25. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/__init__.py +0 -0
  26. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/__main__.py +0 -0
  27. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/__init__.py +0 -0
  28. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/chunking.py +0 -0
  29. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/embeddings.py +0 -0
  30. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/pipeline.py +0 -0
  31. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/reranker.py +0 -0
  32. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/core/retriever.py +0 -0
  33. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/experiments/__init__.py +0 -0
  34. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/optimization/__init__.py +0 -0
  35. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/optimization/search.py +0 -0
  36. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/tests/__init__.py +0 -0
  37. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/tests/test_pipeline.py +0 -0
  38. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/tests/test_retriever.py +0 -0
  39. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/tests/test_search.py +0 -0
  40. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/utils/__init__.py +0 -0
  41. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/utils/caching.py +0 -0
  42. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/utils/logger.py +0 -0
  43. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint/utils/metrics.py +0 -0
  44. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint.egg-info/dependency_links.txt +0 -0
  45. {ragmint-0.1.0 → ragmint-0.2.0}/src/ragmint.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragmint
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A modular framework for evaluating and optimizing RAG pipelines.
5
5
  Author-email: Andre Oliveira <oandreoliveira@outlook.com>
6
6
  License: Apache License 2.0
@@ -22,6 +22,8 @@ Requires-Dist: faiss-cpu; sys_platform != "darwin"
22
22
  Requires-Dist: optuna>=3.0
23
23
  Requires-Dist: pytest
24
24
  Requires-Dist: colorama
25
+ Requires-Dist: google-generativeai>=0.8.0
26
+ Requires-Dist: supabase>=2.4.0
25
27
  Dynamic: license-file
26
28
 
27
29
  # Ragmint
@@ -36,17 +38,19 @@ Dynamic: license-file
36
38
 
37
39
  **Ragmint** (Retrieval-Augmented Generation Model Inspection & Tuning) is a modular, developer-friendly Python library for **evaluating, optimizing, and tuning RAG (Retrieval-Augmented Generation) pipelines**.
38
40
 
39
- It provides a complete toolkit for **retriever selection**, **embedding model tuning**, and **automated RAG evaluation** with support for **Optuna-based Bayesian optimization**.
41
+ It provides a complete toolkit for **retriever selection**, **embedding model tuning**, and **automated RAG evaluation** with support for **Optuna-based Bayesian optimization**, **Auto-RAG tuning**, and **explainability** through Gemini or Claude.
40
42
 
41
43
  ---
42
44
 
43
45
  ## ✨ Features
44
46
 
45
47
  - ✅ **Automated hyperparameter optimization** (Grid, Random, Bayesian via Optuna)
48
+ - 🤖 **Auto-RAG Tuner** — dynamically recommends retriever–embedding pairs based on corpus size
49
+ - 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs
50
+ - 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB
46
51
  - 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency
47
52
  - ⚙️ **Retrievers** — FAISS, Chroma, ElasticSearch
48
53
  - 🧩 **Embeddings** — OpenAI, HuggingFace
49
- - 🧠 **Rerankers** — MMR, CrossEncoder (extensible via plugin interface)
50
54
  - 💾 **Caching, experiment tracking, and reproducibility** out of the box
51
55
  - 🧰 **Clean modular structure** for easy integration in research and production setups
52
56
 
@@ -103,47 +107,133 @@ print(result)
103
107
 
104
108
  ---
105
109
 
110
+ ## 🧪 Dataset Options
111
+
112
+ Ragmint can automatically load evaluation datasets for your RAG pipeline:
113
+
114
+ | Mode | Example | Description |
115
+ |------|----------|-------------|
116
+ | 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
117
+ | 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
118
+ | 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
119
+
120
+ ### Example
121
+
122
+ ```python
123
+ from ragmint.tuner import RAGMint
124
+
125
+ ragmint = RAGMint(
126
+ docs_path="data/docs/",
127
+ retrievers=["faiss", "chroma"],
128
+ embeddings=["text-embedding-3-small"],
129
+ rerankers=["mmr"],
130
+ )
131
+
132
+ # Use built-in default
133
+ ragmint.optimize(validation_set=None)
134
+
135
+ # Use Hugging Face benchmark
136
+ ragmint.optimize(validation_set="squad")
137
+
138
+ # Use your own dataset
139
+ ragmint.optimize(validation_set="data/custom_qa.json")
140
+ ```
141
+
142
+ ---
143
+
144
+ ## 🧠 Auto-RAG Tuner
145
+
146
+ The **AutoRAGTuner** automatically recommends retriever–embedding combinations
147
+ based on corpus size and average document length.
148
+
149
+ ```python
150
+ from ragmint.autotuner import AutoRAGTuner
151
+
152
+ corpus_stats = {"size": 5000, "avg_len": 250}
153
+ tuner = AutoRAGTuner(corpus_stats)
154
+ recommendation = tuner.recommend()
155
+ print(recommendation)
156
+ # Example output: {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
157
+ ```
158
+
159
+ ---
160
+
161
+ ## 🏆 Leaderboard Tracking
162
+
163
+ Track and visualize your best experiments across runs.
164
+
165
+ ```python
166
+ from ragmint.leaderboard import Leaderboard
167
+
168
+ lb = Leaderboard("experiments/leaderboard.json")
169
+ lb.add_entry({"trial": 1, "faithfulness": 0.87, "latency": 0.12})
170
+ lb.show_top(3)
171
+ ```
172
+
173
+ ---
174
+
175
+ ## 🧠 Explainability with Gemini / Claude
176
+
177
+ Compare two RAG configurations and receive natural language insights
178
+ on **why** one performs better.
179
+
180
+ ```python
181
+ from ragmint.explainer import explain_results
182
+
183
+ config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
184
+ config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
185
+
186
+ explanation = explain_results(config_a, config_b, model="gemini")
187
+ print(explanation)
188
+ ```
189
+
190
+ > Set your API keys in a `.env` file or via environment variables:
191
+ > ```
192
+ > export GOOGLE_API_KEY="your_gemini_key"
193
+ > export ANTHROPIC_API_KEY="your_claude_key"
194
+ > ```
195
+
196
+ ---
197
+
106
198
  ## 🧩 Folder Structure
107
199
 
108
200
  ```
109
201
  ragmint/
110
202
  ├── core/
111
- │ ├── pipeline.py # RAGPipeline implementation
112
- │ ├── retriever.py # Retriever logic (FAISS, Chroma)
113
- │ ├── reranker.py # MMR + CrossEncoder rerankers
114
- └── embedding.py # Embedding backends
115
- ├── tuner.py # Grid, Random, Bayesian optimization (Optuna)
116
- ├── utils/ # Metrics, logging, caching helpers
117
- ├── configs/ # Default experiment configs
118
- ├── experiments/ # Saved experiment results
119
- ├── tests/ # Unit tests for all components
120
- ├── main.py # CLI entrypoint for tuning
121
- └── pyproject.toml # Project dependencies & build metadata
203
+ │ ├── pipeline.py
204
+ │ ├── retriever.py
205
+ │ ├── reranker.py
206
+ ├── embedding.py
207
+ │ └── evaluation.py
208
+ ├── autotuner.py
209
+ ├── explainer.py
210
+ ├── leaderboard.py
211
+ ├── tuner.py
212
+ ├── utils/
213
+ ├── configs/
214
+ ├── experiments/
215
+ ├── tests/
216
+ └── main.py
122
217
  ```
123
218
 
124
219
  ---
125
220
 
126
221
  ## 🧪 Running Tests
127
222
 
128
- To verify your setup:
129
-
130
223
  ```bash
131
224
  pytest -v
132
225
  ```
133
226
 
134
- Or to test a specific component (e.g., reranker):
135
-
227
+ To include integration tests with Gemini or Claude APIs:
136
228
  ```bash
137
- pytest tests/test_reranker.py -v
229
+ pytest -m integration
138
230
  ```
139
231
 
140
- All tests are designed for **Pytest** and run with lightweight mock data.
141
-
142
232
  ---
143
233
 
144
234
  ## ⚙️ Configuration via `pyproject.toml`
145
235
 
146
- Your `pyproject.toml` automatically includes:
236
+ Your `pyproject.toml` includes all required dependencies:
147
237
 
148
238
  ```toml
149
239
  [project]
@@ -158,6 +248,8 @@ dependencies = [
158
248
  "pytest",
159
249
  "openai",
160
250
  "tqdm",
251
+ "google-generativeai",
252
+ "google-genai",
161
253
  ]
162
254
  ```
163
255
 
@@ -165,10 +257,10 @@ dependencies = [
165
257
 
166
258
  ## 📊 Example Experiment Workflow
167
259
 
168
- 1. Define your retriever and reranker configuration in YAML
169
- 2. Launch an optimization search (Grid, Random, or Bayesian)
170
- 3. Ragmint evaluates combinations automatically and reports top results
171
- 4. Export best parameters for production pipelines
260
+ 1. Define your retriever, embedding, and reranker setup
261
+ 2. Launch optimization (Grid, Random, Bayesian) or AutoTune
262
+ 3. Compare performance with explainability
263
+ 4. Persist results to leaderboard for later inspection
172
264
 
173
265
  ---
174
266
 
@@ -181,7 +273,7 @@ flowchart TD
181
273
  C --> D[Reranker]
182
274
  D --> E[Generator]
183
275
  E --> F[Evaluation]
184
- F --> G[Optuna Tuner]
276
+ F --> G[Optuna / AutoRAGTuner]
185
277
  G -->|Best Params| B
186
278
  ```
187
279
 
@@ -191,8 +283,9 @@ flowchart TD
191
283
 
192
284
  ```
193
285
  [INFO] Starting Bayesian optimization with Optuna
194
- [INFO] Trial 7 finished: recall=0.83, latency=0.42s
286
+ [INFO] Trial 7 finished: faithfulness=0.83, latency=0.42s
195
287
  [INFO] Best parameters: {'lambda_param': 0.6, 'retriever': 'faiss'}
288
+ [INFO] AutoRAGTuner: Suggested retriever=Chroma for medium corpus
196
289
  ```
197
290
 
198
291
  ---
@@ -200,8 +293,9 @@ flowchart TD
200
293
  ## 🧠 Why Ragmint?
201
294
 
202
295
  - Built for **RAG researchers**, **AI engineers**, and **LLM ops**
203
- - Works with **LangChain**, **LlamaIndex**, or standalone RAG setups
204
- - Designed for **extensibility** — plug in your own models, retrievers, or metrics
296
+ - Works with **LangChain**, **LlamaIndex**, or standalone setups
297
+ - Designed for **extensibility** — plug in your own retrievers, models, or metrics
298
+ - Integrated **explainability and leaderboard** modules for research and production
205
299
 
206
300
  ---
207
301
 
@@ -0,0 +1,284 @@
1
+ # Ragmint
2
+
3
+ ![Python](https://img.shields.io/badge/python-3.9%2B-blue)
4
+ ![License](https://img.shields.io/badge/license-Apache%202.0-green)
5
+ ![Tests](https://github.com/andyolivers/ragmint/actions/workflows/tests.yml/badge.svg)
6
+ ![Optuna](https://img.shields.io/badge/Optuna-Integrated-orange)
7
+ ![Status](https://img.shields.io/badge/Status-Active-success)
8
+
9
+ ![](/assets/images/ragmint-banner.png)
10
+
11
+ **Ragmint** (Retrieval-Augmented Generation Model Inspection & Tuning) is a modular, developer-friendly Python library for **evaluating, optimizing, and tuning RAG (Retrieval-Augmented Generation) pipelines**.
12
+
13
+ It provides a complete toolkit for **retriever selection**, **embedding model tuning**, and **automated RAG evaluation** with support for **Optuna-based Bayesian optimization**, **Auto-RAG tuning**, and **explainability** through Gemini or Claude.
14
+
15
+ ---
16
+
17
+ ## ✨ Features
18
+
19
+ - ✅ **Automated hyperparameter optimization** (Grid, Random, Bayesian via Optuna)
20
+ - 🤖 **Auto-RAG Tuner** — dynamically recommends retriever–embedding pairs based on corpus size
21
+ - 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs
22
+ - 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB
23
+ - 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency
24
+ - ⚙️ **Retrievers** — FAISS, Chroma, ElasticSearch
25
+ - 🧩 **Embeddings** — OpenAI, HuggingFace
26
+ - 💾 **Caching, experiment tracking, and reproducibility** out of the box
27
+ - 🧰 **Clean modular structure** for easy integration in research and production setups
28
+
29
+ ---
30
+
31
+ ## 🚀 Quick Start
32
+
33
+ ### 1️⃣ Installation
34
+
35
+ ```bash
36
+ git clone https://github.com/andyolivers/ragmint.git
37
+ cd ragmint
38
+ pip install -e .
39
+ ```
40
+
41
+ > The `-e` flag installs Ragmint in editable (development) mode.
42
+ > Requires **Python ≥ 3.9**.
43
+
44
+ ---
45
+
46
+ ### 2️⃣ Run a RAG Optimization Experiment
47
+
48
+ ```bash
49
+ python ragmint/main.py --config configs/default.yaml --search bayesian
50
+ ```
51
+
52
+ Example `configs/default.yaml`:
53
+ ```yaml
54
+ retriever: faiss
55
+ embedding_model: text-embedding-3-small
56
+ reranker:
57
+ mode: mmr
58
+ lambda_param: 0.5
59
+ optimization:
60
+ search_method: bayesian
61
+ n_trials: 20
62
+ ```
63
+
64
+ ---
65
+
66
+ ### 3️⃣ Manual Pipeline Usage
67
+
68
+ ```python
69
+ from ragmint.core.pipeline import RAGPipeline
70
+
71
+ pipeline = RAGPipeline({
72
+ "embedding_model": "text-embedding-3-small",
73
+ "retriever": "faiss",
74
+ })
75
+
76
+ result = pipeline.run("What is retrieval-augmented generation?")
77
+ print(result)
78
+ ```
79
+
80
+ ---
81
+
82
+ ## 🧪 Dataset Options
83
+
84
+ Ragmint can automatically load evaluation datasets for your RAG pipeline:
85
+
86
+ | Mode | Example | Description |
87
+ |------|----------|-------------|
88
+ | 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
89
+ | 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
90
+ | 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
91
+
92
+ ### Example
93
+
94
+ ```python
95
+ from ragmint.tuner import RAGMint
96
+
97
+ ragmint = RAGMint(
98
+ docs_path="data/docs/",
99
+ retrievers=["faiss", "chroma"],
100
+ embeddings=["text-embedding-3-small"],
101
+ rerankers=["mmr"],
102
+ )
103
+
104
+ # Use built-in default
105
+ ragmint.optimize(validation_set=None)
106
+
107
+ # Use Hugging Face benchmark
108
+ ragmint.optimize(validation_set="squad")
109
+
110
+ # Use your own dataset
111
+ ragmint.optimize(validation_set="data/custom_qa.json")
112
+ ```
113
+
114
+ ---
115
+
116
+ ## 🧠 Auto-RAG Tuner
117
+
118
+ The **AutoRAGTuner** automatically recommends retriever–embedding combinations
119
+ based on corpus size and average document length.
120
+
121
+ ```python
122
+ from ragmint.autotuner import AutoRAGTuner
123
+
124
+ corpus_stats = {"size": 5000, "avg_len": 250}
125
+ tuner = AutoRAGTuner(corpus_stats)
126
+ recommendation = tuner.recommend()
127
+ print(recommendation)
128
+ # Example output: {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
129
+ ```
130
+
131
+ ---
132
+
133
+ ## 🏆 Leaderboard Tracking
134
+
135
+ Track and visualize your best experiments across runs.
136
+
137
+ ```python
138
+ from ragmint.leaderboard import Leaderboard
139
+
140
+ lb = Leaderboard("experiments/leaderboard.json")
141
+ lb.add_entry({"trial": 1, "faithfulness": 0.87, "latency": 0.12})
142
+ lb.show_top(3)
143
+ ```
144
+
145
+ ---
146
+
147
+ ## 🧠 Explainability with Gemini / Claude
148
+
149
+ Compare two RAG configurations and receive natural language insights
150
+ on **why** one performs better.
151
+
152
+ ```python
153
+ from ragmint.explainer import explain_results
154
+
155
+ config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
156
+ config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
157
+
158
+ explanation = explain_results(config_a, config_b, model="gemini")
159
+ print(explanation)
160
+ ```
161
+
162
+ > Set your API keys in a `.env` file or via environment variables:
163
+ > ```
164
+ > export GOOGLE_API_KEY="your_gemini_key"
165
+ > export ANTHROPIC_API_KEY="your_claude_key"
166
+ > ```
167
+
168
+ ---
169
+
170
+ ## 🧩 Folder Structure
171
+
172
+ ```
173
+ ragmint/
174
+ ├── core/
175
+ │ ├── pipeline.py
176
+ │ ├── retriever.py
177
+ │ ├── reranker.py
178
+ │ ├── embedding.py
179
+ │ └── evaluation.py
180
+ ├── autotuner.py
181
+ ├── explainer.py
182
+ ├── leaderboard.py
183
+ ├── tuner.py
184
+ ├── utils/
185
+ ├── configs/
186
+ ├── experiments/
187
+ ├── tests/
188
+ └── main.py
189
+ ```
190
+
191
+ ---
192
+
193
+ ## 🧪 Running Tests
194
+
195
+ ```bash
196
+ pytest -v
197
+ ```
198
+
199
+ To include integration tests with Gemini or Claude APIs:
200
+ ```bash
201
+ pytest -m integration
202
+ ```
203
+
204
+ ---
205
+
206
+ ## ⚙️ Configuration via `pyproject.toml`
207
+
208
+ Your `pyproject.toml` includes all required dependencies:
209
+
210
+ ```toml
211
+ [project]
212
+ name = "ragmint"
213
+ version = "0.1.0"
214
+ dependencies = [
215
+ "numpy",
216
+ "optuna",
217
+ "scikit-learn",
218
+ "faiss-cpu",
219
+ "chromadb",
220
+ "pytest",
221
+ "openai",
222
+ "tqdm",
223
+ "google-generativeai",
224
+ "google-genai",
225
+ ]
226
+ ```
227
+
228
+ ---
229
+
230
+ ## 📊 Example Experiment Workflow
231
+
232
+ 1. Define your retriever, embedding, and reranker setup
233
+ 2. Launch optimization (Grid, Random, Bayesian) or AutoTune
234
+ 3. Compare performance with explainability
235
+ 4. Persist results to leaderboard for later inspection
236
+
237
+ ---
238
+
239
+ ## 🧬 Architecture Overview
240
+
241
+ ```mermaid
242
+ flowchart TD
243
+ A[Query] --> B[Embedder]
244
+ B --> C[Retriever]
245
+ C --> D[Reranker]
246
+ D --> E[Generator]
247
+ E --> F[Evaluation]
248
+ F --> G[Optuna / AutoRAGTuner]
249
+ G -->|Best Params| B
250
+ ```
251
+
252
+ ---
253
+
254
+ ## 📘 Example Output
255
+
256
+ ```
257
+ [INFO] Starting Bayesian optimization with Optuna
258
+ [INFO] Trial 7 finished: faithfulness=0.83, latency=0.42s
259
+ [INFO] Best parameters: {'lambda_param': 0.6, 'retriever': 'faiss'}
260
+ [INFO] AutoRAGTuner: Suggested retriever=Chroma for medium corpus
261
+ ```
262
+
263
+ ---
264
+
265
+ ## 🧠 Why Ragmint?
266
+
267
+ - Built for **RAG researchers**, **AI engineers**, and **LLM ops**
268
+ - Works with **LangChain**, **LlamaIndex**, or standalone setups
269
+ - Designed for **extensibility** — plug in your own retrievers, models, or metrics
270
+ - Integrated **explainability and leaderboard** modules for research and production
271
+
272
+ ---
273
+
274
+ ## ⚖️ License
275
+
276
+ Licensed under the **Apache License 2.0** — free for personal, research, and commercial use.
277
+
278
+ ---
279
+
280
+ ## 👤 Author
281
+
282
+ **André Oliveira**
283
+ [andyolivers.com](https://andyolivers.com)
284
+ Data Scientist | AI Engineer
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ragmint"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "A modular framework for evaluating and optimizing RAG pipelines."
9
9
  readme = "README.md"
10
10
  license = { text = "Apache License 2.0" }
@@ -24,7 +24,9 @@ dependencies = [
24
24
  "faiss-cpu; sys_platform != 'darwin'",
25
25
  "optuna>=3.0",
26
26
  "pytest",
27
- "colorama"
27
+ "colorama",
28
+ "google-generativeai>=0.8.0",
29
+ "supabase>=2.4.0"
28
30
  ]
29
31
 
30
32
  [project.urls]
@@ -0,0 +1,33 @@
1
+ """
2
+ Auto-RAG Tuner
3
+ --------------
4
+ Recommends retriever–embedding pairs dynamically based on corpus size
5
+ and dataset characteristics. Integrates seamlessly with RAGMint evaluator.
6
+ """
7
+
8
+ from .core.evaluation import evaluate_config
9
+
10
+
11
+ class AutoRAGTuner:
12
+ def __init__(self, corpus_stats: dict):
13
+ """
14
+ corpus_stats: dict
15
+ Example: {'size': 12000, 'avg_len': 240}
16
+ """
17
+ self.corpus_stats = corpus_stats
18
+
19
+ def recommend(self):
20
+ size = self.corpus_stats.get("size", 0)
21
+ avg_len = self.corpus_stats.get("avg_len", 0)
22
+
23
+ if size < 1000:
24
+ return {"retriever": "BM25", "embedding_model": "OpenAI"}
25
+ elif size < 10000:
26
+ return {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
27
+ else:
28
+ return {"retriever": "FAISS", "embedding_model": "InstructorXL"}
29
+
30
+ def auto_tune(self, validation_data):
31
+ config = self.recommend()
32
+ results = evaluate_config(config, validation_data)
33
+ return {"recommended": config, "results": results}
@@ -25,3 +25,14 @@ class Evaluator:
25
25
 
26
26
  def _similarity(self, a: str, b: str) -> float:
27
27
  return SequenceMatcher(None, a, b).ratio()
28
+
29
+ def evaluate_config(config, validation_data):
30
+ evaluator = Evaluator()
31
+ results = []
32
+ for sample in validation_data:
33
+ query = sample.get("query", "")
34
+ answer = sample.get("answer", "")
35
+ context = sample.get("context", "")
36
+ results.append(evaluator.evaluate(query, answer, context))
37
+ return results
38
+
@@ -0,0 +1,61 @@
1
+ """
2
+ Interpretability Layer
3
+ ----------------------
4
+ Uses Gemini or Anthropic Claude to explain why one RAG configuration
5
+ outperforms another. Falls back gracefully if no API key is provided.
6
+ """
7
+
8
+ import os
9
+ import json
10
+
11
+
12
+ def explain_results(results_a: dict, results_b: dict, model: str = "gemini-1.5-pro") -> str:
13
+ """
14
+ Generate a natural-language explanation comparing two RAG experiment results.
15
+ Priority:
16
+ 1. Anthropic Claude (if ANTHROPIC_API_KEY is set)
17
+ 2. Google Gemini (if GOOGLE_API_KEY is set)
18
+ 3. Fallback text message
19
+ """
20
+ prompt = f"""
21
+ You are an AI evaluation expert.
22
+ Compare these two RAG experiment results and explain why one performs better.
23
+ Metrics A: {json.dumps(results_a, indent=2)}
24
+ Metrics B: {json.dumps(results_b, indent=2)}
25
+ Provide a concise, human-friendly explanation and practical improvement tips.
26
+ """
27
+
28
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY")
29
+ google_key = os.getenv("GEMINI_API_KEY")
30
+
31
+
32
+ # 1️⃣ Try Anthropic Claude first
33
+ if anthropic_key:
34
+ try:
35
+ from anthropic import Anthropic
36
+ client = Anthropic(api_key=anthropic_key)
37
+ response = client.messages.create(
38
+ model="claude-3-opus-20240229",
39
+ max_tokens=300,
40
+ messages=[{"role": "user", "content": prompt}],
41
+ )
42
+ return response.content[0].text
43
+ except Exception as e:
44
+ return f"[Claude unavailable] {e}"
45
+
46
+ # 2️⃣ Fallback to Google Gemini
47
+ elif google_key:
48
+ try:
49
+ import google.generativeai as genai
50
+ genai.configure(api_key=google_key)
51
+ response = genai.GenerativeModel(model).generate_content(prompt)
52
+ return response.text
53
+ except Exception as e:
54
+ return f"[Gemini unavailable] {e}"
55
+
56
+ # 3️⃣ Fallback if neither key is available
57
+ else:
58
+ return (
59
+ "[No LLM available] Please set ANTHROPIC_API_KEY or GOOGLE_API_KEY "
60
+ "to enable interpretability via Claude or Gemini."
61
+ )