ragmint 0.2.1__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ragmint/app.py +512 -0
  2. ragmint/autotuner.py +201 -17
  3. ragmint/core/chunking.py +68 -4
  4. ragmint/core/embeddings.py +46 -10
  5. ragmint/core/evaluation.py +33 -14
  6. ragmint/core/pipeline.py +34 -10
  7. ragmint/core/retriever.py +152 -20
  8. ragmint/experiments/validation_qa.json +1 -14
  9. ragmint/explainer.py +47 -20
  10. ragmint/integrations/__init__.py +0 -0
  11. ragmint/integrations/config_adapter.py +96 -0
  12. ragmint/integrations/langchain_prebuilder.py +99 -0
  13. ragmint/leaderboard.py +41 -35
  14. ragmint/qa_generator.py +190 -0
  15. ragmint/tests/test_autotuner.py +52 -30
  16. ragmint/tests/test_config_adapter.py +39 -0
  17. ragmint/tests/test_embeddings.py +46 -0
  18. ragmint/tests/test_explainer.py +28 -12
  19. ragmint/tests/test_integration_autotuner_ragmint.py +39 -52
  20. ragmint/tests/test_langchain_prebuilder.py +82 -0
  21. ragmint/tests/test_leaderboard.py +78 -25
  22. ragmint/tests/test_pipeline.py +3 -2
  23. ragmint/tests/test_qa_generator.py +66 -0
  24. ragmint/tests/test_retriever.py +3 -2
  25. ragmint/tests/test_tuner.py +1 -1
  26. ragmint/tuner.py +109 -22
  27. ragmint-0.4.6.data/data/README.md +485 -0
  28. ragmint-0.4.6.dist-info/METADATA +530 -0
  29. ragmint-0.4.6.dist-info/RECORD +48 -0
  30. ragmint-0.4.6.dist-info/licenses/LICENSE +19 -0
  31. ragmint/tests/test_explainer_integration.py +0 -18
  32. ragmint-0.2.1.dist-info/METADATA +0 -27
  33. ragmint-0.2.1.dist-info/RECORD +0 -38
  34. {ragmint-0.2.1.dist-info/licenses → ragmint-0.4.6.data/data}/LICENSE +0 -0
  35. {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/WHEEL +0 -0
  36. {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,530 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragmint
3
+ Version: 0.4.6
4
+ Summary: A modular framework for evaluating and optimizing RAG pipelines.
5
+ Author-email: Andre Oliveira <oandreoliveira@outlook.com>
6
+ License: Apache License 2.0
7
+ Project-URL: Homepage, https://github.com/andyolivers/ragmint
8
+ Project-URL: Documentation, https://andyolivers.com
9
+ Project-URL: Issues, https://github.com/andyolivers/ragmint/issues
10
+ Keywords: RAG,LLM,retrieval,optimization,AI,evaluation,chunking,autotuning
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy<2.0.0
15
+ Requires-Dist: pandas>=2.0
16
+ Requires-Dist: scikit-learn>=1.3
17
+ Requires-Dist: sentence-transformers>=2.2.2
18
+ Requires-Dist: chromadb>=0.3.1
19
+ Requires-Dist: faiss-cpu; sys_platform != "darwin"
20
+ Requires-Dist: faiss-cpu==1.7.4; sys_platform == "darwin"
21
+ Requires-Dist: rank-bm25>=0.2.2
22
+ Requires-Dist: optuna>=3.0
23
+ Requires-Dist: tqdm
24
+ Requires-Dist: colorama
25
+ Requires-Dist: pyyaml
26
+ Requires-Dist: python-dotenv
27
+ Requires-Dist: openai>=1.0.0
28
+ Requires-Dist: google-generativeai>=0.8.0
29
+ Requires-Dist: anthropic>=0.25.0
30
+ Requires-Dist: pytest
31
+ Requires-Dist: langchain>=0.2.5
32
+ Requires-Dist: langchain-community>=0.2.5
33
+ Requires-Dist: langchain-text-splitters>=0.2.1
34
+ Requires-Dist: gradio>=4.38.0
35
+ Requires-Dist: matplotlib>=3.8.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: black; extra == "dev"
38
+ Requires-Dist: flake8; extra == "dev"
39
+ Requires-Dist: isort; extra == "dev"
40
+ Requires-Dist: pytest-cov; extra == "dev"
41
+ Provides-Extra: docs
42
+ Requires-Dist: mkdocs; extra == "docs"
43
+ Requires-Dist: mkdocs-material; extra == "docs"
44
+ Dynamic: license-file
45
+
46
+ # Ragmint
47
+
48
+ <p align="center">
49
+ <img src="src/ragmint/assets/img/ragmint-banner.png" width="auto" height="70px" alt="Ragmint Banner">
50
+ </p>
51
+
52
+ ![Python](https://img.shields.io/badge/python-3.9%2B-blue)
53
+ ![License](https://img.shields.io/badge/license-Apache%202.0-green)
54
+ ![Tests](https://github.com/andyolivers/ragmint/actions/workflows/tests.yml/badge.svg)
55
+ ![Optuna](https://img.shields.io/badge/Optuna-Integrated-orange)
56
+ ![Status](https://img.shields.io/badge/Status-Active-success)
57
+ ![PyPI](https://img.shields.io/pypi/v/ragmint?color=blue)
58
+ ![Docs](https://img.shields.io/badge/docs-latest-blueviolet)
59
+
60
+
61
+ **Ragmint** (Retrieval-Augmented Generation Model Inspection & Tuning) is a modular, developer-friendly Python library for **evaluating, optimizing, and tuning RAG (Retrieval-Augmented Generation) pipelines**.
62
+
63
+ It provides a complete toolkit for **retriever selection**, **embedding model tuning**, **automated RAG evaluation**, and **config-driven prebuilding** of pipelines with support for **Optuna-based Bayesian optimization**, **Auto-RAG tuning**, **chunking**, and **explainability** through Gemini or Claude.
64
+
65
+ ---
66
+
67
+ ## ✨ Features
68
+
69
+ - ✅ **Automated hyperparameter optimization** (Grid, Random, Bayesian via Optuna).
70
+ - 🤖 **Auto-RAG Tuner** — dynamically recommends retriever–embedding pairs based on corpus size and document statistics, **suggests multiple chunk sizes with overlaps**, and can **test configurations to identify the best-performing RAG setup**.
71
+ - 🧮 **Validation QA Generator** — automatically creates QA datasets from a corpus for evaluating and tuning RAG pipelines when no labeled data is available.
72
+ - 🧠 **Explainability Layer** — interprets RAG performance via Gemini or Claude APIs.
73
+ - 🏆 **Leaderboard Tracking** — stores and ranks experiment runs via JSON or external DB.
74
+ - 🔍 **Built-in RAG evaluation metrics** — faithfulness, recall, BLEU, ROUGE, latency.
75
+ - 📦 **Chunking system** — automatic or configurable `chunk_size` and `overlap` for documents with multiple suggested pairs.
76
+ - ⚙️ **Retrievers** — FAISS, Chroma, scikit-learn.
77
+ - 🧩 **Embeddings** — Hugging Face.
78
+ - 💾 **Caching, experiment tracking, and reproducibility** out of the box.
79
+ - 🧰 **Clean modular structure** for easy integration in research and production setups.
80
+ - 🏗️ **Langchain Prebuilder** — prepares pipelines, applies chunking, embeddings, and vector store creation automatically.
81
+ - ⚙️ **Config Adapter (LangchainConfigAdapter)** — normalizes configuration, fills defaults, validates retrievers.
82
+
83
+ ---
84
+
85
+ ## 🚀 Quick Start
86
+
87
+ ### Installation
88
+
89
+ ```bash
90
+ git clone https://github.com/andyolivers/ragmint.git
91
+ cd ragmint
92
+ pip install -e .
93
+ python -m ragmint.app
94
+ ```
95
+
96
+ > The `-e` flag installs Ragmint in editable (development) mode.
97
+ > Requires **Python ≥ 3.9**.
98
+
99
+ ### Installation via PyPI
100
+
101
+ ```bash
102
+ pip install ragmint
103
+ ```
104
+
105
+ ---
106
+
107
+ ### 2️⃣ Run a RAG Optimization Experiment
108
+
109
+ ```bash
110
+ python ragmint/main.py --config configs/default.yaml --search bayesian
111
+ ```
112
+
113
+ Example `configs/default.yaml`:
114
+ ```yaml
115
+ retriever: faiss
116
+ embedding_model: text-embedding-3-small
117
+ chunk_size: 500
118
+ overlap: 100
119
+ reranker:
120
+ mode: mmr
121
+ lambda_param: 0.5
122
+ optimization:
123
+ search_method: bayesian
124
+ n_trials: 20
125
+ ```
126
+
127
+ ---
128
+
129
+ ### 3️⃣ Manual Pipeline Usage
130
+
131
+ ```python
132
+ from ragmint.prebuilder import PreBuilder
133
+ from ragmint.tuner import RAGMint
134
+
135
+ # Prebuild pipeline (chunking, embeddings, vector store)
136
+ prebuilder = PreBuilder(
137
+ docs_path="data/docs/",
138
+ config_path="configs/default.yaml"
139
+ )
140
+ pipeline = prebuilder.build_pipeline()
141
+
142
+ # Initialize RAGMint with prebuilt components
143
+ rag = RAGMint(pipeline=pipeline)
144
+
145
+ # Run optimization
146
+ best, results = rag.optimize(validation_set=None, metric="faithfulness", trials=3)
147
+ print("Best configuration:", best)
148
+
149
+ ```
150
+ ---
151
+ # 🧩 Embeddings and Retrievers
152
+
153
+ **Ragmint** supports a flexible set of embeddings and retrievers, allowing you to adapt easily to various **RAG architectures**.
154
+
155
+ ---
156
+ ## 🧩 Chunking System
157
+
158
+ * **Automatically splits documents** into chunks with `chunk_size` and `overlap` parameters.
159
+ * **Supports default values** if not provided in configuration.
160
+ * **Optimized** for downstream **retrieval and embeddings**.
161
+ * **Enables adaptive chunking strategies** in future releases.
162
+ ---
163
+ ## 🧮 Validation QA Generator
164
+
165
+ The **QA Generator** module automatically creates **question–answer (QA) validation datasets** from any corpus of `.txt` documents.
166
+ This dataset can be used to **evaluate and tune RAG pipelines** inside Ragmint when no labeled data is available.
167
+
168
+ ### ✨ Key Capabilities
169
+
170
+ - 🔁 **Batch processing** — splits large corpora into batches to prevent token overflows and API timeouts.
171
+
172
+ - 🧠 **Topic-aware question estimation** — dynamically determines how many questions to generate per document based on:
173
+ - Document length (logarithmic scaling)
174
+ - Topic diversity (via `SentenceTransformer` + `KMeans` clustering)
175
+
176
+ - 🤖 **LLM-powered QA synthesis** — generates factual QA pairs using **Gemini** or **Claude** models.
177
+
178
+ - 💾 **Automatic JSON export** — saves the generated dataset to `experiments/validation_qa.json` (configurable).
179
+
180
+ ### ⚙️ Usage
181
+
182
+ You can run the generator directly from the command line:
183
+
184
+ ```bash
185
+ python -m ragmint.qa_generator --density 0.005
186
+ ```
187
+
188
+ ### 💡 Example: Using in Python
189
+
190
+ ```python
191
+ from ragmint.qa_generator import generate_validation_qa
192
+
193
+ generate_validation_qa(
194
+ docs_path="data/docs", # Folder with .txt documents
195
+ output_path="experiments/validation_qa.json", # Output JSON file
196
+ llm_model="gemini-2.5-flash-lite", # or "claude-3-opus-20240229"
197
+ batch_size=5, # Number of docs per LLM call
198
+ sleep_between_batches=2, # Wait time between calls (seconds)
199
+ min_q=3, # Minimum questions per doc
200
+ max_q=25 # Maximum questions per doc
201
+ )
202
+ ```
203
+ ✅ The generator supports both Gemini and Claude models.
204
+ Set your API key in a `.env` file or via environment variables:
205
+ ```
206
+ export GOOGLE_API_KEY="your_gemini_key"
207
+ export ANTHROPIC_API_KEY="your_claude_key"
208
+ ```
209
+
210
+ ---
211
+ ## 🧩 Langchain Config Adapter
212
+
213
+ * **Ensures consistent configuration** across pipeline components.
214
+ * **Normalizes retriever and embedding names** (e.g., `faiss`, `sentence-transformers/...`).
215
+ * **Adds default chunk parameters** when missing.
216
+ * **Validates retriever backends** and **raises clear errors** for unsupported options.
217
+
218
+ ---
219
+ ## 🧩 Langchain Prebuilder
220
+
221
+ **Automates pipeline preparation:**
222
+ 1. Reads documents
223
+ 2. Applies chunking
224
+ 3. Creates embeddings
225
+ 4. Initializes retriever / vector store
226
+ 5. Returns ready-to-use pipeline** for RAGMint or custom usage.
227
+
228
+ ---
229
+
230
+ ## 🔤 Available Embeddings (Hugging Face)
231
+
232
+ You can select from the following models:
233
+
234
+ * `sentence-transformers/all-MiniLM-L6-v2` — **lightweight**, general-purpose
235
+ * `sentence-transformers/all-mpnet-base-v2` — **higher accuracy**, slower
236
+ * `BAAI/bge-base-en-v1.5` — **multilingual**, dense embeddings
237
+ * `intfloat/multilingual-e5-base` — ideal for **multilingual corpora**
238
+
239
+
240
+
241
+ ### Configuration Example
242
+
243
+ Use the following format in your config file to specify the embedding model:
244
+
245
+ ```yaml
246
+ embedding_model: sentence-transformers/all-MiniLM-L6-v2
247
+ ```
248
+ ---
249
+
250
+ ## 🔍 Available Retrievers
251
+
252
+ **Ragmint** integrates multiple **retrieval backends** to suit different needs:
253
+
254
+ | Retriever | Description |
255
+ | :--- | :--- |
256
+ | **FAISS** | Fast vector similarity search; efficient for dense embeddings |
257
+ | **Chroma** | Persistent vector DB; works well for incremental updates |
258
+ | **scikit-learn (NearestNeighbors)** | Lightweight, zero-dependency local retriever |
259
+
260
+
261
+ ### Configuration Example
262
+
263
+ To specify the retriever in your configuration file, use the following format:
264
+
265
+ ```yaml
266
+ retriever: faiss
267
+ ```
268
+
269
+ ---
270
+
271
+ ## 🧪 Dataset Options
272
+
273
+ Ragmint can automatically load evaluation datasets for your RAG pipeline:
274
+
275
+ | Mode | Example | Description |
276
+ |------|----------|-------------|
277
+ | 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
278
+ | 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
279
+ | 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
280
+
281
+ ### Example
282
+
283
+ ```python
284
+ from ragmint.tuner import RAGMint
285
+
286
+ ragmint = RAGMint(
287
+ docs_path="data/docs/",
288
+ retrievers=["faiss", "chroma"],
289
+ embeddings=["text-embedding-3-small"],
290
+ rerankers=["mmr"],
291
+ )
292
+
293
+ # Use built-in default
294
+ ragmint.optimize(validation_set=None)
295
+
296
+ # Use Hugging Face benchmark
297
+ ragmint.optimize(validation_set="squad")
298
+
299
+ # Use your own dataset
300
+ ragmint.optimize(validation_set="data/custom_qa.json")
301
+ ```
302
+
303
+ ---
304
+
305
+ ## 🧠 Auto-RAG Tuner
306
+
307
+ The **AutoRAGTuner** automatically analyzes your corpus and recommends retriever–embedding combinations based on corpus statistics (size and average document length). It also **suggests multiple chunk sizes with overlaps** to improve retrieval performance.
308
+
309
+ Beyond recommendations, it can **run full end-to-end testing** of the suggested configurations and **identify the best-performing RAG setup** for your dataset.
310
+
311
+
312
+ ```python
313
+ from ragmint.autotuner import AutoRAGTuner
314
+
315
+ # Initialize with your documents
316
+ tuner = AutoRAGTuner(docs_path="data/docs/")
317
+
318
+ # Recommend configurations and suggest chunk sizes
319
+ recommendation = tuner.recommend(num_chunk_pairs=5)
320
+ print("Initial recommendation:", recommendation)
321
+
322
+ # Run full auto-tuning on validation set
323
+ best_config, results = tuner.auto_tune(validation_set="data/validation.json", trials=5)
324
+ print("Best configuration after testing:", best_config)
325
+ print("All trial results:", results)
326
+ ```
327
+ ---
328
+ ## 🧠 Live Dashboard (Gradio)
329
+ Ragmint includes a visual dashboard to AutoTune and analyze RAG pipelines.
330
+
331
+ <p align="center">
332
+ <img src="/assets/images/dashboard-preview.png" width="80%" alt="Ragmint Gradio App Preview">
333
+ </p>
334
+
335
+ ---
336
+
337
+ ## 🏆 Leaderboard Tracking
338
+
339
+ Track and visualize your best experiments across runs.
340
+
341
+ ```python
342
+ from ragmint.leaderboard import Leaderboard
343
+
344
+ # Initialize local leaderboard
345
+ leaderboard = Leaderboard(storage_path="leaderboard.jsonl")
346
+
347
+ # Retrieve top 5 runs
348
+ print("\n🏅 Top 5 Experiments:")
349
+ for result in leaderboard.top_results(limit=5):
350
+ print(f"{result['run_id']} | Score: {result['best_score']:.2f} | Model: {result['model']}")
351
+ ```
352
+
353
+ ---
354
+
355
+ ## 🧠 Explainability with Gemini / Claude
356
+
357
+ Compare RAG configurations and receive **natural language insights** on why one performs better.
358
+
359
+ ```python
360
+ from ragmint.autotuner import AutoRAGTuner
361
+ from ragmint.explainer import explain_results
362
+
363
+ tuner = AutoRAGTuner(docs_path="data/docs/")
364
+ best, results = tuner.auto_tune(
365
+ validation_set='data/docs/validation_qa.json',
366
+ metric="faithfulness",
367
+ trials=5,
368
+ search_type='bayesian'
369
+ )
370
+
371
+ analysis = explain_results(best, results, corpus_stats=tuner.corpus_stats)
372
+ print(analysis)
373
+ ```
374
+
375
+ > Set your API keys in a `.env` file or via environment variables:
376
+ > ```
377
+ > export GEMINI_API_KEY="your_gemini_key"
378
+ > export ANTHROPIC_API_KEY="your_claude_key"
379
+ > ```
380
+
381
+ ---
382
+
383
+ ## 🧩 Folder Structure
384
+
385
+ ```
386
+ ragmint/
387
+ ├── core/
388
+ │ ├── pipeline.py
389
+ │ ├── retriever.py
390
+ │ ├── reranker.py
391
+ │ ├── embeddings.py
392
+ │ ├── chunking.py
393
+ │ └── evaluation.py
394
+ ├── integration/
395
+ │ ├── config_adapter.py
396
+ │ └── langchain_prebuilder.py
397
+ ├── autotuner.py
398
+ ├── explainer.py
399
+ ├── leaderboard.py
400
+ ├── tuner.py
401
+ ├── utils/
402
+ ├── configs/
403
+ ├── experiments/
404
+ ├── tests/
405
+ └── main.py
406
+ ```
407
+
408
+ ---
409
+
410
+ ## 🧪 Running Tests
411
+
412
+ ```bash
413
+ pytest -v
414
+ ```
415
+
416
+ To include integration tests with Gemini or Claude APIs:
417
+ ```bash
418
+ pytest -m integration
419
+ ```
420
+
421
+ ---
422
+
423
+ ## ⚙️ Configuration via `pyproject.toml`
424
+
425
+ Your `pyproject.toml` includes all required dependencies:
426
+
427
+ ```toml
428
+ [project]
429
+ name = "ragmint"
430
+ version = "0.1.0"
431
+ dependencies = [
432
+ # Core ML + Embeddings
433
+ "numpy<2.0.0",
434
+ "pandas>=2.0",
435
+ "scikit-learn>=1.3",
436
+ "sentence-transformers>=2.2.2",
437
+
438
+ # Retrieval backends
439
+ "chromadb>=0.4",
440
+ "faiss-cpu; sys_platform != 'darwin'", # For Linux/Windows
441
+ "faiss-cpu==1.7.4; sys_platform == 'darwin'", # Optional fix for macOS MPS
442
+ "rank-bm25>=0.2.2", # For BM25 retriever
443
+
444
+ # Optimization & evaluation
445
+ "optuna>=3.0",
446
+ "tqdm",
447
+ "colorama",
448
+
449
+ # RAG evaluation and data utils
450
+ "pyyaml",
451
+ "python-dotenv",
452
+
453
+ # Explainability and LLM APIs
454
+ "openai>=1.0.0",
455
+ "google-generativeai>=0.8.0",
456
+ "anthropic>=0.25.0",
457
+
458
+ # Integration / storage
459
+ "supabase>=2.4.0",
460
+
461
+ # Testing
462
+ "pytest",
463
+
464
+ # LangChain integration layer
465
+ "langchain>=0.2.5",
466
+ "langchain-community>=0.2.5",
467
+ "langchain-text-splitters>=0.2.1"
468
+ ]
469
+ ```
470
+
471
+ ---
472
+
473
+ ## 📊 Example Experiment Workflow
474
+
475
+ 1. Define your retriever, embedding, and reranker setup
476
+ 2. Launch optimization (Grid, Random, Bayesian) or AutoTune
477
+ 3. Compare performance with explainability
478
+ 4. Persist results to leaderboard for later inspection
479
+
480
+ ---
481
+
482
+ ## 🧬 Architecture Overview
483
+
484
+ ```mermaid
485
+ flowchart TD
486
+ A[Query] --> B[Chunking / Preprocessing]
487
+ B --> C[Embedder]
488
+ C --> D[Retriever]
489
+ D --> E[Reranker]
490
+ E --> F[Generator]
491
+ F --> G[Evaluation]
492
+ G --> H[AutoRAGTuner / Optuna]
493
+ H --> I[Suggested Configs & Chunk Sizes]
494
+ I --> J[Best Configuration]
495
+ J -->|Update Params| C
496
+
497
+ ```
498
+
499
+ ---
500
+
501
+ ## 📘 Example Output
502
+
503
+ ```
504
+ [INFO] Starting Auto-RAG Tuning
505
+ [INFO] Suggested retriever=Chroma, embedding_model=sentence-transformers/all-MiniLM-L6-v2
506
+ [INFO] Suggested chunk-size candidates: [(380, 80), (420, 100), (350, 70), (400, 90), (360, 75)]
507
+ [INFO] Running full evaluation on validation set with 5 trials
508
+ [INFO] Trial 1 finished: faithfulness=0.82, latency=0.40s
509
+ [INFO] Trial 2 finished: faithfulness=0.85, latency=0.44s
510
+ ...
511
+ [INFO] Best configuration after testing: {'retriever': 'Chroma', 'embedding_model': 'sentence-transformers/all-MiniLM-L6-v2', 'chunk_size': 400, 'overlap': 90, 'strategy': 'sentence'}
512
+ ```
513
+ ---
514
+ ## 🧾 Citation
515
+ If you use **Ragmint** in your research, please cite:
516
+ ```markdown
517
+ @software{oliveira2025ragmint,
518
+ author = {André Oliveira},
519
+ title = {Ragmint: Retrieval-Augmented Generation Model Inspection & Tuning},
520
+ year = {2025},
521
+ url = {https://github.com/andyolivers/ragmint},
522
+ license = {Apache-2.0}
523
+ }
524
+ ```
525
+
526
+ ---
527
+
528
+ <p align="center">
529
+ <sub>Built with ❤️ by <a href="https://andyolivers.com">André Oliveira</a> | Apache 2.0 License</sub>
530
+ </p>
@@ -0,0 +1,48 @@
1
+ ragmint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ragmint/__main__.py,sha256=q7hBn56Z1xAckbs03i8ynsuOzJVUXmod2qHddX7gkpc,729
3
+ ragmint/app.py,sha256=BGOI8h5JR9sb_6-8iO0iia1eDArTQ3OPw1E7DAPKJnU,18540
4
+ ragmint/autotuner.py,sha256=GMwhc9gwSj1NLxBAT8wSp-yJl0MHoNDx_0eIv6vARys,7681
5
+ ragmint/explainer.py,sha256=lU8EJcmk2x5_XFpKFwDT9q3yqauG-MTNLdFfOfRa-Ms,3097
6
+ ragmint/leaderboard.py,sha256=VvJvJUReHtwPV4xeS1o_sqpI2YYKe01aW4j3VpcrWOQ,1778
7
+ ragmint/qa_generator.py,sha256=0u7o0YVsFh7ASkwI8n_P0piZtyd1BVqCCNA6h-ZvDmE,6301
8
+ ragmint/tuner.py,sha256=DWEdh0JBKTRdU1p5HDAGNRJeUrjAprwkcWZvb8Ifx_4,7261
9
+ ragmint/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ ragmint/core/chunking.py,sha256=cwxRUTs1u8Pe4xQso0G5doA_9AFCMyxB5Hxe8GJUoE4,2711
11
+ ragmint/core/embeddings.py,sha256=WOd58_Z-xir66Q7tLAldfDO_x-wAieMKKXsMfhw7OF4,1875
12
+ ragmint/core/evaluation.py,sha256=aDjEPcbeRWChMOLy6tSXflvry4swxhPOpHYZRed8iCA,1882
13
+ ragmint/core/pipeline.py,sha256=xqLE-NiMDoC5NYCP0O984-eoyQOD4nnCsu6nq4DQEdI,2058
14
+ ragmint/core/reranker.py,sha256=B2-NDExqpd9jdXHkEHOXC0B_6-FMJm5vdi-_ZbxC3Os,2303
15
+ ragmint/core/retriever.py,sha256=bKaJr9vfGPEMAjLBfRhCpAH1bzlJgjDKx0r6ZRWfyuY,6010
16
+ ragmint/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ ragmint/experiments/validation_qa.json,sha256=T1PNoYwrqgwDVLtfmj7L5e0Sq02OEbqHPC8RFhICuUU,2
18
+ ragmint/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ ragmint/integrations/config_adapter.py,sha256=gE6M4abT1cREDd648XaKR3qS11Nz-ULdtNgx9uCrjl4,3405
20
+ ragmint/integrations/langchain_prebuilder.py,sha256=B0_ht1f3cWZ5NStRLypIGs0G2eUDs1tEZPAXtwBvGVM,3495
21
+ ragmint/optimization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ ragmint/optimization/search.py,sha256=uiLJeoO_jaLCQEw99L6uI1rnqHHx_rTY81WxfMmlALs,1623
23
+ ragmint/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ ragmint/tests/conftest.py,sha256=QhYPp5nrQ_DbZlsVH3nLjDgjPATAnLwzJkwl-Y-xrmM,488
25
+ ragmint/tests/test_autotuner.py,sha256=ECfBzQaRrdmvJrCogORGd9rFaUK7hFLcCaTX7RXueXU,2340
26
+ ragmint/tests/test_config_adapter.py,sha256=GlLlvVLQb2jMKTrLQgqzPiTsYvTGZZ8XOxCM9fdZhJI,1358
27
+ ragmint/tests/test_embeddings.py,sha256=KSqxdMy8e8Ekjh_JID5-Eb9gVDXDjOwZUIh36DoWBTw,1354
28
+ ragmint/tests/test_explainer.py,sha256=eI9YOHdC1wRcm04ksBJ0tTJA3wd7JI3SvfJSvCitLHI,1289
29
+ ragmint/tests/test_integration_autotuner_ragmint.py,sha256=_WQRl7rvd_4GXSiqp0Virz-3f5YqS8X550u0613RMlM,1678
30
+ ragmint/tests/test_langchain_prebuilder.py,sha256=5jjitN3muReqbdaD01sNl1h5GH8tfNjpj1UEq3XD5fo,3240
31
+ ragmint/tests/test_leaderboard.py,sha256=MiEJnu4eDC_qsDxPaR8bF2OuAMILCPS8ji7VvpzzTWg,2656
32
+ ragmint/tests/test_pipeline.py,sha256=wj7dEuqz6vnoMc-V05j0DifWsXfmO5xGzD5i24V8aQI,667
33
+ ragmint/tests/test_qa_generator.py,sha256=6Dx85g51CoSp8InJTd73PKNGOz_VInlEWmohyQLuT88,2278
34
+ ragmint/tests/test_retriever.py,sha256=eMXtnH7T5Sgf439iCF0zg_DSNxly9cGJuLFzjaiuGIA,473
35
+ ragmint/tests/test_search.py,sha256=FcC-DEnw9veAEyMnFoRw9DAwzqJC9F6-r63Nqo2nO58,598
36
+ ragmint/tests/test_tuner.py,sha256=CylYlE7yC-wCCZo5P_tqFDtjfIiAz-LOW4ZmWDYT0Z8,2316
37
+ ragmint/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ ragmint/utils/caching.py,sha256=LPE2JorOQ90BgVf6NUiS0-bdt-FGpNxDy7FnuwEHzy0,1060
39
+ ragmint/utils/data_loader.py,sha256=GXU9Nc3o0UWxtBeRwiskD1aCjSiNNuRoAokIUODn7q8,2024
40
+ ragmint/utils/logger.py,sha256=X7hTNb3st3fUeQIzSghuoV5B8FWXzm_O3DRkSfJvhmI,1033
41
+ ragmint/utils/metrics.py,sha256=DR8mrdumHtQerK0VrugwYKIG1oNptEcsFqodXq3i2kY,717
42
+ ragmint-0.4.6.data/data/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
43
+ ragmint-0.4.6.data/data/README.md,sha256=LRfAA3TN-U_uDDko9rp7_OTeRsxoumhsS0yA2mRFQ9I,14543
44
+ ragmint-0.4.6.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
45
+ ragmint-0.4.6.dist-info/METADATA,sha256=AAvKBKQXDvSfBbwVD6LP_Ub3hN8ipJx2mZlp7mD02x0,16135
46
+ ragmint-0.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ ragmint-0.4.6.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
48
+ ragmint-0.4.6.dist-info/RECORD,,
@@ -0,0 +1,19 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ Copyright 2025 André Oliveira
8
+
9
+ Licensed under the Apache License, Version 2.0 (the "License");
10
+ you may not use this file except in compliance with the License.
11
+ You may obtain a copy of the License at
12
+
13
+ http://www.apache.org/licenses/LICENSE-2.0
14
+
15
+ Unless required by applicable law or agreed to in writing, software
16
+ distributed under the License is distributed on an "AS IS" BASIS,
17
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ See the License for the specific language governing permissions and
19
+ limitations under the License.
@@ -1,18 +0,0 @@
1
- import os
2
- import pytest
3
- from ragmint.explainer import explain_results
4
-
5
-
6
- @pytest.mark.integration
7
- def test_real_gemini_explanation():
8
- """Run real Gemini call if GOOGLE_API_KEY is set."""
9
- if not os.getenv("GEMINI_API_KEY"):
10
- pytest.skip("GOOGLE_API_KEY not set")
11
-
12
- config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
13
- config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
14
-
15
- result = explain_results(config_a, config_b, model="gemini-1.5-pro")
16
- assert isinstance(result, str)
17
- assert len(result) > 0
18
- print("\n[Gemini explanation]:", result[:200], "...")
@@ -1,27 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: ragmint
3
- Version: 0.2.1
4
- Summary: A modular framework for evaluating and optimizing RAG pipelines.
5
- Author-email: Andre Oliveira <oandreoliveira@outlook.com>
6
- License: Apache License 2.0
7
- Project-URL: Homepage, https://github.com/andyolivers/ragmint
8
- Project-URL: Documentation, https://andyolivers.com
9
- Project-URL: Issues, https://github.com/andyolivers/ragmint/issues
10
- Keywords: RAG,LLM,retrieval,optimization,AI,evaluation
11
- Requires-Python: >=3.9
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: numpy>=1.23
15
- Requires-Dist: pandas>=2.0
16
- Requires-Dist: scikit-learn>=1.3
17
- Requires-Dist: openai>=1.0
18
- Requires-Dist: tqdm
19
- Requires-Dist: pyyaml
20
- Requires-Dist: chromadb>=0.4
21
- Requires-Dist: faiss-cpu; sys_platform != "darwin"
22
- Requires-Dist: optuna>=3.0
23
- Requires-Dist: pytest
24
- Requires-Dist: colorama
25
- Requires-Dist: google-generativeai>=0.8.0
26
- Requires-Dist: supabase>=2.4.0
27
- Dynamic: license-file
@@ -1,38 +0,0 @@
1
- ragmint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ragmint/__main__.py,sha256=q7hBn56Z1xAckbs03i8ynsuOzJVUXmod2qHddX7gkpc,729
3
- ragmint/autotuner.py,sha256=eXEH4e_3Os9FPX9y_0N7GnIQsmoHxmFbtjj7xanu17g,1064
4
- ragmint/explainer.py,sha256=1glGNdC4GlwR6Qs8Bj1oOol7f5_db7Ksnh07HAp-A2c,2077
5
- ragmint/leaderboard.py,sha256=nILQ5QR63RpZtCrZ__RFfwHXy4bkUIMUcSfH92OQ93Y,1628
6
- ragmint/tuner.py,sha256=BLPZ66sVk3dh3Wj-GVUYRVmVtgXYTzv3oTQtKJeDlgE,4442
7
- ragmint/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- ragmint/core/chunking.py,sha256=Dy9RYyapGSS6ik6Vg9lqbUPCFqSraU1JKpHbYUTkaFo,576
9
- ragmint/core/embeddings.py,sha256=6wJjfZ5ukr8G5bJJ1evjIqj0_FMbs_gq4xC-sBBqNlA,566
10
- ragmint/core/evaluation.py,sha256=3OFcZU2zZyaP53d9S2zdpknV0CYfTq0KoRB3a_dtjM4,1022
11
- ragmint/core/pipeline.py,sha256=2qwGKuG0Du7gtIpieLFn71h_RcwBpjcV-h9PQz2ZOsc,1169
12
- ragmint/core/reranker.py,sha256=B2-NDExqpd9jdXHkEHOXC0B_6-FMJm5vdi-_ZbxC3Os,2303
13
- ragmint/core/retriever.py,sha256=jbpKy_fGdDq736y0es_utQuLqY9eiWNd71Q8JbU0Sko,1259
14
- ragmint/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- ragmint/experiments/validation_qa.json,sha256=mQyGeMyvyAqN5yGjpjqW42JJ7FfhFYNki__paKVhQss,520
16
- ragmint/optimization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- ragmint/optimization/search.py,sha256=uiLJeoO_jaLCQEw99L6uI1rnqHHx_rTY81WxfMmlALs,1623
18
- ragmint/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- ragmint/tests/conftest.py,sha256=QhYPp5nrQ_DbZlsVH3nLjDgjPATAnLwzJkwl-Y-xrmM,488
20
- ragmint/tests/test_autotuner.py,sha256=k5nsIH6MYB5zaocR_Wn1wTX-QDYfhH6ugx2chZu9Q8U,1500
21
- ragmint/tests/test_explainer.py,sha256=K_DRnGGl34WcTA2yaQGmfzWkVi1uEkzjpsTPeZxXeIg,802
22
- ragmint/tests/test_explainer_integration.py,sha256=tYT62fYqk616bjQ1VxHADVRfJ9vdF_CiF3cz4A9BdbE,620
23
- ragmint/tests/test_integration_autotuner_ragmint.py,sha256=YCGge0_KOijAdB7VNDGHl2VRJjiOyl_-sJNRLjAXGLw,2182
24
- ragmint/tests/test_leaderboard.py,sha256=ay81YK6KxAUU6mcG6n1_xV8GPYkBgjzJj9iAIyAzIzA,1163
25
- ragmint/tests/test_pipeline.py,sha256=MIMkEKelh-POlbXzbCc4ClMk8XCGzfuj569xXltziic,615
26
- ragmint/tests/test_retriever.py,sha256=Ag0uGW8-iMzKA4nJNnsjuzlQHa79sN-T-K1g1cdin-A,421
27
- ragmint/tests/test_search.py,sha256=FcC-DEnw9veAEyMnFoRw9DAwzqJC9F6-r63Nqo2nO58,598
28
- ragmint/tests/test_tuner.py,sha256=LOvtIxAbUsoRHQudZ23UVr60FYAU0a1SBNvAN0mLpfU,2322
29
- ragmint/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- ragmint/utils/caching.py,sha256=LPE2JorOQ90BgVf6NUiS0-bdt-FGpNxDy7FnuwEHzy0,1060
31
- ragmint/utils/data_loader.py,sha256=GXU9Nc3o0UWxtBeRwiskD1aCjSiNNuRoAokIUODn7q8,2024
32
- ragmint/utils/logger.py,sha256=X7hTNb3st3fUeQIzSghuoV5B8FWXzm_O3DRkSfJvhmI,1033
33
- ragmint/utils/metrics.py,sha256=DR8mrdumHtQerK0VrugwYKIG1oNptEcsFqodXq3i2kY,717
34
- ragmint-0.2.1.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
35
- ragmint-0.2.1.dist-info/METADATA,sha256=sR31_9qGCaarWLNqTc2uNjO4aklKq9nxaGEjtcy5ipU,936
36
- ragmint-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- ragmint-0.2.1.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
38
- ragmint-0.2.1.dist-info/RECORD,,