ragmint 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ragmint might be problematic. Click here for more details.
- ragmint/tests/test_tuner.py +38 -5
- ragmint/tuner.py +1 -1
- ragmint/utils/data_loader.py +33 -3
- {ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/METADATA +34 -1
- {ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/RECORD +8 -8
- {ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/WHEEL +0 -0
- {ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/top_level.txt +0 -0
ragmint/tests/test_tuner.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
+
import pytest
|
|
3
4
|
from ragmint.tuner import RAGMint
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def setup_validation_file(tmp_path):
|
|
8
|
+
"""Create a temporary validation QA dataset."""
|
|
7
9
|
data = [
|
|
8
10
|
{"question": "What is AI?", "answer": "Artificial Intelligence"},
|
|
9
11
|
{"question": "Define ML", "answer": "Machine Learning"}
|
|
@@ -15,6 +17,7 @@ def setup_validation_file(tmp_path):
|
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def setup_docs(tmp_path):
|
|
20
|
+
"""Create a small document corpus for testing."""
|
|
18
21
|
corpus = tmp_path / "corpus"
|
|
19
22
|
corpus.mkdir()
|
|
20
23
|
(corpus / "doc1.txt").write_text("This is about Artificial Intelligence.")
|
|
@@ -22,17 +25,47 @@ def setup_docs(tmp_path):
|
|
|
22
25
|
return str(corpus)
|
|
23
26
|
|
|
24
27
|
|
|
25
|
-
|
|
28
|
+
@pytest.mark.parametrize("validation_mode", [
|
|
29
|
+
None, # Built-in dataset
|
|
30
|
+
"data/custom_eval.json", # Custom dataset path (mocked below)
|
|
31
|
+
])
|
|
32
|
+
def test_optimize_ragmint(tmp_path, validation_mode, monkeypatch):
|
|
33
|
+
"""Test RAGMint.optimize() with different dataset modes."""
|
|
26
34
|
docs_path = setup_docs(tmp_path)
|
|
27
35
|
val_file = setup_validation_file(tmp_path)
|
|
28
36
|
|
|
37
|
+
# If using custom dataset, mock the path
|
|
38
|
+
if validation_mode and "custom_eval" in validation_mode:
|
|
39
|
+
custom_path = tmp_path / "custom_eval.json"
|
|
40
|
+
os.rename(val_file, custom_path)
|
|
41
|
+
validation_mode = str(custom_path)
|
|
42
|
+
|
|
43
|
+
metric = "faithfulness"
|
|
44
|
+
|
|
45
|
+
# Initialize RAGMint
|
|
29
46
|
rag = RAGMint(
|
|
30
47
|
docs_path=docs_path,
|
|
31
48
|
retrievers=["faiss"],
|
|
32
|
-
embeddings=["
|
|
49
|
+
embeddings=["text-embedding-3-small"],
|
|
33
50
|
rerankers=["mmr"]
|
|
34
51
|
)
|
|
35
52
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
53
|
+
# Run optimization
|
|
54
|
+
best, results = rag.optimize(
|
|
55
|
+
validation_set=validation_mode,
|
|
56
|
+
metric=metric,
|
|
57
|
+
trials=2
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Validate results
|
|
61
|
+
assert isinstance(best, dict), "Best config should be a dict"
|
|
62
|
+
assert isinstance(results, list), "Results should be a list of trials"
|
|
63
|
+
assert len(results) > 0, "Optimization should produce results"
|
|
64
|
+
|
|
65
|
+
# The best result can expose either 'score' or the metric name (e.g. 'faithfulness')
|
|
66
|
+
assert any(k in best for k in ("score", metric)), \
|
|
67
|
+
f"Best config should include either 'score' or '{metric}'"
|
|
68
|
+
|
|
69
|
+
# Ensure the metric value is valid
|
|
70
|
+
assert best.get(metric, best.get("score")) >= 0, \
|
|
71
|
+
f"{metric} score should be non-negative"
|
ragmint/tuner.py
CHANGED
|
@@ -90,7 +90,7 @@ class RAGMint:
|
|
|
90
90
|
search_type: str = "random",
|
|
91
91
|
trials: int = 10,
|
|
92
92
|
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
93
|
-
validation = load_validation_set(validation_set)
|
|
93
|
+
validation = load_validation_set(validation_set or "default")
|
|
94
94
|
|
|
95
95
|
search_space = {
|
|
96
96
|
"retriever": self.retrievers,
|
ragmint/utils/data_loader.py
CHANGED
|
@@ -2,6 +2,14 @@ import json
|
|
|
2
2
|
import csv
|
|
3
3
|
from typing import List, Dict
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from datasets import load_dataset
|
|
9
|
+
except ImportError:
|
|
10
|
+
load_dataset = None # optional dependency
|
|
11
|
+
|
|
12
|
+
DEFAULT_VALIDATION_PATH = Path(__file__).parent.parent / "experiments" / "validation_qa.json"
|
|
5
13
|
|
|
6
14
|
|
|
7
15
|
def load_json(path: str) -> List[Dict]:
|
|
@@ -19,10 +27,32 @@ def save_json(path: str, data: Dict):
|
|
|
19
27
|
with open(path, "w", encoding="utf-8") as f:
|
|
20
28
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
21
29
|
|
|
22
|
-
def load_validation_set(path: str) -> List[Dict]:
|
|
30
|
+
def load_validation_set(path: str | None = None) -> List[Dict]:
|
|
23
31
|
"""
|
|
24
|
-
Loads a validation dataset (QA pairs) from
|
|
32
|
+
Loads a validation dataset (QA pairs) from:
|
|
33
|
+
- Built-in default JSON file
|
|
34
|
+
- User-provided JSON or CSV
|
|
35
|
+
- Hugging Face dataset by name
|
|
25
36
|
"""
|
|
37
|
+
# Default behavior
|
|
38
|
+
if path is None or path == "default":
|
|
39
|
+
if not DEFAULT_VALIDATION_PATH.exists():
|
|
40
|
+
raise FileNotFoundError(f"Default validation set not found at {DEFAULT_VALIDATION_PATH}")
|
|
41
|
+
return load_json(DEFAULT_VALIDATION_PATH)
|
|
42
|
+
|
|
43
|
+
# Hugging Face dataset
|
|
44
|
+
if not os.path.exists(path) and load_dataset:
|
|
45
|
+
try:
|
|
46
|
+
dataset = load_dataset(path, split="validation")
|
|
47
|
+
data = [
|
|
48
|
+
{"question": q, "answer": a}
|
|
49
|
+
for q, a in zip(dataset["question"], dataset["answers"])
|
|
50
|
+
]
|
|
51
|
+
return data
|
|
52
|
+
except Exception:
|
|
53
|
+
pass # fall through to file loading
|
|
54
|
+
|
|
55
|
+
# Local file
|
|
26
56
|
p = Path(path)
|
|
27
57
|
if not p.exists():
|
|
28
58
|
raise FileNotFoundError(f"Validation file not found: {path}")
|
|
@@ -32,4 +62,4 @@ def load_validation_set(path: str) -> List[Dict]:
|
|
|
32
62
|
elif p.suffix.lower() in [".csv", ".tsv"]:
|
|
33
63
|
return load_csv(path)
|
|
34
64
|
else:
|
|
35
|
-
raise ValueError("Unsupported validation set format. Use JSON or
|
|
65
|
+
raise ValueError("Unsupported validation set format. Use JSON, CSV, or a Hugging Face dataset name.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragmint
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: A modular framework for evaluating and optimizing RAG pipelines.
|
|
5
5
|
Author-email: Andre Oliveira <oandreoliveira@outlook.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -101,6 +101,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
|
|
|
101
101
|
print(result)
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
+
---
|
|
105
|
+
## 🧪 Dataset Options
|
|
106
|
+
|
|
107
|
+
Ragmint can automatically load evaluation datasets for your RAG pipeline:
|
|
108
|
+
|
|
109
|
+
| Mode | Example | Description |
|
|
110
|
+
|------|----------|-------------|
|
|
111
|
+
| 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
|
|
112
|
+
| 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
|
|
113
|
+
| 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
|
|
114
|
+
|
|
115
|
+
### Example
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from ragmint.tuner import RAGMint
|
|
119
|
+
|
|
120
|
+
ragmint = RAGMint(
|
|
121
|
+
docs_path="data/docs/",
|
|
122
|
+
retrievers=["faiss", "chroma"],
|
|
123
|
+
embeddings=["text-embedding-3-small"],
|
|
124
|
+
rerankers=["mmr"],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Use built-in default
|
|
128
|
+
ragmint.optimize(validation_set=None)
|
|
129
|
+
|
|
130
|
+
# Use Hugging Face benchmark
|
|
131
|
+
ragmint.optimize(validation_set="squad")
|
|
132
|
+
|
|
133
|
+
# Use your own dataset
|
|
134
|
+
ragmint.optimize(validation_set="data/custom_qa.json")
|
|
135
|
+
```
|
|
136
|
+
|
|
104
137
|
---
|
|
105
138
|
|
|
106
139
|
## 🧩 Folder Structure
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
ragmint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
ragmint/__main__.py,sha256=q7hBn56Z1xAckbs03i8ynsuOzJVUXmod2qHddX7gkpc,729
|
|
3
|
-
ragmint/tuner.py,sha256=
|
|
3
|
+
ragmint/tuner.py,sha256=BLPZ66sVk3dh3Wj-GVUYRVmVtgXYTzv3oTQtKJeDlgE,4442
|
|
4
4
|
ragmint/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
ragmint/core/chunking.py,sha256=Dy9RYyapGSS6ik6Vg9lqbUPCFqSraU1JKpHbYUTkaFo,576
|
|
6
6
|
ragmint/core/embeddings.py,sha256=6wJjfZ5ukr8G5bJJ1evjIqj0_FMbs_gq4xC-sBBqNlA,566
|
|
@@ -15,14 +15,14 @@ ragmint/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
15
15
|
ragmint/tests/test_pipeline.py,sha256=MIMkEKelh-POlbXzbCc4ClMk8XCGzfuj569xXltziic,615
|
|
16
16
|
ragmint/tests/test_retriever.py,sha256=Ag0uGW8-iMzKA4nJNnsjuzlQHa79sN-T-K1g1cdin-A,421
|
|
17
17
|
ragmint/tests/test_search.py,sha256=FcC-DEnw9veAEyMnFoRw9DAwzqJC9F6-r63Nqo2nO58,598
|
|
18
|
-
ragmint/tests/test_tuner.py,sha256=
|
|
18
|
+
ragmint/tests/test_tuner.py,sha256=LOvtIxAbUsoRHQudZ23UVr60FYAU0a1SBNvAN0mLpfU,2322
|
|
19
19
|
ragmint/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
ragmint/utils/caching.py,sha256=LPE2JorOQ90BgVf6NUiS0-bdt-FGpNxDy7FnuwEHzy0,1060
|
|
21
|
-
ragmint/utils/data_loader.py,sha256=
|
|
21
|
+
ragmint/utils/data_loader.py,sha256=GXU9Nc3o0UWxtBeRwiskD1aCjSiNNuRoAokIUODn7q8,2024
|
|
22
22
|
ragmint/utils/logger.py,sha256=X7hTNb3st3fUeQIzSghuoV5B8FWXzm_O3DRkSfJvhmI,1033
|
|
23
23
|
ragmint/utils/metrics.py,sha256=DR8mrdumHtQerK0VrugwYKIG1oNptEcsFqodXq3i2kY,717
|
|
24
|
-
ragmint-0.1.
|
|
25
|
-
ragmint-0.1.
|
|
26
|
-
ragmint-0.1.
|
|
27
|
-
ragmint-0.1.
|
|
28
|
-
ragmint-0.1.
|
|
24
|
+
ragmint-0.1.1.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
|
|
25
|
+
ragmint-0.1.1.dist-info/METADATA,sha256=qv4dd0BpS4z9Hx67AYZe2MYA2bYvQdOKYfBPovSLb88,6580
|
|
26
|
+
ragmint-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
ragmint-0.1.1.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
|
|
28
|
+
ragmint-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|