ragmint 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ragmint might be problematic. Click here for more details.

@@ -1,9 +1,11 @@
1
1
  import os
2
2
  import json
3
+ import pytest
3
4
  from ragmint.tuner import RAGMint
4
5
 
5
6
 
6
7
  def setup_validation_file(tmp_path):
8
+ """Create a temporary validation QA dataset."""
7
9
  data = [
8
10
  {"question": "What is AI?", "answer": "Artificial Intelligence"},
9
11
  {"question": "Define ML", "answer": "Machine Learning"}
@@ -15,6 +17,7 @@ def setup_validation_file(tmp_path):
15
17
 
16
18
 
17
19
  def setup_docs(tmp_path):
20
+ """Create a small document corpus for testing."""
18
21
  corpus = tmp_path / "corpus"
19
22
  corpus.mkdir()
20
23
  (corpus / "doc1.txt").write_text("This is about Artificial Intelligence.")
@@ -22,17 +25,47 @@ def setup_docs(tmp_path):
22
25
  return str(corpus)
23
26
 
24
27
 
25
- def test_optimize_random(tmp_path):
28
+ @pytest.mark.parametrize("validation_mode", [
29
+ None, # Built-in dataset
30
+ "data/custom_eval.json", # Custom dataset path (mocked below)
31
+ ])
32
+ def test_optimize_ragmint(tmp_path, validation_mode, monkeypatch):
33
+ """Test RAGMint.optimize() with different dataset modes."""
26
34
  docs_path = setup_docs(tmp_path)
27
35
  val_file = setup_validation_file(tmp_path)
28
36
 
37
+ # If using custom dataset, mock the path
38
+ if validation_mode and "custom_eval" in validation_mode:
39
+ custom_path = tmp_path / "custom_eval.json"
40
+ os.rename(val_file, custom_path)
41
+ validation_mode = str(custom_path)
42
+
43
+ metric = "faithfulness"
44
+
45
+ # Initialize RAGMint
29
46
  rag = RAGMint(
30
47
  docs_path=docs_path,
31
48
  retrievers=["faiss"],
32
- embeddings=["openai/text-embedding-3-small"],
49
+ embeddings=["text-embedding-3-small"],
33
50
  rerankers=["mmr"]
34
51
  )
35
52
 
36
- best, results = rag.optimize(validation_set=val_file, metric="faithfulness", trials=2)
37
- assert isinstance(best, dict)
38
- assert isinstance(results, list)
53
+ # Run optimization
54
+ best, results = rag.optimize(
55
+ validation_set=validation_mode,
56
+ metric=metric,
57
+ trials=2
58
+ )
59
+
60
+ # Validate results
61
+ assert isinstance(best, dict), "Best config should be a dict"
62
+ assert isinstance(results, list), "Results should be a list of trials"
63
+ assert len(results) > 0, "Optimization should produce results"
64
+
65
+ # The best result can expose either 'score' or the metric name (e.g. 'faithfulness')
66
+ assert any(k in best for k in ("score", metric)), \
67
+ f"Best config should include either 'score' or '{metric}'"
68
+
69
+ # Ensure the metric value is valid
70
+ assert best.get(metric, best.get("score")) >= 0, \
71
+ f"{metric} score should be non-negative"
ragmint/tuner.py CHANGED
@@ -90,7 +90,7 @@ class RAGMint:
90
90
  search_type: str = "random",
91
91
  trials: int = 10,
92
92
  ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
93
- validation = load_validation_set(validation_set)
93
+ validation = load_validation_set(validation_set or "default")
94
94
 
95
95
  search_space = {
96
96
  "retriever": self.retrievers,
@@ -2,6 +2,14 @@ import json
2
2
  import csv
3
3
  from typing import List, Dict
4
4
  from pathlib import Path
5
+ import os
6
+
7
+ try:
8
+ from datasets import load_dataset
9
+ except ImportError:
10
+ load_dataset = None # optional dependency
11
+
12
+ DEFAULT_VALIDATION_PATH = Path(__file__).parent.parent / "experiments" / "validation_qa.json"
5
13
 
6
14
 
7
15
  def load_json(path: str) -> List[Dict]:
@@ -19,10 +27,32 @@ def save_json(path: str, data: Dict):
19
27
  with open(path, "w", encoding="utf-8") as f:
20
28
  json.dump(data, f, ensure_ascii=False, indent=2)
21
29
 
22
- def load_validation_set(path: str) -> List[Dict]:
30
+ def load_validation_set(path: str | None = None) -> List[Dict]:
23
31
  """
24
- Loads a validation dataset (QA pairs) from JSON or CSV.
32
+ Loads a validation dataset (QA pairs) from:
33
+ - Built-in default JSON file
34
+ - User-provided JSON or CSV
35
+ - Hugging Face dataset by name
25
36
  """
37
+ # Default behavior
38
+ if path is None or path == "default":
39
+ if not DEFAULT_VALIDATION_PATH.exists():
40
+ raise FileNotFoundError(f"Default validation set not found at {DEFAULT_VALIDATION_PATH}")
41
+ return load_json(DEFAULT_VALIDATION_PATH)
42
+
43
+ # Hugging Face dataset
44
+ if not os.path.exists(path) and load_dataset:
45
+ try:
46
+ dataset = load_dataset(path, split="validation")
47
+ data = [
48
+ {"question": q, "answer": a}
49
+ for q, a in zip(dataset["question"], dataset["answers"])
50
+ ]
51
+ return data
52
+ except Exception:
53
+ pass # fall through to file loading
54
+
55
+ # Local file
26
56
  p = Path(path)
27
57
  if not p.exists():
28
58
  raise FileNotFoundError(f"Validation file not found: {path}")
@@ -32,4 +62,4 @@ def load_validation_set(path: str) -> List[Dict]:
32
62
  elif p.suffix.lower() in [".csv", ".tsv"]:
33
63
  return load_csv(path)
34
64
  else:
35
- raise ValueError("Unsupported validation set format. Use JSON or CSV.")
65
+ raise ValueError("Unsupported validation set format. Use JSON, CSV, or a Hugging Face dataset name.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragmint
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: A modular framework for evaluating and optimizing RAG pipelines.
5
5
  Author-email: Andre Oliveira <oandreoliveira@outlook.com>
6
6
  License: Apache License 2.0
@@ -101,6 +101,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
101
101
  print(result)
102
102
  ```
103
103
 
104
+ ---
105
+ ## 🧪 Dataset Options
106
+
107
+ Ragmint can automatically load evaluation datasets for your RAG pipeline:
108
+
109
+ | Mode | Example | Description |
110
+ |------|----------|-------------|
111
+ | 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
112
+ | 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
113
+ | 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
114
+
115
+ ### Example
116
+
117
+ ```python
118
+ from ragmint.tuner import RAGMint
119
+
120
+ ragmint = RAGMint(
121
+ docs_path="data/docs/",
122
+ retrievers=["faiss", "chroma"],
123
+ embeddings=["text-embedding-3-small"],
124
+ rerankers=["mmr"],
125
+ )
126
+
127
+ # Use built-in default
128
+ ragmint.optimize(validation_set=None)
129
+
130
+ # Use Hugging Face benchmark
131
+ ragmint.optimize(validation_set="squad")
132
+
133
+ # Use your own dataset
134
+ ragmint.optimize(validation_set="data/custom_qa.json")
135
+ ```
136
+
104
137
  ---
105
138
 
106
139
  ## 🧩 Folder Structure
@@ -1,6 +1,6 @@
1
1
  ragmint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  ragmint/__main__.py,sha256=q7hBn56Z1xAckbs03i8ynsuOzJVUXmod2qHddX7gkpc,729
3
- ragmint/tuner.py,sha256=sCUb-qGqk-lz4nUJboomwXFt3us7mYf3oJhwWV9Kzo4,4429
3
+ ragmint/tuner.py,sha256=BLPZ66sVk3dh3Wj-GVUYRVmVtgXYTzv3oTQtKJeDlgE,4442
4
4
  ragmint/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  ragmint/core/chunking.py,sha256=Dy9RYyapGSS6ik6Vg9lqbUPCFqSraU1JKpHbYUTkaFo,576
6
6
  ragmint/core/embeddings.py,sha256=6wJjfZ5ukr8G5bJJ1evjIqj0_FMbs_gq4xC-sBBqNlA,566
@@ -15,14 +15,14 @@ ragmint/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  ragmint/tests/test_pipeline.py,sha256=MIMkEKelh-POlbXzbCc4ClMk8XCGzfuj569xXltziic,615
16
16
  ragmint/tests/test_retriever.py,sha256=Ag0uGW8-iMzKA4nJNnsjuzlQHa79sN-T-K1g1cdin-A,421
17
17
  ragmint/tests/test_search.py,sha256=FcC-DEnw9veAEyMnFoRw9DAwzqJC9F6-r63Nqo2nO58,598
18
- ragmint/tests/test_tuner.py,sha256=VFZ23og0dOypBpr3TxkRmSngilkNgyboZc6u9qB0pME,1101
18
+ ragmint/tests/test_tuner.py,sha256=LOvtIxAbUsoRHQudZ23UVr60FYAU0a1SBNvAN0mLpfU,2322
19
19
  ragmint/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  ragmint/utils/caching.py,sha256=LPE2JorOQ90BgVf6NUiS0-bdt-FGpNxDy7FnuwEHzy0,1060
21
- ragmint/utils/data_loader.py,sha256=Q3pBO77XZ1rl4fuMn3TK7x3mSM2eLdV_OJTyy_eL3Ys,988
21
+ ragmint/utils/data_loader.py,sha256=GXU9Nc3o0UWxtBeRwiskD1aCjSiNNuRoAokIUODn7q8,2024
22
22
  ragmint/utils/logger.py,sha256=X7hTNb3st3fUeQIzSghuoV5B8FWXzm_O3DRkSfJvhmI,1033
23
23
  ragmint/utils/metrics.py,sha256=DR8mrdumHtQerK0VrugwYKIG1oNptEcsFqodXq3i2kY,717
24
- ragmint-0.1.0.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
25
- ragmint-0.1.0.dist-info/METADATA,sha256=BgMj5BxH2C2_5GweYpClkopepUBCVen5tWAFcOby8o8,5643
26
- ragmint-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- ragmint-0.1.0.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
28
- ragmint-0.1.0.dist-info/RECORD,,
24
+ ragmint-0.1.1.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
25
+ ragmint-0.1.1.dist-info/METADATA,sha256=qv4dd0BpS4z9Hx67AYZe2MYA2bYvQdOKYfBPovSLb88,6580
26
+ ragmint-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ ragmint-0.1.1.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
28
+ ragmint-0.1.1.dist-info/RECORD,,