PyPI - ragmint - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

ragmint 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ragmint might be problematic. Click here for more details.

Files changed (8) hide show

ragmint/tests/test_tuner.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import os
 import json
+import pytest
 from ragmint.tuner import RAGMint
 def setup_validation_file(tmp_path):
+    """Create a temporary validation QA dataset."""
     data = [
         {"question": "What is AI?", "answer": "Artificial Intelligence"},
         {"question": "Define ML", "answer": "Machine Learning"}
@@ -15,6 +17,7 @@ def setup_validation_file(tmp_path):
 def setup_docs(tmp_path):
+    """Create a small document corpus for testing."""
     corpus = tmp_path / "corpus"
     corpus.mkdir()
     (corpus / "doc1.txt").write_text("This is about Artificial Intelligence.")
@@ -22,17 +25,47 @@ def setup_docs(tmp_path):
     return str(corpus)
-def test_optimize_random(tmp_path):
+@pytest.mark.parametrize("validation_mode", [
+    None,  # Built-in dataset
+    "data/custom_eval.json",  # Custom dataset path (mocked below)
+])
+def test_optimize_ragmint(tmp_path, validation_mode, monkeypatch):
+    """Test RAGMint.optimize() with different dataset modes."""
     docs_path = setup_docs(tmp_path)
     val_file = setup_validation_file(tmp_path)
+    # If using custom dataset, mock the path
+    if validation_mode and "custom_eval" in validation_mode:
+        custom_path = tmp_path / "custom_eval.json"
+        os.rename(val_file, custom_path)
+        validation_mode = str(custom_path)
+    metric = "faithfulness"
+    # Initialize RAGMint
     rag = RAGMint(
         docs_path=docs_path,
         retrievers=["faiss"],
-        embeddings=["openai/text-embedding-3-small"],
+        embeddings=["text-embedding-3-small"],
         rerankers=["mmr"]
     )
-    best, results = rag.optimize(validation_set=val_file, metric="faithfulness", trials=2)
-    assert isinstance(best, dict)
-    assert isinstance(results, list)
+    # Run optimization
+    best, results = rag.optimize(
+        validation_set=validation_mode,
+        metric=metric,
+        trials=2
+    )
+    # Validate results
+    assert isinstance(best, dict), "Best config should be a dict"
+    assert isinstance(results, list), "Results should be a list of trials"
+    assert len(results) > 0, "Optimization should produce results"
+    # The best result can expose either 'score' or the metric name (e.g. 'faithfulness')
+    assert any(k in best for k in ("score", metric)), \
+        f"Best config should include either 'score' or '{metric}'"
+    # Ensure the metric value is valid
+    assert best.get(metric, best.get("score")) >= 0, \
+        f"{metric} score should be non-negative"

ragmint/tuner.py CHANGED Viewed

@@ -90,7 +90,7 @@ class RAGMint:
         search_type: str = "random",
         trials: int = 10,
     ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
-        validation = load_validation_set(validation_set)
+        validation = load_validation_set(validation_set or "default")
         search_space = {
             "retriever": self.retrievers,

ragmint/utils/data_loader.py CHANGED Viewed

@@ -2,6 +2,14 @@ import json
 import csv
 from typing import List, Dict
 from pathlib import Path
+import os
+try:
+    from datasets import load_dataset
+except ImportError:
+    load_dataset = None  # optional dependency
+DEFAULT_VALIDATION_PATH = Path(__file__).parent.parent / "experiments" / "validation_qa.json"
 def load_json(path: str) -> List[Dict]:
@@ -19,10 +27,32 @@ def save_json(path: str, data: Dict):
     with open(path, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=2)
-def load_validation_set(path: str) -> List[Dict]:
+def load_validation_set(path: str | None = None) -> List[Dict]:
     """
-    Loads a validation dataset (QA pairs) from JSON or CSV.
+    Loads a validation dataset (QA pairs) from:
+    - Built-in default JSON file
+    - User-provided JSON or CSV
+    - Hugging Face dataset by name
     """
+    # Default behavior
+    if path is None or path == "default":
+        if not DEFAULT_VALIDATION_PATH.exists():
+            raise FileNotFoundError(f"Default validation set not found at {DEFAULT_VALIDATION_PATH}")
+        return load_json(DEFAULT_VALIDATION_PATH)
+    # Hugging Face dataset
+    if not os.path.exists(path) and load_dataset:
+        try:
+            dataset = load_dataset(path, split="validation")
+            data = [
+                {"question": q, "answer": a}
+                for q, a in zip(dataset["question"], dataset["answers"])
+            ]
+            return data
+        except Exception:
+            pass  # fall through to file loading
+    # Local file
     p = Path(path)
     if not p.exists():
         raise FileNotFoundError(f"Validation file not found: {path}")
@@ -32,4 +62,4 @@ def load_validation_set(path: str) -> List[Dict]:
     elif p.suffix.lower() in [".csv", ".tsv"]:
         return load_csv(path)
     else:
-        raise ValueError("Unsupported validation set format. Use JSON or CSV.")
+        raise ValueError("Unsupported validation set format. Use JSON, CSV, or a Hugging Face dataset name.")

{ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragmint
-Version: 0.1.0
+Version: 0.1.1
 Summary: A modular framework for evaluating and optimizing RAG pipelines.
 Author-email: Andre Oliveira <oandreoliveira@outlook.com>
 License: Apache License 2.0
@@ -101,6 +101,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
 print(result)
 ```
+---
+## 🧪 Dataset Options
+Ragmint can automatically load evaluation datasets for your RAG pipeline:
+| Mode | Example | Description |
+|------|----------|-------------|
+| 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
+| 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
+| 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
+### Example
+```python
+from ragmint.tuner import RAGMint
+ragmint = RAGMint(
+    docs_path="data/docs/",
+    retrievers=["faiss", "chroma"],
+    embeddings=["text-embedding-3-small"],
+    rerankers=["mmr"],
+)
+# Use built-in default
+ragmint.optimize(validation_set=None)
+# Use Hugging Face benchmark
+ragmint.optimize(validation_set="squad")
+# Use your own dataset
+ragmint.optimize(validation_set="data/custom_qa.json")
+```
 ---
 ## 🧩 Folder Structure

{ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 ragmint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ragmint/__main__.py,sha256=q7hBn56Z1xAckbs03i8ynsuOzJVUXmod2qHddX7gkpc,729
-ragmint/tuner.py,sha256=sCUb-qGqk-lz4nUJboomwXFt3us7mYf3oJhwWV9Kzo4,4429
+ragmint/tuner.py,sha256=BLPZ66sVk3dh3Wj-GVUYRVmVtgXYTzv3oTQtKJeDlgE,4442
 ragmint/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ragmint/core/chunking.py,sha256=Dy9RYyapGSS6ik6Vg9lqbUPCFqSraU1JKpHbYUTkaFo,576
 ragmint/core/embeddings.py,sha256=6wJjfZ5ukr8G5bJJ1evjIqj0_FMbs_gq4xC-sBBqNlA,566
@@ -15,14 +15,14 @@ ragmint/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ragmint/tests/test_pipeline.py,sha256=MIMkEKelh-POlbXzbCc4ClMk8XCGzfuj569xXltziic,615
 ragmint/tests/test_retriever.py,sha256=Ag0uGW8-iMzKA4nJNnsjuzlQHa79sN-T-K1g1cdin-A,421
 ragmint/tests/test_search.py,sha256=FcC-DEnw9veAEyMnFoRw9DAwzqJC9F6-r63Nqo2nO58,598
-ragmint/tests/test_tuner.py,sha256=VFZ23og0dOypBpr3TxkRmSngilkNgyboZc6u9qB0pME,1101
+ragmint/tests/test_tuner.py,sha256=LOvtIxAbUsoRHQudZ23UVr60FYAU0a1SBNvAN0mLpfU,2322
 ragmint/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ragmint/utils/caching.py,sha256=LPE2JorOQ90BgVf6NUiS0-bdt-FGpNxDy7FnuwEHzy0,1060
-ragmint/utils/data_loader.py,sha256=Q3pBO77XZ1rl4fuMn3TK7x3mSM2eLdV_OJTyy_eL3Ys,988
+ragmint/utils/data_loader.py,sha256=GXU9Nc3o0UWxtBeRwiskD1aCjSiNNuRoAokIUODn7q8,2024
 ragmint/utils/logger.py,sha256=X7hTNb3st3fUeQIzSghuoV5B8FWXzm_O3DRkSfJvhmI,1033
 ragmint/utils/metrics.py,sha256=DR8mrdumHtQerK0VrugwYKIG1oNptEcsFqodXq3i2kY,717
-ragmint-0.1.0.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
-ragmint-0.1.0.dist-info/METADATA,sha256=BgMj5BxH2C2_5GweYpClkopepUBCVen5tWAFcOby8o8,5643
-ragmint-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ragmint-0.1.0.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
-ragmint-0.1.0.dist-info/RECORD,,
+ragmint-0.1.1.dist-info/licenses/LICENSE,sha256=ahkhYfFLI8tGrdxdO2_GaT6OJW2eNwyFT3kYi85QQhc,692
+ragmint-0.1.1.dist-info/METADATA,sha256=qv4dd0BpS4z9Hx67AYZe2MYA2bYvQdOKYfBPovSLb88,6580
+ragmint-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ragmint-0.1.1.dist-info/top_level.txt,sha256=K2ulzMHuvFm6xayvvJdGABeRJAvKDBn6M3EI-3SbYLw,8
+ragmint-0.1.1.dist-info/RECORD,,

{ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ragmint-0.1.0.dist-info → ragmint-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

ragmint 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

Potentially problematic release.

ragmint 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl