PyPI - ragmint - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

ragmint 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ragmint might be problematic. Click here for more details.

Files changed (35) hide show

{ragmint-0.1.0/src/ragmint.egg-info → ragmint-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragmint
-Version: 0.1.0
+Version: 0.1.1
 Summary: A modular framework for evaluating and optimizing RAG pipelines.
 Author-email: Andre Oliveira <oandreoliveira@outlook.com>
 License: Apache License 2.0
@@ -101,6 +101,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
 print(result)
 ```
+---
+## 🧪 Dataset Options
+Ragmint can automatically load evaluation datasets for your RAG pipeline:
+| Mode | Example | Description |
+|------|----------|-------------|
+| 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
+| 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
+| 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
+### Example
+```python
+from ragmint.tuner import RAGMint
+ragmint = RAGMint(
+    docs_path="data/docs/",
+    retrievers=["faiss", "chroma"],
+    embeddings=["text-embedding-3-small"],
+    rerankers=["mmr"],
+)
+# Use built-in default
+ragmint.optimize(validation_set=None)
+# Use Hugging Face benchmark
+ragmint.optimize(validation_set="squad")
+# Use your own dataset
+ragmint.optimize(validation_set="data/custom_qa.json")
+```
 ---
 ## 🧩 Folder Structure

{ragmint-0.1.0 → ragmint-0.1.1}/README.md RENAMED Viewed

@@ -75,6 +75,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
 print(result)
 ```
+---
+## 🧪 Dataset Options
+Ragmint can automatically load evaluation datasets for your RAG pipeline:
+| Mode | Example | Description |
+|------|----------|-------------|
+| 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
+| 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
+| 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
+### Example
+```python
+from ragmint.tuner import RAGMint
+ragmint = RAGMint(
+    docs_path="data/docs/",
+    retrievers=["faiss", "chroma"],
+    embeddings=["text-embedding-3-small"],
+    rerankers=["mmr"],
+)
+# Use built-in default
+ragmint.optimize(validation_set=None)
+# Use Hugging Face benchmark
+ragmint.optimize(validation_set="squad")
+# Use your own dataset
+ragmint.optimize(validation_set="data/custom_qa.json")
+```
 ---
 ## 🧩 Folder Structure

{ragmint-0.1.0 → ragmint-0.1.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ragmint"
-version = "0.1.0"
+version = "0.1.1"
 description = "A modular framework for evaluating and optimizing RAG pipelines."
 readme = "README.md"
 license = { text = "Apache License 2.0" }

ragmint-0.1.1/src/ragmint/tests/test_tuner.py ADDED Viewed

@@ -0,0 +1,71 @@
+import os
+import json
+import pytest
+from ragmint.tuner import RAGMint
+def setup_validation_file(tmp_path):
+    """Create a temporary validation QA dataset."""
+    data = [
+        {"question": "What is AI?", "answer": "Artificial Intelligence"},
+        {"question": "Define ML", "answer": "Machine Learning"}
+    ]
+    file = tmp_path / "validation_qa.json"
+    with open(file, "w", encoding="utf-8") as f:
+        json.dump(data, f)
+    return str(file)
+def setup_docs(tmp_path):
+    """Create a small document corpus for testing."""
+    corpus = tmp_path / "corpus"
+    corpus.mkdir()
+    (corpus / "doc1.txt").write_text("This is about Artificial Intelligence.")
+    (corpus / "doc2.txt").write_text("This text explains Machine Learning.")
+    return str(corpus)
+@pytest.mark.parametrize("validation_mode", [
+    None,  # Built-in dataset
+    "data/custom_eval.json",  # Custom dataset path (mocked below)
+])
+def test_optimize_ragmint(tmp_path, validation_mode, monkeypatch):
+    """Test RAGMint.optimize() with different dataset modes."""
+    docs_path = setup_docs(tmp_path)
+    val_file = setup_validation_file(tmp_path)
+    # If using custom dataset, mock the path
+    if validation_mode and "custom_eval" in validation_mode:
+        custom_path = tmp_path / "custom_eval.json"
+        os.rename(val_file, custom_path)
+        validation_mode = str(custom_path)
+    metric = "faithfulness"
+    # Initialize RAGMint
+    rag = RAGMint(
+        docs_path=docs_path,
+        retrievers=["faiss"],
+        embeddings=["text-embedding-3-small"],
+        rerankers=["mmr"]
+    )
+    # Run optimization
+    best, results = rag.optimize(
+        validation_set=validation_mode,
+        metric=metric,
+        trials=2
+    )
+    # Validate results
+    assert isinstance(best, dict), "Best config should be a dict"
+    assert isinstance(results, list), "Results should be a list of trials"
+    assert len(results) > 0, "Optimization should produce results"
+    # The best result can expose either 'score' or the metric name (e.g. 'faithfulness')
+    assert any(k in best for k in ("score", metric)), \
+        f"Best config should include either 'score' or '{metric}'"
+    # Ensure the metric value is valid
+    assert best.get(metric, best.get("score")) >= 0, \
+        f"{metric} score should be non-negative"

{ragmint-0.1.0 → ragmint-0.1.1}/src/ragmint/tuner.py RENAMED Viewed

@@ -90,7 +90,7 @@ class RAGMint:
         search_type: str = "random",
         trials: int = 10,
     ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
-        validation = load_validation_set(validation_set)
+        validation = load_validation_set(validation_set or "default")
         search_space = {
             "retriever": self.retrievers,

ragmint-0.1.1/src/ragmint/utils/data_loader.py ADDED Viewed

@@ -0,0 +1,65 @@
+import json
+import csv
+from typing import List, Dict
+from pathlib import Path
+import os
+try:
+    from datasets import load_dataset
+except ImportError:
+    load_dataset = None  # optional dependency
+DEFAULT_VALIDATION_PATH = Path(__file__).parent.parent / "experiments" / "validation_qa.json"
+def load_json(path: str) -> List[Dict]:
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+def load_csv(path: str) -> List[Dict]:
+    with open(path, newline="", encoding="utf-8") as csvfile:
+        reader = csv.DictReader(csvfile)
+        return list(reader)
+def save_json(path: str, data: Dict):
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+def load_validation_set(path: str | None = None) -> List[Dict]:
+    """
+    Loads a validation dataset (QA pairs) from:
+    - Built-in default JSON file
+    - User-provided JSON or CSV
+    - Hugging Face dataset by name
+    """
+    # Default behavior
+    if path is None or path == "default":
+        if not DEFAULT_VALIDATION_PATH.exists():
+            raise FileNotFoundError(f"Default validation set not found at {DEFAULT_VALIDATION_PATH}")
+        return load_json(DEFAULT_VALIDATION_PATH)
+    # Hugging Face dataset
+    if not os.path.exists(path) and load_dataset:
+        try:
+            dataset = load_dataset(path, split="validation")
+            data = [
+                {"question": q, "answer": a}
+                for q, a in zip(dataset["question"], dataset["answers"])
+            ]
+            return data
+        except Exception:
+            pass  # fall through to file loading
+    # Local file
+    p = Path(path)
+    if not p.exists():
+        raise FileNotFoundError(f"Validation file not found: {path}")
+    if p.suffix.lower() == ".json":
+        return load_json(path)
+    elif p.suffix.lower() in [".csv", ".tsv"]:
+        return load_csv(path)
+    else:
+        raise ValueError("Unsupported validation set format. Use JSON, CSV, or a Hugging Face dataset name.")

{ragmint-0.1.0 → ragmint-0.1.1/src/ragmint.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragmint
-Version: 0.1.0
+Version: 0.1.1
 Summary: A modular framework for evaluating and optimizing RAG pipelines.
 Author-email: Andre Oliveira <oandreoliveira@outlook.com>
 License: Apache License 2.0
@@ -101,6 +101,39 @@ result = pipeline.run("What is retrieval-augmented generation?")
 print(result)
 ```
+---
+## 🧪 Dataset Options
+Ragmint can automatically load evaluation datasets for your RAG pipeline:
+| Mode | Example | Description |
+|------|----------|-------------|
+| 🧱 **Default** | `validation_set=None` | Uses built-in `experiments/validation_qa.json` |
+| 📁 **Custom File** | `validation_set="data/my_eval.json"` | Load your own QA dataset (JSON or CSV) |
+| 🌐 **Hugging Face Dataset** | `validation_set="squad"` | Automatically downloads benchmark datasets (requires `pip install datasets`) |
+### Example
+```python
+from ragmint.tuner import RAGMint
+ragmint = RAGMint(
+    docs_path="data/docs/",
+    retrievers=["faiss", "chroma"],
+    embeddings=["text-embedding-3-small"],
+    rerankers=["mmr"],
+)
+# Use built-in default
+ragmint.optimize(validation_set=None)
+# Use Hugging Face benchmark
+ragmint.optimize(validation_set="squad")
+# Use your own dataset
+ragmint.optimize(validation_set="data/custom_qa.json")
+```
 ---
 ## 🧩 Folder Structure

ragmint-0.1.0/src/ragmint/tests/test_tuner.py DELETED Viewed

@@ -1,38 +0,0 @@
-import os
-import json
-from ragmint.tuner import RAGMint
-def setup_validation_file(tmp_path):
-    data = [
-        {"question": "What is AI?", "answer": "Artificial Intelligence"},
-        {"question": "Define ML", "answer": "Machine Learning"}
-    ]
-    file = tmp_path / "validation_qa.json"
-    with open(file, "w", encoding="utf-8") as f:
-        json.dump(data, f)
-    return str(file)
-def setup_docs(tmp_path):
-    corpus = tmp_path / "corpus"
-    corpus.mkdir()
-    (corpus / "doc1.txt").write_text("This is about Artificial Intelligence.")
-    (corpus / "doc2.txt").write_text("This text explains Machine Learning.")
-    return str(corpus)
-def test_optimize_random(tmp_path):
-    docs_path = setup_docs(tmp_path)
-    val_file = setup_validation_file(tmp_path)
-    rag = RAGMint(
-        docs_path=docs_path,
-        retrievers=["faiss"],
-        embeddings=["openai/text-embedding-3-small"],
-        rerankers=["mmr"]
-    )
-    best, results = rag.optimize(validation_set=val_file, metric="faithfulness", trials=2)
-    assert isinstance(best, dict)
-    assert isinstance(results, list)

ragmint-0.1.0/src/ragmint/utils/data_loader.py DELETED Viewed

@@ -1,35 +0,0 @@
-import json
-import csv
-from typing import List, Dict
-from pathlib import Path
-def load_json(path: str) -> List[Dict]:
-    with open(path, "r", encoding="utf-8") as f:
-        return json.load(f)
-def load_csv(path: str) -> List[Dict]:
-    with open(path, newline="", encoding="utf-8") as csvfile:
-        reader = csv.DictReader(csvfile)
-        return list(reader)
-def save_json(path: str, data: Dict):
-    with open(path, "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=2)
-def load_validation_set(path: str) -> List[Dict]:
-    """
-    Loads a validation dataset (QA pairs) from JSON or CSV.
-    """
-    p = Path(path)
-    if not p.exists():
-        raise FileNotFoundError(f"Validation file not found: {path}")
-    if p.suffix.lower() == ".json":
-        return load_json(path)
-    elif p.suffix.lower() in [".csv", ".tsv"]:
-        return load_csv(path)
-    else:
-        raise ValueError("Unsupported validation set format. Use JSON or CSV.")