PyPI - ragmint - Versions diffs - 0.3.1__py3-none-any.whl - Mend

ragmint 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ragmint might be problematic. Click here for more details.

Files changed (46) hide show

ragmint/__init__.py +0 -0
ragmint/__main__.py +28 -0
ragmint/autotuner.py +138 -0
ragmint/core/__init__.py +0 -0
ragmint/core/chunking.py +86 -0
ragmint/core/embeddings.py +55 -0
ragmint/core/evaluation.py +38 -0
ragmint/core/pipeline.py +62 -0
ragmint/core/reranker.py +62 -0
ragmint/core/retriever.py +165 -0
ragmint/experiments/__init__.py +0 -0
ragmint/experiments/validation_qa.json +14 -0
ragmint/explainer.py +63 -0
ragmint/integrations/__init__.py +0 -0
ragmint/integrations/config_adapter.py +96 -0
ragmint/integrations/langchain_prebuilder.py +99 -0
ragmint/leaderboard.py +45 -0
ragmint/optimization/__init__.py +0 -0
ragmint/optimization/search.py +48 -0
ragmint/tests/__init__.py +0 -0
ragmint/tests/conftest.py +16 -0
ragmint/tests/test_autotuner.py +51 -0
ragmint/tests/test_config_adapter.py +39 -0
ragmint/tests/test_embeddings.py +46 -0
ragmint/tests/test_explainer.py +20 -0
ragmint/tests/test_explainer_integration.py +18 -0
ragmint/tests/test_integration_autotuner_ragmint.py +47 -0
ragmint/tests/test_langchain_prebuilder.py +82 -0
ragmint/tests/test_leaderboard.py +39 -0
ragmint/tests/test_pipeline.py +20 -0
ragmint/tests/test_retriever.py +15 -0
ragmint/tests/test_search.py +17 -0
ragmint/tests/test_tuner.py +71 -0
ragmint/tuner.py +189 -0
ragmint/utils/__init__.py +0 -0
ragmint/utils/caching.py +37 -0
ragmint/utils/data_loader.py +65 -0
ragmint/utils/logger.py +36 -0
ragmint/utils/metrics.py +27 -0
ragmint-0.3.1.data/data/LICENSE +19 -0
ragmint-0.3.1.data/data/README.md +397 -0
ragmint-0.3.1.dist-info/METADATA +441 -0
ragmint-0.3.1.dist-info/RECORD +46 -0
ragmint-0.3.1.dist-info/WHEEL +5 -0
ragmint-0.3.1.dist-info/licenses/LICENSE +19 -0
ragmint-0.3.1.dist-info/top_level.txt +1 -0

ragmint/explainer.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""
+Interpretability Layer
+----------------------
+Uses Gemini or Anthropic Claude to explain why one RAG configuration
+outperforms another. Falls back gracefully if no API key is provided.
+"""
+import os
+import json
+from dotenv import load_dotenv
+# Load environment variables from .env file if available
+load_dotenv()
+def explain_results(results_a: dict, results_b: dict, model: str = "gemini-2.5-flash-lite") -> str:
+    """
+    Generate a natural-language explanation comparing two RAG experiment results.
+    Priority:
+      1. Anthropic Claude (if ANTHROPIC_API_KEY is set)
+      2. Google Gemini (if GOOGLE_API_KEY is set)
+      3. Fallback text message
+    """
+    prompt = f"""
+    You are an AI evaluation expert.
+    Compare these two RAG experiment results and explain why one performs better.
+    Metrics A: {json.dumps(results_a, indent=2)}
+    Metrics B: {json.dumps(results_b, indent=2)}
+    Provide a concise, human-friendly explanation and practical improvement tips.
+    """
+    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
+    google_key = os.getenv("GOOGLE_API_KEY")  # fixed var name
+    # 1️⃣ Try Anthropic Claude first
+    if anthropic_key:
+        try:
+            from anthropic import Anthropic
+            client = Anthropic(api_key=anthropic_key)
+            response = client.messages.create(
+                model="claude-3-opus-20240229",
+                max_tokens=300,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            return response.content[0].text
+        except Exception as e:
+            return f"[Claude unavailable] {e}"
+    # 2️⃣ Fallback to Google Gemini
+    elif google_key:
+        try:
+            import google.generativeai as genai
+            genai.configure(api_key=google_key)
+            response = genai.GenerativeModel(model).generate_content(prompt)
+            return response.text
+        except Exception as e:
+            return f"[Gemini unavailable] {e}"
+    # 3️⃣ Fallback if neither key is available
+    else:
+        return (
+            "[No LLM available] Please set ANTHROPIC_API_KEY or GOOGLE_API_KEY "
+            "to enable interpretability via Claude or Gemini."
+        )

ragmint/integrations/__init__.py ADDED Viewed

File without changes

ragmint/integrations/config_adapter.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""
+RAGMint → LangChain Config Adapter
+----------------------------------
+Takes RAGMint or AutoRAGTuner recommendations and converts them into
+a normalized, pickle-safe configuration that can be used to build
+a LangChain RAG pipeline later.
+"""
+import json
+import pickle
+from pathlib import Path
+from typing import Dict, Any
+class LangchainConfigAdapter:
+    """
+    Converts RAGMint recommendations into LangChain-compatible configs.
+    Example:
+        adapter = LangChainConfigAdapter()
+        cfg = adapter.prepare(recommendation)
+        adapter.save(cfg, "best_config.pkl")
+    """
+    DEFAULT_EMBEDDINGS = {
+        "OpenAI": "sentence-transformers/all-MiniLM-L6-v2",
+        "SentenceTransformers": "sentence-transformers/all-MiniLM-L6-v2",
+        "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
+        "InstructorXL": "hkunlp/instructor-xl"
+    }
+    SUPPORTED_RETRIEVERS = {"faiss", "chroma", "bm25", "numpy", "sklearn"}
+    def __init__(self, recommendation: Dict[str, Any] | None = None):
+        self.recommendation = recommendation
+    def prepare(self, recommendation: Dict[str, Any] | None = None) -> Dict[str, Any]:
+        recommendation = recommendation or self.recommendation or {}
+        """
+        Normalize and validate configuration for LangChain use.
+        Returns:
+            dict with clean retriever, embedding, and chunking settings.
+        """
+        retriever = recommendation.get("retriever", "faiss").lower()
+        embedding_model = recommendation.get("embedding_model", "sentence-transformers/all-MiniLM-L6-v2")
+        chunk_size = recommendation.get("chunk_size", 400)
+        overlap = recommendation.get("overlap", 100)
+        # Normalize embedding model names
+        embedding_model = self.DEFAULT_EMBEDDINGS.get(embedding_model, embedding_model)
+        # Validate retriever backend
+        if retriever not in self.SUPPORTED_RETRIEVERS:
+            raise ValueError(f"Unsupported retriever backend: {retriever}")
+        config = {
+            "retriever": retriever,
+            "embedding_model": embedding_model,
+            "chunk_size": int(chunk_size),
+            "overlap": int(overlap),
+        }
+        return config
+    def save(self, config: Dict[str, Any], path: str):
+        """
+        Save configuration to a pickle file.
+        """
+        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "wb") as f:
+            pickle.dump(config, f)
+        print(f"💾 Saved LangChain config → {path}")
+    def load(self, path: str) -> Dict[str, Any]:
+        """
+        Load configuration from a pickle file.
+        """
+        with open(path, "rb") as f:
+            cfg = pickle.load(f)
+        print(f"✅ Loaded LangChain config ← {path}")
+        return cfg
+    def to_json(self, config: Dict[str, Any], path: str):
+        """
+        Save configuration as JSON (for human readability).
+        """
+        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump(config, f, indent=2)
+        print(f"📝 Exported LangChain config → {path}")
+    # Alias for backward compatibility
+    def to_standard_config(self, recommendation: Dict[str, Any] | None = None) -> Dict[str, Any]:
+        """Alias for backward compatibility with older test suites."""
+        return self.prepare(recommendation)

ragmint/integrations/langchain_prebuilder.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""
+LangChain Pre-Build Integration
+-------------------------------
+This module bridges RAGMint's auto-tuning system with LangChain,
+returning retriever and embedding components that can plug directly
+into any LangChain RAG pipeline.
+Example:
+    from ragmint.integrations.langchain_prebuilder import LangChainPrebuilder
+    from langchain.chains import RetrievalQA
+    from langchain_openai import ChatOpenAI
+    prebuilder = LangChainPrebuilder(best_cfg)
+    retriever, embeddings = prebuilder.prepare(documents)
+    llm = ChatOpenAI(model="gpt-4o-mini")
+    qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+"""
+from typing import List, Tuple, Dict, Any
+try:
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+except ImportError:
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS, Chroma
+from langchain_community.retrievers import BM25Retriever
+class LangchainPrebuilder:
+    """
+    Dynamically builds LangChain retriever and embedding objects
+    based on a RAGMint configuration dictionary.
+    """
+    def __init__(self, cfg: Dict[str, Any]):
+        """
+        Args:
+            cfg (dict): RAGMint configuration with keys:
+                - retriever: "faiss" | "chroma" | "bm25"
+                - embedding_model: HuggingFace model name
+                - chunk_size: int (default=500)
+                - overlap: int (default=100)
+        """
+        self.cfg = cfg
+        self.retriever_backend = cfg.get("retriever", "faiss").lower()
+        self.embedding_model = cfg.get("embedding_model", "sentence-transformers/all-MiniLM-L6-v2")
+        self.chunk_size = int(cfg.get("chunk_size", 500))
+        self.overlap = int(cfg.get("overlap", 100))
+    def prepare(self, documents: List[str]) -> Tuple[Any, Any]:
+        """
+        Prepares LangChain-compatible retriever and embeddings.
+        Args:
+            documents (list[str]): Corpus texts
+        Returns:
+            (retriever, embeddings): Tuple of initialized LangChain retriever and embedding model
+        """
+        # 1️⃣ Split into chunks
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.overlap
+        )
+        docs = splitter.create_documents(documents)
+        # 2️⃣ Create embeddings
+        embeddings = HuggingFaceEmbeddings(model_name=self.embedding_model)
+        # 3️⃣ Build retriever
+        retriever = self._build_retriever(docs, embeddings)
+        return retriever, embeddings
+    def _build_retriever(self, docs, embeddings):
+        """Internal helper for building retriever backend."""
+        backend = self.retriever_backend
+        if backend == "faiss":
+            db = FAISS.from_documents(docs, embeddings)
+            return db.as_retriever(search_kwargs={"k": 5})
+        elif backend == "chroma":
+            db = Chroma.from_documents(docs, embeddings, collection_name="ragmint_docs")
+            return db.as_retriever(search_kwargs={"k": 5})
+        elif backend == "bm25":
+            # Support both Document objects and raw text strings
+            texts = [getattr(d, "page_content", d) for d in docs]
+            retriever = BM25Retriever.from_texts(texts)
+            retriever.k = 5
+            return retriever
+        else:
+            raise ValueError(f"Unsupported retriever backend: {backend}")

ragmint/leaderboard.py ADDED Viewed

@@ -0,0 +1,45 @@
+import os
+import json
+from datetime import datetime
+from typing import Dict, Any, Optional
+from supabase import create_client
+class Leaderboard:
+    def __init__(self, storage_path: Optional[str] = None):
+        self.storage_path = storage_path
+        url = os.getenv("SUPABASE_URL")
+        key = os.getenv("SUPABASE_KEY")
+        self.client = None
+        if url and key:
+            self.client = create_client(url, key)
+        elif not storage_path:
+            raise EnvironmentError("Set SUPABASE_URL/SUPABASE_KEY or pass storage_path")
+    def upload(self, run_id: str, config: Dict[str, Any], score: float):
+        data = {
+            "run_id": run_id,
+            "config": config,
+            "score": score,
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        if self.client:
+            return self.client.table("experiments").insert(data).execute()
+        else:
+            os.makedirs(os.path.dirname(self.storage_path), exist_ok=True)
+            with open(self.storage_path, "a", encoding="utf-8") as f:
+                f.write(json.dumps(data) + "\n")
+            return data
+    def top_results(self, limit: int = 10):
+        if self.client:
+            return (
+                self.client.table("experiments")
+                .select("*")
+                .order("score", desc=True)
+                .limit(limit)
+                .execute()
+            )
+        else:
+            with open(self.storage_path, "r", encoding="utf-8") as f:
+                lines = [json.loads(line) for line in f]
+            return sorted(lines, key=lambda x: x["score"], reverse=True)[:limit]

ragmint/optimization/__init__.py ADDED Viewed

File without changes

ragmint/optimization/search.py ADDED Viewed

@@ -0,0 +1,48 @@
+import itertools
+import random
+import logging
+from typing import Dict, List, Iterator, Any
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+class GridSearch:
+    def __init__(self, search_space: Dict[str, List[Any]]):
+        keys = list(search_space.keys())
+        values = list(search_space.values())
+        self.combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
+        for combo in self.combinations:
+            yield combo
+class RandomSearch:
+    def __init__(self, search_space: Dict[str, List[Any]], n_trials: int = 10):
+        self.search_space = search_space
+        self.n_trials = n_trials
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
+        keys = list(self.search_space.keys())
+        for _ in range(self.n_trials):
+            yield {k: random.choice(self.search_space[k]) for k in keys}
+class BayesianSearch:
+    def __init__(self, search_space: Dict[str, List[Any]]):
+        try:
+            import optuna
+            self.optuna = optuna
+        except ImportError:
+            raise RuntimeError("Optuna not installed; use GridSearch or RandomSearch instead.")
+        self.search_space = search_space
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
+        keys = list(self.search_space.keys())
+        def objective(trial):
+            return {k: trial.suggest_categorical(k, self.search_space[k]) for k in keys}
+        # Example static 5-trial yield for compatibility
+        for _ in range(5):
+            yield {k: random.choice(self.search_space[k]) for k in keys}

ragmint/tests/__init__.py ADDED Viewed

File without changes

ragmint/tests/conftest.py ADDED Viewed

@@ -0,0 +1,16 @@
+# src/ragmint/tests/conftest.py
+import os
+from dotenv import load_dotenv
+import pytest
+# Load .env from project root
+load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "../../../.env"))
+def pytest_configure(config):
+    """Print which keys are loaded (debug)."""
+    google = os.getenv("GEMINI_API_KEY")
+    anthropic = os.getenv("ANTHROPIC_API_KEY")
+    if google:
+        print("✅ GOOGLE_API_KEY loaded")
+    if anthropic:
+        print("✅ ANTHROPIC_API_KEY loaded")

ragmint/tests/test_autotuner.py ADDED Viewed

@@ -0,0 +1,51 @@
+import os
+import json
+import pytest
+from ragmint.autotuner import AutoRAGTuner
+def setup_docs(tmp_path):
+    """Create a temporary corpus with multiple text files for testing."""
+    corpus = tmp_path / "corpus"
+    corpus.mkdir()
+    (corpus / "short_doc.txt").write_text("AI is changing the world.")
+    (corpus / "long_doc.txt").write_text("Machine learning enables RAG pipelines to optimize retrievals. " * 50)
+    return str(corpus)
+def test_analyze_corpus(tmp_path):
+    """Ensure AutoRAGTuner analyzes corpus correctly."""
+    docs_path = setup_docs(tmp_path)
+    tuner = AutoRAGTuner(docs_path)
+    stats = tuner.corpus_stats
+    assert stats["num_docs"] == 2, "Should detect all documents"
+    assert stats["size"] > 0, "Corpus size should be positive"
+    assert stats["avg_len"] > 0, "Average document length should be computed"
+@pytest.mark.parametrize("size,expected_retriever", [
+    (10_000, "Chroma"),
+    (500_000, "FAISS"),
+    (1_000, "BM25"),
+])
+def test_recommendation_logic(tmp_path, monkeypatch, size, expected_retriever):
+    """Validate retriever recommendation based on corpus size."""
+    docs_path = setup_docs(tmp_path)
+    tuner = AutoRAGTuner(docs_path)
+    # Mock corpus stats manually
+    tuner.corpus_stats = {"size": size, "avg_len": 300, "num_docs": 10}
+    rec = tuner.recommend()
+    assert "retriever" in rec and "embedding_model" in rec
+    assert rec["retriever"] == expected_retriever, f"Expected {expected_retriever}"
+    assert rec["chunk_size"] > 0 and rec["overlap"] >= 0
+def test_invalid_corpus_path(tmp_path):
+    """Should handle missing directories gracefully."""
+    missing_path = tmp_path / "nonexistent"
+    tuner = AutoRAGTuner(str(missing_path))
+    assert tuner.corpus_stats["size"] == 0
+    assert tuner.corpus_stats["num_docs"] == 0

ragmint/tests/test_config_adapter.py ADDED Viewed

@@ -0,0 +1,39 @@
+import pytest
+from ragmint.integrations.config_adapter import LangchainConfigAdapter
+def test_default_conversion():
+    """Test that default config values are applied correctly."""
+    cfg = {
+        "retriever": "FAISS",
+        "embedding_model": "all-MiniLM-L6-v2",
+        "chunk_size": 500,
+        "overlap": 100
+    }
+    adapter = LangchainConfigAdapter(cfg)
+    result = adapter.to_standard_config()
+    assert result["retriever"].lower() == "faiss"
+    assert result["embedding_model"] == "sentence-transformers/all-MiniLM-L6-v2"
+    assert result["chunk_size"] == 500
+    assert result["overlap"] == 100
+def test_missing_fields_are_defaulted():
+    """Ensure missing optional fields (e.g. chunk params) are filled in."""
+    cfg = {"retriever": "BM25", "embedding_model": "all-MiniLM-L6-v2"}
+    adapter = LangchainConfigAdapter(cfg)
+    result = adapter.to_standard_config()
+    assert "chunk_size" in result
+    assert "overlap" in result
+    assert result["chunk_size"] > 0
+    assert result["overlap"] >= 0
+def test_validation_of_invalid_retriever():
+    """Ensure invalid retriever names raise an informative error."""
+    cfg = {"retriever": "InvalidBackend", "embedding_model": "all-MiniLM-L6-v2"}
+    with pytest.raises(ValueError, match="Unsupported retriever backend"):
+        LangchainConfigAdapter(cfg).to_standard_config()

ragmint/tests/test_embeddings.py ADDED Viewed

@@ -0,0 +1,46 @@
+import numpy as np
+import pytest
+from ragmint.core.embeddings import Embeddings
+def test_dummy_backend_output_shape():
+    model = Embeddings(backend="dummy")
+    texts = ["hello", "world"]
+    embeddings = model.encode(texts)
+    # Expect 2x768 array
+    assert isinstance(embeddings, np.ndarray)
+    assert embeddings.shape == (2, 768)
+    assert embeddings.dtype == np.float32
+def test_dummy_backend_single_string():
+    model = Embeddings(backend="dummy")
+    text = "test"
+    embeddings = model.encode(text)
+    assert embeddings.shape == (1, 768)
+    assert isinstance(embeddings, np.ndarray)
+'''@pytest.mark.skipif(
+    not hasattr(__import__('importlib').util.find_spec("sentence_transformers"), "loader"),
+    reason="sentence-transformers not installed"
+)
+def test_huggingface_backend_output_shape():
+    model = Embeddings(backend="huggingface", model_name="all-MiniLM-L6-v2")
+    texts = ["This is a test.", "Another sentence."]
+    embeddings = model.encode(texts)
+    # Expect 2x384 for MiniLM-L6-v2
+    assert isinstance(embeddings, np.ndarray)
+    assert embeddings.ndim == 2
+    assert embeddings.shape[0] == len(texts)
+    assert embeddings.dtype == np.float32
+'''
+def test_invalid_backend():
+    try:
+        Embeddings(backend="unknown")
+    except ValueError as e:
+        assert "Unsupported embedding backend" in str(e)

ragmint/tests/test_explainer.py ADDED Viewed

@@ -0,0 +1,20 @@
+import pytest
+from ragmint.explainer import explain_results
+def test_explain_results_gemini():
+    """Gemini explanation should contain model-specific phrasing."""
+    config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
+    config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
+    result = explain_results(config_a, config_b, model="gemini")
+    assert isinstance(result, str)
+    assert "Gemini" in result or "gemini" in result
+def test_explain_results_claude():
+    """Claude explanation should contain model-specific phrasing."""
+    config_a = {"retriever": "FAISS"}
+    config_b = {"retriever": "Chroma"}
+    result = explain_results(config_a, config_b, model="claude")
+    assert isinstance(result, str)
+    assert "Claude" in result or "claude" in result

ragmint/tests/test_explainer_integration.py ADDED Viewed

@@ -0,0 +1,18 @@
+import os
+import pytest
+from ragmint.explainer import explain_results
+@pytest.mark.integration
+def test_real_gemini_explanation():
+    """Run real Gemini call if GOOGLE_API_KEY is set."""
+    if not os.getenv("GEMINI_API_KEY"):
+        pytest.skip("GEMINI_API_KEY not set")
+    config_a = {"retriever": "FAISS", "embedding_model": "OpenAI"}
+    config_b = {"retriever": "Chroma", "embedding_model": "SentenceTransformers"}
+    result = explain_results(config_a, config_b, model="gemini-1.5-pro")
+    assert isinstance(result, str)
+    assert len(result) > 0
+    print("\n[Gemini explanation]:", result[:200], "...")

ragmint/tests/test_integration_autotuner_ragmint.py ADDED Viewed

@@ -0,0 +1,47 @@
+import os
+import json
+import pytest
+from ragmint.autotuner import AutoRAGTuner
+from ragmint.tuner import RAGMint
+def setup_docs(tmp_path):
+    """Create a temporary corpus for integration testing."""
+    corpus = tmp_path / "docs"
+    corpus.mkdir()
+    (corpus / "doc1.txt").write_text("This document discusses Artificial Intelligence and Machine Learning.")
+    (corpus / "doc2.txt").write_text("Retrieval-Augmented Generation combines retrievers and LLMs effectively.")
+    return str(corpus)
+def setup_validation_file(tmp_path):
+    """Create a temporary validation QA dataset."""
+    data = [
+        {"question": "What is AI?", "answer": "Artificial Intelligence"},
+        {"question": "Define RAG", "answer": "Retrieval-Augmented Generation"},
+    ]
+    file = tmp_path / "validation_qa.json"
+    with open(file, "w", encoding="utf-8") as f:
+        json.dump(data, f)
+    return str(file)
+def test_autotune_integration(tmp_path):
+    """Test that AutoRAGTuner can fully run a RAGMint optimization."""
+    docs_path = setup_docs(tmp_path)
+    val_file = setup_validation_file(tmp_path)
+    tuner = AutoRAGTuner(docs_path)
+    best, results = tuner.auto_tune(
+        validation_set=val_file,
+        metric="faithfulness",
+        trials=2,
+        search_type="random",
+    )
+    # Assertions on the results
+    assert isinstance(best, dict), "Best configuration should be a dict"
+    assert isinstance(results, list), "Results should be a list"
+    assert len(results) > 0, "Optimization should produce results"
+    assert "retriever" in best and "embedding_model" in best
+    assert best.get("faithfulness", 0.0) >= 0.0, "Metric value should be non-negative"