ragmint 0.2.3__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. ragmint/app.py +512 -0
  2. ragmint/autotuner.py +201 -17
  3. ragmint/core/chunking.py +68 -4
  4. ragmint/core/embeddings.py +46 -10
  5. ragmint/core/evaluation.py +33 -14
  6. ragmint/core/pipeline.py +34 -10
  7. ragmint/core/retriever.py +152 -20
  8. ragmint/experiments/validation_qa.json +1 -14
  9. ragmint/explainer.py +47 -20
  10. ragmint/integrations/__init__.py +0 -0
  11. ragmint/integrations/config_adapter.py +96 -0
  12. ragmint/integrations/langchain_prebuilder.py +99 -0
  13. ragmint/leaderboard.py +41 -35
  14. ragmint/qa_generator.py +190 -0
  15. ragmint/tests/test_autotuner.py +52 -30
  16. ragmint/tests/test_config_adapter.py +39 -0
  17. ragmint/tests/test_embeddings.py +46 -0
  18. ragmint/tests/test_explainer.py +28 -12
  19. ragmint/tests/test_integration_autotuner_ragmint.py +39 -52
  20. ragmint/tests/test_langchain_prebuilder.py +82 -0
  21. ragmint/tests/test_leaderboard.py +78 -25
  22. ragmint/tests/test_pipeline.py +3 -2
  23. ragmint/tests/test_qa_generator.py +66 -0
  24. ragmint/tests/test_retriever.py +3 -2
  25. ragmint/tests/test_tuner.py +1 -1
  26. ragmint/tuner.py +109 -22
  27. ragmint-0.4.6.data/data/README.md +485 -0
  28. ragmint-0.4.6.dist-info/METADATA +530 -0
  29. ragmint-0.4.6.dist-info/RECORD +48 -0
  30. ragmint/tests/test_explainer_integration.py +0 -18
  31. ragmint-0.2.3.data/data/README.md +0 -284
  32. ragmint-0.2.3.dist-info/METADATA +0 -312
  33. ragmint-0.2.3.dist-info/RECORD +0 -40
  34. {ragmint-0.2.3.data → ragmint-0.4.6.data}/data/LICENSE +0 -0
  35. {ragmint-0.2.3.dist-info → ragmint-0.4.6.dist-info}/WHEEL +0 -0
  36. {ragmint-0.2.3.dist-info → ragmint-0.4.6.dist-info}/licenses/LICENSE +0 -0
  37. {ragmint-0.2.3.dist-info → ragmint-0.4.6.dist-info}/top_level.txt +0 -0
@@ -1,39 +1,92 @@
1
+ import os
1
2
  import json
2
3
  import tempfile
3
- from pathlib import Path
4
+ import pytest
5
+ from datetime import datetime
4
6
  from ragmint.leaderboard import Leaderboard
5
7
 
6
8
 
7
- def test_leaderboard_add_and_top(tmp_path):
8
- """Ensure local leaderboard persistence works without Supabase."""
9
- file_path = tmp_path / "leaderboard.jsonl"
10
- lb = Leaderboard(storage_path=str(file_path))
9
+ @pytest.fixture
10
+ def temp_leaderboard():
11
+ """Create a temporary leaderboard file for testing."""
12
+ with tempfile.TemporaryDirectory() as tmpdir:
13
+ path = os.path.join(tmpdir, "leaderboard.jsonl")
14
+ lb = Leaderboard(storage_path=path)
15
+ yield lb, path
11
16
 
12
- # Add two runs
13
- lb.upload("run1", {"retriever": "FAISS"}, 0.91)
14
- lb.upload("run2", {"retriever": "Chroma"}, 0.85)
15
17
 
16
- # Verify file content
17
- assert file_path.exists()
18
- with open(file_path, "r", encoding="utf-8") as f:
19
- lines = [json.loads(line) for line in f]
20
- assert len(lines) == 2
18
+ def test_upload_and_persistence(temp_leaderboard):
19
+ lb, path = temp_leaderboard
20
+
21
+ # --- Mock experiment data ---
22
+ run_id = "run_001"
23
+ best_config = {"retriever": "FAISS", "embedding_model": "all-MiniLM"}
24
+ best_score = 0.92
25
+ all_results = [
26
+ {"retriever": "FAISS", "score": 0.92},
27
+ {"retriever": "BM25", "score": 0.85},
28
+ ]
29
+ documents = ["docs/a.txt", "docs/b.txt"]
30
+ model = "gemini"
31
+ corpus_stats = {"size": 20000, "avg_len": 400, "num_docs": 10}
32
+
33
+ # --- Upload ---
34
+ record = lb.upload(
35
+ run_id=run_id,
36
+ best_config=best_config,
37
+ best_score=best_score,
38
+ all_results=all_results,
39
+ documents=documents,
40
+ model=model,
41
+ corpus_stats=corpus_stats,
42
+ )
21
43
 
22
- # Get top results
23
- top = lb.top_results(limit=1)
24
- assert isinstance(top, list)
25
- assert len(top) == 1
26
- assert "score" in top[0]
44
+ # --- Validate returned record ---
45
+ assert record["run_id"] == run_id
46
+ assert record["model"] == "gemini"
47
+ assert "timestamp" in record
48
+ assert record["best_score"] == 0.92
49
+ assert all(doc in record["documents"] for doc in ["a.txt", "b.txt"])
27
50
 
51
+ # --- File should contain JSON line ---
52
+ with open(path, "r", encoding="utf-8") as f:
53
+ lines = f.readlines()
54
+ assert len(lines) == 1
55
+ parsed = json.loads(lines[0])
56
+ assert parsed["run_id"] == run_id
28
57
 
29
- def test_leaderboard_append_existing(tmp_path):
30
- """Ensure multiple uploads append properly."""
31
- file_path = tmp_path / "leaderboard.jsonl"
32
- lb = Leaderboard(storage_path=str(file_path))
33
58
 
34
- for i in range(3):
35
- lb.upload(f"run{i}", {"retriever": "BM25"}, 0.8 + i * 0.05)
59
+ def test_top_results_ordering(temp_leaderboard):
60
+ lb, _ = temp_leaderboard
36
61
 
62
+ # Upload multiple runs with varying scores
63
+ for i, score in enumerate([0.8, 0.95, 0.7]):
64
+ lb.upload(
65
+ run_id=f"run_{i}",
66
+ best_config={"retriever": "FAISS"},
67
+ best_score=score,
68
+ all_results=[],
69
+ documents=["file.txt"],
70
+ model="claude",
71
+ )
72
+
73
+ # --- Get top results ---
37
74
  top = lb.top_results(limit=2)
38
75
  assert len(top) == 2
39
- assert top[0]["score"] >= top[1]["score"]
76
+
77
+ # --- Ensure ordering descending by score ---
78
+ assert top[0]["best_score"] >= top[1]["best_score"]
79
+ assert top[0]["best_score"] == 0.95
80
+
81
+
82
+ def test_all_results_reads_all_entries(temp_leaderboard):
83
+ lb, _ = temp_leaderboard
84
+
85
+ # Add two runs
86
+ lb.upload("run_a", {}, 0.5, [], ["doc1.txt"], "gemini")
87
+ lb.upload("run_b", {}, 0.7, [], ["doc2.txt"], "claude")
88
+
89
+ results = lb.all_results()
90
+ assert len(results) == 2
91
+ run_ids = {r["run_id"] for r in results}
92
+ assert {"run_a", "run_b"} <= run_ids
@@ -1,14 +1,15 @@
1
1
  import numpy as np
2
2
  from ragmint.core.pipeline import RAGPipeline
3
3
  from ragmint.core.retriever import Retriever
4
+ from ragmint.core.embeddings import Embeddings
4
5
  from ragmint.core.reranker import Reranker
5
6
  from ragmint.core.evaluation import Evaluator
6
7
 
7
8
 
8
9
  def test_pipeline_run():
9
10
  docs = ["doc1 text", "doc2 text"]
10
- embeddings = [np.random.rand(4) for _ in range(2)]
11
- retriever = Retriever(embeddings, docs)
11
+ embedder = Embeddings(backend="dummy")
12
+ retriever = Retriever(embedder=embedder, documents=docs)
12
13
  reranker = Reranker("mmr")
13
14
  evaluator = Evaluator()
14
15
  pipeline = RAGPipeline(retriever, reranker, evaluator)
@@ -0,0 +1,66 @@
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from pathlib import Path
5
+ import pytest
6
+
7
+ from ragmint.qa_generator import generate_validation_qa
8
+
9
+
10
+ class DummyLLM:
11
+ """Mock LLM that returns predictable JSON output."""
12
+ def generate_content(self, prompt):
13
+ class DummyResponse:
14
+ text = '[{"query": "What is X?", "expected_answer": "Y"}]'
15
+ return DummyResponse()
16
+
17
+
18
+ @pytest.fixture
19
+ def dummy_docs(tmp_path):
20
+ docs_dir = tmp_path / "docs"
21
+ docs_dir.mkdir()
22
+ for i in range(3):
23
+ (docs_dir / f"doc_{i}.txt").write_text(f"This is test document number {i}. It contains some content.")
24
+ return docs_dir
25
+
26
+
27
+ @pytest.fixture
28
+ def output_path(tmp_path):
29
+ return tmp_path / "validation_qa.json"
30
+
31
+
32
+ def test_generate_validation_qa(monkeypatch, dummy_docs, output_path):
33
+ """Ensure QA generator runs end-to-end with mocked LLM."""
34
+ # --- Mock LLM setup ---
35
+ from sentence_transformers import SentenceTransformer
36
+ monkeypatch.setattr("ragmint.qa_generator.setup_llm", lambda *_: (DummyLLM(), "gemini"))
37
+ monkeypatch.setattr(SentenceTransformer, "encode", lambda self, x, normalize_embeddings=True: [[0.1] * 3] * len(x))
38
+
39
+ # --- Run function ---
40
+ generate_validation_qa(
41
+ docs_path=dummy_docs,
42
+ output_path=output_path,
43
+ llm_model="gemini-2.5-flash-lite",
44
+ batch_size=2,
45
+ sleep_between_batches=0,
46
+ )
47
+
48
+ # --- Validate output ---
49
+ assert output_path.exists(), "Output JSON file should be created"
50
+ data = json.loads(output_path.read_text())
51
+ assert isinstance(data, list), "Output must be a list"
52
+ assert all("query" in d and "expected_answer" in d for d in data), "Each entry must have query and answer"
53
+ assert len(data) > 0, "At least one QA pair should be generated"
54
+
55
+
56
+ def test_handles_empty_folder(monkeypatch, tmp_path):
57
+ """Ensure no crash when docs folder is empty."""
58
+ empty_dir = tmp_path / "empty"
59
+ empty_dir.mkdir()
60
+ output_file = tmp_path / "qa.json"
61
+
62
+ monkeypatch.setattr("ragmint.qa_generator.setup_llm", lambda *_: (DummyLLM(), "gemini"))
63
+
64
+ generate_validation_qa(docs_path=empty_dir, output_path=output_file, sleep_between_batches=0)
65
+ data = json.loads(output_file.read_text())
66
+ assert data == [], "Empty folder should produce empty QA list"
@@ -1,11 +1,12 @@
1
1
  import numpy as np
2
2
  from ragmint.core.retriever import Retriever
3
+ from ragmint.core.embeddings import Embeddings
3
4
 
4
5
 
5
6
  def test_retrieve_basic():
6
- embeddings = [np.random.rand(5) for _ in range(3)]
7
7
  docs = ["doc A", "doc B", "doc C"]
8
- retriever = Retriever(embeddings, docs)
8
+ embedder = Embeddings(backend="dummy")
9
+ retriever = Retriever(embedder=embedder, documents=docs)
9
10
 
10
11
  results = retriever.retrieve("sample query", top_k=2)
11
12
  assert isinstance(results, list)
@@ -46,7 +46,7 @@ def test_optimize_ragmint(tmp_path, validation_mode, monkeypatch):
46
46
  rag = RAGMint(
47
47
  docs_path=docs_path,
48
48
  retrievers=["faiss"],
49
- embeddings=["text-embedding-3-small"],
49
+ embeddings=["all-MiniLM-L6-v2"],
50
50
  rerankers=["mmr"]
51
51
  )
52
52
 
ragmint/tuner.py CHANGED
@@ -1,17 +1,17 @@
1
1
  import os
2
- import json
3
2
  import logging
4
- from typing import Any, Dict, List, Tuple, Optional
3
+ from typing import Any, Dict, List, Tuple
5
4
  from time import perf_counter
6
5
 
7
6
  from .core.pipeline import RAGPipeline
8
- from .core.embeddings import EmbeddingModel
7
+ from .core.embeddings import Embeddings
9
8
  from .core.retriever import Retriever
10
9
  from .core.reranker import Reranker
11
10
  from .core.evaluation import Evaluator
12
11
  from .optimization.search import GridSearch, RandomSearch, BayesianSearch
13
-
14
12
  from .utils.data_loader import load_validation_set
13
+ from .leaderboard import Leaderboard
14
+ from uuid import uuid4
15
15
 
16
16
  logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
17
17
 
@@ -19,6 +19,8 @@ logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
19
19
  class RAGMint:
20
20
  """
21
21
  Main RAG pipeline optimizer and evaluator.
22
+ Runs combinations of retrievers, embeddings, rerankers, and chunking parameters
23
+ to find the best performing RAG configuration.
22
24
  """
23
25
 
24
26
  def __init__(
@@ -27,62 +29,119 @@ class RAGMint:
27
29
  retrievers: List[str],
28
30
  embeddings: List[str],
29
31
  rerankers: List[str],
32
+ chunk_sizes: List[int] = [400, 600],
33
+ overlaps: List[int] = [50, 100],
34
+ strategies: List[str] = ["fixed"],
30
35
  ):
31
36
  self.docs_path = docs_path
32
37
  self.retrievers = retrievers
33
38
  self.embeddings = embeddings
34
39
  self.rerankers = rerankers
40
+ self.chunk_sizes = chunk_sizes
41
+ self.overlaps = overlaps
42
+ self.strategies = strategies
35
43
 
36
44
  self.documents: List[str] = self._load_docs()
37
45
  self.embeddings_cache: Dict[str, Any] = {}
38
46
 
47
+ # -------------------------
48
+ # Document Loading
49
+ # -------------------------
39
50
  def _load_docs(self) -> List[str]:
40
51
  if not os.path.exists(self.docs_path):
41
52
  logging.warning(f"Corpus path not found: {self.docs_path}")
42
53
  return []
54
+
43
55
  docs = []
44
56
  for file in os.listdir(self.docs_path):
45
- if file.endswith(".txt") or file.endswith(".md") or file.endswith(".rst"):
57
+ if file.endswith((".txt", ".md", ".rst")):
46
58
  with open(os.path.join(self.docs_path, file), "r", encoding="utf-8") as f:
47
59
  docs.append(f.read())
48
- logging.info(f"Loaded {len(docs)} documents from {self.docs_path}")
60
+
61
+ logging.info(f"📚 Loaded {len(docs)} documents from {self.docs_path}")
49
62
  return docs
50
63
 
51
- def _embed_docs(self, model_name: str):
64
+ # -------------------------
65
+ # Embedding Cache
66
+ # -------------------------
67
+ def _embed_docs(self, model_name: str) -> Any:
68
+ """Compute and cache document embeddings."""
52
69
  if model_name in self.embeddings_cache:
53
70
  return self.embeddings_cache[model_name]
54
71
 
55
- model = EmbeddingModel(model_name)
72
+ model = Embeddings(backend="huggingface", model_name=model_name)
56
73
  embeddings = model.encode(self.documents)
57
74
  self.embeddings_cache[model_name] = embeddings
58
75
  return embeddings
59
76
 
77
+ # -------------------------
78
+ # Build Pipeline
79
+ # -------------------------
60
80
  def _build_pipeline(self, config: Dict[str, str]) -> RAGPipeline:
61
- emb_model = EmbeddingModel(config["embedding_model"])
62
- embeddings = self._embed_docs(config["embedding_model"])
63
- retriever = Retriever(embeddings, self.documents)
64
- reranker = Reranker(config["reranker"])
81
+ """Builds a pipeline from one configuration."""
82
+ retriever_backend = config["retriever"]
83
+ model_name = config["embedding_model"]
84
+ reranker_name = config["reranker"]
85
+
86
+ # Chunking params (use defaults if missing)
87
+ chunk_size = int(config.get("chunk_size", 500))
88
+ overlap = int(config.get("overlap", 100))
89
+ strategy = config.get("strategy", "fixed")
90
+
91
+ # Load embeddings (cached)
92
+ embeddings = self._embed_docs(model_name)
93
+ embedder = Embeddings(backend="huggingface", model_name=model_name)
94
+
95
+ # Initialize retriever with backend
96
+ logging.info(f"⚙️ Initializing retriever backend: {retriever_backend}")
97
+ retriever = Retriever(
98
+ embedder=embedder,
99
+ documents=self.documents,
100
+ embeddings=embeddings,
101
+ backend=retriever_backend,
102
+ )
103
+
104
+ reranker = Reranker(reranker_name)
65
105
  evaluator = Evaluator()
66
- return RAGPipeline(retriever, reranker, evaluator)
67
106
 
107
+ # ✅ Pass chunking parameters into RAGPipeline
108
+ return RAGPipeline(
109
+ retriever,
110
+ reranker,
111
+ evaluator,
112
+ chunk_size=chunk_size,
113
+ overlap=overlap,
114
+ chunking_strategy=strategy,
115
+ )
116
+
117
+ # -------------------------
118
+ # Evaluate Configuration
119
+ # -------------------------
68
120
  def _evaluate_config(
69
121
  self, config: Dict[str, Any], validation: List[Dict[str, str]], metric: str
70
122
  ) -> Dict[str, float]:
123
+ """Evaluates a single configuration."""
71
124
  pipeline = self._build_pipeline(config)
72
-
73
125
  scores = []
74
126
  start = perf_counter()
127
+
75
128
  for sample in validation:
76
- query = sample.get("question") or sample.get("query")
77
- reference = sample.get("answer")
129
+ query = sample.get("question") or sample.get("query") or ""
78
130
  result = pipeline.run(query)
79
131
  score = result["metrics"].get(metric, 0.0)
80
132
  scores.append(score)
81
- elapsed = perf_counter() - start
82
133
 
134
+ elapsed = perf_counter() - start
83
135
  avg_score = sum(scores) / len(scores) if scores else 0.0
84
- return {metric: avg_score, "latency": elapsed / max(1, len(validation))}
85
136
 
137
+ return {
138
+ metric: avg_score,
139
+ "latency": elapsed / max(1, len(validation)),
140
+ }
141
+
142
+ # -------------------------
143
+ # Optimize
144
+ # -------------------------
86
145
  def optimize(
87
146
  self,
88
147
  validation_set: str,
@@ -90,16 +149,22 @@ class RAGMint:
90
149
  search_type: str = "random",
91
150
  trials: int = 10,
92
151
  ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
152
+ """Run optimization search over retrievers, embeddings, rerankers, and chunking."""
93
153
  validation = load_validation_set(validation_set or "default")
94
154
 
155
+ # search space
95
156
  search_space = {
96
157
  "retriever": self.retrievers,
97
158
  "embedding_model": self.embeddings,
98
159
  "reranker": self.rerankers,
160
+ "chunk_size": self.chunk_sizes,
161
+ "overlap": self.overlaps,
162
+ "strategy": self.strategies,
99
163
  }
100
164
 
101
- logging.info(f"Starting {search_type} optimization with {trials} trials")
165
+ logging.info(f"🚀 Starting {search_type} optimization with {trials} trials")
102
166
 
167
+ # Select search strategy
103
168
  try:
104
169
  if search_type == "grid":
105
170
  searcher = GridSearch(search_space)
@@ -108,16 +173,38 @@ class RAGMint:
108
173
  else:
109
174
  searcher = RandomSearch(search_space, n_trials=trials)
110
175
  except Exception as e:
111
- logging.warning(f"Falling back to RandomSearch due to missing deps: {e}")
176
+ logging.warning(f"⚠️ Fallback to RandomSearch due to missing deps: {e}")
112
177
  searcher = RandomSearch(search_space, n_trials=trials)
113
178
 
179
+ # Run trials
114
180
  results = []
115
181
  for config in searcher:
116
182
  metrics = self._evaluate_config(config, validation, metric)
117
183
  result = {**config, **metrics}
118
184
  results.append(result)
119
- logging.info(f"Tested config: {config} -> {metrics}")
185
+ logging.info(f"🔹 Tested config: {config} -> {metrics}")
120
186
 
121
187
  best = max(results, key=lambda r: r.get(metric, 0.0)) if results else {}
122
- logging.info(f" Best configuration found: {best}")
188
+ logging.info(f"🏆 Best configuration: {best}")
189
+
190
+ # Save to leaderboard
191
+ run_id = f"run_{uuid4().hex[:8]}"
192
+ leaderboard = Leaderboard()
193
+
194
+ corpus_stats = {
195
+ "num_docs": len(self.documents),
196
+ "avg_len": sum(len(d.split()) for d in self.documents) / max(1, len(self.documents)),
197
+ "corpus_size": sum(len(d) for d in self.documents),
198
+ }
199
+
200
+ leaderboard.upload(
201
+ run_id=run_id,
202
+ best_config=best,
203
+ best_score=best.get(metric, 0.0),
204
+ all_results=results,
205
+ documents=os.listdir(self.docs_path),
206
+ model=best.get("embedding_model", "unknown"),
207
+ corpus_stats=corpus_stats,
208
+ )
209
+
123
210
  return best, results