ragmint 0.2.1__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ragmint/app.py +512 -0
  2. ragmint/autotuner.py +201 -17
  3. ragmint/core/chunking.py +68 -4
  4. ragmint/core/embeddings.py +46 -10
  5. ragmint/core/evaluation.py +33 -14
  6. ragmint/core/pipeline.py +34 -10
  7. ragmint/core/retriever.py +152 -20
  8. ragmint/experiments/validation_qa.json +1 -14
  9. ragmint/explainer.py +47 -20
  10. ragmint/integrations/__init__.py +0 -0
  11. ragmint/integrations/config_adapter.py +96 -0
  12. ragmint/integrations/langchain_prebuilder.py +99 -0
  13. ragmint/leaderboard.py +41 -35
  14. ragmint/qa_generator.py +190 -0
  15. ragmint/tests/test_autotuner.py +52 -30
  16. ragmint/tests/test_config_adapter.py +39 -0
  17. ragmint/tests/test_embeddings.py +46 -0
  18. ragmint/tests/test_explainer.py +28 -12
  19. ragmint/tests/test_integration_autotuner_ragmint.py +39 -52
  20. ragmint/tests/test_langchain_prebuilder.py +82 -0
  21. ragmint/tests/test_leaderboard.py +78 -25
  22. ragmint/tests/test_pipeline.py +3 -2
  23. ragmint/tests/test_qa_generator.py +66 -0
  24. ragmint/tests/test_retriever.py +3 -2
  25. ragmint/tests/test_tuner.py +1 -1
  26. ragmint/tuner.py +109 -22
  27. ragmint-0.4.6.data/data/README.md +485 -0
  28. ragmint-0.4.6.dist-info/METADATA +530 -0
  29. ragmint-0.4.6.dist-info/RECORD +48 -0
  30. ragmint-0.4.6.dist-info/licenses/LICENSE +19 -0
  31. ragmint/tests/test_explainer_integration.py +0 -18
  32. ragmint-0.2.1.dist-info/METADATA +0 -27
  33. ragmint-0.2.1.dist-info/RECORD +0 -38
  34. {ragmint-0.2.1.dist-info/licenses → ragmint-0.4.6.data/data}/LICENSE +0 -0
  35. {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/WHEEL +0 -0
  36. {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/top_level.txt +0 -0
ragmint/core/retriever.py CHANGED
@@ -1,33 +1,165 @@
1
- from typing import List, Dict, Any
1
+ from typing import List, Dict, Any, Optional
2
2
  import numpy as np
3
+ from .embeddings import Embeddings
4
+
5
+ # Optional imports
6
+ try:
7
+ import faiss
8
+ except ImportError:
9
+ faiss = None
10
+
11
+ try:
12
+ import chromadb
13
+ except ImportError:
14
+ chromadb = None
15
+
16
+ try:
17
+ from sklearn.neighbors import BallTree
18
+ except ImportError:
19
+ BallTree = None
20
+
21
+ try:
22
+ from rank_bm25 import BM25Okapi
23
+ except ImportError:
24
+ BM25Okapi = None
3
25
 
4
26
 
5
27
  class Retriever:
6
28
  """
7
- Simple vector retriever using cosine similarity.
29
+ Multi-backend retriever supporting:
30
+ - "numpy" : basic cosine similarity (dense)
31
+ - "faiss" : high-performance dense retriever
32
+ - "chroma" : persistent vector DB
33
+ - "sklearn": BallTree (cosine or Euclidean)
34
+ - "bm25" : lexical retriever using Rank-BM25
35
+
36
+ Example:
37
+ retriever = Retriever(embedder, documents=["A", "B", "C"], backend="bm25")
38
+ results = retriever.retrieve("example query", top_k=3)
8
39
  """
9
40
 
10
- def __init__(self, embeddings: List[np.ndarray], documents: List[str]):
11
- if len(embeddings) == 0:
12
- self.embeddings = np.zeros((1, 768))
13
- else:
14
- self.embeddings = np.array(embeddings)
15
- self.documents = documents or [""]
41
+ def __init__(
42
+ self,
43
+ embedder: Optional[Embeddings] = None,
44
+ documents: Optional[List[str]] = None,
45
+ embeddings: Optional[np.ndarray] = None,
46
+ backend: str = "numpy",
47
+ ):
48
+ self.embedder = embedder
49
+ self.documents = documents or []
50
+ self.backend = backend.lower()
51
+ self.embeddings = None
52
+ self.index = None
53
+ self.client = None
54
+ self.bm25 = None
55
+
56
+ # Initialize embeddings for dense backends
57
+ if self.backend not in ["bm25"]:
58
+ if embeddings is not None:
59
+ self.embeddings = np.array(embeddings)
60
+ elif self.documents and self.embedder:
61
+ self.embeddings = self.embedder.encode(self.documents)
62
+ else:
63
+ self.embeddings = np.zeros((0, getattr(self.embedder, "dim", 768)))
64
+
65
+ # Normalize for cosine
66
+ if self.embeddings.size > 0:
67
+ self.embeddings = self._normalize(self.embeddings)
68
+
69
+ # Initialize backend
70
+ self._init_backend()
71
+
72
+ # ------------------------
73
+ # Backend Initialization
74
+ # ------------------------
75
+ def _init_backend(self):
76
+ if self.backend == "faiss":
77
+ if faiss is None:
78
+ raise ImportError("faiss not installed. Run `pip install faiss-cpu`.")
79
+ self.index = faiss.IndexFlatIP(self.embedder.dim)
80
+ self.index.add(self.embeddings.astype("float32"))
81
+
82
+ elif self.backend == "chroma":
83
+ if chromadb is None:
84
+ raise ImportError("chromadb not installed. Run `pip install chromadb`.")
85
+ self.client = chromadb.Client()
86
+ self.collection = self.client.create_collection(name="ragmint_retriever")
87
+ for i, doc in enumerate(self.documents):
88
+ self.collection.add(
89
+ ids=[str(i)],
90
+ documents=[doc],
91
+ embeddings=[self.embeddings[i].tolist()],
92
+ )
16
93
 
94
+ elif self.backend == "sklearn":
95
+ if BallTree is None:
96
+ raise ImportError("scikit-learn not installed. Run `pip install scikit-learn`.")
97
+ self.index = BallTree(self.embeddings)
98
+
99
+ elif self.backend == "bm25":
100
+ if BM25Okapi is None:
101
+ raise ImportError("rank-bm25 not installed. Run `pip install rank-bm25`.")
102
+ tokenized_corpus = [doc.lower().split() for doc in self.documents]
103
+ self.bm25 = BM25Okapi(tokenized_corpus)
104
+
105
+ elif self.backend != "numpy":
106
+ raise ValueError(f"Unsupported retriever backend: {self.backend}")
107
+
108
+ # ------------------------
109
+ # Retrieval
110
+ # ------------------------
17
111
  def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
18
- if self.embeddings.size == 0 or len(self.documents) == 0:
112
+ if len(self.documents) == 0:
113
+ return [{"text": "", "score": 0.0}]
114
+
115
+ # BM25 retrieval (lexical)
116
+ if self.backend == "bm25":
117
+ tokenized_query = query.lower().split()
118
+ scores = self.bm25.get_scores(tokenized_query)
119
+ top_indices = np.argsort(scores)[::-1][:top_k]
120
+ return [
121
+ {"text": self.documents[i], "score": float(scores[i])}
122
+ for i in top_indices
123
+ ]
124
+
125
+ # Dense retrieval (others)
126
+ if self.embeddings is None or self.embeddings.size == 0:
19
127
  return [{"text": "", "score": 0.0}]
20
128
 
21
- query_vec = self._embed(query)
22
- scores = self._cosine_similarity(query_vec, self.embeddings)
23
- top_indices = np.argsort(scores)[::-1][:min(top_k, len(scores))]
24
- return [{"text": self.documents[i], "score": float(scores[i])} for i in top_indices]
129
+ query_vec = self.embedder.encode([query])[0]
130
+ query_vec = self._normalize(query_vec)
25
131
 
26
- def _embed(self, query: str) -> np.ndarray:
27
- dim = self.embeddings.shape[1] if len(self.embeddings.shape) > 1 else 768
28
- return np.random.rand(dim)
132
+ if self.backend == "numpy":
133
+ scores = np.dot(self.embeddings, query_vec)
134
+ top_indices = np.argsort(scores)[::-1][:top_k]
135
+ return [{"text": self.documents[i], "score": float(scores[i])} for i in top_indices]
136
+
137
+ elif self.backend == "faiss":
138
+ query_vec = np.expand_dims(query_vec.astype("float32"), axis=0)
139
+ scores, indices = self.index.search(query_vec, top_k)
140
+ return [{"text": self.documents[int(i)], "score": float(scores[0][j])} for j, i in enumerate(indices[0])]
141
+
142
+ elif self.backend == "chroma":
143
+ results = self.collection.query(query_texts=[query], n_results=top_k)
144
+ docs = results["documents"][0]
145
+ scores = results["distances"][0]
146
+ return [{"text": d, "score": 1 - s} for d, s in zip(docs, scores)]
147
+
148
+ elif self.backend == "sklearn":
149
+ distances, indices = self.index.query([query_vec], k=top_k)
150
+ scores = 1 - distances[0]
151
+ return [{"text": self.documents[int(i)], "score": float(scores[j])} for j, i in enumerate(indices[0])]
152
+
153
+ else:
154
+ raise ValueError(f"Unknown backend: {self.backend}")
29
155
 
30
- def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
31
- a_norm = a / np.linalg.norm(a)
32
- b_norm = b / np.linalg.norm(b, axis=1, keepdims=True)
33
- return np.dot(b_norm, a_norm)
156
+ # ------------------------
157
+ # Utils
158
+ # ------------------------
159
+ @staticmethod
160
+ def _normalize(vectors: np.ndarray) -> np.ndarray:
161
+ if vectors.ndim == 1:
162
+ norm = np.linalg.norm(vectors)
163
+ return vectors / norm if norm > 0 else vectors
164
+ norms = np.linalg.norm(vectors, axis=1, keepdims=True)
165
+ return np.divide(vectors, norms, out=np.zeros_like(vectors), where=norms != 0)
@@ -1,14 +1 @@
1
- [
2
- {
3
- "query": "What is Retrieval-Augmented Generation?",
4
- "expected_answer": "A technique that combines information retrieval with language generation to improve factual accuracy."
5
- },
6
- {
7
- "query": "What is the role of embeddings in a RAG system?",
8
- "expected_answer": "They represent text as numerical vectors for similarity-based retrieval."
9
- },
10
- {
11
- "query": "What is Maximal Marginal Relevance used for?",
12
- "expected_answer": "To select diverse and relevant documents during reranking."
13
- }
14
- ]
1
+ []
ragmint/explainer.py CHANGED
@@ -1,49 +1,76 @@
1
1
  """
2
2
  Interpretability Layer
3
3
  ----------------------
4
- Uses Gemini or Anthropic Claude to explain why one RAG configuration
5
- outperforms another. Falls back gracefully if no API key is provided.
4
+ Uses Gemini or Anthropic Claude to explain why a particular RAG configuration
5
+ performed best, considering both optimizer results and corpus characteristics.
6
6
  """
7
7
 
8
8
  import os
9
9
  import json
10
+ from dotenv import load_dotenv
10
11
 
12
+ # Load .env if available
13
+ load_dotenv()
11
14
 
12
- def explain_results(results_a: dict, results_b: dict, model: str = "gemini-1.5-pro") -> str:
15
+ def explain_results(best_result: dict, all_results: list, corpus_stats: dict = None,
16
+ model: str = "gemini-2.5-flash-lite") -> str:
13
17
  """
14
- Generate a natural-language explanation comparing two RAG experiment results.
15
- Priority:
16
- 1. Anthropic Claude (if ANTHROPIC_API_KEY is set)
17
- 2. Google Gemini (if GOOGLE_API_KEY is set)
18
- 3. Fallback text message
19
- """
20
- prompt = f"""
21
- You are an AI evaluation expert.
22
- Compare these two RAG experiment results and explain why one performs better.
23
- Metrics A: {json.dumps(results_a, indent=2)}
24
- Metrics B: {json.dumps(results_b, indent=2)}
25
- Provide a concise, human-friendly explanation and practical improvement tips.
18
+ Generate a detailed natural-language explanation for RAG optimization results.
19
+
20
+ Parameters:
21
+ - best_result: dict containing the best configuration and metrics.
22
+ - all_results: list of all trial results with metrics and configs.
23
+ - corpus_stats: optional dict with corpus info (size, avg_len, num_docs).
24
+ - model: LLM model name (Gemini or Claude).
25
+
26
+ Returns:
27
+ A natural-language explanation string.
26
28
  """
27
29
 
28
30
  anthropic_key = os.getenv("ANTHROPIC_API_KEY")
29
- google_key = os.getenv("GEMINI_API_KEY")
31
+ google_key = os.getenv("GOOGLE_API_KEY")
32
+
33
+ # Build dynamic context
34
+ corpus_info = json.dumps(corpus_stats or {}, indent=2)
35
+ best_json = json.dumps(best_result, indent=2)
36
+ all_json = json.dumps(list(all_results)[:10], indent=2) #cap for safety
30
37
 
38
+ prompt = f"""
39
+ You are an expert AI researcher specializing in Retrieval-Augmented Generation (RAG) optimization.
40
+
41
+ A RAG auto-tuner was run on a corpus with these characteristics:
42
+ {corpus_info}
43
+
44
+ The tuner evaluated multiple configurations and metrics. Below are:
45
+ - The BEST configuration:
46
+ {best_json}
47
+
48
+ - A sample of ALL evaluated configurations:
49
+ {all_json}
50
+
51
+ Please:
52
+ 1. Explain WHY this best configuration likely performs better than others.
53
+ 2. Highlight trade-offs between accuracy, latency, and resource usage.
54
+ 3. Suggest potential improvements (different chunking, embedding, retriever, etc.).
55
+ 4. Provide a concise summary of which setup you recommend for this corpus.
56
+ Keep it structured, under 300 words, and easy to read.
57
+ """
31
58
 
32
- # 1️⃣ Try Anthropic Claude first
59
+ # --- 1️⃣ Anthropic Claude first ---
33
60
  if anthropic_key:
34
61
  try:
35
62
  from anthropic import Anthropic
36
63
  client = Anthropic(api_key=anthropic_key)
37
64
  response = client.messages.create(
38
65
  model="claude-3-opus-20240229",
39
- max_tokens=300,
66
+ max_tokens=500,
40
67
  messages=[{"role": "user", "content": prompt}],
41
68
  )
42
69
  return response.content[0].text
43
70
  except Exception as e:
44
71
  return f"[Claude unavailable] {e}"
45
72
 
46
- # 2️⃣ Fallback to Google Gemini
73
+ # --- 2️⃣ Gemini fallback ---
47
74
  elif google_key:
48
75
  try:
49
76
  import google.generativeai as genai
@@ -53,7 +80,7 @@ def explain_results(results_a: dict, results_b: dict, model: str = "gemini-1.5-p
53
80
  except Exception as e:
54
81
  return f"[Gemini unavailable] {e}"
55
82
 
56
- # 3️⃣ Fallback if neither key is available
83
+ # --- 3️⃣ Fallback message ---
57
84
  else:
58
85
  return (
59
86
  "[No LLM available] Please set ANTHROPIC_API_KEY or GOOGLE_API_KEY "
File without changes
@@ -0,0 +1,96 @@
1
+ """
2
+ RAGMint → LangChain Config Adapter
3
+ ----------------------------------
4
+ Takes RAGMint or AutoRAGTuner recommendations and converts them into
5
+ a normalized, pickle-safe configuration that can be used to build
6
+ a LangChain RAG pipeline later.
7
+ """
8
+
9
+ import json
10
+ import pickle
11
+ from pathlib import Path
12
+ from typing import Dict, Any
13
+
14
+
15
+ class LangchainConfigAdapter:
16
+ """
17
+ Converts RAGMint recommendations into LangChain-compatible configs.
18
+
19
+ Example:
20
+ adapter = LangChainConfigAdapter()
21
+ cfg = adapter.prepare(recommendation)
22
+ adapter.save(cfg, "best_config.pkl")
23
+ """
24
+
25
+ DEFAULT_EMBEDDINGS = {
26
+ "OpenAI": "sentence-transformers/all-MiniLM-L6-v2",
27
+ "SentenceTransformers": "sentence-transformers/all-MiniLM-L6-v2",
28
+ "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
29
+ "InstructorXL": "hkunlp/instructor-xl"
30
+ }
31
+
32
+ SUPPORTED_RETRIEVERS = {"faiss", "chroma", "bm25", "numpy", "sklearn"}
33
+
34
+ def __init__(self, recommendation: Dict[str, Any] | None = None):
35
+ self.recommendation = recommendation
36
+
37
+ def prepare(self, recommendation: Dict[str, Any] | None = None) -> Dict[str, Any]:
38
+ recommendation = recommendation or self.recommendation or {}
39
+ """
40
+ Normalize and validate configuration for LangChain use.
41
+
42
+ Returns:
43
+ dict with clean retriever, embedding, and chunking settings.
44
+ """
45
+ retriever = recommendation.get("retriever", "faiss").lower()
46
+ embedding_model = recommendation.get("embedding_model", "sentence-transformers/all-MiniLM-L6-v2")
47
+ chunk_size = recommendation.get("chunk_size", 400)
48
+ overlap = recommendation.get("overlap", 100)
49
+
50
+ # Normalize embedding model names
51
+ embedding_model = self.DEFAULT_EMBEDDINGS.get(embedding_model, embedding_model)
52
+
53
+ # Validate retriever backend
54
+ if retriever not in self.SUPPORTED_RETRIEVERS:
55
+ raise ValueError(f"Unsupported retriever backend: {retriever}")
56
+
57
+ config = {
58
+ "retriever": retriever,
59
+ "embedding_model": embedding_model,
60
+ "chunk_size": int(chunk_size),
61
+ "overlap": int(overlap),
62
+ }
63
+
64
+ return config
65
+
66
+ def save(self, config: Dict[str, Any], path: str):
67
+ """
68
+ Save configuration to a pickle file.
69
+ """
70
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
71
+ with open(path, "wb") as f:
72
+ pickle.dump(config, f)
73
+ print(f"💾 Saved LangChain config → {path}")
74
+
75
+ def load(self, path: str) -> Dict[str, Any]:
76
+ """
77
+ Load configuration from a pickle file.
78
+ """
79
+ with open(path, "rb") as f:
80
+ cfg = pickle.load(f)
81
+ print(f"✅ Loaded LangChain config ← {path}")
82
+ return cfg
83
+
84
+ def to_json(self, config: Dict[str, Any], path: str):
85
+ """
86
+ Save configuration as JSON (for human readability).
87
+ """
88
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
89
+ with open(path, "w", encoding="utf-8") as f:
90
+ json.dump(config, f, indent=2)
91
+ print(f"📝 Exported LangChain config → {path}")
92
+
93
+ # Alias for backward compatibility
94
+ def to_standard_config(self, recommendation: Dict[str, Any] | None = None) -> Dict[str, Any]:
95
+ """Alias for backward compatibility with older test suites."""
96
+ return self.prepare(recommendation)
@@ -0,0 +1,99 @@
1
+ """
2
+ LangChain Pre-Build Integration
3
+ -------------------------------
4
+ This module bridges RAGMint's auto-tuning system with LangChain,
5
+ returning retriever and embedding components that can plug directly
6
+ into any LangChain RAG pipeline.
7
+
8
+ Example:
9
+ from ragmint.integrations.langchain_prebuilder import LangChainPrebuilder
10
+ from langchain.chains import RetrievalQA
11
+ from langchain_openai import ChatOpenAI
12
+
13
+ prebuilder = LangChainPrebuilder(best_cfg)
14
+ retriever, embeddings = prebuilder.prepare(documents)
15
+
16
+ llm = ChatOpenAI(model="gpt-4o-mini")
17
+ qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
18
+ """
19
+
20
+ from typing import List, Tuple, Dict, Any
21
+
22
+
23
+ try:
24
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
25
+ except ImportError:
26
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
27
+
28
+ from langchain_community.embeddings import HuggingFaceEmbeddings
29
+ from langchain_community.vectorstores import FAISS, Chroma
30
+ from langchain_community.retrievers import BM25Retriever
31
+
32
+
33
+ class LangchainPrebuilder:
34
+ """
35
+ Dynamically builds LangChain retriever and embedding objects
36
+ based on a RAGMint configuration dictionary.
37
+ """
38
+
39
+ def __init__(self, cfg: Dict[str, Any]):
40
+ """
41
+ Args:
42
+ cfg (dict): RAGMint configuration with keys:
43
+ - retriever: "faiss" | "chroma" | "bm25"
44
+ - embedding_model: HuggingFace model name
45
+ - chunk_size: int (default=500)
46
+ - overlap: int (default=100)
47
+ """
48
+ self.cfg = cfg
49
+ self.retriever_backend = cfg.get("retriever", "faiss").lower()
50
+ self.embedding_model = cfg.get("embedding_model", "sentence-transformers/all-MiniLM-L6-v2")
51
+ self.chunk_size = int(cfg.get("chunk_size", 500))
52
+ self.overlap = int(cfg.get("overlap", 100))
53
+
54
+ def prepare(self, documents: List[str]) -> Tuple[Any, Any]:
55
+ """
56
+ Prepares LangChain-compatible retriever and embeddings.
57
+
58
+ Args:
59
+ documents (list[str]): Corpus texts
60
+
61
+ Returns:
62
+ (retriever, embeddings): Tuple of initialized LangChain retriever and embedding model
63
+ """
64
+ # 1️⃣ Split into chunks
65
+ splitter = RecursiveCharacterTextSplitter(
66
+ chunk_size=self.chunk_size,
67
+ chunk_overlap=self.overlap
68
+ )
69
+ docs = splitter.create_documents(documents)
70
+
71
+ # 2️⃣ Create embeddings
72
+ embeddings = HuggingFaceEmbeddings(model_name=self.embedding_model)
73
+
74
+ # 3️⃣ Build retriever
75
+ retriever = self._build_retriever(docs, embeddings)
76
+ return retriever, embeddings
77
+
78
+ def _build_retriever(self, docs, embeddings):
79
+ """Internal helper for building retriever backend."""
80
+ backend = self.retriever_backend
81
+
82
+ if backend == "faiss":
83
+ db = FAISS.from_documents(docs, embeddings)
84
+ return db.as_retriever(search_kwargs={"k": 5})
85
+
86
+ elif backend == "chroma":
87
+ db = Chroma.from_documents(docs, embeddings, collection_name="ragmint_docs")
88
+ return db.as_retriever(search_kwargs={"k": 5})
89
+
90
+
91
+ elif backend == "bm25":
92
+ # Support both Document objects and raw text strings
93
+ texts = [getattr(d, "page_content", d) for d in docs]
94
+ retriever = BM25Retriever.from_texts(texts)
95
+ retriever.k = 5
96
+ return retriever
97
+
98
+ else:
99
+ raise ValueError(f"Unsupported retriever backend: {backend}")
ragmint/leaderboard.py CHANGED
@@ -1,45 +1,51 @@
1
1
  import os
2
2
  import json
3
3
  from datetime import datetime
4
- from typing import Dict, Any, Optional
5
- from supabase import create_client
4
+ from typing import Dict, Any, List, Optional
5
+
6
6
 
7
7
  class Leaderboard:
8
- def __init__(self, storage_path: Optional[str] = None):
8
+ def __init__(self, storage_path: Optional[str] = "leaderboard.jsonl"):
9
9
  self.storage_path = storage_path
10
- url = os.getenv("SUPABASE_URL")
11
- key = os.getenv("SUPABASE_KEY")
12
- self.client = None
13
- if url and key:
14
- self.client = create_client(url, key)
15
- elif not storage_path:
16
- raise EnvironmentError("Set SUPABASE_URL/SUPABASE_KEY or pass storage_path")
17
-
18
- def upload(self, run_id: str, config: Dict[str, Any], score: float):
10
+ os.makedirs(os.path.dirname(self.storage_path) or ".", exist_ok=True)
11
+
12
+ if not os.path.exists(self.storage_path):
13
+ open(self.storage_path, "w", encoding="utf-8").close()
14
+
15
+ def upload(
16
+ self,
17
+ run_id: str,
18
+ best_config: Dict[str, Any],
19
+ best_score: float,
20
+ all_results: List[Dict[str, Any]],
21
+ documents: List[str],
22
+ model: str,
23
+ corpus_stats: Optional[Dict[str, Any]] = None,
24
+ ):
25
+ """Persist a full experiment run to local leaderboard."""
19
26
  data = {
20
27
  "run_id": run_id,
21
- "config": config,
22
- "score": score,
23
28
  "timestamp": datetime.utcnow().isoformat(),
29
+ "best_config": best_config,
30
+ "best_score": best_score,
31
+ "all_results": all_results,
32
+ "documents": [os.path.basename(d) for d in documents],
33
+ "model": model,
34
+ "corpus_stats": corpus_stats or {},
24
35
  }
25
- if self.client:
26
- return self.client.table("experiments").insert(data).execute()
27
- else:
28
- os.makedirs(os.path.dirname(self.storage_path), exist_ok=True)
29
- with open(self.storage_path, "a", encoding="utf-8") as f:
30
- f.write(json.dumps(data) + "\n")
31
- return data
32
-
33
- def top_results(self, limit: int = 10):
34
- if self.client:
35
- return (
36
- self.client.table("experiments")
37
- .select("*")
38
- .order("score", desc=True)
39
- .limit(limit)
40
- .execute()
41
- )
42
- else:
43
- with open(self.storage_path, "r", encoding="utf-8") as f:
44
- lines = [json.loads(line) for line in f]
45
- return sorted(lines, key=lambda x: x["score"], reverse=True)[:limit]
36
+
37
+ with open(self.storage_path, "a", encoding="utf-8") as f:
38
+ f.write(json.dumps(data) + "\n")
39
+
40
+ return data
41
+
42
+ def all_results(self) -> List[Dict[str, Any]]:
43
+ if not os.path.exists(self.storage_path):
44
+ return []
45
+ with open(self.storage_path, "r", encoding="utf-8") as f:
46
+ return [json.loads(line) for line in f if line.strip()]
47
+
48
+ def top_results(self, limit: int = 10) -> List[Dict[str, Any]]:
49
+ """Return top experiments by score."""
50
+ results = self.all_results()
51
+ return sorted(results, key=lambda x: x.get("best_score", 0.0), reverse=True)[:limit]