ragmint 0.2.1__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragmint/app.py +512 -0
- ragmint/autotuner.py +201 -17
- ragmint/core/chunking.py +68 -4
- ragmint/core/embeddings.py +46 -10
- ragmint/core/evaluation.py +33 -14
- ragmint/core/pipeline.py +34 -10
- ragmint/core/retriever.py +152 -20
- ragmint/experiments/validation_qa.json +1 -14
- ragmint/explainer.py +47 -20
- ragmint/integrations/__init__.py +0 -0
- ragmint/integrations/config_adapter.py +96 -0
- ragmint/integrations/langchain_prebuilder.py +99 -0
- ragmint/leaderboard.py +41 -35
- ragmint/qa_generator.py +190 -0
- ragmint/tests/test_autotuner.py +52 -30
- ragmint/tests/test_config_adapter.py +39 -0
- ragmint/tests/test_embeddings.py +46 -0
- ragmint/tests/test_explainer.py +28 -12
- ragmint/tests/test_integration_autotuner_ragmint.py +39 -52
- ragmint/tests/test_langchain_prebuilder.py +82 -0
- ragmint/tests/test_leaderboard.py +78 -25
- ragmint/tests/test_pipeline.py +3 -2
- ragmint/tests/test_qa_generator.py +66 -0
- ragmint/tests/test_retriever.py +3 -2
- ragmint/tests/test_tuner.py +1 -1
- ragmint/tuner.py +109 -22
- ragmint-0.4.6.data/data/README.md +485 -0
- ragmint-0.4.6.dist-info/METADATA +530 -0
- ragmint-0.4.6.dist-info/RECORD +48 -0
- ragmint-0.4.6.dist-info/licenses/LICENSE +19 -0
- ragmint/tests/test_explainer_integration.py +0 -18
- ragmint-0.2.1.dist-info/METADATA +0 -27
- ragmint-0.2.1.dist-info/RECORD +0 -38
- {ragmint-0.2.1.dist-info/licenses → ragmint-0.4.6.data/data}/LICENSE +0 -0
- {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/WHEEL +0 -0
- {ragmint-0.2.1.dist-info → ragmint-0.4.6.dist-info}/top_level.txt +0 -0
|
@@ -1,39 +1,92 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import json
|
|
2
3
|
import tempfile
|
|
3
|
-
|
|
4
|
+
import pytest
|
|
5
|
+
from datetime import datetime
|
|
4
6
|
from ragmint.leaderboard import Leaderboard
|
|
5
7
|
|
|
6
8
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def temp_leaderboard():
|
|
11
|
+
"""Create a temporary leaderboard file for testing."""
|
|
12
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
13
|
+
path = os.path.join(tmpdir, "leaderboard.jsonl")
|
|
14
|
+
lb = Leaderboard(storage_path=path)
|
|
15
|
+
yield lb, path
|
|
11
16
|
|
|
12
|
-
# Add two runs
|
|
13
|
-
lb.upload("run1", {"retriever": "FAISS"}, 0.91)
|
|
14
|
-
lb.upload("run2", {"retriever": "Chroma"}, 0.85)
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
def test_upload_and_persistence(temp_leaderboard):
|
|
19
|
+
lb, path = temp_leaderboard
|
|
20
|
+
|
|
21
|
+
# --- Mock experiment data ---
|
|
22
|
+
run_id = "run_001"
|
|
23
|
+
best_config = {"retriever": "FAISS", "embedding_model": "all-MiniLM"}
|
|
24
|
+
best_score = 0.92
|
|
25
|
+
all_results = [
|
|
26
|
+
{"retriever": "FAISS", "score": 0.92},
|
|
27
|
+
{"retriever": "BM25", "score": 0.85},
|
|
28
|
+
]
|
|
29
|
+
documents = ["docs/a.txt", "docs/b.txt"]
|
|
30
|
+
model = "gemini"
|
|
31
|
+
corpus_stats = {"size": 20000, "avg_len": 400, "num_docs": 10}
|
|
32
|
+
|
|
33
|
+
# --- Upload ---
|
|
34
|
+
record = lb.upload(
|
|
35
|
+
run_id=run_id,
|
|
36
|
+
best_config=best_config,
|
|
37
|
+
best_score=best_score,
|
|
38
|
+
all_results=all_results,
|
|
39
|
+
documents=documents,
|
|
40
|
+
model=model,
|
|
41
|
+
corpus_stats=corpus_stats,
|
|
42
|
+
)
|
|
21
43
|
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
assert
|
|
25
|
-
assert
|
|
26
|
-
assert "
|
|
44
|
+
# --- Validate returned record ---
|
|
45
|
+
assert record["run_id"] == run_id
|
|
46
|
+
assert record["model"] == "gemini"
|
|
47
|
+
assert "timestamp" in record
|
|
48
|
+
assert record["best_score"] == 0.92
|
|
49
|
+
assert all(doc in record["documents"] for doc in ["a.txt", "b.txt"])
|
|
27
50
|
|
|
51
|
+
# --- File should contain JSON line ---
|
|
52
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
53
|
+
lines = f.readlines()
|
|
54
|
+
assert len(lines) == 1
|
|
55
|
+
parsed = json.loads(lines[0])
|
|
56
|
+
assert parsed["run_id"] == run_id
|
|
28
57
|
|
|
29
|
-
def test_leaderboard_append_existing(tmp_path):
|
|
30
|
-
"""Ensure multiple uploads append properly."""
|
|
31
|
-
file_path = tmp_path / "leaderboard.jsonl"
|
|
32
|
-
lb = Leaderboard(storage_path=str(file_path))
|
|
33
58
|
|
|
34
|
-
|
|
35
|
-
|
|
59
|
+
def test_top_results_ordering(temp_leaderboard):
|
|
60
|
+
lb, _ = temp_leaderboard
|
|
36
61
|
|
|
62
|
+
# Upload multiple runs with varying scores
|
|
63
|
+
for i, score in enumerate([0.8, 0.95, 0.7]):
|
|
64
|
+
lb.upload(
|
|
65
|
+
run_id=f"run_{i}",
|
|
66
|
+
best_config={"retriever": "FAISS"},
|
|
67
|
+
best_score=score,
|
|
68
|
+
all_results=[],
|
|
69
|
+
documents=["file.txt"],
|
|
70
|
+
model="claude",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# --- Get top results ---
|
|
37
74
|
top = lb.top_results(limit=2)
|
|
38
75
|
assert len(top) == 2
|
|
39
|
-
|
|
76
|
+
|
|
77
|
+
# --- Ensure ordering descending by score ---
|
|
78
|
+
assert top[0]["best_score"] >= top[1]["best_score"]
|
|
79
|
+
assert top[0]["best_score"] == 0.95
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_all_results_reads_all_entries(temp_leaderboard):
|
|
83
|
+
lb, _ = temp_leaderboard
|
|
84
|
+
|
|
85
|
+
# Add two runs
|
|
86
|
+
lb.upload("run_a", {}, 0.5, [], ["doc1.txt"], "gemini")
|
|
87
|
+
lb.upload("run_b", {}, 0.7, [], ["doc2.txt"], "claude")
|
|
88
|
+
|
|
89
|
+
results = lb.all_results()
|
|
90
|
+
assert len(results) == 2
|
|
91
|
+
run_ids = {r["run_id"] for r in results}
|
|
92
|
+
assert {"run_a", "run_b"} <= run_ids
|
ragmint/tests/test_pipeline.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from ragmint.core.pipeline import RAGPipeline
|
|
3
3
|
from ragmint.core.retriever import Retriever
|
|
4
|
+
from ragmint.core.embeddings import Embeddings
|
|
4
5
|
from ragmint.core.reranker import Reranker
|
|
5
6
|
from ragmint.core.evaluation import Evaluator
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def test_pipeline_run():
|
|
9
10
|
docs = ["doc1 text", "doc2 text"]
|
|
10
|
-
|
|
11
|
-
retriever = Retriever(
|
|
11
|
+
embedder = Embeddings(backend="dummy")
|
|
12
|
+
retriever = Retriever(embedder=embedder, documents=docs)
|
|
12
13
|
reranker = Reranker("mmr")
|
|
13
14
|
evaluator = Evaluator()
|
|
14
15
|
pipeline = RAGPipeline(retriever, reranker, evaluator)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from ragmint.qa_generator import generate_validation_qa
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DummyLLM:
|
|
11
|
+
"""Mock LLM that returns predictable JSON output."""
|
|
12
|
+
def generate_content(self, prompt):
|
|
13
|
+
class DummyResponse:
|
|
14
|
+
text = '[{"query": "What is X?", "expected_answer": "Y"}]'
|
|
15
|
+
return DummyResponse()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def dummy_docs(tmp_path):
|
|
20
|
+
docs_dir = tmp_path / "docs"
|
|
21
|
+
docs_dir.mkdir()
|
|
22
|
+
for i in range(3):
|
|
23
|
+
(docs_dir / f"doc_{i}.txt").write_text(f"This is test document number {i}. It contains some content.")
|
|
24
|
+
return docs_dir
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def output_path(tmp_path):
|
|
29
|
+
return tmp_path / "validation_qa.json"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_generate_validation_qa(monkeypatch, dummy_docs, output_path):
|
|
33
|
+
"""Ensure QA generator runs end-to-end with mocked LLM."""
|
|
34
|
+
# --- Mock LLM setup ---
|
|
35
|
+
from sentence_transformers import SentenceTransformer
|
|
36
|
+
monkeypatch.setattr("ragmint.qa_generator.setup_llm", lambda *_: (DummyLLM(), "gemini"))
|
|
37
|
+
monkeypatch.setattr(SentenceTransformer, "encode", lambda self, x, normalize_embeddings=True: [[0.1] * 3] * len(x))
|
|
38
|
+
|
|
39
|
+
# --- Run function ---
|
|
40
|
+
generate_validation_qa(
|
|
41
|
+
docs_path=dummy_docs,
|
|
42
|
+
output_path=output_path,
|
|
43
|
+
llm_model="gemini-2.5-flash-lite",
|
|
44
|
+
batch_size=2,
|
|
45
|
+
sleep_between_batches=0,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# --- Validate output ---
|
|
49
|
+
assert output_path.exists(), "Output JSON file should be created"
|
|
50
|
+
data = json.loads(output_path.read_text())
|
|
51
|
+
assert isinstance(data, list), "Output must be a list"
|
|
52
|
+
assert all("query" in d and "expected_answer" in d for d in data), "Each entry must have query and answer"
|
|
53
|
+
assert len(data) > 0, "At least one QA pair should be generated"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_handles_empty_folder(monkeypatch, tmp_path):
|
|
57
|
+
"""Ensure no crash when docs folder is empty."""
|
|
58
|
+
empty_dir = tmp_path / "empty"
|
|
59
|
+
empty_dir.mkdir()
|
|
60
|
+
output_file = tmp_path / "qa.json"
|
|
61
|
+
|
|
62
|
+
monkeypatch.setattr("ragmint.qa_generator.setup_llm", lambda *_: (DummyLLM(), "gemini"))
|
|
63
|
+
|
|
64
|
+
generate_validation_qa(docs_path=empty_dir, output_path=output_file, sleep_between_batches=0)
|
|
65
|
+
data = json.loads(output_file.read_text())
|
|
66
|
+
assert data == [], "Empty folder should produce empty QA list"
|
ragmint/tests/test_retriever.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from ragmint.core.retriever import Retriever
|
|
3
|
+
from ragmint.core.embeddings import Embeddings
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
def test_retrieve_basic():
|
|
6
|
-
embeddings = [np.random.rand(5) for _ in range(3)]
|
|
7
7
|
docs = ["doc A", "doc B", "doc C"]
|
|
8
|
-
|
|
8
|
+
embedder = Embeddings(backend="dummy")
|
|
9
|
+
retriever = Retriever(embedder=embedder, documents=docs)
|
|
9
10
|
|
|
10
11
|
results = retriever.retrieve("sample query", top_k=2)
|
|
11
12
|
assert isinstance(results, list)
|
ragmint/tests/test_tuner.py
CHANGED
ragmint/tuner.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import json
|
|
3
2
|
import logging
|
|
4
|
-
from typing import Any, Dict, List, Tuple
|
|
3
|
+
from typing import Any, Dict, List, Tuple
|
|
5
4
|
from time import perf_counter
|
|
6
5
|
|
|
7
6
|
from .core.pipeline import RAGPipeline
|
|
8
|
-
from .core.embeddings import
|
|
7
|
+
from .core.embeddings import Embeddings
|
|
9
8
|
from .core.retriever import Retriever
|
|
10
9
|
from .core.reranker import Reranker
|
|
11
10
|
from .core.evaluation import Evaluator
|
|
12
11
|
from .optimization.search import GridSearch, RandomSearch, BayesianSearch
|
|
13
|
-
|
|
14
12
|
from .utils.data_loader import load_validation_set
|
|
13
|
+
from .leaderboard import Leaderboard
|
|
14
|
+
from uuid import uuid4
|
|
15
15
|
|
|
16
16
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
17
17
|
|
|
@@ -19,6 +19,8 @@ logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
|
19
19
|
class RAGMint:
|
|
20
20
|
"""
|
|
21
21
|
Main RAG pipeline optimizer and evaluator.
|
|
22
|
+
Runs combinations of retrievers, embeddings, rerankers, and chunking parameters
|
|
23
|
+
to find the best performing RAG configuration.
|
|
22
24
|
"""
|
|
23
25
|
|
|
24
26
|
def __init__(
|
|
@@ -27,62 +29,119 @@ class RAGMint:
|
|
|
27
29
|
retrievers: List[str],
|
|
28
30
|
embeddings: List[str],
|
|
29
31
|
rerankers: List[str],
|
|
32
|
+
chunk_sizes: List[int] = [400, 600],
|
|
33
|
+
overlaps: List[int] = [50, 100],
|
|
34
|
+
strategies: List[str] = ["fixed"],
|
|
30
35
|
):
|
|
31
36
|
self.docs_path = docs_path
|
|
32
37
|
self.retrievers = retrievers
|
|
33
38
|
self.embeddings = embeddings
|
|
34
39
|
self.rerankers = rerankers
|
|
40
|
+
self.chunk_sizes = chunk_sizes
|
|
41
|
+
self.overlaps = overlaps
|
|
42
|
+
self.strategies = strategies
|
|
35
43
|
|
|
36
44
|
self.documents: List[str] = self._load_docs()
|
|
37
45
|
self.embeddings_cache: Dict[str, Any] = {}
|
|
38
46
|
|
|
47
|
+
# -------------------------
|
|
48
|
+
# Document Loading
|
|
49
|
+
# -------------------------
|
|
39
50
|
def _load_docs(self) -> List[str]:
|
|
40
51
|
if not os.path.exists(self.docs_path):
|
|
41
52
|
logging.warning(f"Corpus path not found: {self.docs_path}")
|
|
42
53
|
return []
|
|
54
|
+
|
|
43
55
|
docs = []
|
|
44
56
|
for file in os.listdir(self.docs_path):
|
|
45
|
-
if file.endswith(".txt"
|
|
57
|
+
if file.endswith((".txt", ".md", ".rst")):
|
|
46
58
|
with open(os.path.join(self.docs_path, file), "r", encoding="utf-8") as f:
|
|
47
59
|
docs.append(f.read())
|
|
48
|
-
|
|
60
|
+
|
|
61
|
+
logging.info(f"📚 Loaded {len(docs)} documents from {self.docs_path}")
|
|
49
62
|
return docs
|
|
50
63
|
|
|
51
|
-
|
|
64
|
+
# -------------------------
|
|
65
|
+
# Embedding Cache
|
|
66
|
+
# -------------------------
|
|
67
|
+
def _embed_docs(self, model_name: str) -> Any:
|
|
68
|
+
"""Compute and cache document embeddings."""
|
|
52
69
|
if model_name in self.embeddings_cache:
|
|
53
70
|
return self.embeddings_cache[model_name]
|
|
54
71
|
|
|
55
|
-
model =
|
|
72
|
+
model = Embeddings(backend="huggingface", model_name=model_name)
|
|
56
73
|
embeddings = model.encode(self.documents)
|
|
57
74
|
self.embeddings_cache[model_name] = embeddings
|
|
58
75
|
return embeddings
|
|
59
76
|
|
|
77
|
+
# -------------------------
|
|
78
|
+
# Build Pipeline
|
|
79
|
+
# -------------------------
|
|
60
80
|
def _build_pipeline(self, config: Dict[str, str]) -> RAGPipeline:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
81
|
+
"""Builds a pipeline from one configuration."""
|
|
82
|
+
retriever_backend = config["retriever"]
|
|
83
|
+
model_name = config["embedding_model"]
|
|
84
|
+
reranker_name = config["reranker"]
|
|
85
|
+
|
|
86
|
+
# Chunking params (use defaults if missing)
|
|
87
|
+
chunk_size = int(config.get("chunk_size", 500))
|
|
88
|
+
overlap = int(config.get("overlap", 100))
|
|
89
|
+
strategy = config.get("strategy", "fixed")
|
|
90
|
+
|
|
91
|
+
# Load embeddings (cached)
|
|
92
|
+
embeddings = self._embed_docs(model_name)
|
|
93
|
+
embedder = Embeddings(backend="huggingface", model_name=model_name)
|
|
94
|
+
|
|
95
|
+
# Initialize retriever with backend
|
|
96
|
+
logging.info(f"⚙️ Initializing retriever backend: {retriever_backend}")
|
|
97
|
+
retriever = Retriever(
|
|
98
|
+
embedder=embedder,
|
|
99
|
+
documents=self.documents,
|
|
100
|
+
embeddings=embeddings,
|
|
101
|
+
backend=retriever_backend,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
reranker = Reranker(reranker_name)
|
|
65
105
|
evaluator = Evaluator()
|
|
66
|
-
return RAGPipeline(retriever, reranker, evaluator)
|
|
67
106
|
|
|
107
|
+
# ✅ Pass chunking parameters into RAGPipeline
|
|
108
|
+
return RAGPipeline(
|
|
109
|
+
retriever,
|
|
110
|
+
reranker,
|
|
111
|
+
evaluator,
|
|
112
|
+
chunk_size=chunk_size,
|
|
113
|
+
overlap=overlap,
|
|
114
|
+
chunking_strategy=strategy,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# -------------------------
|
|
118
|
+
# Evaluate Configuration
|
|
119
|
+
# -------------------------
|
|
68
120
|
def _evaluate_config(
|
|
69
121
|
self, config: Dict[str, Any], validation: List[Dict[str, str]], metric: str
|
|
70
122
|
) -> Dict[str, float]:
|
|
123
|
+
"""Evaluates a single configuration."""
|
|
71
124
|
pipeline = self._build_pipeline(config)
|
|
72
|
-
|
|
73
125
|
scores = []
|
|
74
126
|
start = perf_counter()
|
|
127
|
+
|
|
75
128
|
for sample in validation:
|
|
76
|
-
query = sample.get("question") or sample.get("query")
|
|
77
|
-
reference = sample.get("answer")
|
|
129
|
+
query = sample.get("question") or sample.get("query") or ""
|
|
78
130
|
result = pipeline.run(query)
|
|
79
131
|
score = result["metrics"].get(metric, 0.0)
|
|
80
132
|
scores.append(score)
|
|
81
|
-
elapsed = perf_counter() - start
|
|
82
133
|
|
|
134
|
+
elapsed = perf_counter() - start
|
|
83
135
|
avg_score = sum(scores) / len(scores) if scores else 0.0
|
|
84
|
-
return {metric: avg_score, "latency": elapsed / max(1, len(validation))}
|
|
85
136
|
|
|
137
|
+
return {
|
|
138
|
+
metric: avg_score,
|
|
139
|
+
"latency": elapsed / max(1, len(validation)),
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# -------------------------
|
|
143
|
+
# Optimize
|
|
144
|
+
# -------------------------
|
|
86
145
|
def optimize(
|
|
87
146
|
self,
|
|
88
147
|
validation_set: str,
|
|
@@ -90,16 +149,22 @@ class RAGMint:
|
|
|
90
149
|
search_type: str = "random",
|
|
91
150
|
trials: int = 10,
|
|
92
151
|
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
152
|
+
"""Run optimization search over retrievers, embeddings, rerankers, and chunking."""
|
|
93
153
|
validation = load_validation_set(validation_set or "default")
|
|
94
154
|
|
|
155
|
+
# search space
|
|
95
156
|
search_space = {
|
|
96
157
|
"retriever": self.retrievers,
|
|
97
158
|
"embedding_model": self.embeddings,
|
|
98
159
|
"reranker": self.rerankers,
|
|
160
|
+
"chunk_size": self.chunk_sizes,
|
|
161
|
+
"overlap": self.overlaps,
|
|
162
|
+
"strategy": self.strategies,
|
|
99
163
|
}
|
|
100
164
|
|
|
101
|
-
logging.info(f"Starting {search_type} optimization with {trials} trials")
|
|
165
|
+
logging.info(f"🚀 Starting {search_type} optimization with {trials} trials")
|
|
102
166
|
|
|
167
|
+
# Select search strategy
|
|
103
168
|
try:
|
|
104
169
|
if search_type == "grid":
|
|
105
170
|
searcher = GridSearch(search_space)
|
|
@@ -108,16 +173,38 @@ class RAGMint:
|
|
|
108
173
|
else:
|
|
109
174
|
searcher = RandomSearch(search_space, n_trials=trials)
|
|
110
175
|
except Exception as e:
|
|
111
|
-
logging.warning(f"
|
|
176
|
+
logging.warning(f"⚠️ Fallback to RandomSearch due to missing deps: {e}")
|
|
112
177
|
searcher = RandomSearch(search_space, n_trials=trials)
|
|
113
178
|
|
|
179
|
+
# Run trials
|
|
114
180
|
results = []
|
|
115
181
|
for config in searcher:
|
|
116
182
|
metrics = self._evaluate_config(config, validation, metric)
|
|
117
183
|
result = {**config, **metrics}
|
|
118
184
|
results.append(result)
|
|
119
|
-
logging.info(f"Tested config: {config} -> {metrics}")
|
|
185
|
+
logging.info(f"🔹 Tested config: {config} -> {metrics}")
|
|
120
186
|
|
|
121
187
|
best = max(results, key=lambda r: r.get(metric, 0.0)) if results else {}
|
|
122
|
-
logging.info(f"
|
|
188
|
+
logging.info(f"🏆 Best configuration: {best}")
|
|
189
|
+
|
|
190
|
+
# Save to leaderboard
|
|
191
|
+
run_id = f"run_{uuid4().hex[:8]}"
|
|
192
|
+
leaderboard = Leaderboard()
|
|
193
|
+
|
|
194
|
+
corpus_stats = {
|
|
195
|
+
"num_docs": len(self.documents),
|
|
196
|
+
"avg_len": sum(len(d.split()) for d in self.documents) / max(1, len(self.documents)),
|
|
197
|
+
"corpus_size": sum(len(d) for d in self.documents),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
leaderboard.upload(
|
|
201
|
+
run_id=run_id,
|
|
202
|
+
best_config=best,
|
|
203
|
+
best_score=best.get(metric, 0.0),
|
|
204
|
+
all_results=results,
|
|
205
|
+
documents=os.listdir(self.docs_path),
|
|
206
|
+
model=best.get("embedding_model", "unknown"),
|
|
207
|
+
corpus_stats=corpus_stats,
|
|
208
|
+
)
|
|
209
|
+
|
|
123
210
|
return best, results
|