ragit 0.8__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragit/config.py ADDED
@@ -0,0 +1,60 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Ragit configuration management.
7
+
8
+ Loads configuration from environment variables and .env files.
9
+
10
+ Note: As of v0.8.0, ragit no longer has default LLM or embedding models.
11
+ Users must explicitly configure providers.
12
+ """
13
+
14
+ import os
15
+ from pathlib import Path
16
+
17
+ from dotenv import load_dotenv
18
+
19
+ # Load .env file from current working directory or project root
20
+ _env_path = Path.cwd() / ".env"
21
+ if _env_path.exists():
22
+ load_dotenv(_env_path)
23
+ else:
24
+ # Try to find .env in parent directories
25
+ for parent in Path.cwd().parents:
26
+ _env_path = parent / ".env"
27
+ if _env_path.exists():
28
+ load_dotenv(_env_path)
29
+ break
30
+
31
+
32
+ class Config:
33
+ """Ragit configuration loaded from environment variables.
34
+
35
+ Note: As of v0.8.0, DEFAULT_LLM_MODEL and DEFAULT_EMBEDDING_MODEL are
36
+ no longer used as defaults. They are only read from environment variables
37
+ for backwards compatibility with user configurations.
38
+ """
39
+
40
+ # Ollama LLM API Configuration (used when explicitly using OllamaProvider)
41
+ OLLAMA_BASE_URL: str = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
42
+ OLLAMA_API_KEY: str | None = os.getenv("OLLAMA_API_KEY")
43
+ OLLAMA_TIMEOUT: int = int(os.getenv("OLLAMA_TIMEOUT", "120"))
44
+
45
+ # Ollama Embedding API Configuration
46
+ OLLAMA_EMBEDDING_URL: str = os.getenv(
47
+ "OLLAMA_EMBEDDING_URL", os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
48
+ )
49
+
50
+ # Model settings (only used if explicitly requested, no defaults)
51
+ # These can still be set via environment variables for convenience
52
+ DEFAULT_LLM_MODEL: str | None = os.getenv("RAGIT_DEFAULT_LLM_MODEL")
53
+ DEFAULT_EMBEDDING_MODEL: str | None = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL")
54
+
55
+ # Logging
56
+ LOG_LEVEL: str = os.getenv("RAGIT_LOG_LEVEL", "INFO")
57
+
58
+
59
+ # Singleton instance
60
+ config = Config()
ragit/core/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """Ragit core module."""
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """Ragit experiment module."""
6
+
7
+ from ragit.core.experiment.experiment import (
8
+ BenchmarkQuestion,
9
+ Document,
10
+ RAGConfig,
11
+ RagitExperiment,
12
+ )
13
+ from ragit.core.experiment.results import EvaluationResult, ExperimentResults
14
+
15
+ __all__ = [
16
+ "RagitExperiment",
17
+ "Document",
18
+ "BenchmarkQuestion",
19
+ "RAGConfig",
20
+ "EvaluationResult",
21
+ "ExperimentResults",
22
+ ]
@@ -0,0 +1,571 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Ragit Experiment - Core RAG optimization engine.
7
+
8
+ This module provides the main experiment class for optimizing RAG hyperparameters.
9
+ """
10
+
11
+ import time
12
+ from collections.abc import Callable
13
+ from dataclasses import dataclass, field
14
+ from itertools import product
15
+ from typing import Any
16
+
17
+ import numpy as np
18
+ from tqdm import tqdm
19
+
20
+ from ragit.core.experiment.results import EvaluationResult
21
+ from ragit.providers.base import BaseEmbeddingProvider, BaseLLMProvider
22
+ from ragit.providers.function_adapter import FunctionProvider
23
+
24
+
25
+ @dataclass
26
+ class RAGConfig:
27
+ """Configuration for a RAG pattern."""
28
+
29
+ name: str
30
+ chunk_size: int
31
+ chunk_overlap: int
32
+ num_chunks: int # Number of chunks to retrieve
33
+ embedding_model: str
34
+ llm_model: str
35
+
36
+
37
+ @dataclass
38
+ class Document:
39
+ """A document in the knowledge base."""
40
+
41
+ id: str
42
+ content: str
43
+ metadata: dict[str, Any] = field(default_factory=dict)
44
+
45
+
46
+ @dataclass
47
+ class Chunk:
48
+ """A document chunk."""
49
+
50
+ content: str
51
+ doc_id: str
52
+ chunk_index: int
53
+ embedding: tuple[float, ...] | list[float] | None = None
54
+ metadata: dict[str, Any] = field(default_factory=dict)
55
+
56
+
57
+ @dataclass
58
+ class BenchmarkQuestion:
59
+ """A benchmark question for evaluation."""
60
+
61
+ question: str
62
+ ground_truth: str
63
+ relevant_doc_ids: list[str] = field(default_factory=list)
64
+
65
+
66
+ @dataclass
67
+ class EvaluationScores:
68
+ """Scores from evaluating a RAG response."""
69
+
70
+ answer_correctness: float
71
+ context_relevance: float
72
+ faithfulness: float
73
+
74
+ @property
75
+ def combined_score(self) -> float:
76
+ """Combined score (weighted average)."""
77
+ return 0.4 * self.answer_correctness + 0.3 * self.context_relevance + 0.3 * self.faithfulness
78
+
79
+
80
+ class SimpleVectorStore:
81
+ """Simple in-memory vector store with pre-normalized embeddings for fast search.
82
+
83
+ Note: This class is NOT thread-safe.
84
+ """
85
+
86
+ def __init__(self) -> None:
87
+ self.chunks: list[Chunk] = []
88
+ self._embedding_matrix: np.ndarray[Any, np.dtype[np.float64]] | None = None # Pre-normalized
89
+
90
+ def add(self, chunks: list[Chunk]) -> None:
91
+ """Add chunks to the store and rebuild pre-normalized embedding matrix."""
92
+ self.chunks.extend(chunks)
93
+ self._rebuild_matrix()
94
+
95
+ def _rebuild_matrix(self) -> None:
96
+ """Rebuild and pre-normalize the embedding matrix from chunks."""
97
+ embeddings = [c.embedding for c in self.chunks if c.embedding is not None]
98
+ if embeddings:
99
+ matrix = np.array(embeddings, dtype=np.float64)
100
+ # Pre-normalize for fast cosine similarity
101
+ norms = np.linalg.norm(matrix, axis=1, keepdims=True)
102
+ norms[norms == 0] = 1 # Avoid division by zero
103
+ self._embedding_matrix = matrix / norms
104
+ else:
105
+ self._embedding_matrix = None
106
+
107
+ def clear(self) -> None:
108
+ """Clear all chunks."""
109
+ self.chunks = []
110
+ self._embedding_matrix = None
111
+
112
+ def search(self, query_embedding: tuple[float, ...] | list[float], top_k: int = 5) -> list[tuple[Chunk, float]]:
113
+ """Search for similar chunks using pre-normalized cosine similarity."""
114
+ if not self.chunks or self._embedding_matrix is None:
115
+ return []
116
+
117
+ # Normalize query vector
118
+ query_vec = np.array(query_embedding, dtype=np.float64)
119
+ query_norm = np.linalg.norm(query_vec)
120
+ if query_norm == 0:
121
+ return []
122
+ query_normalized = query_vec / query_norm
123
+
124
+ # Fast cosine similarity: matrix is pre-normalized, just dot product
125
+ similarities = self._embedding_matrix @ query_normalized
126
+
127
+ # Get top_k indices efficiently
128
+ if len(similarities) <= top_k:
129
+ top_indices = np.argsort(similarities)[::-1]
130
+ else:
131
+ top_indices = np.argpartition(similarities, -top_k)[-top_k:]
132
+ top_indices = top_indices[np.argsort(similarities[top_indices])[::-1]]
133
+
134
+ return [(self.chunks[i], float(similarities[i])) for i in top_indices]
135
+
136
+
137
+ class RagitExperiment:
138
+ """
139
+ Ragit Experiment - Automatic RAG Hyperparameter Optimization.
140
+
141
+ This class orchestrates the optimization of RAG pipeline hyperparameters
142
+ by systematically evaluating different configurations.
143
+
144
+ Parameters
145
+ ----------
146
+ documents : list[Document]
147
+ Documents to use as the knowledge base.
148
+ benchmark : list[BenchmarkQuestion]
149
+ Benchmark questions for evaluation.
150
+ embed_fn : Callable[[str], list[float]], optional
151
+ Function that takes text and returns an embedding vector.
152
+ generate_fn : Callable, optional
153
+ Function for text generation.
154
+ provider : BaseEmbeddingProvider, optional
155
+ Provider for embeddings and LLM. If embed_fn is provided, this is
156
+ ignored for embeddings but can be used for LLM.
157
+
158
+ Raises
159
+ ------
160
+ ValueError
161
+ If neither embed_fn nor provider is provided.
162
+
163
+ Examples
164
+ --------
165
+ >>> # With custom functions
166
+ >>> experiment = RagitExperiment(docs, benchmark, embed_fn=my_embed, generate_fn=my_llm)
167
+ >>>
168
+ >>> # With explicit provider
169
+ >>> from ragit.providers import OllamaProvider
170
+ >>> experiment = RagitExperiment(docs, benchmark, provider=OllamaProvider())
171
+ >>>
172
+ >>> results = experiment.run()
173
+ >>> print(results[0].config) # Best configuration
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ documents: list[Document],
179
+ benchmark: list[BenchmarkQuestion],
180
+ embed_fn: Callable[[str], list[float]] | None = None,
181
+ generate_fn: Callable[..., str] | None = None,
182
+ provider: BaseEmbeddingProvider | BaseLLMProvider | None = None,
183
+ ):
184
+ self.documents = documents
185
+ self.benchmark = benchmark
186
+ self.vector_store = SimpleVectorStore()
187
+ self.results: list[EvaluationResult] = []
188
+
189
+ # Resolve provider from functions or explicit provider
190
+ self._embedding_provider: BaseEmbeddingProvider
191
+ self._llm_provider: BaseLLMProvider | None = None
192
+
193
+ if embed_fn is not None:
194
+ # Create FunctionProvider from provided functions
195
+ function_provider = FunctionProvider(
196
+ embed_fn=embed_fn,
197
+ generate_fn=generate_fn,
198
+ )
199
+ self._embedding_provider = function_provider
200
+ if generate_fn is not None:
201
+ self._llm_provider = function_provider
202
+ elif provider is not None and isinstance(provider, BaseLLMProvider):
203
+ self._llm_provider = provider
204
+ elif provider is not None:
205
+ if not isinstance(provider, BaseEmbeddingProvider):
206
+ raise ValueError(
207
+ "Provider must implement BaseEmbeddingProvider for embeddings. Alternatively, provide embed_fn."
208
+ )
209
+ self._embedding_provider = provider
210
+ if isinstance(provider, BaseLLMProvider):
211
+ self._llm_provider = provider
212
+ else:
213
+ raise ValueError(
214
+ "Must provide embed_fn or provider for embeddings. "
215
+ "Examples:\n"
216
+ " RagitExperiment(docs, benchmark, embed_fn=my_embed, generate_fn=my_llm)\n"
217
+ " RagitExperiment(docs, benchmark, provider=OllamaProvider())"
218
+ )
219
+
220
+ # LLM is required for evaluation
221
+ if self._llm_provider is None:
222
+ raise ValueError(
223
+ "RagitExperiment requires LLM for evaluation. Provide generate_fn or a provider with LLM support."
224
+ )
225
+
226
+ @property
227
+ def provider(self) -> BaseEmbeddingProvider:
228
+ """Return the embedding provider (for backwards compatibility)."""
229
+ return self._embedding_provider
230
+
231
+ def define_search_space(
232
+ self,
233
+ chunk_sizes: list[int] | None = None,
234
+ chunk_overlaps: list[int] | None = None,
235
+ num_chunks_options: list[int] | None = None,
236
+ embedding_models: list[str] | None = None,
237
+ llm_models: list[str] | None = None,
238
+ ) -> list[RAGConfig]:
239
+ """
240
+ Define the hyperparameter search space.
241
+
242
+ Parameters
243
+ ----------
244
+ chunk_sizes : list[int], optional
245
+ Chunk sizes to test. Default: [256, 512]
246
+ chunk_overlaps : list[int], optional
247
+ Chunk overlaps to test. Default: [50, 100]
248
+ num_chunks_options : list[int], optional
249
+ Number of chunks to retrieve. Default: [2, 3]
250
+ embedding_models : list[str], optional
251
+ Embedding models to test. Default: ["default"]
252
+ llm_models : list[str], optional
253
+ LLM models to test. Default: ["default"]
254
+
255
+ Returns
256
+ -------
257
+ list[RAGConfig]
258
+ List of configurations to evaluate.
259
+ """
260
+ chunk_sizes = chunk_sizes or [256, 512]
261
+ chunk_overlaps = chunk_overlaps or [50, 100]
262
+ num_chunks_options = num_chunks_options or [2, 3]
263
+ embedding_models = embedding_models or ["default"]
264
+ llm_models = llm_models or ["default"]
265
+
266
+ configs = []
267
+ pattern_num = 1
268
+
269
+ for cs, co, nc, em, lm in product(
270
+ chunk_sizes, chunk_overlaps, num_chunks_options, embedding_models, llm_models
271
+ ):
272
+ # Ensure overlap is less than chunk size
273
+ if co >= cs:
274
+ continue
275
+
276
+ configs.append(
277
+ RAGConfig(
278
+ name=f"Pattern_{pattern_num}",
279
+ chunk_size=cs,
280
+ chunk_overlap=co,
281
+ num_chunks=nc,
282
+ embedding_model=em,
283
+ llm_model=lm,
284
+ )
285
+ )
286
+ pattern_num += 1
287
+
288
+ return configs
289
+
290
+ def _chunk_document(self, doc: Document, chunk_size: int, overlap: int) -> list[Chunk]:
291
+ """Split document into overlapping chunks."""
292
+ chunks = []
293
+ text = doc.content
294
+ start = 0
295
+ chunk_idx = 0
296
+
297
+ while start < len(text):
298
+ end = start + chunk_size
299
+ chunk_text = text[start:end].strip()
300
+
301
+ if chunk_text:
302
+ chunks.append(
303
+ Chunk(
304
+ content=chunk_text,
305
+ doc_id=doc.id,
306
+ chunk_index=chunk_idx,
307
+ )
308
+ )
309
+ chunk_idx += 1
310
+
311
+ start = end - overlap
312
+ if start >= len(text) - overlap:
313
+ break
314
+
315
+ return chunks
316
+
317
+ def _build_index(self, config: RAGConfig) -> None:
318
+ """Build vector index with given configuration using batch embedding."""
319
+ self.vector_store.clear()
320
+ all_chunks: list[Chunk] = []
321
+
322
+ # Chunk all documents
323
+ for doc in self.documents:
324
+ chunks = self._chunk_document(doc, config.chunk_size, config.chunk_overlap)
325
+ all_chunks.extend(chunks)
326
+
327
+ if not all_chunks:
328
+ return
329
+
330
+ # Batch embed all chunks at once (single API call)
331
+ texts = [chunk.content for chunk in all_chunks]
332
+ responses = self._embedding_provider.embed_batch(texts, config.embedding_model)
333
+
334
+ for chunk, response in zip(all_chunks, responses, strict=True):
335
+ chunk.embedding = response.embedding
336
+
337
+ self.vector_store.add(all_chunks)
338
+
339
+ def _retrieve(self, query: str, config: RAGConfig) -> list[Chunk]:
340
+ """Retrieve relevant chunks for a query."""
341
+ query_response = self._embedding_provider.embed(query, config.embedding_model)
342
+ results = self.vector_store.search(query_response.embedding, top_k=config.num_chunks)
343
+ return [chunk for chunk, _ in results]
344
+
345
+ def _generate(self, question: str, context: str, config: RAGConfig) -> str:
346
+ """Generate answer using RAG."""
347
+ if self._llm_provider is None:
348
+ raise ValueError("LLM provider is required for generation")
349
+
350
+ system_prompt = """You are a helpful assistant. Answer questions based ONLY on the provided context.
351
+ If the context doesn't contain enough information, say so. Be concise and accurate."""
352
+
353
+ prompt = f"""Context:
354
+ {context}
355
+
356
+ Question: {question}
357
+
358
+ Answer:"""
359
+
360
+ response = self._llm_provider.generate(
361
+ prompt=prompt,
362
+ model=config.llm_model,
363
+ system_prompt=system_prompt,
364
+ temperature=0.7,
365
+ )
366
+ return response.text
367
+
368
+ def _evaluate_response(
369
+ self,
370
+ question: str,
371
+ generated: str,
372
+ ground_truth: str,
373
+ context: str,
374
+ config: RAGConfig,
375
+ ) -> EvaluationScores:
376
+ """Evaluate a RAG response using LLM-as-judge."""
377
+ if self._llm_provider is None:
378
+ raise ValueError("LLM provider is required for evaluation")
379
+
380
+ def extract_score(response: str) -> float:
381
+ """Extract numeric score from LLM response."""
382
+ try:
383
+ # Find first number in response
384
+ nums = "".join(c for c in response if c.isdigit() or c == ".")
385
+ if nums:
386
+ score = float(nums.split(".")[0]) # Take integer part
387
+ return min(100, max(0, score)) / 100
388
+ except (ValueError, IndexError):
389
+ pass
390
+ return 0.5
391
+
392
+ # Evaluate answer correctness
393
+ correctness_prompt = f"""Rate how correct this answer is compared to ground truth (0-100):
394
+
395
+ Question: {question}
396
+ Ground Truth: {ground_truth}
397
+ Generated Answer: {generated}
398
+
399
+ Respond with ONLY a number 0-100."""
400
+
401
+ resp = self._llm_provider.generate(correctness_prompt, config.llm_model)
402
+ correctness = extract_score(resp.text)
403
+
404
+ # Evaluate context relevance
405
+ relevance_prompt = f"""Rate how relevant this context is for answering the question (0-100):
406
+
407
+ Question: {question}
408
+ Context: {context[:1000]}
409
+
410
+ Respond with ONLY a number 0-100."""
411
+
412
+ resp = self._llm_provider.generate(relevance_prompt, config.llm_model)
413
+ relevance = extract_score(resp.text)
414
+
415
+ # Evaluate faithfulness
416
+ faithfulness_prompt = f"""Rate if this answer is grounded in the context (0-100):
417
+
418
+ Context: {context[:1000]}
419
+ Answer: {generated}
420
+
421
+ Respond with ONLY a number 0-100."""
422
+
423
+ resp = self._llm_provider.generate(faithfulness_prompt, config.llm_model)
424
+ faithfulness = extract_score(resp.text)
425
+
426
+ return EvaluationScores(
427
+ answer_correctness=correctness,
428
+ context_relevance=relevance,
429
+ faithfulness=faithfulness,
430
+ )
431
+
432
+ def evaluate_config(self, config: RAGConfig, verbose: bool = False) -> EvaluationResult:
433
+ """
434
+ Evaluate a single RAG configuration.
435
+
436
+ Parameters
437
+ ----------
438
+ config : RAGConfig
439
+ Configuration to evaluate.
440
+ verbose : bool
441
+ Print progress information.
442
+
443
+ Returns
444
+ -------
445
+ EvaluationResult
446
+ Evaluation results for this configuration.
447
+ """
448
+ if verbose:
449
+ print(f"\nEvaluating {config.name}:")
450
+ print(f" chunk_size={config.chunk_size}, overlap={config.chunk_overlap}, num_chunks={config.num_chunks}")
451
+
452
+ start_time = time.time()
453
+
454
+ # Build index
455
+ self._build_index(config)
456
+
457
+ # Evaluate on benchmark
458
+ all_scores = []
459
+
460
+ for qa in self.benchmark:
461
+ # Retrieve
462
+ chunks = self._retrieve(qa.question, config)
463
+ context = "\n\n".join(f"[{c.doc_id}]: {c.content}" for c in chunks)
464
+
465
+ # Generate
466
+ answer = self._generate(qa.question, context, config)
467
+
468
+ # Evaluate
469
+ scores = self._evaluate_response(qa.question, answer, qa.ground_truth, context, config)
470
+ all_scores.append(scores)
471
+
472
+ # Aggregate scores (use generators for memory efficiency)
473
+ avg_correctness = np.mean([s.answer_correctness for s in all_scores])
474
+ avg_relevance = np.mean([s.context_relevance for s in all_scores])
475
+ avg_faithfulness = np.mean([s.faithfulness for s in all_scores])
476
+ combined = float(np.mean([s.combined_score for s in all_scores]))
477
+
478
+ execution_time = time.time() - start_time
479
+
480
+ if verbose:
481
+ print(
482
+ f" Scores: correctness={avg_correctness:.2f}, "
483
+ f"relevance={avg_relevance:.2f}, faithfulness={avg_faithfulness:.2f}"
484
+ )
485
+ print(f" Combined: {combined:.3f} | Time: {execution_time:.1f}s")
486
+
487
+ return EvaluationResult(
488
+ pattern_name=config.name,
489
+ indexing_params={
490
+ "chunk_size": config.chunk_size,
491
+ "chunk_overlap": config.chunk_overlap,
492
+ "embedding_model": config.embedding_model,
493
+ },
494
+ inference_params={
495
+ "num_chunks": config.num_chunks,
496
+ "llm_model": config.llm_model,
497
+ },
498
+ scores={
499
+ "answer_correctness": {"mean": float(avg_correctness)},
500
+ "context_relevance": {"mean": float(avg_relevance)},
501
+ "faithfulness": {"mean": float(avg_faithfulness)},
502
+ },
503
+ execution_time=execution_time,
504
+ final_score=float(combined),
505
+ )
506
+
507
+ def run(
508
+ self,
509
+ configs: list[RAGConfig] | None = None,
510
+ max_configs: int | None = None,
511
+ verbose: bool = True,
512
+ ) -> list[EvaluationResult]:
513
+ """
514
+ Run the RAG optimization experiment.
515
+
516
+ Parameters
517
+ ----------
518
+ configs : list[RAGConfig], optional
519
+ Configurations to evaluate. If None, uses default search space.
520
+ max_configs : int, optional
521
+ Maximum number of configurations to evaluate.
522
+ verbose : bool
523
+ Print progress information.
524
+
525
+ Returns
526
+ -------
527
+ list[EvaluationResult]
528
+ Results sorted by combined score (best first).
529
+ """
530
+ if configs is None:
531
+ configs = self.define_search_space()
532
+
533
+ if max_configs:
534
+ configs = configs[:max_configs]
535
+
536
+ if verbose:
537
+ print("=" * 60)
538
+ print("RAGIT: RAG Optimization Experiment")
539
+ print("=" * 60)
540
+ print(f"Configurations to test: {len(configs)}")
541
+ print(f"Documents: {len(self.documents)}")
542
+ print(f"Benchmark questions: {len(self.benchmark)}")
543
+ print()
544
+
545
+ self.results = []
546
+
547
+ for cfg in tqdm(configs, desc="Evaluating configs", disable=not verbose):
548
+ result = self.evaluate_config(cfg, verbose=verbose)
549
+ self.results.append(result)
550
+
551
+ # Sort by combined score (best first)
552
+ self.results.sort(key=lambda x: x.final_score, reverse=True)
553
+
554
+ if verbose:
555
+ print("\n" + "=" * 60)
556
+ print("RESULTS (sorted by score)")
557
+ print("=" * 60)
558
+ for i, result in enumerate(self.results[:5], 1):
559
+ print(f"{i}. {result.pattern_name}: {result.final_score:.3f}")
560
+ print(
561
+ f" chunk_size={result.indexing_params['chunk_size']}, "
562
+ f"num_chunks={result.inference_params['num_chunks']}"
563
+ )
564
+
565
+ return self.results
566
+
567
+ def get_best_config(self) -> EvaluationResult | None:
568
+ """Get the best configuration from results."""
569
+ if not self.results:
570
+ return None
571
+ return self.results[0]