ragbits-evaluate 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragbits/evaluate/cli.py CHANGED
@@ -8,12 +8,13 @@ from pydantic import BaseModel
8
8
 
9
9
  from ragbits.cli._utils import get_instance_or_exit
10
10
  from ragbits.cli.state import print_output
11
- from ragbits.core.utils.config_handling import WithConstructionConfig, import_by_path
11
+ from ragbits.core.utils.config_handling import WithConstructionConfig
12
12
  from ragbits.evaluate.config import eval_config
13
- from ragbits.evaluate.dataloaders import DataLoader, get_dataloader_instance
13
+ from ragbits.evaluate.dataloaders import DataLoader
14
14
  from ragbits.evaluate.evaluator import Evaluator
15
15
  from ragbits.evaluate.metrics.base import MetricSet
16
16
  from ragbits.evaluate.pipelines import get_evaluation_pipeline_for_target
17
+ from ragbits.evaluate.pipelines.base import EvaluationPipeline
17
18
 
18
19
  eval_app = typer.Typer(no_args_is_help=True)
19
20
 
@@ -30,9 +31,9 @@ def register(app: typer.Typer) -> None:
30
31
 
31
32
  @dataclass
32
33
  class _CLIState:
33
- evaluation_target: WithConstructionConfig | None = None
34
- metrics: MetricSet | None = None
35
34
  dataloader: DataLoader | None = None
35
+ pipeline: EvaluationPipeline | None = None
36
+ metrics: MetricSet | None = None
36
37
 
37
38
 
38
39
  class EvaluationResult(BaseModel):
@@ -46,26 +47,18 @@ state: _CLIState = _CLIState()
46
47
 
47
48
  @eval_app.callback()
48
49
  def common_args(
49
- target_cls: Annotated[
50
- str,
51
- typer.Option(
52
- help="A path to target class to be evaluated in a format python.path:ModuleName",
53
- exists=True,
54
- resolve_path=True,
55
- ),
56
- ],
57
- dataloader_args: Annotated[
58
- str,
50
+ dataloader_factory_path: Annotated[
51
+ str | None,
59
52
  typer.Option(
60
- help="Comma separated arguments of dataloader",
53
+ help="A path to evaluation data loader factory in format python.path:function_name",
61
54
  exists=True,
62
55
  resolve_path=True,
63
56
  ),
64
- ],
65
- dataloader_cls: Annotated[
66
- str | None,
57
+ ] = None,
58
+ dataloader_yaml_path: Annotated[
59
+ Path | None,
67
60
  typer.Option(
68
- help="Dataloader class path in a format python.path:ModuleName to override the default",
61
+ help="A path to evaluation data loader configuration",
69
62
  exists=True,
70
63
  resolve_path=True,
71
64
  ),
@@ -73,7 +66,7 @@ def common_args(
73
66
  target_factory_path: Annotated[
74
67
  str | None,
75
68
  typer.Option(
76
- help="A path to a factory of the target class in format: python.path:function_name",
69
+ help="A path to a factory of the evaluation target class in format: python.path:function_name",
77
70
  exists=True,
78
71
  resolve_path=True,
79
72
  ),
@@ -81,7 +74,7 @@ def common_args(
81
74
  target_yaml_path: Annotated[
82
75
  Path | None,
83
76
  typer.Option(
84
- help="A path to a YAML configuration file of the target class",
77
+ help="A path to a YAML configuration file of the evaluation target class",
85
78
  exists=True,
86
79
  resolve_path=True,
87
80
  ),
@@ -106,40 +99,48 @@ def common_args(
106
99
  """
107
100
  Common arguments for the evaluate commands.
108
101
  """
109
- state.evaluation_target = get_instance_or_exit(
110
- import_by_path(target_cls),
102
+ evaluation_target = get_instance_or_exit(
103
+ cls=WithConstructionConfig,
111
104
  factory_path=target_factory_path,
112
105
  yaml_path=target_yaml_path,
106
+ config_override=eval_config,
113
107
  )
114
- # TODO validate if given metric set is suitable for evaluation target
115
- state.metrics = get_instance_or_exit(
116
- MetricSet, factory_path=metrics_factory_path, yaml_path=metrics_yaml_path, config_override=eval_config
108
+ state.pipeline = get_evaluation_pipeline_for_target(evaluation_target)
109
+ # TODO: validate if given dataloader is suitable for evaluation pipeline
110
+ state.dataloader = get_instance_or_exit(
111
+ cls=DataLoader,
112
+ factory_path=dataloader_factory_path,
113
+ yaml_path=dataloader_yaml_path,
114
+ config_override=eval_config,
117
115
  )
118
- # TODO validate if given dataloader is suitable for evaluation target
119
- state.dataloader = get_dataloader_instance(
120
- config=eval_config, dataloader_args=dataloader_args, dataloader_cls_override=dataloader_cls
116
+ # TODO: validate if given metric set is suitable for evaluation pipeline
117
+ state.metrics = get_instance_or_exit(
118
+ cls=MetricSet,
119
+ factory_path=metrics_factory_path,
120
+ yaml_path=metrics_yaml_path,
121
+ config_override=eval_config,
121
122
  )
122
123
 
123
124
 
124
125
  @eval_app.command()
125
- def run_evaluation() -> None:
126
+ def run() -> None:
126
127
  """
127
- Evaluate the set-up pipeline.
128
+ Evaluate the pipeline.
128
129
  """
129
130
 
130
131
  async def run() -> None:
131
- if state.evaluation_target is None:
132
- raise ValueError("Evaluation target not initialized")
132
+ if state.dataloader is None:
133
+ raise ValueError("Evaluation dataloader not initialized")
134
+ if state.pipeline is None:
135
+ raise ValueError("Evaluation pipeline not initialized")
133
136
  if state.metrics is None:
134
137
  raise ValueError("Evaluation metrics not initialized")
135
- if state.dataloader is None:
136
- raise ValueError("Dataloader not initialized")
137
- evaluation_pipeline = get_evaluation_pipeline_for_target(evaluation_target=state.evaluation_target)
138
+
138
139
  evaluator = Evaluator()
139
140
  metric_results = await evaluator.compute(
140
- pipeline=evaluation_pipeline,
141
- metrics=state.metrics,
141
+ pipeline=state.pipeline,
142
142
  dataloader=state.dataloader,
143
+ metrics=state.metrics,
143
144
  )
144
145
  evaluation_results = EvaluationResult(
145
146
  metrics={"metrics": metric_results["metrics"], "time_perf": metric_results["time_perf"]}
@@ -3,11 +3,9 @@ from ragbits.core.utils._pyproject import get_config_instance
3
3
 
4
4
 
5
5
  class EvaluateConfig(CoreConfig):
6
- """Configuration of ragbits evaluate module"""
7
-
8
- component_preference_factories: dict[str, str] = {"metrics": "ragbits.evaluate.factories:precision_recall_f1"}
9
-
10
- dataloader_default_class: str = "ragbits.evaluate.dataloaders.hf:HFDataLoader"
6
+ """
7
+ Configuration for the ragbits-evaluate package, loaded from downstream projects' pyproject.toml files.
8
+ """
11
9
 
12
10
 
13
11
  eval_config = get_config_instance(EvaluateConfig, subproject="evaluate")
@@ -1,21 +1,3 @@
1
- from ragbits.core.utils.config_handling import import_by_path
2
- from ragbits.evaluate.config import EvaluateConfig
3
1
  from ragbits.evaluate.dataloaders.base import DataLoader
4
2
 
5
3
  __all__ = ["DataLoader"]
6
-
7
-
8
- def get_dataloader_instance(
9
- config: EvaluateConfig, dataloader_args: str, dataloader_cls_override: str | None = None
10
- ) -> DataLoader:
11
- """
12
- A function for instantiation of dataloader
13
- Args:
14
- config: configuration of ragbits.evaluate module
15
- dataloader_args: comma separated arguments of dataloader
16
- dataloader_cls_override: optional path to override of default dataloader class
17
- Returns:
18
- DataLoader
19
- """
20
- dataloader_cls = dataloader_cls_override or config.dataloader_default_class
21
- return import_by_path(dataloader_cls)(*dataloader_args.split(","))
@@ -1,18 +1,59 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Generic, TypeVar
2
+ from collections.abc import Iterable
3
+ from types import ModuleType
4
+ from typing import ClassVar, Generic
3
5
 
4
- from ragbits.core.utils.config_handling import WithConstructionConfig
6
+ from pydantic import BaseModel
7
+ from typing_extensions import Self
5
8
 
6
- DataT = TypeVar("DataT")
9
+ from ragbits.core.sources.base import Source
10
+ from ragbits.core.utils.config_handling import ObjectConstructionConfig, WithConstructionConfig
11
+ from ragbits.evaluate import dataloaders
12
+ from ragbits.evaluate.pipelines.base import EvaluationDataT
7
13
 
8
14
 
9
- class DataLoader(WithConstructionConfig, Generic[DataT], ABC):
15
+ class DataLoaderConfig(BaseModel):
10
16
  """
11
- Data loader.
17
+ Schema for the data loader config.
12
18
  """
13
19
 
20
+ source: ObjectConstructionConfig
21
+
22
+
23
+ class DataLoader(WithConstructionConfig, Generic[EvaluationDataT], ABC):
24
+ """
25
+ Evaluation data loader.
26
+ """
27
+
28
+ default_module: ClassVar[ModuleType | None] = dataloaders
29
+ configuration_key: ClassVar[str] = "dataloader"
30
+
31
+ def __init__(self, source: Source) -> None:
32
+ """
33
+ Initialize the data loader.
34
+
35
+ Args:
36
+ source: The source to load the evaluation data from.
37
+ """
38
+ self.source = source
39
+
40
+ @classmethod
41
+ def from_config(cls, config: dict) -> Self:
42
+ """
43
+ Create an instance of `DataLoader` from a configuration dictionary.
44
+
45
+ Args:
46
+ config: A dictionary containing configuration settings for the data loader.
47
+
48
+ Returns:
49
+ An instance of the data loader class initialized with the provided configuration.
50
+ """
51
+ dataloader_config = DataLoaderConfig.model_validate(config)
52
+ config["source"] = Source.subclass_from_config(dataloader_config.source)
53
+ return super().from_config(config)
54
+
14
55
  @abstractmethod
15
- async def load(self) -> DataT:
56
+ async def load(self) -> Iterable[EvaluationDataT]:
16
57
  """
17
58
  Load the data.
18
59
 
@@ -0,0 +1,45 @@
1
+ from collections.abc import Iterable
2
+
3
+ from datasets import load_dataset
4
+
5
+ from ragbits.evaluate.dataloaders.base import DataLoader
6
+ from ragbits.evaluate.dataloaders.exceptions import DataLoaderIncorrectFormatDataError
7
+ from ragbits.evaluate.pipelines.document_search import DocumentSearchData
8
+
9
+
10
+ class DocumentSearchDataLoader(DataLoader[DocumentSearchData]):
11
+ """
12
+ Document search evaluation data loader.
13
+
14
+ The source used for this data loader should point to a file that can be loaded by [Hugging Face](https://huggingface.co/docs/datasets/loading#local-and-remote-files)
15
+ and contain the following features: "question, "passages".
16
+ """
17
+
18
+ async def load(self) -> Iterable[DocumentSearchData]:
19
+ """
20
+ Load the data from source and format them.
21
+
22
+ Returns:
23
+ The document search evaluation data.
24
+
25
+ Raises:
26
+ DataLoaderIncorrectFormatDataError: If evaluation dataset is incorrectly formatted.
27
+ """
28
+ data_path = await self.source.fetch()
29
+ dataset = load_dataset(
30
+ path=str(data_path.parent),
31
+ split=data_path.stem,
32
+ )
33
+ if "question" not in dataset.features or "passages" not in dataset.features:
34
+ raise DataLoaderIncorrectFormatDataError(
35
+ required_features=["question", "passages"],
36
+ data_path=data_path,
37
+ )
38
+
39
+ return [
40
+ DocumentSearchData(
41
+ question=data["question"],
42
+ reference_passages=data["passages"],
43
+ )
44
+ for data in dataset
45
+ ]
@@ -0,0 +1,25 @@
1
+ from pathlib import Path
2
+
3
+
4
+ class DataLoaderError(Exception):
5
+ """
6
+ Class for all exceptions raised by the data loader.
7
+ """
8
+
9
+ def __init__(self, message: str, data_path: Path) -> None:
10
+ super().__init__(message)
11
+ self.message = message
12
+ self.data_path = data_path
13
+
14
+
15
+ class DataLoaderIncorrectFormatDataError(DataLoaderError):
16
+ """
17
+ Raised when the data are incorrectly formatted.
18
+ """
19
+
20
+ def __init__(self, required_features: list[str], data_path: Path) -> None:
21
+ super().__init__(
22
+ message=f"Dataset {data_path} is incorrectly formatted. Required features: {required_features}",
23
+ data_path=data_path,
24
+ )
25
+ self.required_features = required_features
@@ -9,30 +9,46 @@ from tqdm.asyncio import tqdm
9
9
  from ragbits.core.utils.config_handling import ObjectConstructionConfig, WithConstructionConfig
10
10
  from ragbits.evaluate.dataloaders.base import DataLoader
11
11
  from ragbits.evaluate.metrics.base import MetricSet
12
- from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult
12
+ from ragbits.evaluate.pipelines.base import EvaluationDataT, EvaluationPipeline, EvaluationResultT, EvaluationTargetT
13
13
 
14
14
 
15
- class EvaluatorConfig(BaseModel):
15
+ class EvaluationConfig(BaseModel):
16
16
  """
17
- Schema for for the dict taken by `Evaluator.run_from_config` method.
17
+ Schema for the evaluation run config.
18
18
  """
19
19
 
20
- dataloader: ObjectConstructionConfig
21
20
  pipeline: ObjectConstructionConfig
21
+ dataloader: ObjectConstructionConfig
22
22
  metrics: dict[str, ObjectConstructionConfig]
23
23
 
24
24
 
25
+ class EvaluatorConfig(BaseModel):
26
+ """
27
+ Schema for the evaluator config.
28
+ """
29
+
30
+ evaluation: EvaluationConfig
31
+ evaluator: dict | None = None
32
+
33
+
25
34
  class Evaluator(WithConstructionConfig):
26
35
  """
27
36
  Evaluator class.
28
37
  """
29
38
 
30
- CONCURRENCY: int = 10
39
+ def __init__(self, batch_size: int = 10) -> None:
40
+ """
41
+ Initialize the evaluator.
42
+
43
+ Args:
44
+ batch_size: batch size for the evaluation pipeline inference.
45
+ """
46
+ self.batch_size = batch_size
31
47
 
32
48
  @classmethod
33
49
  async def run_from_config(cls, config: dict) -> dict:
34
50
  """
35
- Runs the evaluation based on configuration.
51
+ Run the evaluation based on configuration.
36
52
 
37
53
  Args:
38
54
  config: Evaluation config.
@@ -40,12 +56,14 @@ class Evaluator(WithConstructionConfig):
40
56
  Returns:
41
57
  The evaluation results.
42
58
  """
43
- model = EvaluatorConfig.model_validate(config)
44
- dataloader: DataLoader = DataLoader.subclass_from_config(model.dataloader)
45
- pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(model.pipeline)
46
- metrics: MetricSet = MetricSet.from_config(model.metrics)
47
-
48
- return await cls().compute(
59
+ evaluator_config = EvaluatorConfig.model_validate(config)
60
+ evaluation_config = EvaluationConfig.model_validate(evaluator_config.evaluation)
61
+ pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(evaluation_config.pipeline)
62
+ dataloader: DataLoader = DataLoader.subclass_from_config(evaluation_config.dataloader)
63
+ metrics: MetricSet = MetricSet.from_config(evaluation_config.metrics)
64
+
65
+ evaluator = cls.from_config(evaluator_config.evaluator or {})
66
+ return await evaluator.compute(
49
67
  pipeline=pipeline,
50
68
  dataloader=dataloader,
51
69
  metrics=metrics,
@@ -53,9 +71,9 @@ class Evaluator(WithConstructionConfig):
53
71
 
54
72
  async def compute(
55
73
  self,
56
- pipeline: EvaluationPipeline,
57
- dataloader: DataLoader,
58
- metrics: MetricSet,
74
+ pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
75
+ dataloader: DataLoader[EvaluationDataT],
76
+ metrics: MetricSet[EvaluationResultT],
59
77
  ) -> dict:
60
78
  """
61
79
  Compute the evaluation results for the given pipeline and data.
@@ -83,9 +101,9 @@ class Evaluator(WithConstructionConfig):
83
101
 
84
102
  async def _call_pipeline(
85
103
  self,
86
- pipeline: EvaluationPipeline,
87
- dataset: Iterable[dict],
88
- ) -> tuple[list[EvaluationResult], dict]:
104
+ pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
105
+ dataset: Iterable[EvaluationDataT],
106
+ ) -> tuple[list[EvaluationResultT], dict]:
89
107
  """
90
108
  Call the pipeline with the given data.
91
109
 
@@ -96,9 +114,9 @@ class Evaluator(WithConstructionConfig):
96
114
  Returns:
97
115
  The evaluation results and performance metrics.
98
116
  """
99
- semaphore = asyncio.Semaphore(self.CONCURRENCY)
117
+ semaphore = asyncio.Semaphore(self.batch_size)
100
118
 
101
- async def _call_pipeline_with_semaphore(data: dict) -> EvaluationResult:
119
+ async def _call_pipeline_with_semaphore(data: EvaluationDataT) -> EvaluationResultT:
102
120
  async with semaphore:
103
121
  return await pipeline(data)
104
122
 
@@ -109,7 +127,7 @@ class Evaluator(WithConstructionConfig):
109
127
  return pipe_outputs, self._compute_time_perf(start_time, end_time, len(pipe_outputs))
110
128
 
111
129
  @staticmethod
112
- def _results_processor(results: list[EvaluationResult]) -> dict:
130
+ def _results_processor(results: list[EvaluationResultT]) -> dict:
113
131
  """
114
132
  Process the results.
115
133
 
@@ -122,7 +140,7 @@ class Evaluator(WithConstructionConfig):
122
140
  return {"results": [asdict(result) for result in results]}
123
141
 
124
142
  @staticmethod
125
- def _compute_metrics(metrics: MetricSet, results: list[EvaluationResult]) -> dict:
143
+ def _compute_metrics(metrics: MetricSet[EvaluationResultT], results: list[EvaluationResultT]) -> dict:
126
144
  """
127
145
  Compute a metric using the given inputs.
128
146
 
@@ -3,10 +3,12 @@ import asyncio
3
3
  from datasets import load_dataset
4
4
 
5
5
  from ragbits.core.embeddings.dense import LiteLLMEmbedder
6
+ from ragbits.core.sources.hf import HuggingFaceSource
6
7
  from ragbits.core.utils.config_handling import ObjectConstructionConfig
7
8
  from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
8
9
  from ragbits.document_search import DocumentSearch
9
10
  from ragbits.document_search.documents.document import DocumentMeta
11
+ from ragbits.evaluate.dataloaders.document_search import DocumentSearchDataLoader
10
12
  from ragbits.evaluate.metrics import MetricSet
11
13
 
12
14
  DS_PRECISION_RECALL_F1 = {
@@ -27,7 +29,9 @@ DS_PRECISION_RECALL_F1 = {
27
29
 
28
30
 
29
31
  def precision_recall_f1() -> MetricSet:
30
- """A factory of precision recall f1 metric set for retrival evaluation"""
32
+ """
33
+ Factory of precision recall f1 metric set for retrival evaluation.
34
+ """
31
35
  return MetricSet.from_config(config=DS_PRECISION_RECALL_F1)
32
36
 
33
37
 
@@ -38,7 +42,16 @@ async def _add_example_documents(document_search: DocumentSearch) -> None:
38
42
 
39
43
 
40
44
  def basic_document_search_factory() -> DocumentSearch:
41
- """A factory for basic example document search instance"""
45
+ """
46
+ Factory for basic example document search instance.
47
+ """
42
48
  document_search = DocumentSearch(vector_store=InMemoryVectorStore(embedder=LiteLLMEmbedder()))
43
49
  asyncio.run(_add_example_documents(document_search))
44
50
  return document_search
51
+
52
+
53
+ def synthetic_rag_dataset() -> DocumentSearchDataLoader:
54
+ """
55
+ Factory for synthetic RAG dataset.
56
+ """
57
+ return DocumentSearchDataLoader(source=HuggingFaceSource(path="deepsense-ai/synthetic-rag-dataset_v1.0"))
@@ -1,19 +1,22 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Generic, TypeVar
2
+ from types import ModuleType
3
+ from typing import ClassVar, Generic
3
4
 
4
5
  from typing_extensions import Self
5
6
 
6
7
  from ragbits.core.utils.config_handling import WithConstructionConfig
7
- from ragbits.evaluate.pipelines.base import EvaluationResult
8
+ from ragbits.evaluate import metrics
9
+ from ragbits.evaluate.pipelines.base import EvaluationResultT
8
10
 
9
- ResultT = TypeVar("ResultT", bound=EvaluationResult)
10
11
 
11
-
12
- class Metric(WithConstructionConfig, Generic[ResultT], ABC):
12
+ class Metric(WithConstructionConfig, Generic[EvaluationResultT], ABC):
13
13
  """
14
14
  Base class for metrics.
15
15
  """
16
16
 
17
+ default_module: ClassVar[ModuleType | None] = metrics
18
+ configuration_key: ClassVar[str] = "metric"
19
+
17
20
  def __init__(self, weight: float = 1.0) -> None:
18
21
  """
19
22
  Initializes the metric.
@@ -25,7 +28,7 @@ class Metric(WithConstructionConfig, Generic[ResultT], ABC):
25
28
  self.weight = weight
26
29
 
27
30
  @abstractmethod
28
- def compute(self, results: list[ResultT]) -> dict:
31
+ def compute(self, results: list[EvaluationResultT]) -> dict:
29
32
  """
30
33
  Compute the metric.
31
34
 
@@ -37,16 +40,17 @@ class Metric(WithConstructionConfig, Generic[ResultT], ABC):
37
40
  """
38
41
 
39
42
 
40
- class MetricSet(WithConstructionConfig, Generic[ResultT]):
43
+ class MetricSet(WithConstructionConfig, Generic[EvaluationResultT]):
41
44
  """
42
45
  Represents a set of metrics.
43
46
  """
44
47
 
45
- configuration_key = "metrics"
48
+ configuration_key: ClassVar[str] = "metrics"
49
+ default_module: ClassVar[ModuleType | None] = metrics
46
50
 
47
- def __init__(self, *metrics: Metric[ResultT]) -> None:
51
+ def __init__(self, *metrics: Metric[EvaluationResultT]) -> None:
48
52
  """
49
- Initializes the metric set.
53
+ Initialize the metric set.
50
54
 
51
55
  Args:
52
56
  metrics: The metrics.
@@ -66,7 +70,7 @@ class MetricSet(WithConstructionConfig, Generic[ResultT]):
66
70
  """
67
71
  return cls(*[Metric.subclass_from_config(metric_config) for metric_config in config.values()])
68
72
 
69
- def compute(self, results: list[ResultT]) -> dict:
73
+ def compute(self, results: list[EvaluationResultT]) -> dict:
70
74
  """
71
75
  Compute the metrics.
72
76
 
@@ -19,7 +19,7 @@ class DocumentSearchMetric(Metric[DocumentSearchResult], ABC):
19
19
 
20
20
  def __init__(self, matching_strategy: MatchingStrategy, weight: float = 1.0) -> None:
21
21
  """
22
- Initializes the document search metric.
22
+ Initialize the document search metric.
23
23
 
24
24
  Args:
25
25
  matching_strategy: Matching strategys that determine relevance.
@@ -17,10 +17,10 @@ from ragbits.evaluate.utils import setup_optuna_neptune_callback
17
17
 
18
18
  class OptimizerConfig(BaseModel):
19
19
  """
20
- Schema for the dict taken by `Optimizer.run_from_config` method.
20
+ Schema for the optimizer config.
21
21
  """
22
22
 
23
- experiment: EvaluatorConfig
23
+ evaluator: EvaluatorConfig
24
24
  optimizer: dict | None = None
25
25
  neptune_callback: bool = False
26
26
 
@@ -32,7 +32,7 @@ class Optimizer(WithConstructionConfig):
32
32
 
33
33
  def __init__(self, direction: str = "maximize", n_trials: int = 10, max_retries_for_trial: int = 1) -> None:
34
34
  """
35
- Initializes the pipeline optimizer.
35
+ Initialize the pipeline optimizer.
36
36
 
37
37
  Args:
38
38
  direction: Direction of optimization.
@@ -49,7 +49,7 @@ class Optimizer(WithConstructionConfig):
49
49
  @classmethod
50
50
  def run_from_config(cls, config: dict) -> list[tuple[dict, float, dict[str, float]]]:
51
51
  """
52
- Runs the optimization process configured with a config object.
52
+ Run the optimization process configured with a config object.
53
53
 
54
54
  Args:
55
55
  config: Optimizer config.
@@ -58,16 +58,16 @@ class Optimizer(WithConstructionConfig):
58
58
  List of tested configs with associated scores and metrics.
59
59
  """
60
60
  optimizer_config = OptimizerConfig.model_validate(config)
61
- evaluator_config = EvaluatorConfig.model_validate(optimizer_config.experiment)
61
+ evaluator_config = EvaluatorConfig.model_validate(optimizer_config.evaluator)
62
62
 
63
- dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.dataloader)
64
- metrics: MetricSet = MetricSet.from_config(evaluator_config.metrics)
63
+ dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.evaluation.dataloader)
64
+ metrics: MetricSet = MetricSet.from_config(evaluator_config.evaluation.metrics)
65
65
 
66
- pipeline_class = import_by_path(evaluator_config.pipeline.type)
67
- pipeline_config = dict(optimizer_config.experiment.pipeline.config)
66
+ pipeline_class = import_by_path(evaluator_config.evaluation.pipeline.type)
67
+ pipeline_config = dict(evaluator_config.evaluation.pipeline.config)
68
68
  callbacks = [setup_optuna_neptune_callback()] if optimizer_config.neptune_callback else []
69
69
 
70
- optimizer = cls.from_config(config.get("optimizer", {}))
70
+ optimizer = cls.from_config(optimizer_config.optimizer or {})
71
71
  return optimizer.optimize(
72
72
  pipeline_class=pipeline_class,
73
73
  pipeline_config=pipeline_config,
@@ -85,7 +85,7 @@ class Optimizer(WithConstructionConfig):
85
85
  callbacks: list[Callable] | None = None,
86
86
  ) -> list[tuple[dict, float, dict[str, float]]]:
87
87
  """
88
- Runs the optimization process for given parameters.
88
+ Run the optimization process for given parameters.
89
89
 
90
90
  Args:
91
91
  pipeline_class: Pipeline to be optimized.
@@ -134,7 +134,7 @@ class Optimizer(WithConstructionConfig):
134
134
  metrics: MetricSet,
135
135
  ) -> float:
136
136
  """
137
- Runs a single experiment.
137
+ Run a single experiment.
138
138
  """
139
139
  evaluator = Evaluator()
140
140
  event_loop = asyncio.get_event_loop()
@@ -1,14 +1,14 @@
1
1
  from ragbits.core.utils.config_handling import WithConstructionConfig
2
2
  from ragbits.document_search import DocumentSearch
3
- from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult
3
+ from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult
4
4
  from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
5
5
 
6
+ __all__ = ["DocumentSearchPipeline", "EvaluationData", "EvaluationPipeline", "EvaluationResult"]
7
+
6
8
  _target_to_evaluation_pipeline: dict[type[WithConstructionConfig], type[EvaluationPipeline]] = {
7
- DocumentSearch: DocumentSearchPipeline
9
+ DocumentSearch: DocumentSearchPipeline,
8
10
  }
9
11
 
10
- __all__ = ["DocumentSearchPipeline", "EvaluationPipeline", "EvaluationResult"]
11
-
12
12
 
13
13
  def get_evaluation_pipeline_for_target(evaluation_target: WithConstructionConfig) -> EvaluationPipeline:
14
14
  """
@@ -1,12 +1,24 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from dataclasses import dataclass
3
- from typing import Generic, TypeVar
3
+ from types import ModuleType
4
+ from typing import ClassVar, Generic, TypeVar
5
+
6
+ from pydantic import BaseModel
4
7
 
5
8
  from ragbits.core.utils.config_handling import WithConstructionConfig
9
+ from ragbits.evaluate import pipelines
6
10
 
11
+ EvaluationDataT = TypeVar("EvaluationDataT", bound="EvaluationData")
12
+ EvaluationResultT = TypeVar("EvaluationResultT", bound="EvaluationResult")
7
13
  EvaluationTargetT = TypeVar("EvaluationTargetT", bound=WithConstructionConfig)
8
14
 
9
15
 
16
+ class EvaluationData(BaseModel, ABC):
17
+ """
18
+ Represents the data for a single evaluation.
19
+ """
20
+
21
+
10
22
  @dataclass
11
23
  class EvaluationResult(ABC):
12
24
  """
@@ -14,18 +26,34 @@ class EvaluationResult(ABC):
14
26
  """
15
27
 
16
28
 
17
- class EvaluationPipeline(Generic[EvaluationTargetT], WithConstructionConfig, ABC):
29
+ class EvaluationPipeline(WithConstructionConfig, Generic[EvaluationTargetT, EvaluationDataT, EvaluationResultT], ABC):
18
30
  """
19
- Collection evaluation pipeline.
31
+ Evaluation pipeline.
20
32
  """
21
33
 
22
- def __init__(self, evaluation_target: EvaluationTargetT):
34
+ default_module: ClassVar[ModuleType | None] = pipelines
35
+ configuration_key: ClassVar[str] = "pipeline"
36
+
37
+ def __init__(self, evaluation_target: EvaluationTargetT) -> None:
38
+ """
39
+ Initialize the evaluation pipeline.
40
+
41
+ Args:
42
+ evaluation_target: Evaluation target instance.
43
+ """
44
+ super().__init__()
23
45
  self.evaluation_target = evaluation_target
24
46
 
47
+ async def prepare(self) -> None:
48
+ """
49
+ Prepare pipeline for evaluation. Optional step.
50
+ """
51
+ pass
52
+
25
53
  @abstractmethod
26
- async def __call__(self, data: dict) -> EvaluationResult:
54
+ async def __call__(self, data: EvaluationDataT) -> EvaluationResultT:
27
55
  """
28
- Runs the evaluation pipeline.
56
+ Run the evaluation pipeline.
29
57
 
30
58
  Args:
31
59
  data: The evaluation data.
@@ -33,9 +61,3 @@ class EvaluationPipeline(Generic[EvaluationTargetT], WithConstructionConfig, ABC
33
61
  Returns:
34
62
  The evaluation result.
35
63
  """
36
-
37
- async def prepare(self) -> None:
38
- """
39
- Prepares pipeline for evaluation.
40
- """
41
- pass
@@ -5,7 +5,16 @@ from typing_extensions import Self
5
5
 
6
6
  from ragbits.core.sources.hf import HuggingFaceSource
7
7
  from ragbits.document_search import DocumentSearch
8
- from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult
8
+ from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult
9
+
10
+
11
+ class DocumentSearchData(EvaluationData):
12
+ """
13
+ Represents the evaluation data for document search.
14
+ """
15
+
16
+ question: str
17
+ reference_passages: list[str]
9
18
 
10
19
 
11
20
  @dataclass
@@ -19,14 +28,14 @@ class DocumentSearchResult(EvaluationResult):
19
28
  predicted_passages: list[str]
20
29
 
21
30
 
22
- class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
31
+ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch, DocumentSearchData, DocumentSearchResult]):
23
32
  """
24
33
  Document search evaluation pipeline.
25
34
  """
26
35
 
27
36
  def __init__(self, evaluation_target: DocumentSearch, source: dict | None = None) -> None:
28
37
  """
29
- Initializes the document search pipeline.
38
+ Initialize the document search evaluation pipeline.
30
39
 
31
40
  Args:
32
41
  evaluation_target: Document Search instance.
@@ -51,12 +60,12 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
51
60
  # TODO: optimize this for cases with duplicated document search configs between runs
52
61
  if config.get("source"):
53
62
  config["vector_store"]["config"]["index_name"] = str(uuid4())
54
- document_search = DocumentSearch.from_config(config)
55
- return cls(evaluation_target=document_search, source=config.get("source"))
63
+ evaluation_target = DocumentSearch.from_config(config)
64
+ return cls(evaluation_target=evaluation_target, source=config.get("source"))
56
65
 
57
66
  async def prepare(self) -> None:
58
67
  """
59
- Ingests corpus data for evaluation.
68
+ Ingest corpus data for evaluation.
60
69
  """
61
70
  if self.source:
62
71
  # For now we only support HF sources for pre-evaluation ingest
@@ -67,9 +76,9 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
67
76
  )
68
77
  await self.evaluation_target.ingest(sources)
69
78
 
70
- async def __call__(self, data: dict) -> DocumentSearchResult:
79
+ async def __call__(self, data: DocumentSearchData) -> DocumentSearchResult:
71
80
  """
72
- Runs the document search evaluation pipeline.
81
+ Run the document search evaluation pipeline.
73
82
 
74
83
  Args:
75
84
  data: The evaluation data.
@@ -77,10 +86,11 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
77
86
  Returns:
78
87
  The evaluation result.
79
88
  """
80
- elements = await self.evaluation_target.search(data["question"])
89
+ elements = await self.evaluation_target.search(data.question)
81
90
  predicted_passages = [element.text_representation for element in elements if element.text_representation]
91
+
82
92
  return DocumentSearchResult(
83
- question=data["question"],
84
- reference_passages=data["passage"],
93
+ question=data.question,
94
+ reference_passages=data.reference_passages,
85
95
  predicted_passages=predicted_passages,
86
96
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragbits-evaluate
3
- Version: 0.15.0
3
+ Version: 0.17.0
4
4
  Summary: Evaluation module for Ragbits components
5
5
  Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
6
6
  Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
@@ -22,11 +22,12 @@ Classifier: Programming Language :: Python :: 3.13
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Requires-Python: >=3.10
25
- Requires-Dist: distilabel==1.4.1
26
- Requires-Dist: hydra-core~=1.3.2
27
- Requires-Dist: neptune[optuna]~=1.12.0
28
- Requires-Dist: optuna==4.0.0
29
- Requires-Dist: ragbits-core==0.15.0
25
+ Requires-Dist: datasets<4.0.0,>=3.0.1
26
+ Requires-Dist: distilabel<2.0.0,>=1.4.1
27
+ Requires-Dist: hydra-core<2.0.0,>=1.3.2
28
+ Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
29
+ Requires-Dist: optuna<5.0.0,>=4.0.0
30
+ Requires-Dist: ragbits-core==0.17.0
30
31
  Provides-Extra: relari
31
32
  Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
32
33
  Description-Content-Type: text/markdown
@@ -1,14 +1,14 @@
1
1
  ragbits/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ragbits/evaluate/cli.py,sha256=zOPnd9LhPVNN-SRIt0R3y_qmDDqSVnbaLPMiFUJo1Jo,4617
3
- ragbits/evaluate/config.py,sha256=FfeC04f95hygTans3GduiSFuzhInUGWhmxLK4ylL95c,467
4
- ragbits/evaluate/evaluator.py,sha256=iMyxMXJhKNKOWMs9xM4uIxpe_bYNXcerfldZ2z32ghA,4777
5
- ragbits/evaluate/optimizer.py,sha256=rxIK8UMlSyBpP5HL7_TvYPceNoyqmsi7Gvs7SpPnWhA,8471
2
+ ragbits/evaluate/cli.py,sha256=MEDo8ubk81TCNx-fq-liF0P5hjn2-kPpIfq54fReKIY,4509
3
+ ragbits/evaluate/config.py,sha256=2WSmbVxyQi893L2FSjRFQoXkWZp1GetcNmR2GCDe0tA,339
4
+ ragbits/evaluate/evaluator.py,sha256=Cif-QX2n5awOGm-AfFy2nRXkb_m4vGY_JZ_o4K4PhZI,5552
5
+ ragbits/evaluate/optimizer.py,sha256=egcU54aADqKrN31NPqj7cNIQO4UISfG7VtkOAQyQUOY,8471
6
6
  ragbits/evaluate/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  ragbits/evaluate/utils.py,sha256=rTTmrP4nv3D7174cMEfohxrDN5thPScH0BsXaptMHqQ,3757
8
- ragbits/evaluate/dataloaders/__init__.py,sha256=ezGvdCOGBUrOh8M_ceJnOO9hR-jWTdjKr0voHg4Vsvc,802
9
- ragbits/evaluate/dataloaders/base.py,sha256=yzsGpCfQ3u1z3XpqTqG8VnKdv3o5CTpToDw8hh3wvv0,417
10
- ragbits/evaluate/dataloaders/hf.py,sha256=FwAGeeqZ0ZxieJyeBcdWo__adjoG3Db34tgcT1RRMng,707
11
- ragbits/evaluate/dataloaders/local.py,sha256=FrDjeAguKfimM8l35LLthr-kH7slxJG9zRnu1YZuo0A,1155
8
+ ragbits/evaluate/dataloaders/__init__.py,sha256=UFJFjmvi3GUQFsx6A5sYD01HH2f7TXcHRW2VNM1pmIA,83
9
+ ragbits/evaluate/dataloaders/base.py,sha256=ovL38_tH12q9wd3yeflIlovGuSD8S1X9HUUtwv17QrM,1774
10
+ ragbits/evaluate/dataloaders/document_search.py,sha256=sqNPQf1ZYAqM_xMjuwh63ET00zEmKtAzqXX04cazuB8,1579
11
+ ragbits/evaluate/dataloaders/exceptions.py,sha256=xUOBLj1JuCkcqzRVnu0A0I_i1THxbDt2MEDVdDGjDyY,735
12
12
  ragbits/evaluate/dataset_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  ragbits/evaluate/dataset_generator/pipeline.py,sha256=dgnV-Qm0Z7S1Y6ga9-9RscXxxr3krOKsIj7E9WS4ANk,4940
14
14
  ragbits/evaluate/dataset_generator/utils.py,sha256=zD-ksXlX62kkIgzBefE4ILsP7He9bHimnZ63LLsMKCA,1325
@@ -23,13 +23,13 @@ ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py,sha256=ydMHyI0JrWZf
23
23
  ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  ragbits/evaluate/dataset_generator/tasks/text_generation/base.py,sha256=2h-Y14H3fRHKbTNvXWKRus8t0hdTITd9LMoIFVwfKfA,2138
25
25
  ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py,sha256=QAClPbTVNCe4QzVOGuepRnsmkt9ZF6bXBAuJI2elRuE,3851
26
- ragbits/evaluate/factories/__init__.py,sha256=FZulUBra8TD37YMk6vDZXc9uq15o-eq9knZ42w_NGTg,1696
26
+ ragbits/evaluate/factories/__init__.py,sha256=De2ZgQ4YXgvpMOvm81fSDPSMvKpIBjS-aqeE0dxEU1s,2074
27
27
  ragbits/evaluate/metrics/__init__.py,sha256=Mr83ytGyvdXtBlr7Bbo0-5auE0530xsd3wffKSIf8cE,95
28
- ragbits/evaluate/metrics/base.py,sha256=iJ_ise9XRS6IL6zwJZmN18rLNwI-drLsb2BJOzqkEGI,2099
29
- ragbits/evaluate/metrics/document_search.py,sha256=UrIIPhDdgXkozHFVac87jBURjVvgLS5gaaN8uYJ1jJY,2852
30
- ragbits/evaluate/pipelines/__init__.py,sha256=2ESwj6TybDxxCHrndTANpbek3TmpYSIeH1L4jFCKa48,1259
31
- ragbits/evaluate/pipelines/base.py,sha256=Qo3a3il_6edAuk_1vGMp9O_FeNQDJFqJn1WQOKEK_fs,985
32
- ragbits/evaluate/pipelines/document_search.py,sha256=B0mNFxgSCfhCSjpmQhZRqc1InKKjP1kWDAekQQG9Fj4,3010
33
- ragbits_evaluate-0.15.0.dist-info/METADATA,sha256=9tSF7mRlwp-4IJ3S2ws8Dai19BomH2ZLp2_ul_hGFVo,2234
34
- ragbits_evaluate-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
- ragbits_evaluate-0.15.0.dist-info/RECORD,,
28
+ ragbits/evaluate/metrics/base.py,sha256=axkGuKJU5u94SnRjpWsdG4jFWjy8rmkSHVRcgz1JLTo,2342
29
+ ragbits/evaluate/metrics/document_search.py,sha256=WeC0xuLYci_Vbdw-E4OjawTqmLkcFKjDWSJGITC9-AQ,2851
30
+ ragbits/evaluate/pipelines/__init__.py,sha256=Bqp_L7aRq12Ua19ELZDsdYvra6-GlLrQ9cIG2IWArko,1294
31
+ ragbits/evaluate/pipelines/base.py,sha256=1GPu3MV-2o0PdUuFM4IcLeg1baYv9acqCcGrQykmRSs,1682
32
+ ragbits/evaluate/pipelines/document_search.py,sha256=xMcSnahy7fifk2bJoolX9OWCXz4FjSJQfBDHIB1d2mQ,3266
33
+ ragbits_evaluate-0.17.0.dist-info/METADATA,sha256=fdHH9MszU2DO5pp18ikVVnOEPkTnQ_TQwddvcvEwWj4,2300
34
+ ragbits_evaluate-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
+ ragbits_evaluate-0.17.0.dist-info/RECORD,,
@@ -1,29 +0,0 @@
1
- from typing import TypeAlias
2
-
3
- from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, load_dataset
4
-
5
- from ragbits.evaluate.dataloaders.base import DataLoader
6
-
7
- HFData: TypeAlias = DatasetDict | Dataset | IterableDatasetDict | IterableDataset
8
-
9
-
10
- class HFDataLoader(DataLoader[HFData]):
11
- """
12
- Hugging Face data loader.
13
- """
14
-
15
- def __init__(self, path: str, split: str) -> None:
16
- self.path = path
17
- self.split = split
18
-
19
- async def load(self) -> HFData:
20
- """
21
- Load the data from Hugging Face.
22
-
23
- Returns:
24
- The loaded data.
25
- """
26
- return load_dataset(
27
- path=self.path,
28
- split=self.split,
29
- )
@@ -1,45 +0,0 @@
1
- from typing import TypeAlias
2
-
3
- from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, load_dataset
4
-
5
- from .base import DataLoader
6
-
7
- HFData: TypeAlias = DatasetDict | Dataset | IterableDatasetDict | IterableDataset
8
-
9
-
10
- class LocalDataLoader(DataLoader[DatasetDict]):
11
- """
12
- Local data loader.
13
- """
14
-
15
- AVAILABLE_BUILDERS = {
16
- "json",
17
- "csv",
18
- "parquet",
19
- "arrow",
20
- "text",
21
- "xml",
22
- "webdataset",
23
- "imagefolder",
24
- "audiofolder",
25
- "videofolder",
26
- }
27
-
28
- def __init__(self, path: str, split: str, builder: str) -> None:
29
- self.path = path
30
- self.split = split
31
- self.builder = builder
32
-
33
- if self.builder not in self.AVAILABLE_BUILDERS:
34
- raise ValueError(
35
- f"Unsupported builder '{self.builder}'. Available builders: {', '.join(self.AVAILABLE_BUILDERS)}"
36
- )
37
-
38
- async def load(self) -> DatasetDict:
39
- """
40
- Load the data from the local file.
41
-
42
- Returns:
43
- The loaded data.
44
- """
45
- return load_dataset(self.builder, data_files=self.path, split=self.split)