ragbits-evaluate 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragbits/evaluate/cli.py +39 -38
- ragbits/evaluate/config.py +3 -5
- ragbits/evaluate/dataloaders/__init__.py +0 -18
- ragbits/evaluate/dataloaders/base.py +47 -6
- ragbits/evaluate/dataloaders/document_search.py +45 -0
- ragbits/evaluate/dataloaders/exceptions.py +25 -0
- ragbits/evaluate/evaluator.py +40 -22
- ragbits/evaluate/factories/__init__.py +15 -2
- ragbits/evaluate/metrics/base.py +15 -11
- ragbits/evaluate/metrics/document_search.py +1 -1
- ragbits/evaluate/optimizer.py +12 -12
- ragbits/evaluate/pipelines/__init__.py +4 -4
- ragbits/evaluate/pipelines/base.py +34 -12
- ragbits/evaluate/pipelines/document_search.py +21 -11
- {ragbits_evaluate-0.16.0.dist-info → ragbits_evaluate-0.17.0.dist-info}/METADATA +7 -6
- {ragbits_evaluate-0.16.0.dist-info → ragbits_evaluate-0.17.0.dist-info}/RECORD +17 -17
- ragbits/evaluate/dataloaders/hf.py +0 -29
- ragbits/evaluate/dataloaders/local.py +0 -45
- {ragbits_evaluate-0.16.0.dist-info → ragbits_evaluate-0.17.0.dist-info}/WHEEL +0 -0
ragbits/evaluate/cli.py
CHANGED
|
@@ -8,12 +8,13 @@ from pydantic import BaseModel
|
|
|
8
8
|
|
|
9
9
|
from ragbits.cli._utils import get_instance_or_exit
|
|
10
10
|
from ragbits.cli.state import print_output
|
|
11
|
-
from ragbits.core.utils.config_handling import WithConstructionConfig
|
|
11
|
+
from ragbits.core.utils.config_handling import WithConstructionConfig
|
|
12
12
|
from ragbits.evaluate.config import eval_config
|
|
13
|
-
from ragbits.evaluate.dataloaders import DataLoader
|
|
13
|
+
from ragbits.evaluate.dataloaders import DataLoader
|
|
14
14
|
from ragbits.evaluate.evaluator import Evaluator
|
|
15
15
|
from ragbits.evaluate.metrics.base import MetricSet
|
|
16
16
|
from ragbits.evaluate.pipelines import get_evaluation_pipeline_for_target
|
|
17
|
+
from ragbits.evaluate.pipelines.base import EvaluationPipeline
|
|
17
18
|
|
|
18
19
|
eval_app = typer.Typer(no_args_is_help=True)
|
|
19
20
|
|
|
@@ -30,9 +31,9 @@ def register(app: typer.Typer) -> None:
|
|
|
30
31
|
|
|
31
32
|
@dataclass
|
|
32
33
|
class _CLIState:
|
|
33
|
-
evaluation_target: WithConstructionConfig | None = None
|
|
34
|
-
metrics: MetricSet | None = None
|
|
35
34
|
dataloader: DataLoader | None = None
|
|
35
|
+
pipeline: EvaluationPipeline | None = None
|
|
36
|
+
metrics: MetricSet | None = None
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class EvaluationResult(BaseModel):
|
|
@@ -46,26 +47,18 @@ state: _CLIState = _CLIState()
|
|
|
46
47
|
|
|
47
48
|
@eval_app.callback()
|
|
48
49
|
def common_args(
|
|
49
|
-
|
|
50
|
-
str,
|
|
51
|
-
typer.Option(
|
|
52
|
-
help="A path to target class to be evaluated in a format python.path:ModuleName",
|
|
53
|
-
exists=True,
|
|
54
|
-
resolve_path=True,
|
|
55
|
-
),
|
|
56
|
-
],
|
|
57
|
-
dataloader_args: Annotated[
|
|
58
|
-
str,
|
|
50
|
+
dataloader_factory_path: Annotated[
|
|
51
|
+
str | None,
|
|
59
52
|
typer.Option(
|
|
60
|
-
help="
|
|
53
|
+
help="A path to evaluation data loader factory in format python.path:function_name",
|
|
61
54
|
exists=True,
|
|
62
55
|
resolve_path=True,
|
|
63
56
|
),
|
|
64
|
-
],
|
|
65
|
-
|
|
66
|
-
|
|
57
|
+
] = None,
|
|
58
|
+
dataloader_yaml_path: Annotated[
|
|
59
|
+
Path | None,
|
|
67
60
|
typer.Option(
|
|
68
|
-
help="
|
|
61
|
+
help="A path to evaluation data loader configuration",
|
|
69
62
|
exists=True,
|
|
70
63
|
resolve_path=True,
|
|
71
64
|
),
|
|
@@ -73,7 +66,7 @@ def common_args(
|
|
|
73
66
|
target_factory_path: Annotated[
|
|
74
67
|
str | None,
|
|
75
68
|
typer.Option(
|
|
76
|
-
help="A path to a factory of the target class in format: python.path:function_name",
|
|
69
|
+
help="A path to a factory of the evaluation target class in format: python.path:function_name",
|
|
77
70
|
exists=True,
|
|
78
71
|
resolve_path=True,
|
|
79
72
|
),
|
|
@@ -81,7 +74,7 @@ def common_args(
|
|
|
81
74
|
target_yaml_path: Annotated[
|
|
82
75
|
Path | None,
|
|
83
76
|
typer.Option(
|
|
84
|
-
help="A path to a YAML configuration file of the target class",
|
|
77
|
+
help="A path to a YAML configuration file of the evaluation target class",
|
|
85
78
|
exists=True,
|
|
86
79
|
resolve_path=True,
|
|
87
80
|
),
|
|
@@ -106,40 +99,48 @@ def common_args(
|
|
|
106
99
|
"""
|
|
107
100
|
Common arguments for the evaluate commands.
|
|
108
101
|
"""
|
|
109
|
-
|
|
110
|
-
|
|
102
|
+
evaluation_target = get_instance_or_exit(
|
|
103
|
+
cls=WithConstructionConfig,
|
|
111
104
|
factory_path=target_factory_path,
|
|
112
105
|
yaml_path=target_yaml_path,
|
|
106
|
+
config_override=eval_config,
|
|
113
107
|
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
108
|
+
state.pipeline = get_evaluation_pipeline_for_target(evaluation_target)
|
|
109
|
+
# TODO: validate if given dataloader is suitable for evaluation pipeline
|
|
110
|
+
state.dataloader = get_instance_or_exit(
|
|
111
|
+
cls=DataLoader,
|
|
112
|
+
factory_path=dataloader_factory_path,
|
|
113
|
+
yaml_path=dataloader_yaml_path,
|
|
114
|
+
config_override=eval_config,
|
|
117
115
|
)
|
|
118
|
-
# TODO validate if given
|
|
119
|
-
state.
|
|
120
|
-
|
|
116
|
+
# TODO: validate if given metric set is suitable for evaluation pipeline
|
|
117
|
+
state.metrics = get_instance_or_exit(
|
|
118
|
+
cls=MetricSet,
|
|
119
|
+
factory_path=metrics_factory_path,
|
|
120
|
+
yaml_path=metrics_yaml_path,
|
|
121
|
+
config_override=eval_config,
|
|
121
122
|
)
|
|
122
123
|
|
|
123
124
|
|
|
124
125
|
@eval_app.command()
|
|
125
|
-
def
|
|
126
|
+
def run() -> None:
|
|
126
127
|
"""
|
|
127
|
-
Evaluate the
|
|
128
|
+
Evaluate the pipeline.
|
|
128
129
|
"""
|
|
129
130
|
|
|
130
131
|
async def run() -> None:
|
|
131
|
-
if state.
|
|
132
|
-
raise ValueError("Evaluation
|
|
132
|
+
if state.dataloader is None:
|
|
133
|
+
raise ValueError("Evaluation dataloader not initialized")
|
|
134
|
+
if state.pipeline is None:
|
|
135
|
+
raise ValueError("Evaluation pipeline not initialized")
|
|
133
136
|
if state.metrics is None:
|
|
134
137
|
raise ValueError("Evaluation metrics not initialized")
|
|
135
|
-
|
|
136
|
-
raise ValueError("Dataloader not initialized")
|
|
137
|
-
evaluation_pipeline = get_evaluation_pipeline_for_target(evaluation_target=state.evaluation_target)
|
|
138
|
+
|
|
138
139
|
evaluator = Evaluator()
|
|
139
140
|
metric_results = await evaluator.compute(
|
|
140
|
-
pipeline=
|
|
141
|
-
metrics=state.metrics,
|
|
141
|
+
pipeline=state.pipeline,
|
|
142
142
|
dataloader=state.dataloader,
|
|
143
|
+
metrics=state.metrics,
|
|
143
144
|
)
|
|
144
145
|
evaluation_results = EvaluationResult(
|
|
145
146
|
metrics={"metrics": metric_results["metrics"], "time_perf": metric_results["time_perf"]}
|
ragbits/evaluate/config.py
CHANGED
|
@@ -3,11 +3,9 @@ from ragbits.core.utils._pyproject import get_config_instance
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class EvaluateConfig(CoreConfig):
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
dataloader_default_class: str = "ragbits.evaluate.dataloaders.hf:HFDataLoader"
|
|
6
|
+
"""
|
|
7
|
+
Configuration for the ragbits-evaluate package, loaded from downstream projects' pyproject.toml files.
|
|
8
|
+
"""
|
|
11
9
|
|
|
12
10
|
|
|
13
11
|
eval_config = get_config_instance(EvaluateConfig, subproject="evaluate")
|
|
@@ -1,21 +1,3 @@
|
|
|
1
|
-
from ragbits.core.utils.config_handling import import_by_path
|
|
2
|
-
from ragbits.evaluate.config import EvaluateConfig
|
|
3
1
|
from ragbits.evaluate.dataloaders.base import DataLoader
|
|
4
2
|
|
|
5
3
|
__all__ = ["DataLoader"]
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def get_dataloader_instance(
|
|
9
|
-
config: EvaluateConfig, dataloader_args: str, dataloader_cls_override: str | None = None
|
|
10
|
-
) -> DataLoader:
|
|
11
|
-
"""
|
|
12
|
-
A function for instantiation of dataloader
|
|
13
|
-
Args:
|
|
14
|
-
config: configuration of ragbits.evaluate module
|
|
15
|
-
dataloader_args: comma separated arguments of dataloader
|
|
16
|
-
dataloader_cls_override: optional path to override of default dataloader class
|
|
17
|
-
Returns:
|
|
18
|
-
DataLoader
|
|
19
|
-
"""
|
|
20
|
-
dataloader_cls = dataloader_cls_override or config.dataloader_default_class
|
|
21
|
-
return import_by_path(dataloader_cls)(*dataloader_args.split(","))
|
|
@@ -1,18 +1,59 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from types import ModuleType
|
|
4
|
+
from typing import ClassVar, Generic
|
|
3
5
|
|
|
4
|
-
from
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from typing_extensions import Self
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
from ragbits.core.sources.base import Source
|
|
10
|
+
from ragbits.core.utils.config_handling import ObjectConstructionConfig, WithConstructionConfig
|
|
11
|
+
from ragbits.evaluate import dataloaders
|
|
12
|
+
from ragbits.evaluate.pipelines.base import EvaluationDataT
|
|
7
13
|
|
|
8
14
|
|
|
9
|
-
class
|
|
15
|
+
class DataLoaderConfig(BaseModel):
|
|
10
16
|
"""
|
|
11
|
-
|
|
17
|
+
Schema for the data loader config.
|
|
12
18
|
"""
|
|
13
19
|
|
|
20
|
+
source: ObjectConstructionConfig
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DataLoader(WithConstructionConfig, Generic[EvaluationDataT], ABC):
|
|
24
|
+
"""
|
|
25
|
+
Evaluation data loader.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
default_module: ClassVar[ModuleType | None] = dataloaders
|
|
29
|
+
configuration_key: ClassVar[str] = "dataloader"
|
|
30
|
+
|
|
31
|
+
def __init__(self, source: Source) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Initialize the data loader.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
source: The source to load the evaluation data from.
|
|
37
|
+
"""
|
|
38
|
+
self.source = source
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_config(cls, config: dict) -> Self:
|
|
42
|
+
"""
|
|
43
|
+
Create an instance of `DataLoader` from a configuration dictionary.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
config: A dictionary containing configuration settings for the data loader.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
An instance of the data loader class initialized with the provided configuration.
|
|
50
|
+
"""
|
|
51
|
+
dataloader_config = DataLoaderConfig.model_validate(config)
|
|
52
|
+
config["source"] = Source.subclass_from_config(dataloader_config.source)
|
|
53
|
+
return super().from_config(config)
|
|
54
|
+
|
|
14
55
|
@abstractmethod
|
|
15
|
-
async def load(self) ->
|
|
56
|
+
async def load(self) -> Iterable[EvaluationDataT]:
|
|
16
57
|
"""
|
|
17
58
|
Load the data.
|
|
18
59
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
|
|
3
|
+
from datasets import load_dataset
|
|
4
|
+
|
|
5
|
+
from ragbits.evaluate.dataloaders.base import DataLoader
|
|
6
|
+
from ragbits.evaluate.dataloaders.exceptions import DataLoaderIncorrectFormatDataError
|
|
7
|
+
from ragbits.evaluate.pipelines.document_search import DocumentSearchData
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DocumentSearchDataLoader(DataLoader[DocumentSearchData]):
|
|
11
|
+
"""
|
|
12
|
+
Document search evaluation data loader.
|
|
13
|
+
|
|
14
|
+
The source used for this data loader should point to a file that can be loaded by [Hugging Face](https://huggingface.co/docs/datasets/loading#local-and-remote-files)
|
|
15
|
+
and contain the following features: "question, "passages".
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
async def load(self) -> Iterable[DocumentSearchData]:
|
|
19
|
+
"""
|
|
20
|
+
Load the data from source and format them.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The document search evaluation data.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
DataLoaderIncorrectFormatDataError: If evaluation dataset is incorrectly formatted.
|
|
27
|
+
"""
|
|
28
|
+
data_path = await self.source.fetch()
|
|
29
|
+
dataset = load_dataset(
|
|
30
|
+
path=str(data_path.parent),
|
|
31
|
+
split=data_path.stem,
|
|
32
|
+
)
|
|
33
|
+
if "question" not in dataset.features or "passages" not in dataset.features:
|
|
34
|
+
raise DataLoaderIncorrectFormatDataError(
|
|
35
|
+
required_features=["question", "passages"],
|
|
36
|
+
data_path=data_path,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return [
|
|
40
|
+
DocumentSearchData(
|
|
41
|
+
question=data["question"],
|
|
42
|
+
reference_passages=data["passages"],
|
|
43
|
+
)
|
|
44
|
+
for data in dataset
|
|
45
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DataLoaderError(Exception):
|
|
5
|
+
"""
|
|
6
|
+
Class for all exceptions raised by the data loader.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, message: str, data_path: Path) -> None:
|
|
10
|
+
super().__init__(message)
|
|
11
|
+
self.message = message
|
|
12
|
+
self.data_path = data_path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataLoaderIncorrectFormatDataError(DataLoaderError):
|
|
16
|
+
"""
|
|
17
|
+
Raised when the data are incorrectly formatted.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, required_features: list[str], data_path: Path) -> None:
|
|
21
|
+
super().__init__(
|
|
22
|
+
message=f"Dataset {data_path} is incorrectly formatted. Required features: {required_features}",
|
|
23
|
+
data_path=data_path,
|
|
24
|
+
)
|
|
25
|
+
self.required_features = required_features
|
ragbits/evaluate/evaluator.py
CHANGED
|
@@ -9,30 +9,46 @@ from tqdm.asyncio import tqdm
|
|
|
9
9
|
from ragbits.core.utils.config_handling import ObjectConstructionConfig, WithConstructionConfig
|
|
10
10
|
from ragbits.evaluate.dataloaders.base import DataLoader
|
|
11
11
|
from ragbits.evaluate.metrics.base import MetricSet
|
|
12
|
-
from ragbits.evaluate.pipelines.base import EvaluationPipeline,
|
|
12
|
+
from ragbits.evaluate.pipelines.base import EvaluationDataT, EvaluationPipeline, EvaluationResultT, EvaluationTargetT
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class EvaluationConfig(BaseModel):
|
|
16
16
|
"""
|
|
17
|
-
Schema for
|
|
17
|
+
Schema for the evaluation run config.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
dataloader: ObjectConstructionConfig
|
|
21
20
|
pipeline: ObjectConstructionConfig
|
|
21
|
+
dataloader: ObjectConstructionConfig
|
|
22
22
|
metrics: dict[str, ObjectConstructionConfig]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
class EvaluatorConfig(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
Schema for the evaluator config.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
evaluation: EvaluationConfig
|
|
31
|
+
evaluator: dict | None = None
|
|
32
|
+
|
|
33
|
+
|
|
25
34
|
class Evaluator(WithConstructionConfig):
|
|
26
35
|
"""
|
|
27
36
|
Evaluator class.
|
|
28
37
|
"""
|
|
29
38
|
|
|
30
|
-
|
|
39
|
+
def __init__(self, batch_size: int = 10) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Initialize the evaluator.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
batch_size: batch size for the evaluation pipeline inference.
|
|
45
|
+
"""
|
|
46
|
+
self.batch_size = batch_size
|
|
31
47
|
|
|
32
48
|
@classmethod
|
|
33
49
|
async def run_from_config(cls, config: dict) -> dict:
|
|
34
50
|
"""
|
|
35
|
-
|
|
51
|
+
Run the evaluation based on configuration.
|
|
36
52
|
|
|
37
53
|
Args:
|
|
38
54
|
config: Evaluation config.
|
|
@@ -40,12 +56,14 @@ class Evaluator(WithConstructionConfig):
|
|
|
40
56
|
Returns:
|
|
41
57
|
The evaluation results.
|
|
42
58
|
"""
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
59
|
+
evaluator_config = EvaluatorConfig.model_validate(config)
|
|
60
|
+
evaluation_config = EvaluationConfig.model_validate(evaluator_config.evaluation)
|
|
61
|
+
pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(evaluation_config.pipeline)
|
|
62
|
+
dataloader: DataLoader = DataLoader.subclass_from_config(evaluation_config.dataloader)
|
|
63
|
+
metrics: MetricSet = MetricSet.from_config(evaluation_config.metrics)
|
|
64
|
+
|
|
65
|
+
evaluator = cls.from_config(evaluator_config.evaluator or {})
|
|
66
|
+
return await evaluator.compute(
|
|
49
67
|
pipeline=pipeline,
|
|
50
68
|
dataloader=dataloader,
|
|
51
69
|
metrics=metrics,
|
|
@@ -53,9 +71,9 @@ class Evaluator(WithConstructionConfig):
|
|
|
53
71
|
|
|
54
72
|
async def compute(
|
|
55
73
|
self,
|
|
56
|
-
pipeline: EvaluationPipeline,
|
|
57
|
-
dataloader: DataLoader,
|
|
58
|
-
metrics: MetricSet,
|
|
74
|
+
pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
|
|
75
|
+
dataloader: DataLoader[EvaluationDataT],
|
|
76
|
+
metrics: MetricSet[EvaluationResultT],
|
|
59
77
|
) -> dict:
|
|
60
78
|
"""
|
|
61
79
|
Compute the evaluation results for the given pipeline and data.
|
|
@@ -83,9 +101,9 @@ class Evaluator(WithConstructionConfig):
|
|
|
83
101
|
|
|
84
102
|
async def _call_pipeline(
|
|
85
103
|
self,
|
|
86
|
-
pipeline: EvaluationPipeline,
|
|
87
|
-
dataset: Iterable[
|
|
88
|
-
) -> tuple[list[
|
|
104
|
+
pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
|
|
105
|
+
dataset: Iterable[EvaluationDataT],
|
|
106
|
+
) -> tuple[list[EvaluationResultT], dict]:
|
|
89
107
|
"""
|
|
90
108
|
Call the pipeline with the given data.
|
|
91
109
|
|
|
@@ -96,9 +114,9 @@ class Evaluator(WithConstructionConfig):
|
|
|
96
114
|
Returns:
|
|
97
115
|
The evaluation results and performance metrics.
|
|
98
116
|
"""
|
|
99
|
-
semaphore = asyncio.Semaphore(self.
|
|
117
|
+
semaphore = asyncio.Semaphore(self.batch_size)
|
|
100
118
|
|
|
101
|
-
async def _call_pipeline_with_semaphore(data:
|
|
119
|
+
async def _call_pipeline_with_semaphore(data: EvaluationDataT) -> EvaluationResultT:
|
|
102
120
|
async with semaphore:
|
|
103
121
|
return await pipeline(data)
|
|
104
122
|
|
|
@@ -109,7 +127,7 @@ class Evaluator(WithConstructionConfig):
|
|
|
109
127
|
return pipe_outputs, self._compute_time_perf(start_time, end_time, len(pipe_outputs))
|
|
110
128
|
|
|
111
129
|
@staticmethod
|
|
112
|
-
def _results_processor(results: list[
|
|
130
|
+
def _results_processor(results: list[EvaluationResultT]) -> dict:
|
|
113
131
|
"""
|
|
114
132
|
Process the results.
|
|
115
133
|
|
|
@@ -122,7 +140,7 @@ class Evaluator(WithConstructionConfig):
|
|
|
122
140
|
return {"results": [asdict(result) for result in results]}
|
|
123
141
|
|
|
124
142
|
@staticmethod
|
|
125
|
-
def _compute_metrics(metrics: MetricSet, results: list[
|
|
143
|
+
def _compute_metrics(metrics: MetricSet[EvaluationResultT], results: list[EvaluationResultT]) -> dict:
|
|
126
144
|
"""
|
|
127
145
|
Compute a metric using the given inputs.
|
|
128
146
|
|
|
@@ -3,10 +3,12 @@ import asyncio
|
|
|
3
3
|
from datasets import load_dataset
|
|
4
4
|
|
|
5
5
|
from ragbits.core.embeddings.dense import LiteLLMEmbedder
|
|
6
|
+
from ragbits.core.sources.hf import HuggingFaceSource
|
|
6
7
|
from ragbits.core.utils.config_handling import ObjectConstructionConfig
|
|
7
8
|
from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
|
|
8
9
|
from ragbits.document_search import DocumentSearch
|
|
9
10
|
from ragbits.document_search.documents.document import DocumentMeta
|
|
11
|
+
from ragbits.evaluate.dataloaders.document_search import DocumentSearchDataLoader
|
|
10
12
|
from ragbits.evaluate.metrics import MetricSet
|
|
11
13
|
|
|
12
14
|
DS_PRECISION_RECALL_F1 = {
|
|
@@ -27,7 +29,9 @@ DS_PRECISION_RECALL_F1 = {
|
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
def precision_recall_f1() -> MetricSet:
|
|
30
|
-
"""
|
|
32
|
+
"""
|
|
33
|
+
Factory of precision recall f1 metric set for retrival evaluation.
|
|
34
|
+
"""
|
|
31
35
|
return MetricSet.from_config(config=DS_PRECISION_RECALL_F1)
|
|
32
36
|
|
|
33
37
|
|
|
@@ -38,7 +42,16 @@ async def _add_example_documents(document_search: DocumentSearch) -> None:
|
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
def basic_document_search_factory() -> DocumentSearch:
|
|
41
|
-
"""
|
|
45
|
+
"""
|
|
46
|
+
Factory for basic example document search instance.
|
|
47
|
+
"""
|
|
42
48
|
document_search = DocumentSearch(vector_store=InMemoryVectorStore(embedder=LiteLLMEmbedder()))
|
|
43
49
|
asyncio.run(_add_example_documents(document_search))
|
|
44
50
|
return document_search
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def synthetic_rag_dataset() -> DocumentSearchDataLoader:
|
|
54
|
+
"""
|
|
55
|
+
Factory for synthetic RAG dataset.
|
|
56
|
+
"""
|
|
57
|
+
return DocumentSearchDataLoader(source=HuggingFaceSource(path="deepsense-ai/synthetic-rag-dataset_v1.0"))
|
ragbits/evaluate/metrics/base.py
CHANGED
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
2
|
+
from types import ModuleType
|
|
3
|
+
from typing import ClassVar, Generic
|
|
3
4
|
|
|
4
5
|
from typing_extensions import Self
|
|
5
6
|
|
|
6
7
|
from ragbits.core.utils.config_handling import WithConstructionConfig
|
|
7
|
-
from ragbits.evaluate
|
|
8
|
+
from ragbits.evaluate import metrics
|
|
9
|
+
from ragbits.evaluate.pipelines.base import EvaluationResultT
|
|
8
10
|
|
|
9
|
-
ResultT = TypeVar("ResultT", bound=EvaluationResult)
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
class Metric(WithConstructionConfig, Generic[ResultT], ABC):
|
|
12
|
+
class Metric(WithConstructionConfig, Generic[EvaluationResultT], ABC):
|
|
13
13
|
"""
|
|
14
14
|
Base class for metrics.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
default_module: ClassVar[ModuleType | None] = metrics
|
|
18
|
+
configuration_key: ClassVar[str] = "metric"
|
|
19
|
+
|
|
17
20
|
def __init__(self, weight: float = 1.0) -> None:
|
|
18
21
|
"""
|
|
19
22
|
Initializes the metric.
|
|
@@ -25,7 +28,7 @@ class Metric(WithConstructionConfig, Generic[ResultT], ABC):
|
|
|
25
28
|
self.weight = weight
|
|
26
29
|
|
|
27
30
|
@abstractmethod
|
|
28
|
-
def compute(self, results: list[
|
|
31
|
+
def compute(self, results: list[EvaluationResultT]) -> dict:
|
|
29
32
|
"""
|
|
30
33
|
Compute the metric.
|
|
31
34
|
|
|
@@ -37,16 +40,17 @@ class Metric(WithConstructionConfig, Generic[ResultT], ABC):
|
|
|
37
40
|
"""
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
class MetricSet(WithConstructionConfig, Generic[
|
|
43
|
+
class MetricSet(WithConstructionConfig, Generic[EvaluationResultT]):
|
|
41
44
|
"""
|
|
42
45
|
Represents a set of metrics.
|
|
43
46
|
"""
|
|
44
47
|
|
|
45
|
-
configuration_key = "metrics"
|
|
48
|
+
configuration_key: ClassVar[str] = "metrics"
|
|
49
|
+
default_module: ClassVar[ModuleType | None] = metrics
|
|
46
50
|
|
|
47
|
-
def __init__(self, *metrics: Metric[
|
|
51
|
+
def __init__(self, *metrics: Metric[EvaluationResultT]) -> None:
|
|
48
52
|
"""
|
|
49
|
-
|
|
53
|
+
Initialize the metric set.
|
|
50
54
|
|
|
51
55
|
Args:
|
|
52
56
|
metrics: The metrics.
|
|
@@ -66,7 +70,7 @@ class MetricSet(WithConstructionConfig, Generic[ResultT]):
|
|
|
66
70
|
"""
|
|
67
71
|
return cls(*[Metric.subclass_from_config(metric_config) for metric_config in config.values()])
|
|
68
72
|
|
|
69
|
-
def compute(self, results: list[
|
|
73
|
+
def compute(self, results: list[EvaluationResultT]) -> dict:
|
|
70
74
|
"""
|
|
71
75
|
Compute the metrics.
|
|
72
76
|
|
|
@@ -19,7 +19,7 @@ class DocumentSearchMetric(Metric[DocumentSearchResult], ABC):
|
|
|
19
19
|
|
|
20
20
|
def __init__(self, matching_strategy: MatchingStrategy, weight: float = 1.0) -> None:
|
|
21
21
|
"""
|
|
22
|
-
|
|
22
|
+
Initialize the document search metric.
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
25
|
matching_strategy: Matching strategys that determine relevance.
|
ragbits/evaluate/optimizer.py
CHANGED
|
@@ -17,10 +17,10 @@ from ragbits.evaluate.utils import setup_optuna_neptune_callback
|
|
|
17
17
|
|
|
18
18
|
class OptimizerConfig(BaseModel):
|
|
19
19
|
"""
|
|
20
|
-
Schema for the
|
|
20
|
+
Schema for the optimizer config.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
evaluator: EvaluatorConfig
|
|
24
24
|
optimizer: dict | None = None
|
|
25
25
|
neptune_callback: bool = False
|
|
26
26
|
|
|
@@ -32,7 +32,7 @@ class Optimizer(WithConstructionConfig):
|
|
|
32
32
|
|
|
33
33
|
def __init__(self, direction: str = "maximize", n_trials: int = 10, max_retries_for_trial: int = 1) -> None:
|
|
34
34
|
"""
|
|
35
|
-
|
|
35
|
+
Initialize the pipeline optimizer.
|
|
36
36
|
|
|
37
37
|
Args:
|
|
38
38
|
direction: Direction of optimization.
|
|
@@ -49,7 +49,7 @@ class Optimizer(WithConstructionConfig):
|
|
|
49
49
|
@classmethod
|
|
50
50
|
def run_from_config(cls, config: dict) -> list[tuple[dict, float, dict[str, float]]]:
|
|
51
51
|
"""
|
|
52
|
-
|
|
52
|
+
Run the optimization process configured with a config object.
|
|
53
53
|
|
|
54
54
|
Args:
|
|
55
55
|
config: Optimizer config.
|
|
@@ -58,16 +58,16 @@ class Optimizer(WithConstructionConfig):
|
|
|
58
58
|
List of tested configs with associated scores and metrics.
|
|
59
59
|
"""
|
|
60
60
|
optimizer_config = OptimizerConfig.model_validate(config)
|
|
61
|
-
evaluator_config = EvaluatorConfig.model_validate(optimizer_config.
|
|
61
|
+
evaluator_config = EvaluatorConfig.model_validate(optimizer_config.evaluator)
|
|
62
62
|
|
|
63
|
-
dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.dataloader)
|
|
64
|
-
metrics: MetricSet = MetricSet.from_config(evaluator_config.metrics)
|
|
63
|
+
dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.evaluation.dataloader)
|
|
64
|
+
metrics: MetricSet = MetricSet.from_config(evaluator_config.evaluation.metrics)
|
|
65
65
|
|
|
66
|
-
pipeline_class = import_by_path(evaluator_config.pipeline.type)
|
|
67
|
-
pipeline_config = dict(
|
|
66
|
+
pipeline_class = import_by_path(evaluator_config.evaluation.pipeline.type)
|
|
67
|
+
pipeline_config = dict(evaluator_config.evaluation.pipeline.config)
|
|
68
68
|
callbacks = [setup_optuna_neptune_callback()] if optimizer_config.neptune_callback else []
|
|
69
69
|
|
|
70
|
-
optimizer = cls.from_config(
|
|
70
|
+
optimizer = cls.from_config(optimizer_config.optimizer or {})
|
|
71
71
|
return optimizer.optimize(
|
|
72
72
|
pipeline_class=pipeline_class,
|
|
73
73
|
pipeline_config=pipeline_config,
|
|
@@ -85,7 +85,7 @@ class Optimizer(WithConstructionConfig):
|
|
|
85
85
|
callbacks: list[Callable] | None = None,
|
|
86
86
|
) -> list[tuple[dict, float, dict[str, float]]]:
|
|
87
87
|
"""
|
|
88
|
-
|
|
88
|
+
Run the optimization process for given parameters.
|
|
89
89
|
|
|
90
90
|
Args:
|
|
91
91
|
pipeline_class: Pipeline to be optimized.
|
|
@@ -134,7 +134,7 @@ class Optimizer(WithConstructionConfig):
|
|
|
134
134
|
metrics: MetricSet,
|
|
135
135
|
) -> float:
|
|
136
136
|
"""
|
|
137
|
-
|
|
137
|
+
Run a single experiment.
|
|
138
138
|
"""
|
|
139
139
|
evaluator = Evaluator()
|
|
140
140
|
event_loop = asyncio.get_event_loop()
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from ragbits.core.utils.config_handling import WithConstructionConfig
|
|
2
2
|
from ragbits.document_search import DocumentSearch
|
|
3
|
-
from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult
|
|
3
|
+
from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult
|
|
4
4
|
from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
|
|
5
5
|
|
|
6
|
+
__all__ = ["DocumentSearchPipeline", "EvaluationData", "EvaluationPipeline", "EvaluationResult"]
|
|
7
|
+
|
|
6
8
|
_target_to_evaluation_pipeline: dict[type[WithConstructionConfig], type[EvaluationPipeline]] = {
|
|
7
|
-
DocumentSearch: DocumentSearchPipeline
|
|
9
|
+
DocumentSearch: DocumentSearchPipeline,
|
|
8
10
|
}
|
|
9
11
|
|
|
10
|
-
__all__ = ["DocumentSearchPipeline", "EvaluationPipeline", "EvaluationResult"]
|
|
11
|
-
|
|
12
12
|
|
|
13
13
|
def get_evaluation_pipeline_for_target(evaluation_target: WithConstructionConfig) -> EvaluationPipeline:
|
|
14
14
|
"""
|
|
@@ -1,12 +1,24 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from
|
|
3
|
+
from types import ModuleType
|
|
4
|
+
from typing import ClassVar, Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
4
7
|
|
|
5
8
|
from ragbits.core.utils.config_handling import WithConstructionConfig
|
|
9
|
+
from ragbits.evaluate import pipelines
|
|
6
10
|
|
|
11
|
+
EvaluationDataT = TypeVar("EvaluationDataT", bound="EvaluationData")
|
|
12
|
+
EvaluationResultT = TypeVar("EvaluationResultT", bound="EvaluationResult")
|
|
7
13
|
EvaluationTargetT = TypeVar("EvaluationTargetT", bound=WithConstructionConfig)
|
|
8
14
|
|
|
9
15
|
|
|
16
|
+
class EvaluationData(BaseModel, ABC):
|
|
17
|
+
"""
|
|
18
|
+
Represents the data for a single evaluation.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
10
22
|
@dataclass
|
|
11
23
|
class EvaluationResult(ABC):
|
|
12
24
|
"""
|
|
@@ -14,18 +26,34 @@ class EvaluationResult(ABC):
|
|
|
14
26
|
"""
|
|
15
27
|
|
|
16
28
|
|
|
17
|
-
class EvaluationPipeline(Generic[EvaluationTargetT
|
|
29
|
+
class EvaluationPipeline(WithConstructionConfig, Generic[EvaluationTargetT, EvaluationDataT, EvaluationResultT], ABC):
|
|
18
30
|
"""
|
|
19
|
-
|
|
31
|
+
Evaluation pipeline.
|
|
20
32
|
"""
|
|
21
33
|
|
|
22
|
-
|
|
34
|
+
default_module: ClassVar[ModuleType | None] = pipelines
|
|
35
|
+
configuration_key: ClassVar[str] = "pipeline"
|
|
36
|
+
|
|
37
|
+
def __init__(self, evaluation_target: EvaluationTargetT) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Initialize the evaluation pipeline.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
evaluation_target: Evaluation target instance.
|
|
43
|
+
"""
|
|
44
|
+
super().__init__()
|
|
23
45
|
self.evaluation_target = evaluation_target
|
|
24
46
|
|
|
47
|
+
async def prepare(self) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Prepare pipeline for evaluation. Optional step.
|
|
50
|
+
"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
25
53
|
@abstractmethod
|
|
26
|
-
async def __call__(self, data:
|
|
54
|
+
async def __call__(self, data: EvaluationDataT) -> EvaluationResultT:
|
|
27
55
|
"""
|
|
28
|
-
|
|
56
|
+
Run the evaluation pipeline.
|
|
29
57
|
|
|
30
58
|
Args:
|
|
31
59
|
data: The evaluation data.
|
|
@@ -33,9 +61,3 @@ class EvaluationPipeline(Generic[EvaluationTargetT], WithConstructionConfig, ABC
|
|
|
33
61
|
Returns:
|
|
34
62
|
The evaluation result.
|
|
35
63
|
"""
|
|
36
|
-
|
|
37
|
-
async def prepare(self) -> None:
|
|
38
|
-
"""
|
|
39
|
-
Prepares pipeline for evaluation.
|
|
40
|
-
"""
|
|
41
|
-
pass
|
|
@@ -5,7 +5,16 @@ from typing_extensions import Self
|
|
|
5
5
|
|
|
6
6
|
from ragbits.core.sources.hf import HuggingFaceSource
|
|
7
7
|
from ragbits.document_search import DocumentSearch
|
|
8
|
-
from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult
|
|
8
|
+
from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocumentSearchData(EvaluationData):
|
|
12
|
+
"""
|
|
13
|
+
Represents the evaluation data for document search.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
question: str
|
|
17
|
+
reference_passages: list[str]
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
@dataclass
|
|
@@ -19,14 +28,14 @@ class DocumentSearchResult(EvaluationResult):
|
|
|
19
28
|
predicted_passages: list[str]
|
|
20
29
|
|
|
21
30
|
|
|
22
|
-
class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
|
|
31
|
+
class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch, DocumentSearchData, DocumentSearchResult]):
|
|
23
32
|
"""
|
|
24
33
|
Document search evaluation pipeline.
|
|
25
34
|
"""
|
|
26
35
|
|
|
27
36
|
def __init__(self, evaluation_target: DocumentSearch, source: dict | None = None) -> None:
|
|
28
37
|
"""
|
|
29
|
-
|
|
38
|
+
Initialize the document search evaluation pipeline.
|
|
30
39
|
|
|
31
40
|
Args:
|
|
32
41
|
evaluation_target: Document Search instance.
|
|
@@ -51,12 +60,12 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
|
|
|
51
60
|
# TODO: optimize this for cases with duplicated document search configs between runs
|
|
52
61
|
if config.get("source"):
|
|
53
62
|
config["vector_store"]["config"]["index_name"] = str(uuid4())
|
|
54
|
-
|
|
55
|
-
return cls(evaluation_target=
|
|
63
|
+
evaluation_target = DocumentSearch.from_config(config)
|
|
64
|
+
return cls(evaluation_target=evaluation_target, source=config.get("source"))
|
|
56
65
|
|
|
57
66
|
async def prepare(self) -> None:
|
|
58
67
|
"""
|
|
59
|
-
|
|
68
|
+
Ingest corpus data for evaluation.
|
|
60
69
|
"""
|
|
61
70
|
if self.source:
|
|
62
71
|
# For now we only support HF sources for pre-evaluation ingest
|
|
@@ -67,9 +76,9 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
|
|
|
67
76
|
)
|
|
68
77
|
await self.evaluation_target.ingest(sources)
|
|
69
78
|
|
|
70
|
-
async def __call__(self, data:
|
|
79
|
+
async def __call__(self, data: DocumentSearchData) -> DocumentSearchResult:
|
|
71
80
|
"""
|
|
72
|
-
|
|
81
|
+
Run the document search evaluation pipeline.
|
|
73
82
|
|
|
74
83
|
Args:
|
|
75
84
|
data: The evaluation data.
|
|
@@ -77,10 +86,11 @@ class DocumentSearchPipeline(EvaluationPipeline[DocumentSearch]):
|
|
|
77
86
|
Returns:
|
|
78
87
|
The evaluation result.
|
|
79
88
|
"""
|
|
80
|
-
elements = await self.evaluation_target.search(data
|
|
89
|
+
elements = await self.evaluation_target.search(data.question)
|
|
81
90
|
predicted_passages = [element.text_representation for element in elements if element.text_representation]
|
|
91
|
+
|
|
82
92
|
return DocumentSearchResult(
|
|
83
|
-
question=data
|
|
84
|
-
reference_passages=data
|
|
93
|
+
question=data.question,
|
|
94
|
+
reference_passages=data.reference_passages,
|
|
85
95
|
predicted_passages=predicted_passages,
|
|
86
96
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragbits-evaluate
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.0
|
|
4
4
|
Summary: Evaluation module for Ragbits components
|
|
5
5
|
Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
|
|
6
6
|
Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
|
|
@@ -22,11 +22,12 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
22
22
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
24
|
Requires-Python: >=3.10
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist: optuna
|
|
29
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: datasets<4.0.0,>=3.0.1
|
|
26
|
+
Requires-Dist: distilabel<2.0.0,>=1.4.1
|
|
27
|
+
Requires-Dist: hydra-core<2.0.0,>=1.3.2
|
|
28
|
+
Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
|
|
29
|
+
Requires-Dist: optuna<5.0.0,>=4.0.0
|
|
30
|
+
Requires-Dist: ragbits-core==0.17.0
|
|
30
31
|
Provides-Extra: relari
|
|
31
32
|
Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
|
|
32
33
|
Description-Content-Type: text/markdown
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
ragbits/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
ragbits/evaluate/cli.py,sha256=
|
|
3
|
-
ragbits/evaluate/config.py,sha256=
|
|
4
|
-
ragbits/evaluate/evaluator.py,sha256=
|
|
5
|
-
ragbits/evaluate/optimizer.py,sha256=
|
|
2
|
+
ragbits/evaluate/cli.py,sha256=MEDo8ubk81TCNx-fq-liF0P5hjn2-kPpIfq54fReKIY,4509
|
|
3
|
+
ragbits/evaluate/config.py,sha256=2WSmbVxyQi893L2FSjRFQoXkWZp1GetcNmR2GCDe0tA,339
|
|
4
|
+
ragbits/evaluate/evaluator.py,sha256=Cif-QX2n5awOGm-AfFy2nRXkb_m4vGY_JZ_o4K4PhZI,5552
|
|
5
|
+
ragbits/evaluate/optimizer.py,sha256=egcU54aADqKrN31NPqj7cNIQO4UISfG7VtkOAQyQUOY,8471
|
|
6
6
|
ragbits/evaluate/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
ragbits/evaluate/utils.py,sha256=rTTmrP4nv3D7174cMEfohxrDN5thPScH0BsXaptMHqQ,3757
|
|
8
|
-
ragbits/evaluate/dataloaders/__init__.py,sha256=
|
|
9
|
-
ragbits/evaluate/dataloaders/base.py,sha256=
|
|
10
|
-
ragbits/evaluate/dataloaders/
|
|
11
|
-
ragbits/evaluate/dataloaders/
|
|
8
|
+
ragbits/evaluate/dataloaders/__init__.py,sha256=UFJFjmvi3GUQFsx6A5sYD01HH2f7TXcHRW2VNM1pmIA,83
|
|
9
|
+
ragbits/evaluate/dataloaders/base.py,sha256=ovL38_tH12q9wd3yeflIlovGuSD8S1X9HUUtwv17QrM,1774
|
|
10
|
+
ragbits/evaluate/dataloaders/document_search.py,sha256=sqNPQf1ZYAqM_xMjuwh63ET00zEmKtAzqXX04cazuB8,1579
|
|
11
|
+
ragbits/evaluate/dataloaders/exceptions.py,sha256=xUOBLj1JuCkcqzRVnu0A0I_i1THxbDt2MEDVdDGjDyY,735
|
|
12
12
|
ragbits/evaluate/dataset_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
ragbits/evaluate/dataset_generator/pipeline.py,sha256=dgnV-Qm0Z7S1Y6ga9-9RscXxxr3krOKsIj7E9WS4ANk,4940
|
|
14
14
|
ragbits/evaluate/dataset_generator/utils.py,sha256=zD-ksXlX62kkIgzBefE4ILsP7He9bHimnZ63LLsMKCA,1325
|
|
@@ -23,13 +23,13 @@ ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py,sha256=ydMHyI0JrWZf
|
|
|
23
23
|
ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
ragbits/evaluate/dataset_generator/tasks/text_generation/base.py,sha256=2h-Y14H3fRHKbTNvXWKRus8t0hdTITd9LMoIFVwfKfA,2138
|
|
25
25
|
ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py,sha256=QAClPbTVNCe4QzVOGuepRnsmkt9ZF6bXBAuJI2elRuE,3851
|
|
26
|
-
ragbits/evaluate/factories/__init__.py,sha256=
|
|
26
|
+
ragbits/evaluate/factories/__init__.py,sha256=De2ZgQ4YXgvpMOvm81fSDPSMvKpIBjS-aqeE0dxEU1s,2074
|
|
27
27
|
ragbits/evaluate/metrics/__init__.py,sha256=Mr83ytGyvdXtBlr7Bbo0-5auE0530xsd3wffKSIf8cE,95
|
|
28
|
-
ragbits/evaluate/metrics/base.py,sha256=
|
|
29
|
-
ragbits/evaluate/metrics/document_search.py,sha256=
|
|
30
|
-
ragbits/evaluate/pipelines/__init__.py,sha256=
|
|
31
|
-
ragbits/evaluate/pipelines/base.py,sha256=
|
|
32
|
-
ragbits/evaluate/pipelines/document_search.py,sha256=
|
|
33
|
-
ragbits_evaluate-0.
|
|
34
|
-
ragbits_evaluate-0.
|
|
35
|
-
ragbits_evaluate-0.
|
|
28
|
+
ragbits/evaluate/metrics/base.py,sha256=axkGuKJU5u94SnRjpWsdG4jFWjy8rmkSHVRcgz1JLTo,2342
|
|
29
|
+
ragbits/evaluate/metrics/document_search.py,sha256=WeC0xuLYci_Vbdw-E4OjawTqmLkcFKjDWSJGITC9-AQ,2851
|
|
30
|
+
ragbits/evaluate/pipelines/__init__.py,sha256=Bqp_L7aRq12Ua19ELZDsdYvra6-GlLrQ9cIG2IWArko,1294
|
|
31
|
+
ragbits/evaluate/pipelines/base.py,sha256=1GPu3MV-2o0PdUuFM4IcLeg1baYv9acqCcGrQykmRSs,1682
|
|
32
|
+
ragbits/evaluate/pipelines/document_search.py,sha256=xMcSnahy7fifk2bJoolX9OWCXz4FjSJQfBDHIB1d2mQ,3266
|
|
33
|
+
ragbits_evaluate-0.17.0.dist-info/METADATA,sha256=fdHH9MszU2DO5pp18ikVVnOEPkTnQ_TQwddvcvEwWj4,2300
|
|
34
|
+
ragbits_evaluate-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
+
ragbits_evaluate-0.17.0.dist-info/RECORD,,
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
from typing import TypeAlias
|
|
2
|
-
|
|
3
|
-
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, load_dataset
|
|
4
|
-
|
|
5
|
-
from ragbits.evaluate.dataloaders.base import DataLoader
|
|
6
|
-
|
|
7
|
-
HFData: TypeAlias = DatasetDict | Dataset | IterableDatasetDict | IterableDataset
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class HFDataLoader(DataLoader[HFData]):
|
|
11
|
-
"""
|
|
12
|
-
Hugging Face data loader.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
def __init__(self, path: str, split: str) -> None:
|
|
16
|
-
self.path = path
|
|
17
|
-
self.split = split
|
|
18
|
-
|
|
19
|
-
async def load(self) -> HFData:
|
|
20
|
-
"""
|
|
21
|
-
Load the data from Hugging Face.
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
The loaded data.
|
|
25
|
-
"""
|
|
26
|
-
return load_dataset(
|
|
27
|
-
path=self.path,
|
|
28
|
-
split=self.split,
|
|
29
|
-
)
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from typing import TypeAlias
|
|
2
|
-
|
|
3
|
-
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, load_dataset
|
|
4
|
-
|
|
5
|
-
from .base import DataLoader
|
|
6
|
-
|
|
7
|
-
HFData: TypeAlias = DatasetDict | Dataset | IterableDatasetDict | IterableDataset
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class LocalDataLoader(DataLoader[DatasetDict]):
|
|
11
|
-
"""
|
|
12
|
-
Local data loader.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
AVAILABLE_BUILDERS = {
|
|
16
|
-
"json",
|
|
17
|
-
"csv",
|
|
18
|
-
"parquet",
|
|
19
|
-
"arrow",
|
|
20
|
-
"text",
|
|
21
|
-
"xml",
|
|
22
|
-
"webdataset",
|
|
23
|
-
"imagefolder",
|
|
24
|
-
"audiofolder",
|
|
25
|
-
"videofolder",
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
def __init__(self, path: str, split: str, builder: str) -> None:
|
|
29
|
-
self.path = path
|
|
30
|
-
self.split = split
|
|
31
|
-
self.builder = builder
|
|
32
|
-
|
|
33
|
-
if self.builder not in self.AVAILABLE_BUILDERS:
|
|
34
|
-
raise ValueError(
|
|
35
|
-
f"Unsupported builder '{self.builder}'. Available builders: {', '.join(self.AVAILABLE_BUILDERS)}"
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
async def load(self) -> DatasetDict:
|
|
39
|
-
"""
|
|
40
|
-
Load the data from the local file.
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
The loaded data.
|
|
44
|
-
"""
|
|
45
|
-
return load_dataset(self.builder, data_files=self.path, split=self.split)
|
|
File without changes
|