ragbits-evaluate 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ragbits-evaluate might be problematic. Click here for more details.
ragbits/evaluate/evaluator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import random
|
|
3
3
|
import time
|
|
4
|
-
from collections.abc import Awaitable, Callable, Iterable
|
|
4
|
+
from collections.abc import Awaitable, Callable, Iterable, Sized
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import Generic, ParamSpec, TypeVar
|
|
7
7
|
|
|
@@ -71,6 +71,7 @@ class Evaluator(WithConstructionConfig):
|
|
|
71
71
|
num_retries: int = 3,
|
|
72
72
|
backoff_multiplier: int = 1,
|
|
73
73
|
backoff_max: int = 60,
|
|
74
|
+
parallelize_batches: bool = False,
|
|
74
75
|
) -> None:
|
|
75
76
|
"""
|
|
76
77
|
Initialize the Evaluator instance.
|
|
@@ -80,11 +81,13 @@ class Evaluator(WithConstructionConfig):
|
|
|
80
81
|
num_retries: The number of retries per evaluation pipeline inference error.
|
|
81
82
|
backoff_multiplier: The base delay multiplier for exponential backoff (in seconds).
|
|
82
83
|
backoff_max: The maximum allowed delay (in seconds) between retries.
|
|
84
|
+
parallelize_batches: Whether to process samples within each batch in parallel (asyncio.gather).
|
|
83
85
|
"""
|
|
84
86
|
self.batch_size = batch_size
|
|
85
87
|
self.num_retries = num_retries
|
|
86
88
|
self.backoff_multiplier = backoff_multiplier
|
|
87
89
|
self.backoff_max = backoff_max
|
|
90
|
+
self.parallelize_batches = parallelize_batches
|
|
88
91
|
|
|
89
92
|
@classmethod
|
|
90
93
|
async def run_from_config(cls, config: dict) -> EvaluatorResult:
|
|
@@ -156,16 +159,33 @@ class Evaluator(WithConstructionConfig):
|
|
|
156
159
|
The evaluation results and performance metrics.
|
|
157
160
|
"""
|
|
158
161
|
start_time = time.perf_counter()
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
]
|
|
162
|
+
|
|
163
|
+
total_samples = len(dataset) if isinstance(dataset, Sized) else None
|
|
164
|
+
batches = batched(dataset, self.batch_size)
|
|
165
|
+
outputs: list[Iterable[EvaluationResultT] | Exception] = []
|
|
166
|
+
|
|
167
|
+
with tqdm(total=total_samples, desc="Evaluation", unit="sample") as progress_bar:
|
|
168
|
+
for batch in batches:
|
|
169
|
+
batch_list = list(batch)
|
|
170
|
+
|
|
171
|
+
if self.parallelize_batches:
|
|
172
|
+
tasks = [self._call_with_error_handling(pipeline, [sample]) for sample in batch_list]
|
|
173
|
+
batch_results = await asyncio.gather(*tasks)
|
|
174
|
+
|
|
175
|
+
for result in batch_results:
|
|
176
|
+
outputs.append(result)
|
|
177
|
+
progress_bar.update(1)
|
|
178
|
+
else:
|
|
179
|
+
result = await self._call_with_error_handling(pipeline, batch_list)
|
|
180
|
+
outputs.append(result)
|
|
181
|
+
progress_bar.update(len(batch_list))
|
|
182
|
+
|
|
163
183
|
end_time = time.perf_counter()
|
|
164
184
|
|
|
165
185
|
errors = [output for output in outputs if isinstance(output, Exception)]
|
|
166
186
|
results = [item for output in outputs if not isinstance(output, Exception) for item in output]
|
|
167
187
|
|
|
168
|
-
return results, errors, self._compute_time_perf(start_time, end_time, len(
|
|
188
|
+
return results, errors, self._compute_time_perf(start_time, end_time, len(results))
|
|
169
189
|
|
|
170
190
|
async def _call_with_error_handling(
|
|
171
191
|
self,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragbits-evaluate
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Evaluation module for Ragbits components
|
|
5
5
|
Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
|
|
6
6
|
Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
|
|
@@ -27,7 +27,7 @@ Requires-Dist: distilabel<2.0.0,>=1.5.0
|
|
|
27
27
|
Requires-Dist: hydra-core<2.0.0,>=1.3.2
|
|
28
28
|
Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
|
|
29
29
|
Requires-Dist: optuna<5.0.0,>=4.0.0
|
|
30
|
-
Requires-Dist: ragbits-core==1.
|
|
30
|
+
Requires-Dist: ragbits-core==1.3.0
|
|
31
31
|
Provides-Extra: relari
|
|
32
32
|
Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
ragbits/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
ragbits/evaluate/cli.py,sha256=vP8l2DyNXpR6jQP83wXKP_RRnGjEXjKnTVBg9RPbDKo,4505
|
|
3
3
|
ragbits/evaluate/config.py,sha256=2WSmbVxyQi893L2FSjRFQoXkWZp1GetcNmR2GCDe0tA,339
|
|
4
|
-
ragbits/evaluate/evaluator.py,sha256
|
|
4
|
+
ragbits/evaluate/evaluator.py,sha256=-VcO61r340lt6KWTjkl8DdHmU78WygBP7wfYLT2hu9k,8319
|
|
5
5
|
ragbits/evaluate/optimizer.py,sha256=RqYgoiCIEhjXO0HEN6uwldblHyoPuT3qUdncuHPZgCg,8485
|
|
6
6
|
ragbits/evaluate/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
ragbits/evaluate/utils.py,sha256=w-hbvKRHI9tEva9wKDTVla0Wm2eCHT2MxVkof27Sqfw,4831
|
|
@@ -33,6 +33,6 @@ ragbits/evaluate/pipelines/__init__.py,sha256=Bqp_L7aRq12Ua19ELZDsdYvra6-GlLrQ9c
|
|
|
33
33
|
ragbits/evaluate/pipelines/base.py,sha256=QV3fjPnbJjeCgcbt8yV1Ho3BamEUc3wSca3MAzaBlV0,1739
|
|
34
34
|
ragbits/evaluate/pipelines/document_search.py,sha256=tgk-I21eshdBbWVsuNa1zWK_fWuDNXhhMCn1_Fdu_Ko,3840
|
|
35
35
|
ragbits/evaluate/pipelines/question_answer.py,sha256=3CYVHDLnOy4z7kgYPMluiJ8POulHo-w3PEiqvqsF4Dc,2797
|
|
36
|
-
ragbits_evaluate-1.
|
|
37
|
-
ragbits_evaluate-1.
|
|
38
|
-
ragbits_evaluate-1.
|
|
36
|
+
ragbits_evaluate-1.3.0.dist-info/METADATA,sha256=LiLW0olCl5ZPJhxIe0Z7tjkCM2uWv6AVxys9sAJ7tc4,2298
|
|
37
|
+
ragbits_evaluate-1.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
38
|
+
ragbits_evaluate-1.3.0.dist-info/RECORD,,
|
|
File without changes
|