PyPI - ragbits-evaluate - Versions diffs - 0.20.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

ragbits-evaluate 0.20.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ragbits-evaluate might be problematic. Click here for more details.

Files changed (7) hide show

ragbits/evaluate/dataloaders/question_answer.py CHANGED Viewed

@@ -27,7 +27,6 @@ class QuestionAnswerDataLoader(DataLoader[QuestionAnswerData]):
         Args:
             source: The source to load the data from.
             split: The split to load the data from.
-            required_keys: The required keys to load the data from.
             question_key: The dataset column name that contains the question.
             answer_key: The dataset column name that contains the answer.
             context_key: The dataset column name that contains the context. Context is optional.

ragbits/evaluate/dataset_generator/tasks/text_generation/base.py CHANGED Viewed

@@ -2,7 +2,7 @@ import sys
 from abc import ABC, abstractmethod
 from typing import Any
-from distilabel.llms.base import LLM
+from distilabel.models import LLM
 from distilabel.steps.tasks import TextGeneration
 from ragbits.core.prompt import ChatFormat, Prompt

ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any
-from distilabel.llms.base import LLM
+from distilabel.models import LLM
 from ragbits.evaluate.dataset_generator.tasks.text_generation.base import BaseDistilabelTask
 from ragbits.evaluate.dataset_generator.utils import get_closest_substring, get_passages_list

ragbits/evaluate/metrics/question_answer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 from abc import ABC, abstractmethod
+from asyncio import AbstractEventLoop
 from itertools import chain
 from typing import Generic, TypeVar
@@ -27,8 +28,9 @@ class _MetricLMM(LLMInterface):
     Implementation of required interface of Relari generative metrics based on LiteLMM.
     """
-    def __init__(self, llm: LLM) -> None:
+    def __init__(self, llm: LLM, loop: AbstractEventLoop) -> None:
         self._llm = llm
+        self._loop = loop
     def run(self, prompt: dict[str, str], temperature: float = 0, max_tokens: int = 1024) -> str:
         formatted_prompt = [
@@ -39,7 +41,10 @@ class _MetricLMM(LLMInterface):
             temperature=temperature,
             max_tokens=max_tokens,
         )
-        return asyncio.run(self._llm.generate(formatted_prompt, options=options))
+        return asyncio.run_coroutine_threadsafe(
+            self._llm.generate(formatted_prompt, options=options),
+            self._loop,
+        ).result()
 class QuestionAnswerMetric(Generic[MetricT], Metric[QuestionAnswerResult], ABC):
@@ -60,7 +65,7 @@ class QuestionAnswerMetric(Generic[MetricT], Metric[QuestionAnswerResult], ABC):
             weight: Metric value weight in the final score, used during optimization.
         """
         super().__init__(weight=weight)
-        self.metric = self.metric_cls(_MetricLMM(llm))
+        self.llm = llm
         self.batch_size = batch_size
     @classmethod
@@ -89,16 +94,18 @@ class QuestionAnswerMetric(Generic[MetricT], Metric[QuestionAnswerResult], ABC):
         Returns:
             The computed metric.
         """
+        metric = self.metric_cls(_MetricLMM(self.llm, loop=asyncio.get_running_loop()))
         metric_results = chain.from_iterable(
             [
-                await asyncio.gather(*[asyncio.to_thread(self._call_metric, result) for result in batch])
+                await asyncio.gather(*[asyncio.to_thread(self._call_metric, metric, result) for result in batch])
                 for batch in batched(results, self.batch_size)
             ]
         )
-        return self.metric.aggregate(list(metric_results))
+        return metric.aggregate(list(metric_results))
+    @staticmethod
     @abstractmethod
-    def _call_metric(self, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
+    def _call_metric(metric: MetricT, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
         """
         Call the metric with the proper arguments.
         """
@@ -112,8 +119,12 @@ class QuestionAnswerAnswerCorrectness(QuestionAnswerMetric[LLMBasedAnswerCorrect
     metric_cls: type[LLMBasedAnswerCorrectness] = LLMBasedAnswerCorrectness
-    def _call_metric(self, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
-        return self.metric(
+    @staticmethod
+    def _call_metric(
+        metric: LLMBasedAnswerCorrectness,
+        result: QuestionAnswerResult[QuestionAnswerPromptOutputT],
+    ) -> dict:
+        return metric(
             question=result.question,
             answer=(
                 result.predicted_result.content
@@ -132,8 +143,12 @@ class QuestionAnswerAnswerFaithfulness(QuestionAnswerMetric[LLMBasedFaithfulness
     metric_cls: type[LLMBasedFaithfulness] = LLMBasedFaithfulness
-    def _call_metric(self, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
-        return self.metric(
+    @staticmethod
+    def _call_metric(
+        metric: LLMBasedFaithfulness,
+        result: QuestionAnswerResult[QuestionAnswerPromptOutputT],
+    ) -> dict:
+        return metric(
             question=result.question,
             answer=(
                 result.predicted_result.content
@@ -152,8 +167,12 @@ class QuestionAnswerAnswerRelevance(QuestionAnswerMetric[LLMBasedAnswerRelevance
     metric_cls: type[LLMBasedAnswerRelevance] = LLMBasedAnswerRelevance
-    def _call_metric(self, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
-        return self.metric(
+    @staticmethod
+    def _call_metric(
+        metric: LLMBasedAnswerRelevance,
+        result: QuestionAnswerResult[QuestionAnswerPromptOutputT],
+    ) -> dict:
+        return metric(
             question=result.question,
             answer=(
                 result.predicted_result.content
@@ -171,8 +190,12 @@ class QuestionAnswerAnswerConsistency(QuestionAnswerMetric[LLMBasedStyleConsiste
     metric_cls: type[LLMBasedStyleConsistency] = LLMBasedStyleConsistency
-    def _call_metric(self, result: QuestionAnswerResult[QuestionAnswerPromptOutputT]) -> dict:
-        return self.metric(
+    @staticmethod
+    def _call_metric(
+        metric: LLMBasedStyleConsistency,
+        result: QuestionAnswerResult[QuestionAnswerPromptOutputT],
+    ) -> dict:
+        return metric(
             answer=(
                 result.predicted_result.content
                 if isinstance(result.predicted_result.content, str)

{ragbits_evaluate-0.20.1.dist-info → ragbits_evaluate-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragbits-evaluate
-Version: 0.20.1
+Version: 1.1.0
 Summary: Evaluation module for Ragbits components
 Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
 Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
@@ -23,11 +23,11 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Requires-Dist: datasets<4.0.0,>=3.0.1
-Requires-Dist: distilabel<2.0.0,>=1.4.1
+Requires-Dist: distilabel<2.0.0,>=1.5.0
 Requires-Dist: hydra-core<2.0.0,>=1.3.2
 Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
 Requires-Dist: optuna<5.0.0,>=4.0.0
-Requires-Dist: ragbits-core==0.20.1
+Requires-Dist: ragbits-core==1.1.0
 Provides-Extra: relari
 Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
 Description-Content-Type: text/markdown

{ragbits_evaluate-0.20.1.dist-info → ragbits_evaluate-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ ragbits/evaluate/dataloaders/__init__.py,sha256=UFJFjmvi3GUQFsx6A5sYD01HH2f7TXcH
 ragbits/evaluate/dataloaders/base.py,sha256=x8rEl5utNOziF_9urL0grkqoXwMgaDWYSM5akw3Kt9Y,3213
 ragbits/evaluate/dataloaders/document_search.py,sha256=c9Bc4ZtFEKAiG9B70JFiBZlZDkBSGNWFRKabF7PMTU0,2495
 ragbits/evaluate/dataloaders/exceptions.py,sha256=xUOBLj1JuCkcqzRVnu0A0I_i1THxbDt2MEDVdDGjDyY,735
-ragbits/evaluate/dataloaders/question_answer.py,sha256=naXFDtla0otOTWSyHVvWvgDYEq-Wry4irnAJR2tHMNg,2032
+ragbits/evaluate/dataloaders/question_answer.py,sha256=PvG2n9zSy5bH4NJKgSxgxqHjNozLHPJijuBvryiCq_o,1964
 ragbits/evaluate/dataset_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ragbits/evaluate/dataset_generator/pipeline.py,sha256=dgnV-Qm0Z7S1Y6ga9-9RscXxxr3krOKsIj7E9WS4ANk,4940
 ragbits/evaluate/dataset_generator/utils.py,sha256=zD-ksXlX62kkIgzBefE4ILsP7He9bHimnZ63LLsMKCA,1325
@@ -22,17 +22,17 @@ ragbits/evaluate/dataset_generator/tasks/filter/__init__.py,sha256=47DEQpj8HBSa-
 ragbits/evaluate/dataset_generator/tasks/filter/base.py,sha256=vKSBOaVC5hLzZe2NMS0LrGZwhN07x0M2WcrijXoh5iY,1146
 ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py,sha256=ydMHyI0JrWZfZZqY1EFAZ38SsdYCiXyvrvcDsxJPOBg,988
 ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ragbits/evaluate/dataset_generator/tasks/text_generation/base.py,sha256=2h-Y14H3fRHKbTNvXWKRus8t0hdTITd9LMoIFVwfKfA,2138
-ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py,sha256=QAClPbTVNCe4QzVOGuepRnsmkt9ZF6bXBAuJI2elRuE,3851
+ragbits/evaluate/dataset_generator/tasks/text_generation/base.py,sha256=bSNsswe2AMskmlctslTo0cJSz_cTc5EW1WW7kshIJPQ,2135
+ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py,sha256=vVk2U3KgnutkqtJOKGoQKxMFD9YLbx4IR7jBIm2a2RE,3848
 ragbits/evaluate/factories/__init__.py,sha256=7nh0J80EfqMWRGtHx4hkfHNMztfC6FMhH8gHumwcH9w,1727
 ragbits/evaluate/metrics/__init__.py,sha256=Mr83ytGyvdXtBlr7Bbo0-5auE0530xsd3wffKSIf8cE,95
 ragbits/evaluate/metrics/base.py,sha256=bOscQ_nJXLGWmP2ls9jncrUoeghNBnKDJsab71pFEjo,2519
 ragbits/evaluate/metrics/document_search.py,sha256=MfvMwEPenqiJdKYuW6WLvmtMch9ZVYb0T6ibpOF3vGI,3189
-ragbits/evaluate/metrics/question_answer.py,sha256=_XMFjkJcG-xdOO2fCfoKIhJb5VVM_GK_yKhFGXO8FRM,6566
+ragbits/evaluate/metrics/question_answer.py,sha256=369lOoY76KY-wUxBKl0lSQlJSF0JhmPpehNQYeiWNHg,7072
 ragbits/evaluate/pipelines/__init__.py,sha256=Bqp_L7aRq12Ua19ELZDsdYvra6-GlLrQ9cIG2IWArko,1294
 ragbits/evaluate/pipelines/base.py,sha256=QV3fjPnbJjeCgcbt8yV1Ho3BamEUc3wSca3MAzaBlV0,1739
 ragbits/evaluate/pipelines/document_search.py,sha256=tgk-I21eshdBbWVsuNa1zWK_fWuDNXhhMCn1_Fdu_Ko,3840
 ragbits/evaluate/pipelines/question_answer.py,sha256=3CYVHDLnOy4z7kgYPMluiJ8POulHo-w3PEiqvqsF4Dc,2797
-ragbits_evaluate-0.20.1.dist-info/METADATA,sha256=HdTC5f4iph7a0DAcopzaI55KElixcr0zsAVtUi2S3wE,2300
-ragbits_evaluate-0.20.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ragbits_evaluate-0.20.1.dist-info/RECORD,,
+ragbits_evaluate-1.1.0.dist-info/METADATA,sha256=dYK_B94gLLLfz3qPBEP5iF57nd9jFaOOijqkUGijqT8,2298
+ragbits_evaluate-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ragbits_evaluate-1.1.0.dist-info/RECORD,,

{ragbits_evaluate-0.20.1.dist-info → ragbits_evaluate-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

ragbits-evaluate 0.20.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

ragbits-evaluate 0.20.1py3-none-any.whl → 1.1.0py3-none-any.whl