PyPI - ragbits-evaluate - Versions diffs - 1.3.0__tar.gz → 1.4.0__tar.gz - Mend

ragbits-evaluate 1.3.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,24 @@
 ## Unreleased
+## 1.4.0 (2026-02-04)
+### Changed
+- ragbits-core updated to version v1.4.0
+- Feat: introduce agent evaluation pipelines and metrics (HotpotQA, HumanEval, GAIA) (#829)
+- Feat: introduce agent simulation module with utilities for agent-to-agent conversation and evaluation scenarios (#857)
+- Feat: add structured results to agent simulation with `SimulationResult`, `TurnResult`, `TaskResult`, and `ConversationMetrics` models (#885)
+- Feat: add generic `DomainContext` and `DataSnapshot` for flexible agent simulation context (#884)
+- Feat: add metrics collection system for agent simulation (`MetricCollector` protocol, `LatencyMetricCollector`, `TokenUsageMetricCollector`, `ToolUsageMetricCollector`) (#882)
+- Fix: improve `continuous-eval` import compatibility for different package versions specified by constraints
 ## 1.3.0 (2025-09-11)
 ### Changed

{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragbits-evaluate
-Version: 1.3.0
+Version: 1.4.0
 Summary: Evaluation module for Ragbits components
 Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
 Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
@@ -27,7 +27,7 @@ Requires-Dist: distilabel<2.0.0,>=1.5.0
 Requires-Dist: hydra-core<2.0.0,>=1.3.2
 Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
 Requires-Dist: optuna<5.0.0,>=4.0.0
-Requires-Dist: ragbits-core==1.3.0
+Requires-Dist: ragbits-core==1.4.0
 Provides-Extra: relari
 Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
 Description-Content-Type: text/markdown

{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ragbits-evaluate"
-version = "1.3.0"
+version = "1.4.0"
 description = "Evaluation module for Ragbits components"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -32,7 +32,7 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-dependencies = ["hydra-core>=1.3.2,<2.0.0", "neptune[optuna]>=1.12.0,<2.0.0", "optuna>=4.0.0,<5.0.0", "distilabel>=1.5.0,<2.0.0", "datasets>=3.0.1,<4.0.0", "ragbits-core==1.3.0"]
+dependencies = ["hydra-core>=1.3.2,<2.0.0", "neptune[optuna]>=1.12.0,<2.0.0", "optuna>=4.0.0,<5.0.0", "distilabel>=1.5.0,<2.0.0", "datasets>=3.0.1,<4.0.0", "ragbits-core==1.4.0"]
 [project.urls]
 "Homepage" = "https://github.com/deepsense-ai/ragbits"
@@ -45,15 +45,6 @@ relari = [
     "continuous-eval>=0.3.12,<1.0.0",
 ]
-[tool.uv]
-dev-dependencies = [
-    "pre-commit~=3.8.0",
-    "pytest~=8.3.3",
-    "pytest-cov~=5.0.0",
-    "pytest-asyncio~=0.24.0",
-    "pip-licenses>=4.0.0,<5.0.0"
-]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"

{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/question_answer.py RENAMED Viewed

@@ -4,14 +4,6 @@ from asyncio import AbstractEventLoop
 from itertools import chain
 from typing import Generic, TypeVar
-from continuous_eval.llm_factory import LLMInterface
-from continuous_eval.metrics.base import LLMBasedMetric
-from continuous_eval.metrics.generation.text import (
-    LLMBasedAnswerCorrectness,
-    LLMBasedAnswerRelevance,
-    LLMBasedFaithfulness,
-    LLMBasedStyleConsistency,
-)
 from typing_extensions import Self
 from ragbits.agents.types import QuestionAnswerPromptOutputT
@@ -20,6 +12,31 @@ from ragbits.core.utils.helpers import batched
 from ragbits.evaluate.metrics.base import Metric
 from ragbits.evaluate.pipelines.question_answer import QuestionAnswerResult
+try:
+    from continuous_eval.llm_factory import LLMInterface
+    from continuous_eval.metrics.base import LLMBasedMetric
+    from continuous_eval.metrics.generation.text import (
+        LLMBasedAnswerCorrectness,
+        LLMBasedAnswerRelevance,
+        LLMBasedFaithfulness,
+        LLMBasedStyleConsistency,
+    )
+except ModuleNotFoundError:
+    from continuous_eval.llms.base import LLMInterface
+    from continuous_eval.metrics import Metric as LLMBasedMetric
+    from continuous_eval.metrics.generation.text import (
+        AnswerCorrectness as LLMBasedAnswerCorrectness,
+    )
+    from continuous_eval.metrics.generation.text import (
+        AnswerRelevance as LLMBasedAnswerRelevance,
+    )
+    from continuous_eval.metrics.generation.text import (
+        Faithfulness as LLMBasedFaithfulness,
+    )
+    from continuous_eval.metrics.generation.text import (
+        StyleConsistency as LLMBasedStyleConsistency,
+    )
 MetricT = TypeVar("MetricT", bound=LLMBasedMetric)