ragbits-evaluate 1.3.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/CHANGELOG.md +18 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/PKG-INFO +2 -2
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/pyproject.toml +2 -11
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/question_answer.py +25 -8
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/.gitignore +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/README.md +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/cli.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/config.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/base.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/document_search.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/exceptions.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/question_answer.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/pipeline.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/prompts/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/prompts/corpus_generation.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/prompts/qa.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/corpus_generation.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/filter/base.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/text_generation/base.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/utils.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/evaluator.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/factories/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/base.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/document_search.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/optimizer.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/__init__.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/base.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/document_search.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/question_answer.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/py.typed +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/utils.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/tests/cli/test_run_evaluation.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/tests/unit/test_evaluator.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/tests/unit/test_metrics.py +0 -0
- {ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/tests/unit/test_optimizer.py +0 -0
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 1.4.0 (2026-02-04)
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- ragbits-core updated to version v1.4.0
|
|
10
|
+
|
|
11
|
+
- Feat: introduce agent evaluation pipelines and metrics (HotpotQA, HumanEval, GAIA) (#829)
|
|
12
|
+
|
|
13
|
+
- Feat: introduce agent simulation module with utilities for agent-to-agent conversation and evaluation scenarios (#857)
|
|
14
|
+
|
|
15
|
+
- Feat: add structured results to agent simulation with `SimulationResult`, `TurnResult`, `TaskResult`, and `ConversationMetrics` models (#885)
|
|
16
|
+
|
|
17
|
+
- Feat: add generic `DomainContext` and `DataSnapshot` for flexible agent simulation context (#884)
|
|
18
|
+
|
|
19
|
+
- Feat: add metrics collection system for agent simulation (`MetricCollector` protocol, `LatencyMetricCollector`, `TokenUsageMetricCollector`, `ToolUsageMetricCollector`) (#882)
|
|
20
|
+
|
|
21
|
+
- Fix: improve `continuous-eval` import compatibility for different package versions specified by constraints
|
|
22
|
+
|
|
5
23
|
## 1.3.0 (2025-09-11)
|
|
6
24
|
|
|
7
25
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragbits-evaluate
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: Evaluation module for Ragbits components
|
|
5
5
|
Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
|
|
6
6
|
Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
|
|
@@ -27,7 +27,7 @@ Requires-Dist: distilabel<2.0.0,>=1.5.0
|
|
|
27
27
|
Requires-Dist: hydra-core<2.0.0,>=1.3.2
|
|
28
28
|
Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
|
|
29
29
|
Requires-Dist: optuna<5.0.0,>=4.0.0
|
|
30
|
-
Requires-Dist: ragbits-core==1.
|
|
30
|
+
Requires-Dist: ragbits-core==1.4.0
|
|
31
31
|
Provides-Extra: relari
|
|
32
32
|
Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ragbits-evaluate"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.4.0"
|
|
4
4
|
description = "Evaluation module for Ragbits components"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -32,7 +32,7 @@ classifiers = [
|
|
|
32
32
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
33
33
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
34
34
|
]
|
|
35
|
-
dependencies = ["hydra-core>=1.3.2,<2.0.0", "neptune[optuna]>=1.12.0,<2.0.0", "optuna>=4.0.0,<5.0.0", "distilabel>=1.5.0,<2.0.0", "datasets>=3.0.1,<4.0.0", "ragbits-core==1.
|
|
35
|
+
dependencies = ["hydra-core>=1.3.2,<2.0.0", "neptune[optuna]>=1.12.0,<2.0.0", "optuna>=4.0.0,<5.0.0", "distilabel>=1.5.0,<2.0.0", "datasets>=3.0.1,<4.0.0", "ragbits-core==1.4.0"]
|
|
36
36
|
|
|
37
37
|
[project.urls]
|
|
38
38
|
"Homepage" = "https://github.com/deepsense-ai/ragbits"
|
|
@@ -45,15 +45,6 @@ relari = [
|
|
|
45
45
|
"continuous-eval>=0.3.12,<1.0.0",
|
|
46
46
|
]
|
|
47
47
|
|
|
48
|
-
[tool.uv]
|
|
49
|
-
dev-dependencies = [
|
|
50
|
-
"pre-commit~=3.8.0",
|
|
51
|
-
"pytest~=8.3.3",
|
|
52
|
-
"pytest-cov~=5.0.0",
|
|
53
|
-
"pytest-asyncio~=0.24.0",
|
|
54
|
-
"pip-licenses>=4.0.0,<5.0.0"
|
|
55
|
-
]
|
|
56
|
-
|
|
57
48
|
[build-system]
|
|
58
49
|
requires = ["hatchling"]
|
|
59
50
|
build-backend = "hatchling.build"
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/question_answer.py
RENAMED
|
@@ -4,14 +4,6 @@ from asyncio import AbstractEventLoop
|
|
|
4
4
|
from itertools import chain
|
|
5
5
|
from typing import Generic, TypeVar
|
|
6
6
|
|
|
7
|
-
from continuous_eval.llm_factory import LLMInterface
|
|
8
|
-
from continuous_eval.metrics.base import LLMBasedMetric
|
|
9
|
-
from continuous_eval.metrics.generation.text import (
|
|
10
|
-
LLMBasedAnswerCorrectness,
|
|
11
|
-
LLMBasedAnswerRelevance,
|
|
12
|
-
LLMBasedFaithfulness,
|
|
13
|
-
LLMBasedStyleConsistency,
|
|
14
|
-
)
|
|
15
7
|
from typing_extensions import Self
|
|
16
8
|
|
|
17
9
|
from ragbits.agents.types import QuestionAnswerPromptOutputT
|
|
@@ -20,6 +12,31 @@ from ragbits.core.utils.helpers import batched
|
|
|
20
12
|
from ragbits.evaluate.metrics.base import Metric
|
|
21
13
|
from ragbits.evaluate.pipelines.question_answer import QuestionAnswerResult
|
|
22
14
|
|
|
15
|
+
try:
|
|
16
|
+
from continuous_eval.llm_factory import LLMInterface
|
|
17
|
+
from continuous_eval.metrics.base import LLMBasedMetric
|
|
18
|
+
from continuous_eval.metrics.generation.text import (
|
|
19
|
+
LLMBasedAnswerCorrectness,
|
|
20
|
+
LLMBasedAnswerRelevance,
|
|
21
|
+
LLMBasedFaithfulness,
|
|
22
|
+
LLMBasedStyleConsistency,
|
|
23
|
+
)
|
|
24
|
+
except ModuleNotFoundError:
|
|
25
|
+
from continuous_eval.llms.base import LLMInterface
|
|
26
|
+
from continuous_eval.metrics import Metric as LLMBasedMetric
|
|
27
|
+
from continuous_eval.metrics.generation.text import (
|
|
28
|
+
AnswerCorrectness as LLMBasedAnswerCorrectness,
|
|
29
|
+
)
|
|
30
|
+
from continuous_eval.metrics.generation.text import (
|
|
31
|
+
AnswerRelevance as LLMBasedAnswerRelevance,
|
|
32
|
+
)
|
|
33
|
+
from continuous_eval.metrics.generation.text import (
|
|
34
|
+
Faithfulness as LLMBasedFaithfulness,
|
|
35
|
+
)
|
|
36
|
+
from continuous_eval.metrics.generation.text import (
|
|
37
|
+
StyleConsistency as LLMBasedStyleConsistency,
|
|
38
|
+
)
|
|
39
|
+
|
|
23
40
|
MetricT = TypeVar("MetricT", bound=LLMBasedMetric)
|
|
24
41
|
|
|
25
42
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataloaders/exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/__init__.py
RENAMED
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/dataset_generator/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/factories/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/metrics/document_search.py
RENAMED
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/document_search.py
RENAMED
|
File without changes
|
{ragbits_evaluate-1.3.0 → ragbits_evaluate-1.4.0}/src/ragbits/evaluate/pipelines/question_answer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|