ragbits-evaluate 0.0.30rc1__py3-none-any.whl → 1.4.0.dev202602030301__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragbits/evaluate/agent_simulation/__init__.py +4 -49
- ragbits/evaluate/agent_simulation/conversation.py +278 -663
- ragbits/evaluate/agent_simulation/logger.py +1 -1
- ragbits/evaluate/agent_simulation/metrics/__init__.py +0 -10
- ragbits/evaluate/agent_simulation/metrics/builtin.py +49 -59
- ragbits/evaluate/agent_simulation/metrics/collectors.py +17 -37
- ragbits/evaluate/agent_simulation/models.py +18 -198
- ragbits/evaluate/agent_simulation/results.py +49 -125
- ragbits/evaluate/agent_simulation/scenarios.py +19 -95
- ragbits/evaluate/agent_simulation/simulation.py +166 -72
- ragbits/evaluate/metrics/question_answer.py +25 -8
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/METADATA +2 -6
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/RECORD +14 -25
- ragbits/evaluate/agent_simulation/checkers.py +0 -591
- ragbits/evaluate/agent_simulation/display.py +0 -118
- ragbits/evaluate/agent_simulation/metrics/deepeval.py +0 -295
- ragbits/evaluate/agent_simulation/tracing.py +0 -233
- ragbits/evaluate/api.py +0 -603
- ragbits/evaluate/api_types.py +0 -343
- ragbits/evaluate/execution_manager.py +0 -451
- ragbits/evaluate/stores/__init__.py +0 -36
- ragbits/evaluate/stores/base.py +0 -98
- ragbits/evaluate/stores/file.py +0 -466
- ragbits/evaluate/stores/kv.py +0 -535
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/WHEEL +0 -0
|
@@ -6,13 +6,10 @@ This module uses lazy imports for components that require optional dependencies
|
|
|
6
6
|
|
|
7
7
|
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
|
+
# Import context, metrics, and result models eagerly - they have no external dependencies
|
|
9
10
|
from ragbits.evaluate.agent_simulation.context import DataSnapshot, DomainContext
|
|
10
11
|
from ragbits.evaluate.agent_simulation.metrics import (
|
|
11
12
|
CompositeMetricCollector,
|
|
12
|
-
DeepEvalAllMetricsCollector,
|
|
13
|
-
DeepEvalCompletenessMetricCollector,
|
|
14
|
-
DeepEvalKnowledgeRetentionMetricCollector,
|
|
15
|
-
DeepEvalRelevancyMetricCollector,
|
|
16
13
|
LatencyMetricCollector,
|
|
17
14
|
MetricCollector,
|
|
18
15
|
TokenUsageMetricCollector,
|
|
@@ -27,71 +24,41 @@ from ragbits.evaluate.agent_simulation.results import (
|
|
|
27
24
|
)
|
|
28
25
|
|
|
29
26
|
if TYPE_CHECKING:
|
|
30
|
-
from ragbits.agents.tool import ToolCallResult
|
|
31
|
-
from ragbits.core.llms.base import ToolCall, Usage, UsageItem
|
|
32
27
|
from ragbits.evaluate.agent_simulation.conversation import run_simulation
|
|
33
28
|
from ragbits.evaluate.agent_simulation.deepeval_evaluator import DeepEvalEvaluator
|
|
34
29
|
from ragbits.evaluate.agent_simulation.logger import ConversationLogger
|
|
35
|
-
from ragbits.evaluate.agent_simulation.models import Personality, Scenario,
|
|
30
|
+
from ragbits.evaluate.agent_simulation.models import Personality, Scenario, Task, Turn
|
|
36
31
|
from ragbits.evaluate.agent_simulation.scenarios import load_personalities, load_scenarios
|
|
37
32
|
from ragbits.evaluate.agent_simulation.simulation import GoalChecker, SimulatedUser
|
|
38
|
-
from ragbits.evaluate.agent_simulation.tracing import (
|
|
39
|
-
LLMCall,
|
|
40
|
-
MemoryTraceHandler,
|
|
41
|
-
TraceAnalyzer,
|
|
42
|
-
TraceSpan,
|
|
43
|
-
collect_traces,
|
|
44
|
-
)
|
|
45
33
|
|
|
46
34
|
__all__ = [
|
|
47
|
-
# Metrics
|
|
48
35
|
"CompositeMetricCollector",
|
|
49
|
-
# Components (lazy loaded)
|
|
50
36
|
"ConversationLogger",
|
|
51
|
-
# Results
|
|
52
37
|
"ConversationMetrics",
|
|
53
|
-
# Context
|
|
54
38
|
"DataSnapshot",
|
|
55
|
-
"DeepEvalAllMetricsCollector",
|
|
56
|
-
"DeepEvalCompletenessMetricCollector",
|
|
57
39
|
"DeepEvalEvaluator",
|
|
58
|
-
"DeepEvalKnowledgeRetentionMetricCollector",
|
|
59
|
-
"DeepEvalRelevancyMetricCollector",
|
|
60
40
|
"DomainContext",
|
|
61
41
|
"GoalChecker",
|
|
62
|
-
# Tracing (lazy loaded)
|
|
63
|
-
"LLMCall",
|
|
64
42
|
"LatencyMetricCollector",
|
|
65
|
-
"MemoryTraceHandler",
|
|
66
43
|
"MetricCollector",
|
|
67
44
|
"Personality",
|
|
68
45
|
"Scenario",
|
|
69
46
|
"SimulatedUser",
|
|
70
|
-
"SimulationConfig",
|
|
71
47
|
"SimulationResult",
|
|
72
48
|
"SimulationStatus",
|
|
73
49
|
"Task",
|
|
74
50
|
"TaskResult",
|
|
75
51
|
"TokenUsageMetricCollector",
|
|
76
|
-
# Re-exports from ragbits-core/agents
|
|
77
|
-
"ToolCall",
|
|
78
|
-
"ToolCallResult",
|
|
79
52
|
"ToolUsageMetricCollector",
|
|
80
|
-
"TraceAnalyzer",
|
|
81
|
-
"TraceSpan",
|
|
82
53
|
"Turn",
|
|
83
54
|
"TurnResult",
|
|
84
|
-
"Usage",
|
|
85
|
-
"UsageItem",
|
|
86
|
-
"collect_traces",
|
|
87
|
-
# Functions (lazy loaded)
|
|
88
55
|
"load_personalities",
|
|
89
56
|
"load_scenarios",
|
|
90
57
|
"run_simulation",
|
|
91
58
|
]
|
|
92
59
|
|
|
93
60
|
|
|
94
|
-
def __getattr__(name: str) -> object:
|
|
61
|
+
def __getattr__(name: str) -> object:
|
|
95
62
|
"""Lazy import for components with optional dependencies."""
|
|
96
63
|
if name == "run_simulation":
|
|
97
64
|
from ragbits.evaluate.agent_simulation.conversation import run_simulation
|
|
@@ -105,7 +72,7 @@ def __getattr__(name: str) -> object: # noqa: PLR0911
|
|
|
105
72
|
from ragbits.evaluate.agent_simulation.logger import ConversationLogger
|
|
106
73
|
|
|
107
74
|
return ConversationLogger
|
|
108
|
-
if name in ("Personality", "Scenario", "
|
|
75
|
+
if name in ("Personality", "Scenario", "Task", "Turn"):
|
|
109
76
|
from ragbits.evaluate.agent_simulation import models
|
|
110
77
|
|
|
111
78
|
return getattr(models, name)
|
|
@@ -117,16 +84,4 @@ def __getattr__(name: str) -> object: # noqa: PLR0911
|
|
|
117
84
|
from ragbits.evaluate.agent_simulation import simulation
|
|
118
85
|
|
|
119
86
|
return getattr(simulation, name)
|
|
120
|
-
if name in ("LLMCall", "MemoryTraceHandler", "TraceAnalyzer", "TraceSpan", "collect_traces"):
|
|
121
|
-
from ragbits.evaluate.agent_simulation import tracing
|
|
122
|
-
|
|
123
|
-
return getattr(tracing, name)
|
|
124
|
-
if name in ("ToolCall", "Usage", "UsageItem"):
|
|
125
|
-
from ragbits.core.llms import base
|
|
126
|
-
|
|
127
|
-
return getattr(base, name)
|
|
128
|
-
if name == "ToolCallResult":
|
|
129
|
-
from ragbits.agents.tool import ToolCallResult
|
|
130
|
-
|
|
131
|
-
return ToolCallResult
|
|
132
87
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|