deepeval 3.5.0__py3-none-any.whl → 3.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/confident/api.py +2 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -280
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/__init__.py +1 -1
- deepeval/metrics/answer_relevancy/template.py +13 -38
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/faithfulness/template.py +17 -27
- deepeval/models/embedding_models/local_embedding_model.py +2 -2
- deepeval/prompt/api.py +24 -2
- deepeval/prompt/prompt.py +141 -17
- deepeval/synthesizer/synthesizer.py +17 -9
- deepeval/tracing/api.py +3 -0
- deepeval/tracing/context.py +3 -1
- deepeval/tracing/tracing.py +12 -2
- deepeval/tracing/types.py +3 -0
- deepeval/tracing/utils.py +6 -2
- deepeval/utils.py +12 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/METADATA +1 -1
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/RECORD +27 -26
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/WHEEL +0 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.5.
|
|
1
|
+
__version__: str = "3.5.2"
|
deepeval/confident/api.py
CHANGED
|
@@ -89,7 +89,9 @@ class Endpoints(Enum):
|
|
|
89
89
|
TEST_RUN_ENDPOINT = "/v1/test-run"
|
|
90
90
|
TRACES_ENDPOINT = "/v1/traces"
|
|
91
91
|
ANNOTATIONS_ENDPOINT = "/v1/annotations"
|
|
92
|
+
PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
|
|
92
93
|
PROMPTS_ENDPOINT = "/v1/prompts"
|
|
94
|
+
PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
|
|
93
95
|
SIMULATE_ENDPOINT = "/v1/simulate"
|
|
94
96
|
EVALUATE_ENDPOINT = "/v1/evaluate"
|
|
95
97
|
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
from typing import Any, Optional, List, Dict
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
from time import perf_counter
|
|
4
|
+
from deepeval.tracing.context import current_trace_context
|
|
4
5
|
from deepeval.tracing.types import (
|
|
5
6
|
LlmOutput,
|
|
6
7
|
LlmToolCall,
|
|
7
|
-
TraceAttributes,
|
|
8
8
|
)
|
|
9
|
-
from deepeval.metrics import BaseMetric
|
|
10
|
-
from deepeval.test_case import LLMTestCase
|
|
11
|
-
from deepeval.test_run import global_test_run_manager
|
|
9
|
+
from deepeval.metrics import BaseMetric
|
|
12
10
|
|
|
13
11
|
try:
|
|
14
12
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
@@ -19,11 +17,13 @@ try:
|
|
|
19
17
|
# contains langchain imports
|
|
20
18
|
from deepeval.integrations.langchain.utils import (
|
|
21
19
|
parse_prompts_to_messages,
|
|
22
|
-
prepare_dict,
|
|
23
20
|
extract_name,
|
|
24
21
|
safe_extract_model_name,
|
|
25
22
|
safe_extract_token_usage,
|
|
23
|
+
enter_current_context,
|
|
24
|
+
exit_current_context,
|
|
26
25
|
)
|
|
26
|
+
from deepeval.integrations.langchain.patch import tool
|
|
27
27
|
|
|
28
28
|
langchain_installed = True
|
|
29
29
|
except:
|
|
@@ -37,13 +37,8 @@ def is_langchain_installed():
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
# ASSUMPTIONS:
|
|
41
|
-
# cycle for a single invoke call
|
|
42
|
-
# one trace per cycle
|
|
43
|
-
|
|
44
40
|
from deepeval.tracing import trace_manager
|
|
45
41
|
from deepeval.tracing.types import (
|
|
46
|
-
BaseSpan,
|
|
47
42
|
LlmSpan,
|
|
48
43
|
RetrieverSpan,
|
|
49
44
|
TraceSpanStatus,
|
|
@@ -54,115 +49,32 @@ from deepeval.telemetry import capture_tracing_integration
|
|
|
54
49
|
|
|
55
50
|
class CallbackHandler(BaseCallbackHandler):
|
|
56
51
|
|
|
57
|
-
active_trace_id: Optional[str] = None
|
|
58
|
-
metrics: List[BaseMetric] = []
|
|
59
|
-
metric_collection: Optional[str] = None
|
|
60
|
-
|
|
61
52
|
def __init__(
|
|
62
53
|
self,
|
|
63
|
-
metrics: List[BaseMetric] = [],
|
|
64
|
-
metric_collection: Optional[str] = None,
|
|
65
54
|
name: Optional[str] = None,
|
|
66
55
|
tags: Optional[List[str]] = None,
|
|
67
56
|
metadata: Optional[Dict[str, Any]] = None,
|
|
68
57
|
thread_id: Optional[str] = None,
|
|
69
58
|
user_id: Optional[str] = None,
|
|
59
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
60
|
+
metric_collection: Optional[str] = None,
|
|
70
61
|
):
|
|
71
62
|
is_langchain_installed()
|
|
72
63
|
with capture_tracing_integration("langchain.callback.CallbackHandler"):
|
|
64
|
+
trace = trace_manager.start_new_trace()
|
|
65
|
+
|
|
66
|
+
self.trace_uuid = trace.uuid
|
|
67
|
+
|
|
68
|
+
trace.name = name
|
|
69
|
+
trace.tags = tags
|
|
70
|
+
trace.metadata = metadata
|
|
71
|
+
trace.thread_id = thread_id
|
|
72
|
+
trace.user_id = user_id
|
|
73
73
|
self.metrics = metrics
|
|
74
74
|
self.metric_collection = metric_collection
|
|
75
|
-
|
|
76
|
-
name=name,
|
|
77
|
-
tags=tags,
|
|
78
|
-
metadata=metadata,
|
|
79
|
-
thread_id=thread_id,
|
|
80
|
-
user_id=user_id,
|
|
81
|
-
)
|
|
75
|
+
current_trace_context.set(trace)
|
|
82
76
|
super().__init__()
|
|
83
77
|
|
|
84
|
-
def check_active_trace_id(self):
|
|
85
|
-
if self.active_trace_id is None:
|
|
86
|
-
self.active_trace_id = trace_manager.start_new_trace().uuid
|
|
87
|
-
|
|
88
|
-
def add_span_to_trace(self, span: BaseSpan):
|
|
89
|
-
trace_manager.add_span(span)
|
|
90
|
-
trace_manager.add_span_to_trace(span)
|
|
91
|
-
|
|
92
|
-
def end_span(self, span: BaseSpan):
|
|
93
|
-
span.end_time = perf_counter()
|
|
94
|
-
span.status = TraceSpanStatus.SUCCESS
|
|
95
|
-
trace_manager.remove_span(str(span.uuid))
|
|
96
|
-
|
|
97
|
-
######## Conditions to add metric_collection to span ########
|
|
98
|
-
if (
|
|
99
|
-
self.metric_collection and span.parent_uuid is None
|
|
100
|
-
): # if span is a root span
|
|
101
|
-
span.metric_collection = self.metric_collection
|
|
102
|
-
|
|
103
|
-
######## Conditions to add metrics to span ########
|
|
104
|
-
if self.metrics and span.parent_uuid is None: # if span is a root span
|
|
105
|
-
|
|
106
|
-
# prepare test_case for task_completion metric
|
|
107
|
-
for metric in self.metrics:
|
|
108
|
-
if isinstance(metric, TaskCompletionMetric):
|
|
109
|
-
self.prepare_span_metric_test_case(metric, span)
|
|
110
|
-
|
|
111
|
-
def end_trace(self, span: BaseSpan):
|
|
112
|
-
current_trace = trace_manager.get_trace_by_uuid(self.active_trace_id)
|
|
113
|
-
|
|
114
|
-
######## Conditions send the trace for evaluation ########
|
|
115
|
-
if self.metrics:
|
|
116
|
-
trace_manager.evaluating = (
|
|
117
|
-
True # to avoid posting the trace to the server
|
|
118
|
-
)
|
|
119
|
-
trace_manager.evaluation_loop = (
|
|
120
|
-
True # to avoid traces being evaluated twice
|
|
121
|
-
)
|
|
122
|
-
trace_manager.integration_traces_to_evaluate.append(current_trace)
|
|
123
|
-
|
|
124
|
-
if current_trace is not None:
|
|
125
|
-
current_trace.input = span.input
|
|
126
|
-
current_trace.output = span.output
|
|
127
|
-
|
|
128
|
-
# set trace attributes
|
|
129
|
-
if self.trace_attributes:
|
|
130
|
-
if self.trace_attributes.name:
|
|
131
|
-
current_trace.name = self.trace_attributes.name
|
|
132
|
-
if self.trace_attributes.tags:
|
|
133
|
-
current_trace.tags = self.trace_attributes.tags
|
|
134
|
-
if self.trace_attributes.metadata:
|
|
135
|
-
current_trace.metadata = self.trace_attributes.metadata
|
|
136
|
-
if self.trace_attributes.thread_id:
|
|
137
|
-
current_trace.thread_id = self.trace_attributes.thread_id
|
|
138
|
-
if self.trace_attributes.user_id:
|
|
139
|
-
current_trace.user_id = self.trace_attributes.user_id
|
|
140
|
-
|
|
141
|
-
trace_manager.end_trace(self.active_trace_id)
|
|
142
|
-
self.active_trace_id = None
|
|
143
|
-
|
|
144
|
-
def prepare_span_metric_test_case(
|
|
145
|
-
self, metric: TaskCompletionMetric, span: BaseSpan
|
|
146
|
-
):
|
|
147
|
-
task_completion_metric = TaskCompletionMetric(
|
|
148
|
-
threshold=metric.threshold,
|
|
149
|
-
model=metric.model,
|
|
150
|
-
include_reason=metric.include_reason,
|
|
151
|
-
async_mode=metric.async_mode,
|
|
152
|
-
strict_mode=metric.strict_mode,
|
|
153
|
-
verbose_mode=metric.verbose_mode,
|
|
154
|
-
)
|
|
155
|
-
task_completion_metric.evaluation_cost = 0
|
|
156
|
-
_llm_test_case = LLMTestCase(input="None", actual_output="None")
|
|
157
|
-
_llm_test_case._trace_dict = trace_manager.create_nested_spans_dict(
|
|
158
|
-
span
|
|
159
|
-
)
|
|
160
|
-
task, _ = task_completion_metric._extract_task_and_outcome(
|
|
161
|
-
_llm_test_case
|
|
162
|
-
)
|
|
163
|
-
task_completion_metric.task = task
|
|
164
|
-
span.metrics = [task_completion_metric]
|
|
165
|
-
|
|
166
78
|
def on_chain_start(
|
|
167
79
|
self,
|
|
168
80
|
serialized: dict[str, Any],
|
|
@@ -174,43 +86,32 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
174
86
|
metadata: Optional[dict[str, Any]] = None,
|
|
175
87
|
**kwargs: Any,
|
|
176
88
|
) -> Any:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
metadata=prepare_dict(
|
|
189
|
-
serialized=serialized, tags=tags, metadata=metadata, **kwargs
|
|
190
|
-
),
|
|
191
|
-
# fallback for on_end callback
|
|
192
|
-
end_time=perf_counter(),
|
|
193
|
-
)
|
|
194
|
-
self.add_span_to_trace(base_span)
|
|
89
|
+
if parent_run_id is None:
|
|
90
|
+
uuid_str = str(run_id)
|
|
91
|
+
base_span = enter_current_context(
|
|
92
|
+
uuid_str=uuid_str,
|
|
93
|
+
span_type="custom",
|
|
94
|
+
func_name=extract_name(serialized, **kwargs),
|
|
95
|
+
)
|
|
96
|
+
base_span.input = inputs
|
|
97
|
+
current_trace_context.get().input = inputs
|
|
98
|
+
base_span.metrics = self.metrics
|
|
99
|
+
base_span.metric_collection = self.metric_collection
|
|
195
100
|
|
|
196
101
|
def on_chain_end(
|
|
197
102
|
self,
|
|
198
|
-
|
|
103
|
+
output: Any,
|
|
199
104
|
*,
|
|
200
105
|
run_id: UUID,
|
|
201
106
|
parent_run_id: Optional[UUID] = None,
|
|
202
|
-
**kwargs: Any,
|
|
107
|
+
**kwargs: Any,
|
|
203
108
|
) -> Any:
|
|
204
|
-
|
|
205
|
-
base_span = trace_manager.get_span_by_uuid(
|
|
206
|
-
if base_span
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
self.end_span(base_span)
|
|
211
|
-
|
|
212
|
-
if parent_run_id is None:
|
|
213
|
-
self.end_trace(base_span)
|
|
109
|
+
uuid_str = str(run_id)
|
|
110
|
+
base_span = trace_manager.get_span_by_uuid(uuid_str)
|
|
111
|
+
if base_span:
|
|
112
|
+
base_span.output = output
|
|
113
|
+
current_trace_context.get().output = output
|
|
114
|
+
exit_current_context(uuid_str=uuid_str)
|
|
214
115
|
|
|
215
116
|
def on_llm_start(
|
|
216
117
|
self,
|
|
@@ -223,36 +124,24 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
223
124
|
metadata: Optional[dict[str, Any]] = None,
|
|
224
125
|
**kwargs: Any,
|
|
225
126
|
) -> Any:
|
|
226
|
-
|
|
227
|
-
self.check_active_trace_id()
|
|
228
|
-
|
|
229
|
-
# extract input
|
|
127
|
+
uuid_str = str(run_id)
|
|
230
128
|
input_messages = parse_prompts_to_messages(prompts, **kwargs)
|
|
231
|
-
|
|
232
|
-
# extract model name
|
|
233
129
|
model = safe_extract_model_name(metadata, **kwargs)
|
|
234
130
|
|
|
235
|
-
llm_span =
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
trace_uuid=self.active_trace_id,
|
|
240
|
-
parent_uuid=str(parent_run_id) if parent_run_id else None,
|
|
241
|
-
start_time=perf_counter(),
|
|
242
|
-
name=extract_name(serialized, **kwargs),
|
|
243
|
-
input=input_messages,
|
|
244
|
-
output="",
|
|
245
|
-
metadata=prepare_dict(
|
|
246
|
-
serialized=serialized, tags=tags, metadata=metadata, **kwargs
|
|
247
|
-
),
|
|
248
|
-
model=model,
|
|
249
|
-
# fallback for on_end callback
|
|
250
|
-
end_time=perf_counter(),
|
|
251
|
-
metric_collection=metadata.get("metric_collection", None),
|
|
252
|
-
metrics=metadata.get("metrics", None),
|
|
131
|
+
llm_span: LlmSpan = enter_current_context(
|
|
132
|
+
uuid_str=uuid_str,
|
|
133
|
+
span_type="llm",
|
|
134
|
+
func_name=extract_name(serialized, **kwargs),
|
|
253
135
|
)
|
|
254
136
|
|
|
255
|
-
|
|
137
|
+
llm_span.input = input_messages
|
|
138
|
+
llm_span.model = model
|
|
139
|
+
metrics = metadata.pop("metrics", None)
|
|
140
|
+
metric_collection = metadata.pop("metric_collection", None)
|
|
141
|
+
prompt = metadata.pop("prompt", None)
|
|
142
|
+
llm_span.metrics = metrics
|
|
143
|
+
llm_span.metric_collection = metric_collection
|
|
144
|
+
llm_span.prompt = prompt
|
|
256
145
|
|
|
257
146
|
def on_llm_end(
|
|
258
147
|
self,
|
|
@@ -262,12 +151,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
262
151
|
parent_run_id: Optional[UUID] = None,
|
|
263
152
|
**kwargs: Any, # un-logged kwargs
|
|
264
153
|
) -> Any:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
return
|
|
268
|
-
|
|
269
|
-
if not isinstance(llm_span, LlmSpan):
|
|
270
|
-
return
|
|
154
|
+
uuid_str = str(run_id)
|
|
155
|
+
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
271
156
|
|
|
272
157
|
output = ""
|
|
273
158
|
total_input_tokens = 0
|
|
@@ -317,9 +202,38 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
317
202
|
total_output_tokens if total_output_tokens > 0 else None
|
|
318
203
|
)
|
|
319
204
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
205
|
+
exit_current_context(uuid_str=uuid_str)
|
|
206
|
+
|
|
207
|
+
def on_llm_error(
|
|
208
|
+
self,
|
|
209
|
+
error: BaseException,
|
|
210
|
+
*,
|
|
211
|
+
run_id: UUID,
|
|
212
|
+
parent_run_id: Optional[UUID] = None,
|
|
213
|
+
**kwargs: Any,
|
|
214
|
+
) -> Any:
|
|
215
|
+
uuid_str = str(run_id)
|
|
216
|
+
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
217
|
+
llm_span.status = TraceSpanStatus.ERRORED
|
|
218
|
+
llm_span.error = str(error)
|
|
219
|
+
exit_current_context(uuid_str=uuid_str)
|
|
220
|
+
|
|
221
|
+
def on_llm_new_token(
|
|
222
|
+
self,
|
|
223
|
+
token: str,
|
|
224
|
+
*,
|
|
225
|
+
chunk,
|
|
226
|
+
run_id: UUID,
|
|
227
|
+
parent_run_id: Optional[UUID] = None,
|
|
228
|
+
tags: Optional[list[str]] = None,
|
|
229
|
+
**kwargs: Any,
|
|
230
|
+
):
|
|
231
|
+
uuid_str = str(run_id)
|
|
232
|
+
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
233
|
+
if llm_span.token_intervals is None:
|
|
234
|
+
llm_span.token_intervals = {perf_counter(): token}
|
|
235
|
+
else:
|
|
236
|
+
llm_span.token_intervals[perf_counter()] = token
|
|
323
237
|
|
|
324
238
|
def on_tool_start(
|
|
325
239
|
self,
|
|
@@ -333,27 +247,16 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
333
247
|
inputs: Optional[dict[str, Any]] = None,
|
|
334
248
|
**kwargs: Any,
|
|
335
249
|
) -> Any:
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
parent_uuid=str(parent_run_id) if parent_run_id else None,
|
|
345
|
-
start_time=perf_counter(),
|
|
346
|
-
name=extract_name(serialized, **kwargs),
|
|
347
|
-
input=input_str,
|
|
348
|
-
metadata=prepare_dict(
|
|
349
|
-
serialized=serialized, tags=tags, metadata=metadata, **kwargs
|
|
350
|
-
),
|
|
351
|
-
# fallback for on_end callback
|
|
352
|
-
end_time=perf_counter(),
|
|
353
|
-
metric_collection=metadata.get("metric_collection", None),
|
|
354
|
-
metrics=metadata.get("metrics", None),
|
|
250
|
+
uuid_str = str(run_id)
|
|
251
|
+
|
|
252
|
+
tool_span = enter_current_context(
|
|
253
|
+
uuid_str=uuid_str,
|
|
254
|
+
span_type="tool",
|
|
255
|
+
func_name=extract_name(
|
|
256
|
+
serialized, **kwargs
|
|
257
|
+
), # ignored when setting the input
|
|
355
258
|
)
|
|
356
|
-
|
|
259
|
+
tool_span.input = inputs
|
|
357
260
|
|
|
358
261
|
def on_tool_end(
|
|
359
262
|
self,
|
|
@@ -364,16 +267,24 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
364
267
|
**kwargs: Any, # un-logged kwargs
|
|
365
268
|
) -> Any:
|
|
366
269
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
return
|
|
370
|
-
|
|
270
|
+
uuid_str = str(run_id)
|
|
271
|
+
tool_span: ToolSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
371
272
|
tool_span.output = output
|
|
273
|
+
exit_current_context(uuid_str=uuid_str)
|
|
372
274
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
275
|
+
def on_tool_error(
|
|
276
|
+
self,
|
|
277
|
+
error: BaseException,
|
|
278
|
+
*,
|
|
279
|
+
run_id: UUID,
|
|
280
|
+
parent_run_id: Optional[UUID] = None,
|
|
281
|
+
**kwargs: Any, # un-logged kwargs
|
|
282
|
+
) -> Any:
|
|
283
|
+
uuid_str = str(run_id)
|
|
284
|
+
tool_span: ToolSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
285
|
+
tool_span.status = TraceSpanStatus.ERRORED
|
|
286
|
+
tool_span.error = str(error)
|
|
287
|
+
exit_current_context(uuid_str=uuid_str)
|
|
377
288
|
|
|
378
289
|
def on_retriever_start(
|
|
379
290
|
self,
|
|
@@ -386,28 +297,16 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
386
297
|
metadata: Optional[dict[str, Any]] = None,
|
|
387
298
|
**kwargs: Any, # un-logged kwargs
|
|
388
299
|
) -> Any:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
parent_uuid=str(parent_run_id) if parent_run_id else None,
|
|
398
|
-
start_time=perf_counter(),
|
|
399
|
-
name=extract_name(serialized, **kwargs),
|
|
400
|
-
embedder=metadata.get("ls_embedding_provider", "unknown"),
|
|
401
|
-
metadata=prepare_dict(
|
|
402
|
-
serialized=serialized, tags=tags, metadata=metadata, **kwargs
|
|
403
|
-
),
|
|
404
|
-
# fallback for on_end callback
|
|
405
|
-
end_time=perf_counter(),
|
|
300
|
+
uuid_str = str(run_id)
|
|
301
|
+
retriever_span = enter_current_context(
|
|
302
|
+
uuid_str=uuid_str,
|
|
303
|
+
span_type="retriever",
|
|
304
|
+
func_name=extract_name(serialized, **kwargs),
|
|
305
|
+
observe_kwargs={
|
|
306
|
+
"embedder": metadata.get("ls_embedding_provider", "unknown"),
|
|
307
|
+
},
|
|
406
308
|
)
|
|
407
309
|
retriever_span.input = query
|
|
408
|
-
retriever_span.retrieval_context = []
|
|
409
|
-
|
|
410
|
-
self.add_span_to_trace(retriever_span)
|
|
411
310
|
|
|
412
311
|
def on_retriever_end(
|
|
413
312
|
self,
|
|
@@ -417,11 +316,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
417
316
|
parent_run_id: Optional[UUID] = None,
|
|
418
317
|
**kwargs: Any, # un-logged kwargs
|
|
419
318
|
) -> Any:
|
|
420
|
-
|
|
421
|
-
retriever_span = trace_manager.get_span_by_uuid(
|
|
422
|
-
|
|
423
|
-
if retriever_span is None:
|
|
424
|
-
return
|
|
319
|
+
uuid_str = str(run_id)
|
|
320
|
+
retriever_span: RetrieverSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
425
321
|
|
|
426
322
|
# prepare output
|
|
427
323
|
output_list = []
|
|
@@ -431,58 +327,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
431
327
|
else:
|
|
432
328
|
output_list.append(str(output))
|
|
433
329
|
|
|
434
|
-
retriever_span.
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
self.end_span(retriever_span)
|
|
438
|
-
|
|
439
|
-
if parent_run_id is None:
|
|
440
|
-
self.end_trace(retriever_span)
|
|
441
|
-
|
|
442
|
-
################## on_error callbacks ###############
|
|
443
|
-
|
|
444
|
-
def on_chain_error(
|
|
445
|
-
self,
|
|
446
|
-
error: BaseException,
|
|
447
|
-
*,
|
|
448
|
-
run_id: UUID,
|
|
449
|
-
parent_run_id: Optional[UUID] = None,
|
|
450
|
-
**kwargs: Any,
|
|
451
|
-
) -> None:
|
|
452
|
-
base_span = trace_manager.get_span_by_uuid(str(run_id))
|
|
453
|
-
if base_span is None:
|
|
454
|
-
return
|
|
455
|
-
|
|
456
|
-
base_span.end_time = perf_counter()
|
|
457
|
-
|
|
458
|
-
def on_llm_error(
|
|
459
|
-
self,
|
|
460
|
-
error: BaseException,
|
|
461
|
-
*,
|
|
462
|
-
run_id: UUID,
|
|
463
|
-
parent_run_id: Optional[UUID] = None,
|
|
464
|
-
**kwargs: Any,
|
|
465
|
-
) -> Any:
|
|
466
|
-
|
|
467
|
-
llm_span = trace_manager.get_span_by_uuid(str(run_id))
|
|
468
|
-
if llm_span is None:
|
|
469
|
-
return
|
|
470
|
-
|
|
471
|
-
llm_span.end_time = perf_counter()
|
|
472
|
-
|
|
473
|
-
def on_tool_error(
|
|
474
|
-
self,
|
|
475
|
-
error: BaseException,
|
|
476
|
-
*,
|
|
477
|
-
run_id: UUID,
|
|
478
|
-
parent_run_id: Optional[UUID] = None,
|
|
479
|
-
**kwargs: Any,
|
|
480
|
-
) -> Any:
|
|
481
|
-
tool_span = trace_manager.get_span_by_uuid(str(run_id))
|
|
482
|
-
if tool_span is None:
|
|
483
|
-
return
|
|
484
|
-
|
|
485
|
-
tool_span.end_time = perf_counter()
|
|
330
|
+
retriever_span.output = output_list
|
|
331
|
+
exit_current_context(uuid_str=uuid_str)
|
|
486
332
|
|
|
487
333
|
def on_retriever_error(
|
|
488
334
|
self,
|
|
@@ -490,10 +336,10 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
490
336
|
*,
|
|
491
337
|
run_id: UUID,
|
|
492
338
|
parent_run_id: Optional[UUID] = None,
|
|
493
|
-
**kwargs: Any,
|
|
339
|
+
**kwargs: Any, # un-logged kwargs
|
|
494
340
|
) -> Any:
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
341
|
+
uuid_str = str(run_id)
|
|
342
|
+
retriever_span: RetrieverSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
343
|
+
retriever_span.status = TraceSpanStatus.ERRORED
|
|
344
|
+
retriever_span.error = str(error)
|
|
345
|
+
exit_current_context(uuid_str=uuid_str)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
import functools
|
|
2
2
|
from deepeval.metrics import BaseMetric
|
|
3
|
-
from
|
|
4
|
-
from
|
|
3
|
+
from deepeval.tracing.context import current_span_context
|
|
4
|
+
from typing import List, Optional, Callable
|
|
5
|
+
from langchain_core.tools import tool as original_tool, BaseTool
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def tool(
|
|
@@ -16,17 +17,27 @@ def tool(
|
|
|
16
17
|
|
|
17
18
|
# original_tool returns a decorator function, so we need to return a decorator
|
|
18
19
|
def decorator(func: Callable) -> BaseTool:
|
|
19
|
-
|
|
20
|
-
# Apply the original tool decorator to get the BaseTool
|
|
20
|
+
func = _patch_tool_decorator(func, metrics, metric_collection)
|
|
21
21
|
tool_instance = original_tool(*args, **kwargs)(func)
|
|
22
|
-
|
|
23
|
-
if isinstance(tool_instance, BaseTool):
|
|
24
|
-
if tool_instance.metadata is None:
|
|
25
|
-
tool_instance.metadata = {}
|
|
26
|
-
|
|
27
|
-
tool_instance.metadata["metric_collection"] = metric_collection
|
|
28
|
-
tool_instance.metadata["metrics"] = metrics
|
|
29
|
-
|
|
30
22
|
return tool_instance
|
|
31
23
|
|
|
32
24
|
return decorator
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _patch_tool_decorator(
|
|
28
|
+
func: Callable,
|
|
29
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
30
|
+
metric_collection: Optional[str] = None,
|
|
31
|
+
):
|
|
32
|
+
original_func = func
|
|
33
|
+
|
|
34
|
+
@functools.wraps(original_func)
|
|
35
|
+
def wrapper(*args, **kwargs):
|
|
36
|
+
current_span = current_span_context.get()
|
|
37
|
+
current_span.metrics = metrics
|
|
38
|
+
current_span.metric_collection = metric_collection
|
|
39
|
+
res = original_func(*args, **kwargs)
|
|
40
|
+
return res
|
|
41
|
+
|
|
42
|
+
tool = wrapper
|
|
43
|
+
return tool
|