deepeval 3.7.9__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +168 -0
- deepeval/confident/api.py +2 -0
- deepeval/config/settings.py +10 -0
- deepeval/constants.py +1 -0
- deepeval/integrations/langchain/callback.py +330 -158
- deepeval/integrations/langchain/utils.py +31 -8
- deepeval/key_handler.py +8 -1
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
- deepeval/metrics/g_eval/g_eval.py +35 -1
- deepeval/metrics/g_eval/utils.py +65 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/constants.py +23 -0
- deepeval/models/llms/openai_model.py +5 -4
- deepeval/models/llms/openrouter_model.py +398 -0
- deepeval/models/retry_policy.py +3 -0
- deepeval/prompt/api.py +1 -0
- deepeval/tracing/tracing.py +6 -1
- deepeval/tracing/types.py +1 -1
- {deepeval-3.7.9.dist-info → deepeval-3.8.0.dist-info}/METADATA +3 -3
- {deepeval-3.7.9.dist-info → deepeval-3.8.0.dist-info}/RECORD +25 -24
- {deepeval-3.7.9.dist-info → deepeval-3.8.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.9.dist-info → deepeval-3.8.0.dist-info}/WHEEL +0 -0
- {deepeval-3.7.9.dist-info → deepeval-3.8.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
from typing import Any, Optional, List, Dict
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
from time import perf_counter
|
|
4
|
+
from contextlib import contextmanager
|
|
4
5
|
|
|
5
|
-
from deepeval.tracing.context import
|
|
6
|
+
from deepeval.tracing.context import current_trace_context, current_span_context
|
|
6
7
|
from deepeval.test_case.llm_test_case import ToolCall
|
|
7
8
|
from deepeval.tracing.types import (
|
|
8
9
|
LlmOutput,
|
|
9
10
|
LlmToolCall,
|
|
10
11
|
)
|
|
11
12
|
from deepeval.metrics import BaseMetric
|
|
13
|
+
from deepeval.tracing import trace_manager
|
|
12
14
|
from deepeval.tracing.utils import prepare_tool_call_input_parameters
|
|
15
|
+
from deepeval.tracing.types import (
|
|
16
|
+
LlmSpan,
|
|
17
|
+
RetrieverSpan,
|
|
18
|
+
TraceSpanStatus,
|
|
19
|
+
ToolSpan,
|
|
20
|
+
)
|
|
21
|
+
from deepeval.telemetry import capture_tracing_integration
|
|
13
22
|
|
|
14
23
|
try:
|
|
15
24
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
@@ -26,10 +35,10 @@ try:
|
|
|
26
35
|
enter_current_context,
|
|
27
36
|
exit_current_context,
|
|
28
37
|
)
|
|
29
|
-
from deepeval.integrations.langchain.patch import tool
|
|
38
|
+
from deepeval.integrations.langchain.patch import tool # noqa: F401
|
|
30
39
|
|
|
31
40
|
langchain_installed = True
|
|
32
|
-
except:
|
|
41
|
+
except ImportError:
|
|
33
42
|
langchain_installed = False
|
|
34
43
|
|
|
35
44
|
|
|
@@ -40,16 +49,6 @@ def is_langchain_installed():
|
|
|
40
49
|
)
|
|
41
50
|
|
|
42
51
|
|
|
43
|
-
from deepeval.tracing import trace_manager
|
|
44
|
-
from deepeval.tracing.types import (
|
|
45
|
-
LlmSpan,
|
|
46
|
-
RetrieverSpan,
|
|
47
|
-
TraceSpanStatus,
|
|
48
|
-
ToolSpan,
|
|
49
|
-
)
|
|
50
|
-
from deepeval.telemetry import capture_tracing_integration
|
|
51
|
-
|
|
52
|
-
|
|
53
52
|
class CallbackHandler(BaseCallbackHandler):
|
|
54
53
|
|
|
55
54
|
def __init__(
|
|
@@ -64,20 +63,134 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
64
63
|
):
|
|
65
64
|
is_langchain_installed()
|
|
66
65
|
with capture_tracing_integration("langchain.callback.CallbackHandler"):
|
|
67
|
-
trace
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
66
|
+
# Do not create or set a trace in __init__.
|
|
67
|
+
# CallbackHandler instances are often constructed outside the async Task
|
|
68
|
+
# that actually runs LangGraph/LangChain. Creating a trace here can
|
|
69
|
+
# corrupt ContextVars and break observe wrapped async execution
|
|
70
|
+
self._trace = None
|
|
71
|
+
self.trace_uuid = None
|
|
72
|
+
|
|
73
|
+
# Lazily captured fallback parent when callbacks execute.
|
|
74
|
+
self._parent_span = None
|
|
75
|
+
|
|
76
|
+
# Stash trace metadata to apply once we know which trace we are using.
|
|
77
|
+
self._trace_init_fields: Dict[str, Any] = {
|
|
78
|
+
"name": name,
|
|
79
|
+
"tags": tags,
|
|
80
|
+
"metadata": metadata,
|
|
81
|
+
"thread_id": thread_id,
|
|
82
|
+
"user_id": user_id,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Map LangChain run_id -> our span uuid for parent span restoration
|
|
86
|
+
self._run_id_to_span_uuid: Dict[str, str] = {}
|
|
87
|
+
|
|
88
|
+
# Only set trace metadata if values are provided
|
|
76
89
|
self.metrics = metrics
|
|
77
90
|
self.metric_collection = metric_collection
|
|
78
|
-
current_trace_context.set(trace)
|
|
79
91
|
super().__init__()
|
|
80
92
|
|
|
93
|
+
def _ensure_trace(self):
|
|
94
|
+
"""
|
|
95
|
+
Ensure there's an active trace in ContextVars for this callback invocation.
|
|
96
|
+
This is done lazily during actual callback execution to avoid context
|
|
97
|
+
corruption when the handler is constructed outside the async task/context.
|
|
98
|
+
"""
|
|
99
|
+
# Prefer current context trace if it is active.
|
|
100
|
+
ctx_trace = current_trace_context.get()
|
|
101
|
+
if ctx_trace and ctx_trace.uuid in trace_manager.active_traces:
|
|
102
|
+
trace = ctx_trace
|
|
103
|
+
else:
|
|
104
|
+
# Otherwise, restore our stored trace if still active.
|
|
105
|
+
if self._trace and self._trace.uuid in trace_manager.active_traces:
|
|
106
|
+
trace = self._trace
|
|
107
|
+
current_trace_context.set(trace)
|
|
108
|
+
else:
|
|
109
|
+
# Otherwise, create a fresh trace now (in the right context).
|
|
110
|
+
trace = trace_manager.start_new_trace()
|
|
111
|
+
current_trace_context.set(trace)
|
|
112
|
+
self._trace = trace
|
|
113
|
+
|
|
114
|
+
# Keep a copy for quick access.
|
|
115
|
+
self.trace_uuid = trace.uuid
|
|
116
|
+
|
|
117
|
+
# Apply stashed metadata once.
|
|
118
|
+
fields = getattr(self, "_trace_init_fields", None) or {}
|
|
119
|
+
if fields:
|
|
120
|
+
if fields.get("name") is not None:
|
|
121
|
+
trace.name = fields["name"]
|
|
122
|
+
if fields.get("tags") is not None:
|
|
123
|
+
trace.tags = fields["tags"]
|
|
124
|
+
if fields.get("metadata") is not None:
|
|
125
|
+
trace.metadata = fields["metadata"]
|
|
126
|
+
if fields.get("thread_id") is not None:
|
|
127
|
+
trace.thread_id = fields["thread_id"]
|
|
128
|
+
if fields.get("user_id") is not None:
|
|
129
|
+
trace.user_id = fields["user_id"]
|
|
130
|
+
# prevent re-applying on every callback
|
|
131
|
+
self._trace_init_fields = {}
|
|
132
|
+
|
|
133
|
+
# Lazily capture the observe parent span if present.
|
|
134
|
+
if self._parent_span is None:
|
|
135
|
+
self._parent_span = current_span_context.get()
|
|
136
|
+
|
|
137
|
+
return trace
|
|
138
|
+
|
|
139
|
+
@contextmanager
|
|
140
|
+
def _ctx(self, run_id: UUID, parent_run_id: Optional[UUID] = None):
|
|
141
|
+
"""
|
|
142
|
+
Context manager to restore trace and span context for callbacks running
|
|
143
|
+
in different async tasks. In async LangChain/LangGraph execution, ContextVar
|
|
144
|
+
values don't propagate across task boundaries, so we explicitly restore them.
|
|
145
|
+
|
|
146
|
+
IMPORTANT: parent_run_id from LangChain is the source of truth for hierarchy.
|
|
147
|
+
We ALWAYS use it to set the correct parent span, not just when context is lost.
|
|
148
|
+
"""
|
|
149
|
+
span_token = None
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
# Ensure we have a valid trace in this execution context.
|
|
153
|
+
# May start a trace here, or restore a stored one, or reuse an @observe trace.
|
|
154
|
+
self._ensure_trace()
|
|
155
|
+
|
|
156
|
+
# Set parent span based on LangChain's parent_run_id (source of truth for hierarchy)
|
|
157
|
+
# Priority order:
|
|
158
|
+
# 1. Parent span from run_id mapping (LangChain's parent_run_id)
|
|
159
|
+
# 2. Parent span captured at init (from @observe wrapper)
|
|
160
|
+
# 3. Keep existing context
|
|
161
|
+
|
|
162
|
+
target_parent_span = None
|
|
163
|
+
|
|
164
|
+
# First, try to find parent from LangChain's parent_run_id
|
|
165
|
+
if parent_run_id is not None:
|
|
166
|
+
parent_run_id_str = str(parent_run_id)
|
|
167
|
+
if parent_run_id_str in self._run_id_to_span_uuid:
|
|
168
|
+
parent_span_uuid = self._run_id_to_span_uuid[
|
|
169
|
+
parent_run_id_str
|
|
170
|
+
]
|
|
171
|
+
target_parent_span = trace_manager.get_span_by_uuid(
|
|
172
|
+
parent_span_uuid
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Fall back to the span captured at init (from @observe wrapper)
|
|
176
|
+
if target_parent_span is None and self._parent_span:
|
|
177
|
+
if trace_manager.get_span_by_uuid(self._parent_span.uuid):
|
|
178
|
+
target_parent_span = self._parent_span
|
|
179
|
+
|
|
180
|
+
# Set the parent span context if we found one and it's different from current
|
|
181
|
+
current_span = current_span_context.get()
|
|
182
|
+
if target_parent_span and (
|
|
183
|
+
current_span is None
|
|
184
|
+
or current_span.uuid != target_parent_span.uuid
|
|
185
|
+
):
|
|
186
|
+
span_token = current_span_context.set(target_parent_span)
|
|
187
|
+
|
|
188
|
+
yield
|
|
189
|
+
|
|
190
|
+
finally:
|
|
191
|
+
if span_token is not None:
|
|
192
|
+
current_span_context.reset(span_token)
|
|
193
|
+
|
|
81
194
|
def on_chain_start(
|
|
82
195
|
self,
|
|
83
196
|
serialized: dict[str, Any],
|
|
@@ -89,17 +202,27 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
89
202
|
metadata: Optional[dict[str, Any]] = None,
|
|
90
203
|
**kwargs: Any,
|
|
91
204
|
) -> Any:
|
|
92
|
-
|
|
205
|
+
# Create spans for all chains to establish proper parent-child hierarchy
|
|
206
|
+
# This is important for LangGraph where there are nested chains
|
|
207
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
93
208
|
uuid_str = str(run_id)
|
|
94
209
|
base_span = enter_current_context(
|
|
95
210
|
uuid_str=uuid_str,
|
|
96
211
|
span_type="custom",
|
|
97
212
|
func_name=extract_name(serialized, **kwargs),
|
|
98
213
|
)
|
|
214
|
+
# Register this run_id -> span mapping for child callbacks
|
|
215
|
+
self._run_id_to_span_uuid[str(run_id)] = uuid_str
|
|
216
|
+
|
|
99
217
|
base_span.input = inputs
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
218
|
+
|
|
219
|
+
# Only set trace-level input/metrics for root chain
|
|
220
|
+
if parent_run_id is None:
|
|
221
|
+
trace = trace_manager.get_trace_by_uuid(base_span.trace_uuid)
|
|
222
|
+
if trace:
|
|
223
|
+
trace.input = inputs
|
|
224
|
+
base_span.metrics = self.metrics
|
|
225
|
+
base_span.metric_collection = self.metric_collection
|
|
103
226
|
|
|
104
227
|
def on_chain_end(
|
|
105
228
|
self,
|
|
@@ -112,9 +235,16 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
112
235
|
uuid_str = str(run_id)
|
|
113
236
|
base_span = trace_manager.get_span_by_uuid(uuid_str)
|
|
114
237
|
if base_span:
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
238
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
239
|
+
base_span.output = output
|
|
240
|
+
# Only set trace-level output for root chain
|
|
241
|
+
if parent_run_id is None:
|
|
242
|
+
trace = trace_manager.get_trace_by_uuid(
|
|
243
|
+
base_span.trace_uuid
|
|
244
|
+
)
|
|
245
|
+
if trace:
|
|
246
|
+
trace.output = output
|
|
247
|
+
exit_current_context(uuid_str=uuid_str)
|
|
118
248
|
|
|
119
249
|
def on_llm_start(
|
|
120
250
|
self,
|
|
@@ -127,24 +257,27 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
127
257
|
metadata: Optional[dict[str, Any]] = None,
|
|
128
258
|
**kwargs: Any,
|
|
129
259
|
) -> Any:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
llm_span: LlmSpan = enter_current_context(
|
|
135
|
-
uuid_str=uuid_str,
|
|
136
|
-
span_type="llm",
|
|
137
|
-
func_name=extract_name(serialized, **kwargs),
|
|
138
|
-
)
|
|
260
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
261
|
+
uuid_str = str(run_id)
|
|
262
|
+
input_messages = parse_prompts_to_messages(prompts, **kwargs)
|
|
263
|
+
model = safe_extract_model_name(metadata, **kwargs)
|
|
139
264
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
265
|
+
llm_span: LlmSpan = enter_current_context(
|
|
266
|
+
uuid_str=uuid_str,
|
|
267
|
+
span_type="llm",
|
|
268
|
+
func_name=extract_name(serialized, **kwargs),
|
|
269
|
+
)
|
|
270
|
+
# Register this run_id -> span mapping for child callbacks
|
|
271
|
+
self._run_id_to_span_uuid[str(run_id)] = uuid_str
|
|
272
|
+
|
|
273
|
+
llm_span.input = input_messages
|
|
274
|
+
llm_span.model = model
|
|
275
|
+
metrics = metadata.pop("metrics", None)
|
|
276
|
+
metric_collection = metadata.pop("metric_collection", None)
|
|
277
|
+
prompt = metadata.pop("prompt", None)
|
|
278
|
+
llm_span.metrics = metrics
|
|
279
|
+
llm_span.metric_collection = metric_collection
|
|
280
|
+
llm_span.prompt = prompt
|
|
148
281
|
|
|
149
282
|
def on_llm_end(
|
|
150
283
|
self,
|
|
@@ -156,56 +289,63 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
156
289
|
) -> Any:
|
|
157
290
|
uuid_str = str(run_id)
|
|
158
291
|
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
292
|
+
if llm_span is None:
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
296
|
+
output = ""
|
|
297
|
+
total_input_tokens = 0
|
|
298
|
+
total_output_tokens = 0
|
|
299
|
+
model = None
|
|
300
|
+
|
|
301
|
+
for generation in response.generations:
|
|
302
|
+
for gen in generation:
|
|
303
|
+
if isinstance(gen, ChatGeneration):
|
|
304
|
+
if gen.message.response_metadata and isinstance(
|
|
305
|
+
gen.message.response_metadata, dict
|
|
306
|
+
):
|
|
307
|
+
# extract model name from response_metadata
|
|
308
|
+
model = gen.message.response_metadata.get(
|
|
309
|
+
"model_name"
|
|
310
|
+
)
|
|
159
311
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
for generation in response.generations:
|
|
166
|
-
for gen in generation:
|
|
167
|
-
if isinstance(gen, ChatGeneration):
|
|
168
|
-
if gen.message.response_metadata and isinstance(
|
|
169
|
-
gen.message.response_metadata, dict
|
|
170
|
-
):
|
|
171
|
-
# extract model name from response_metadata
|
|
172
|
-
model = gen.message.response_metadata.get("model_name")
|
|
173
|
-
|
|
174
|
-
# extract input and output token
|
|
175
|
-
input_tokens, output_tokens = safe_extract_token_usage(
|
|
176
|
-
gen.message.response_metadata
|
|
177
|
-
)
|
|
178
|
-
total_input_tokens += input_tokens
|
|
179
|
-
total_output_tokens += output_tokens
|
|
180
|
-
|
|
181
|
-
if isinstance(gen.message, AIMessage):
|
|
182
|
-
ai_message = gen.message
|
|
183
|
-
tool_calls = []
|
|
184
|
-
for tool_call in ai_message.tool_calls:
|
|
185
|
-
tool_calls.append(
|
|
186
|
-
LlmToolCall(
|
|
187
|
-
name=tool_call["name"],
|
|
188
|
-
args=tool_call["args"],
|
|
189
|
-
id=tool_call["id"],
|
|
312
|
+
# extract input and output token
|
|
313
|
+
input_tokens, output_tokens = (
|
|
314
|
+
safe_extract_token_usage(
|
|
315
|
+
gen.message.response_metadata
|
|
190
316
|
)
|
|
191
317
|
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
318
|
+
total_input_tokens += input_tokens
|
|
319
|
+
total_output_tokens += output_tokens
|
|
320
|
+
|
|
321
|
+
if isinstance(gen.message, AIMessage):
|
|
322
|
+
ai_message = gen.message
|
|
323
|
+
tool_calls = []
|
|
324
|
+
for tool_call in ai_message.tool_calls:
|
|
325
|
+
tool_calls.append(
|
|
326
|
+
LlmToolCall(
|
|
327
|
+
name=tool_call["name"],
|
|
328
|
+
args=tool_call["args"],
|
|
329
|
+
id=tool_call["id"],
|
|
330
|
+
)
|
|
331
|
+
)
|
|
332
|
+
output = LlmOutput(
|
|
333
|
+
role="AI",
|
|
334
|
+
content=ai_message.content,
|
|
335
|
+
tool_calls=tool_calls,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
llm_span.model = model if model else llm_span.model
|
|
339
|
+
llm_span.input = llm_span.input
|
|
340
|
+
llm_span.output = output
|
|
341
|
+
llm_span.input_token_count = (
|
|
342
|
+
total_input_tokens if total_input_tokens > 0 else None
|
|
343
|
+
)
|
|
344
|
+
llm_span.output_token_count = (
|
|
345
|
+
total_output_tokens if total_output_tokens > 0 else None
|
|
346
|
+
)
|
|
207
347
|
|
|
208
|
-
|
|
348
|
+
exit_current_context(uuid_str=uuid_str)
|
|
209
349
|
|
|
210
350
|
def on_llm_error(
|
|
211
351
|
self,
|
|
@@ -217,9 +357,12 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
217
357
|
) -> Any:
|
|
218
358
|
uuid_str = str(run_id)
|
|
219
359
|
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
220
|
-
llm_span
|
|
221
|
-
|
|
222
|
-
|
|
360
|
+
if llm_span is None:
|
|
361
|
+
return
|
|
362
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
363
|
+
llm_span.status = TraceSpanStatus.ERRORED
|
|
364
|
+
llm_span.error = str(error)
|
|
365
|
+
exit_current_context(uuid_str=uuid_str)
|
|
223
366
|
|
|
224
367
|
def on_llm_new_token(
|
|
225
368
|
self,
|
|
@@ -233,10 +376,13 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
233
376
|
):
|
|
234
377
|
uuid_str = str(run_id)
|
|
235
378
|
llm_span: LlmSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
236
|
-
if llm_span
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
llm_span.token_intervals
|
|
379
|
+
if llm_span is None:
|
|
380
|
+
return
|
|
381
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
382
|
+
if llm_span.token_intervals is None:
|
|
383
|
+
llm_span.token_intervals = {perf_counter(): token}
|
|
384
|
+
else:
|
|
385
|
+
llm_span.token_intervals[perf_counter()] = token
|
|
240
386
|
|
|
241
387
|
def on_tool_start(
|
|
242
388
|
self,
|
|
@@ -250,16 +396,19 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
250
396
|
inputs: Optional[dict[str, Any]] = None,
|
|
251
397
|
**kwargs: Any,
|
|
252
398
|
) -> Any:
|
|
253
|
-
|
|
399
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
400
|
+
uuid_str = str(run_id)
|
|
254
401
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
402
|
+
tool_span = enter_current_context(
|
|
403
|
+
uuid_str=uuid_str,
|
|
404
|
+
span_type="tool",
|
|
405
|
+
func_name=extract_name(
|
|
406
|
+
serialized, **kwargs
|
|
407
|
+
), # ignored when setting the input
|
|
408
|
+
)
|
|
409
|
+
# Register this run_id -> span mapping for child callbacks
|
|
410
|
+
self._run_id_to_span_uuid[str(run_id)] = uuid_str
|
|
411
|
+
tool_span.input = inputs
|
|
263
412
|
|
|
264
413
|
def on_tool_end(
|
|
265
414
|
self,
|
|
@@ -271,31 +420,40 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
271
420
|
) -> Any:
|
|
272
421
|
uuid_str = str(run_id)
|
|
273
422
|
tool_span: ToolSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
274
|
-
tool_span
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
# set the tools called in the parent span as well as on the trace level
|
|
278
|
-
tool_call = ToolCall(
|
|
279
|
-
name=tool_span.name,
|
|
280
|
-
description=tool_span.description,
|
|
281
|
-
output=output,
|
|
282
|
-
input_parameters=prepare_tool_call_input_parameters(
|
|
283
|
-
tool_span.input
|
|
284
|
-
),
|
|
285
|
-
)
|
|
286
|
-
parent_span = current_span_context.get()
|
|
287
|
-
if parent_span:
|
|
288
|
-
if parent_span.tools_called is None:
|
|
289
|
-
parent_span.tools_called = []
|
|
423
|
+
if tool_span is None:
|
|
424
|
+
return
|
|
290
425
|
|
|
291
|
-
|
|
426
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
427
|
+
tool_span.output = output
|
|
428
|
+
exit_current_context(uuid_str=uuid_str)
|
|
292
429
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
430
|
+
# set the tools called in the parent span as well as on the trace level
|
|
431
|
+
tool_call = ToolCall(
|
|
432
|
+
name=tool_span.name,
|
|
433
|
+
description=tool_span.description,
|
|
434
|
+
output=output,
|
|
435
|
+
input_parameters=prepare_tool_call_input_parameters(
|
|
436
|
+
tool_span.input
|
|
437
|
+
),
|
|
438
|
+
)
|
|
297
439
|
|
|
298
|
-
|
|
440
|
+
# Use span's stored trace_uuid and parent_uuid for reliable lookup
|
|
441
|
+
# These are always available regardless of context state
|
|
442
|
+
if tool_span.parent_uuid:
|
|
443
|
+
parent_span = trace_manager.get_span_by_uuid(
|
|
444
|
+
tool_span.parent_uuid
|
|
445
|
+
)
|
|
446
|
+
if parent_span:
|
|
447
|
+
if parent_span.tools_called is None:
|
|
448
|
+
parent_span.tools_called = []
|
|
449
|
+
parent_span.tools_called.append(tool_call)
|
|
450
|
+
|
|
451
|
+
if tool_span.trace_uuid:
|
|
452
|
+
trace = trace_manager.get_trace_by_uuid(tool_span.trace_uuid)
|
|
453
|
+
if trace:
|
|
454
|
+
if trace.tools_called is None:
|
|
455
|
+
trace.tools_called = []
|
|
456
|
+
trace.tools_called.append(tool_call)
|
|
299
457
|
|
|
300
458
|
def on_tool_error(
|
|
301
459
|
self,
|
|
@@ -307,9 +465,12 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
307
465
|
) -> Any:
|
|
308
466
|
uuid_str = str(run_id)
|
|
309
467
|
tool_span: ToolSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
310
|
-
tool_span
|
|
311
|
-
|
|
312
|
-
|
|
468
|
+
if tool_span is None:
|
|
469
|
+
return
|
|
470
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
471
|
+
tool_span.status = TraceSpanStatus.ERRORED
|
|
472
|
+
tool_span.error = str(error)
|
|
473
|
+
exit_current_context(uuid_str=uuid_str)
|
|
313
474
|
|
|
314
475
|
def on_retriever_start(
|
|
315
476
|
self,
|
|
@@ -322,16 +483,21 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
322
483
|
metadata: Optional[dict[str, Any]] = None,
|
|
323
484
|
**kwargs: Any, # un-logged kwargs
|
|
324
485
|
) -> Any:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
486
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
487
|
+
uuid_str = str(run_id)
|
|
488
|
+
retriever_span = enter_current_context(
|
|
489
|
+
uuid_str=uuid_str,
|
|
490
|
+
span_type="retriever",
|
|
491
|
+
func_name=extract_name(serialized, **kwargs),
|
|
492
|
+
observe_kwargs={
|
|
493
|
+
"embedder": metadata.get(
|
|
494
|
+
"ls_embedding_provider", "unknown"
|
|
495
|
+
),
|
|
496
|
+
},
|
|
497
|
+
)
|
|
498
|
+
# Register this run_id -> span mapping for child callbacks
|
|
499
|
+
self._run_id_to_span_uuid[str(run_id)] = uuid_str
|
|
500
|
+
retriever_span.input = query
|
|
335
501
|
|
|
336
502
|
def on_retriever_end(
|
|
337
503
|
self,
|
|
@@ -343,17 +509,20 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
343
509
|
) -> Any:
|
|
344
510
|
uuid_str = str(run_id)
|
|
345
511
|
retriever_span: RetrieverSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
512
|
+
if retriever_span is None:
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
516
|
+
# prepare output
|
|
517
|
+
output_list = []
|
|
518
|
+
if isinstance(output, list):
|
|
519
|
+
for item in output:
|
|
520
|
+
output_list.append(str(item))
|
|
521
|
+
else:
|
|
522
|
+
output_list.append(str(output))
|
|
523
|
+
|
|
524
|
+
retriever_span.output = output_list
|
|
525
|
+
exit_current_context(uuid_str=uuid_str)
|
|
357
526
|
|
|
358
527
|
def on_retriever_error(
|
|
359
528
|
self,
|
|
@@ -365,6 +534,9 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
365
534
|
) -> Any:
|
|
366
535
|
uuid_str = str(run_id)
|
|
367
536
|
retriever_span: RetrieverSpan = trace_manager.get_span_by_uuid(uuid_str)
|
|
368
|
-
retriever_span
|
|
369
|
-
|
|
370
|
-
|
|
537
|
+
if retriever_span is None:
|
|
538
|
+
return
|
|
539
|
+
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
540
|
+
retriever_span.status = TraceSpanStatus.ERRORED
|
|
541
|
+
retriever_span.error = str(error)
|
|
542
|
+
exit_current_context(uuid_str=uuid_str)
|