deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -301
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +54 -11
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/prompt/api.py +13 -9
- deepeval/prompt/prompt.py +19 -9
- deepeval/tracing/tracing.py +87 -0
- deepeval/utils.py +12 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
|
@@ -91,7 +91,7 @@ def extract_name(serialized: dict[str, Any], **kwargs: Any) -> str:
|
|
|
91
91
|
if "name" in serialized:
|
|
92
92
|
return serialized["name"]
|
|
93
93
|
|
|
94
|
-
return "
|
|
94
|
+
return "Agent"
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
def safe_extract_model_name(
|
|
@@ -110,3 +110,205 @@ def safe_extract_model_name(
|
|
|
110
110
|
return ls_model_name
|
|
111
111
|
|
|
112
112
|
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
from typing import Any, List, Dict, Optional, Union, Literal, Callable
|
|
116
|
+
from langchain_core.outputs import ChatGeneration
|
|
117
|
+
from time import perf_counter
|
|
118
|
+
import uuid
|
|
119
|
+
from rich.progress import Progress
|
|
120
|
+
from deepeval.tracing.tracing import Observer
|
|
121
|
+
|
|
122
|
+
from deepeval.metrics import BaseMetric
|
|
123
|
+
from deepeval.tracing.context import current_span_context, current_trace_context
|
|
124
|
+
from deepeval.tracing.tracing import trace_manager
|
|
125
|
+
from deepeval.tracing.types import (
|
|
126
|
+
AgentSpan,
|
|
127
|
+
BaseSpan,
|
|
128
|
+
LlmSpan,
|
|
129
|
+
RetrieverSpan,
|
|
130
|
+
SpanType,
|
|
131
|
+
ToolSpan,
|
|
132
|
+
TraceSpanStatus,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def enter_current_context(
|
|
137
|
+
span_type: Optional[
|
|
138
|
+
Union[Literal["agent", "llm", "retriever", "tool"], str]
|
|
139
|
+
],
|
|
140
|
+
func_name: str,
|
|
141
|
+
metrics: Optional[Union[List[str], List[BaseMetric]]] = None,
|
|
142
|
+
metric_collection: Optional[str] = None,
|
|
143
|
+
observe_kwargs: Optional[Dict[str, Any]] = None,
|
|
144
|
+
function_kwargs: Optional[Dict[str, Any]] = None,
|
|
145
|
+
progress: Optional[Progress] = None,
|
|
146
|
+
pbar_callback_id: Optional[int] = None,
|
|
147
|
+
uuid_str: Optional[str] = None,
|
|
148
|
+
) -> BaseSpan:
|
|
149
|
+
start_time = perf_counter()
|
|
150
|
+
observe_kwargs = observe_kwargs or {}
|
|
151
|
+
function_kwargs = function_kwargs or {}
|
|
152
|
+
|
|
153
|
+
name = observe_kwargs.get("name", func_name)
|
|
154
|
+
prompt = observe_kwargs.get("prompt", None)
|
|
155
|
+
uuid_str = uuid_str or str(uuid.uuid4())
|
|
156
|
+
|
|
157
|
+
parent_span = current_span_context.get()
|
|
158
|
+
trace_uuid: Optional[str] = None
|
|
159
|
+
parent_uuid: Optional[str] = None
|
|
160
|
+
|
|
161
|
+
if parent_span:
|
|
162
|
+
parent_uuid = parent_span.uuid
|
|
163
|
+
trace_uuid = parent_span.trace_uuid
|
|
164
|
+
else:
|
|
165
|
+
current_trace = current_trace_context.get()
|
|
166
|
+
if current_trace:
|
|
167
|
+
trace_uuid = current_trace.uuid
|
|
168
|
+
else:
|
|
169
|
+
trace = trace_manager.start_new_trace(
|
|
170
|
+
metric_collection=metric_collection
|
|
171
|
+
)
|
|
172
|
+
trace_uuid = trace.uuid
|
|
173
|
+
current_trace_context.set(trace)
|
|
174
|
+
|
|
175
|
+
span_kwargs = {
|
|
176
|
+
"uuid": uuid_str,
|
|
177
|
+
"trace_uuid": trace_uuid,
|
|
178
|
+
"parent_uuid": parent_uuid,
|
|
179
|
+
"start_time": start_time,
|
|
180
|
+
"end_time": None,
|
|
181
|
+
"status": TraceSpanStatus.SUCCESS,
|
|
182
|
+
"children": [],
|
|
183
|
+
"name": name,
|
|
184
|
+
"input": None,
|
|
185
|
+
"output": None,
|
|
186
|
+
"metrics": metrics,
|
|
187
|
+
"metric_collection": metric_collection,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if span_type == SpanType.AGENT.value:
|
|
191
|
+
available_tools = observe_kwargs.get("available_tools", [])
|
|
192
|
+
agent_handoffs = observe_kwargs.get("agent_handoffs", [])
|
|
193
|
+
span_instance = AgentSpan(
|
|
194
|
+
**span_kwargs,
|
|
195
|
+
available_tools=available_tools,
|
|
196
|
+
agent_handoffs=agent_handoffs,
|
|
197
|
+
)
|
|
198
|
+
elif span_type == SpanType.LLM.value:
|
|
199
|
+
model = observe_kwargs.get("model", None)
|
|
200
|
+
c_in = observe_kwargs.get("cost_per_input_token", None)
|
|
201
|
+
c_out = observe_kwargs.get("cost_per_output_token", None)
|
|
202
|
+
span_instance = LlmSpan(
|
|
203
|
+
**span_kwargs,
|
|
204
|
+
model=model,
|
|
205
|
+
cost_per_input_token=c_in,
|
|
206
|
+
cost_per_output_token=c_out,
|
|
207
|
+
)
|
|
208
|
+
elif span_type == SpanType.RETRIEVER.value:
|
|
209
|
+
embedder = observe_kwargs.get("embedder", None)
|
|
210
|
+
span_instance = RetrieverSpan(**span_kwargs, embedder=embedder)
|
|
211
|
+
elif span_type == SpanType.TOOL.value:
|
|
212
|
+
span_instance = ToolSpan(**span_kwargs, **observe_kwargs)
|
|
213
|
+
else:
|
|
214
|
+
span_instance = BaseSpan(**span_kwargs)
|
|
215
|
+
|
|
216
|
+
# Set input and prompt at entry
|
|
217
|
+
span_instance.input = trace_manager.mask(function_kwargs)
|
|
218
|
+
if isinstance(span_instance, LlmSpan) and prompt:
|
|
219
|
+
span_instance.prompt = prompt
|
|
220
|
+
|
|
221
|
+
trace_manager.add_span(span_instance)
|
|
222
|
+
trace_manager.add_span_to_trace(span_instance)
|
|
223
|
+
|
|
224
|
+
if (
|
|
225
|
+
parent_span
|
|
226
|
+
and getattr(parent_span, "progress", None) is not None
|
|
227
|
+
and getattr(parent_span, "pbar_callback_id", None) is not None
|
|
228
|
+
):
|
|
229
|
+
progress = parent_span.progress
|
|
230
|
+
pbar_callback_id = parent_span.pbar_callback_id
|
|
231
|
+
|
|
232
|
+
if progress is not None and pbar_callback_id is not None:
|
|
233
|
+
span_instance.progress = progress
|
|
234
|
+
span_instance.pbar_callback_id = pbar_callback_id
|
|
235
|
+
|
|
236
|
+
current_span_context.set(span_instance)
|
|
237
|
+
|
|
238
|
+
# return {
|
|
239
|
+
# "uuid": uuid_str,
|
|
240
|
+
# "progress": progress,
|
|
241
|
+
# "pbar_callback_id": pbar_callback_id,
|
|
242
|
+
# }
|
|
243
|
+
|
|
244
|
+
return span_instance
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def exit_current_context(
|
|
248
|
+
uuid_str: str,
|
|
249
|
+
result: Any = None,
|
|
250
|
+
update_span_properties: Optional[Callable[[BaseSpan], None]] = None,
|
|
251
|
+
progress: Optional[Progress] = None,
|
|
252
|
+
pbar_callback_id: Optional[int] = None,
|
|
253
|
+
exc_type: Optional[type] = None,
|
|
254
|
+
exc_val: Optional[BaseException] = None,
|
|
255
|
+
exc_tb: Optional[Any] = None,
|
|
256
|
+
) -> None:
|
|
257
|
+
end_time = perf_counter()
|
|
258
|
+
|
|
259
|
+
current_span = current_span_context.get()
|
|
260
|
+
|
|
261
|
+
if not current_span or current_span.uuid != uuid_str:
|
|
262
|
+
print(
|
|
263
|
+
f"Error: Current span in context does not match the span being exited. Expected UUID: {uuid_str}, Got: {current_span.uuid if current_span else 'None'}"
|
|
264
|
+
)
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
current_span.end_time = end_time
|
|
268
|
+
if exc_type is not None:
|
|
269
|
+
current_span.status = TraceSpanStatus.ERRORED
|
|
270
|
+
current_span.error = str(exc_val)
|
|
271
|
+
else:
|
|
272
|
+
current_span.status = TraceSpanStatus.SUCCESS
|
|
273
|
+
|
|
274
|
+
if update_span_properties is not None:
|
|
275
|
+
update_span_properties(current_span)
|
|
276
|
+
|
|
277
|
+
# Only set output on exit
|
|
278
|
+
if current_span.output is None:
|
|
279
|
+
current_span.output = trace_manager.mask(result)
|
|
280
|
+
|
|
281
|
+
# Prefer provided progress info, but fallback to span fields if missing
|
|
282
|
+
if progress is None and getattr(current_span, "progress", None) is not None:
|
|
283
|
+
progress = current_span.progress
|
|
284
|
+
if (
|
|
285
|
+
pbar_callback_id is None
|
|
286
|
+
and getattr(current_span, "pbar_callback_id", None) is not None
|
|
287
|
+
):
|
|
288
|
+
pbar_callback_id = current_span.pbar_callback_id
|
|
289
|
+
|
|
290
|
+
trace_manager.remove_span(uuid_str)
|
|
291
|
+
if current_span.parent_uuid:
|
|
292
|
+
parent_span = trace_manager.get_span_by_uuid(current_span.parent_uuid)
|
|
293
|
+
if parent_span:
|
|
294
|
+
current_span_context.set(parent_span)
|
|
295
|
+
else:
|
|
296
|
+
current_span_context.set(None)
|
|
297
|
+
else:
|
|
298
|
+
current_trace = current_trace_context.get()
|
|
299
|
+
if current_span.status == TraceSpanStatus.ERRORED and current_trace:
|
|
300
|
+
current_trace.status = TraceSpanStatus.ERRORED
|
|
301
|
+
if current_trace and current_trace.uuid == current_span.trace_uuid:
|
|
302
|
+
other_active_spans = [
|
|
303
|
+
span
|
|
304
|
+
for span in trace_manager.active_spans.values()
|
|
305
|
+
if span.trace_uuid == current_span.trace_uuid
|
|
306
|
+
]
|
|
307
|
+
if not other_active_spans:
|
|
308
|
+
trace_manager.end_trace(current_span.trace_uuid)
|
|
309
|
+
current_trace_context.set(None)
|
|
310
|
+
|
|
311
|
+
current_span_context.set(None)
|
|
312
|
+
|
|
313
|
+
if progress is not None and pbar_callback_id is not None:
|
|
314
|
+
progress.update(pbar_callback_id, advance=1)
|