deepeval 3.4.9__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.4.9"
1
+ __version__: str = "3.5.0"
@@ -1,6 +1,5 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Union
2
2
  from tqdm import tqdm
3
- from typing import Union
4
3
 
5
4
  from deepeval.dataset import Golden
6
5
  from deepeval.benchmarks.base_benchmark import (
@@ -50,7 +49,7 @@ class DROP(DeepEvalBaseBenchmark):
50
49
  self,
51
50
  model: DeepEvalBaseLLM,
52
51
  *args,
53
- batch_size: int | None = None,
52
+ batch_size: Union[int, None] = None,
54
53
  **kwargs,
55
54
  ) -> DeepEvalBaseBenchmarkResult:
56
55
  import pandas as pd
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional
1
+ from typing import List, Dict, Optional, Union
2
2
  from tqdm import tqdm
3
3
 
4
4
  from deepeval.dataset import Golden
@@ -51,7 +51,7 @@ class HellaSwag(DeepEvalBaseBenchmark):
51
51
  self,
52
52
  model: DeepEvalBaseLLM,
53
53
  *args,
54
- batch_size: int | None = None,
54
+ batch_size: Union[int, None] = None,
55
55
  **kwargs,
56
56
  ) -> DeepEvalBaseBenchmarkResult:
57
57
  import pandas as pd
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Union
2
2
  from tqdm import tqdm
3
3
  import requests
4
4
  import json
@@ -52,7 +52,7 @@ class LogiQA(DeepEvalBaseBenchmark):
52
52
  self,
53
53
  model: DeepEvalBaseLLM,
54
54
  *args,
55
- batch_size: int | None = None,
55
+ batch_size: Union[int, None] = None,
56
56
  **kwargs,
57
57
  ) -> DeepEvalBaseBenchmarkResult:
58
58
  import pandas as pd
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Union
2
2
  from tqdm import tqdm
3
3
 
4
4
  from deepeval.dataset import Golden
@@ -50,7 +50,7 @@ class MathQA(DeepEvalBaseBenchmark):
50
50
  self,
51
51
  model: DeepEvalBaseLLM,
52
52
  *args,
53
- batch_size: int | None = None,
53
+ batch_size: Union[int, None] = None,
54
54
  **kwargs,
55
55
  ) -> DeepEvalBaseBenchmarkResult:
56
56
  import pandas as pd
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Union
2
2
  from tqdm import tqdm
3
3
 
4
4
  from deepeval.dataset import Golden
@@ -49,7 +49,7 @@ class MMLU(DeepEvalBaseBenchmark):
49
49
  self,
50
50
  model: DeepEvalBaseLLM,
51
51
  *args,
52
- batch_size: int | None = None,
52
+ batch_size: Union[int, None] = None,
53
53
  **kwargs,
54
54
  ) -> DeepEvalBaseBenchmarkResult:
55
55
  import pandas as pd
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional
1
+ from typing import List, Dict, Optional, Union
2
2
  from tqdm import tqdm
3
3
 
4
4
  from deepeval.dataset import Golden
@@ -59,7 +59,7 @@ class TruthfulQA(DeepEvalBaseBenchmark):
59
59
  self,
60
60
  model: DeepEvalBaseLLM,
61
61
  *args,
62
- batch_size: int | None = None,
62
+ batch_size: Union[int, None] = None,
63
63
  **kwargs,
64
64
  ) -> DeepEvalBaseBenchmarkResult:
65
65
  import pandas as pd
deepeval/confident/api.py CHANGED
@@ -10,6 +10,7 @@ from tenacity import (
10
10
  retry_if_exception_type,
11
11
  RetryCallState,
12
12
  )
13
+ from pydantic import SecretStr
13
14
 
14
15
  import deepeval
15
16
  from deepeval.key_handler import KEY_FILE_HANDLER, KeyValues
@@ -1,5 +1,3 @@
1
- from .agent import PydanticAIAgent as Agent
2
- from .setup import instrument_pydantic_ai
1
+ from .patcher import instrument as instrument_pydantic_ai
3
2
 
4
-
5
- __all__ = ["Agent", "instrument_pydantic_ai"]
3
+ __all__ = ["instrument_pydantic_ai"]
@@ -31,14 +31,6 @@ def instrument_pydantic_ai(api_key: Optional[str] = None):
31
31
  with capture_tracing_integration("pydantic_ai"):
32
32
  is_opentelemetry_available()
33
33
 
34
- if api_key:
35
- deepeval.login(api_key)
36
-
37
- api_key = get_confident_api_key()
38
-
39
- if not api_key:
40
- raise ValueError("No api key provided.")
41
-
42
34
  # create a new tracer provider
43
35
  tracer_provider = TracerProvider()
44
36
  tracer_provider.add_span_processor(
@@ -0,0 +1,376 @@
1
+ import functools
2
+ import deepeval
3
+ from deepeval.tracing.types import LlmOutput, LlmToolCall
4
+ from pydantic_ai.agent import AgentRunResult
5
+ from deepeval.tracing.context import current_trace_context
6
+ from deepeval.tracing.types import AgentSpan, LlmSpan
7
+ from deepeval.tracing.tracing import Observer
8
+ from typing import List, Callable, Optional, Any
9
+ from deepeval.test_case.llm_test_case import ToolCall
10
+ from deepeval.metrics.base_metric import BaseMetric
11
+ from deepeval.confident.api import get_confident_api_key
12
+ from deepeval.integrations.pydantic_ai.otel import instrument_pydantic_ai
13
+ from deepeval.telemetry import capture_tracing_integration
14
+ from deepeval.prompt import Prompt
15
+
16
+ try:
17
+ from pydantic_ai.agent import Agent
18
+ from pydantic_ai.models import Model
19
+ from pydantic_ai.messages import (
20
+ ModelResponse,
21
+ ModelRequest,
22
+ ModelResponsePart,
23
+ TextPart,
24
+ ToolCallPart,
25
+ SystemPromptPart,
26
+ ToolReturnPart,
27
+ UserPromptPart,
28
+ )
29
+
30
+ pydantic_ai_installed = True
31
+ except:
32
+ pydantic_ai_installed = True
33
+
34
+
35
+ def _patch_agent_tool_decorator():
36
+ original_tool = Agent.tool
37
+
38
+ @functools.wraps(original_tool)
39
+ def wrapper(
40
+ *args,
41
+ metrics: Optional[List[BaseMetric]] = None,
42
+ metric_collection: Optional[str] = None,
43
+ **kwargs
44
+ ):
45
+ # Case 1: Direct decoration - @agent.tool
46
+ if args and callable(args[0]):
47
+ patched_func = _create_patched_tool(
48
+ args[0], metrics, metric_collection
49
+ )
50
+ new_args = (patched_func,) + args[1:]
51
+ return original_tool(*new_args, **kwargs)
52
+
53
+ # Case 2: Decoration with arguments - @agent.tool(metrics=..., metric_collection=...)
54
+ else:
55
+ # Return a decorator function that will receive the actual function
56
+ def decorator(func):
57
+ patched_func = _create_patched_tool(
58
+ func, metrics, metric_collection
59
+ )
60
+ return original_tool(*args, **kwargs)(patched_func)
61
+
62
+ return decorator
63
+
64
+ Agent.tool = wrapper
65
+
66
+
67
+ def _create_patched_tool(
68
+ func: Callable,
69
+ metrics: Optional[List[BaseMetric]] = None,
70
+ metric_collection: Optional[str] = None,
71
+ ):
72
+ import asyncio
73
+
74
+ original_func = func
75
+
76
+ is_async = asyncio.iscoroutinefunction(original_func)
77
+
78
+ if is_async:
79
+
80
+ @functools.wraps(original_func)
81
+ async def async_wrapper(*args, **kwargs):
82
+ with Observer(
83
+ span_type="tool",
84
+ func_name=original_func.__name__,
85
+ metrics=metrics,
86
+ metric_collection=metric_collection,
87
+ function_kwargs={"args": args, **kwargs},
88
+ ) as observer:
89
+ result = await original_func(*args, **kwargs)
90
+ observer.result = result
91
+
92
+ return result
93
+
94
+ return async_wrapper
95
+ else:
96
+
97
+ @functools.wraps(original_func)
98
+ def sync_wrapper(*args, **kwargs):
99
+ with Observer(
100
+ span_type="tool",
101
+ func_name=original_func.__name__,
102
+ metrics=metrics,
103
+ metric_collection=metric_collection,
104
+ function_kwargs={"args": args, **kwargs},
105
+ ) as observer:
106
+ result = original_func(*args, **kwargs)
107
+ observer.result = result
108
+
109
+ return result
110
+
111
+ return sync_wrapper
112
+
113
+
114
+ def _patch_agent_init():
115
+ original_init = Agent.__init__
116
+
117
+ @functools.wraps(original_init)
118
+ def wrapper(
119
+ self,
120
+ *args,
121
+ llm_metric_collection: Optional[str] = None,
122
+ llm_metrics: Optional[List[BaseMetric]] = None,
123
+ llm_prompt: Optional[Prompt] = None,
124
+ agent_metric_collection: Optional[str] = None,
125
+ agent_metrics: Optional[List[BaseMetric]] = None,
126
+ **kwargs
127
+ ):
128
+ result = original_init(self, *args, **kwargs)
129
+ _patch_llm_model(
130
+ self._model, llm_metric_collection, llm_metrics, llm_prompt
131
+ ) # runtime patch of the model
132
+ _patch_agent_run(agent_metric_collection, agent_metrics)
133
+ return result
134
+
135
+ Agent.__init__ = wrapper
136
+
137
+
138
+ def _patch_agent_run(
139
+ agent_metric_collection: Optional[str] = None,
140
+ agent_metrics: Optional[List[BaseMetric]] = None,
141
+ ):
142
+ original_run = Agent.run
143
+
144
+ @functools.wraps(original_run)
145
+ async def wrapper(
146
+ *args,
147
+ trace_metric_collection: Optional[str] = None,
148
+ trace_metrics: Optional[List[BaseMetric]] = None,
149
+ trace_name: Optional[str] = None,
150
+ trace_tags: Optional[List[str]] = None,
151
+ trace_metadata: Optional[dict] = None,
152
+ trace_thread_id: Optional[str] = None,
153
+ trace_user_id: Optional[str] = None,
154
+ **kwargs
155
+ ):
156
+ with Observer(
157
+ span_type="agent",
158
+ func_name="Agent",
159
+ function_kwargs={"input": args[1]},
160
+ metrics=agent_metrics,
161
+ metric_collection=agent_metric_collection,
162
+ ) as observer:
163
+ result = await original_run(*args, **kwargs)
164
+ observer.update_span_properties = (
165
+ lambda agent_span: set_agent_span_attributes(agent_span, result)
166
+ )
167
+ observer.result = result.output
168
+
169
+ _update_trace_context(
170
+ trace_name=trace_name,
171
+ trace_tags=trace_tags,
172
+ trace_metadata=trace_metadata,
173
+ trace_thread_id=trace_thread_id,
174
+ trace_user_id=trace_user_id,
175
+ trace_metric_collection=trace_metric_collection,
176
+ trace_metrics=trace_metrics,
177
+ trace_input=args[1],
178
+ trace_output=result.output,
179
+ )
180
+
181
+ return result
182
+
183
+ Agent.run = wrapper
184
+
185
+
186
+ def _update_trace_context(
187
+ trace_name: Optional[str] = None,
188
+ trace_tags: Optional[List[str]] = None,
189
+ trace_metadata: Optional[dict] = None,
190
+ trace_thread_id: Optional[str] = None,
191
+ trace_user_id: Optional[str] = None,
192
+ trace_metric_collection: Optional[str] = None,
193
+ trace_metrics: Optional[List[BaseMetric]] = None,
194
+ trace_input: Optional[Any] = None,
195
+ trace_output: Optional[Any] = None,
196
+ ):
197
+
198
+ current_trace = current_trace_context.get()
199
+ current_trace.name = trace_name
200
+ current_trace.tags = trace_tags
201
+ current_trace.metadata = trace_metadata
202
+ current_trace.thread_id = trace_thread_id
203
+ current_trace.user_id = trace_user_id
204
+ current_trace.metric_collection = trace_metric_collection
205
+ current_trace.metrics = trace_metrics
206
+ current_trace.input = trace_input
207
+ current_trace.output = trace_output
208
+
209
+
210
+ def _patch_llm_model(
211
+ model: Model,
212
+ llm_metric_collection: Optional[str] = None,
213
+ llm_metrics: Optional[List[BaseMetric]] = None,
214
+ llm_prompt: Optional[Prompt] = None,
215
+ ):
216
+ original_func = model.request
217
+ try:
218
+ model_name = model.model_name
219
+ except Exception:
220
+ model_name = "unknown"
221
+
222
+ @functools.wraps(original_func)
223
+ async def wrapper(*args, **kwargs):
224
+ with Observer(
225
+ span_type="llm",
226
+ func_name="LLM",
227
+ observe_kwargs={"model": model_name},
228
+ metrics=llm_metrics,
229
+ metric_collection=llm_metric_collection,
230
+ ) as observer:
231
+ result = await original_func(*args, **kwargs)
232
+ request = kwargs.get("messages", [])
233
+ if not request:
234
+ request = args[0]
235
+ observer.update_span_properties = (
236
+ lambda llm_span: set_llm_span_attributes(
237
+ llm_span, args[0], result, llm_prompt
238
+ )
239
+ )
240
+ observer.result = result
241
+ return result
242
+
243
+ model.request = wrapper
244
+
245
+
246
+ def instrument(otel: Optional[bool] = False, api_key: Optional[str] = None):
247
+
248
+ if api_key:
249
+ deepeval.login(api_key)
250
+
251
+ api_key = get_confident_api_key()
252
+
253
+ if not api_key:
254
+ raise ValueError("No api key provided.")
255
+
256
+ if otel:
257
+ instrument_pydantic_ai(api_key)
258
+ else:
259
+ with capture_tracing_integration("pydantic_ai"):
260
+ _patch_agent_init()
261
+ _patch_agent_tool_decorator()
262
+
263
+
264
+ def set_llm_span_attributes(
265
+ llm_span: LlmSpan,
266
+ requests: List[ModelRequest],
267
+ result: ModelResponse,
268
+ llm_prompt: Optional[Prompt] = None,
269
+ ):
270
+ llm_span.prompt = llm_prompt
271
+
272
+ input = []
273
+ for request in requests:
274
+ for part in request.parts:
275
+ if isinstance(part, SystemPromptPart):
276
+ input.append({"role": "System", "content": part.content})
277
+ elif isinstance(part, UserPromptPart):
278
+ input.append({"role": "User", "content": part.content})
279
+ elif isinstance(part, ToolCallPart):
280
+ input.append(
281
+ {
282
+ "role": "Tool Call",
283
+ "name": part.tool_name,
284
+ "content": part.args_as_json_str(),
285
+ }
286
+ )
287
+ elif isinstance(part, ToolReturnPart):
288
+ input.append(
289
+ {
290
+ "role": "Tool Return",
291
+ "name": part.tool_name,
292
+ "content": part.model_response_str(),
293
+ }
294
+ )
295
+ llm_span.input = input
296
+
297
+ content = ""
298
+ tool_calls = []
299
+ for part in result.parts:
300
+ if isinstance(part, TextPart):
301
+ content += part.content + "\n"
302
+ elif isinstance(part, ToolCallPart):
303
+ tool_calls.append(
304
+ LlmToolCall(name=part.tool_name, args=part.args_as_dict())
305
+ )
306
+ llm_span.output = LlmOutput(
307
+ role="Assistant", content=content, tool_calls=tool_calls
308
+ )
309
+ llm_span.tools_called = _extract_tools_called_from_llm_response(
310
+ result.parts
311
+ )
312
+
313
+
314
+ def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
315
+ agent_span.tools_called = _extract_tools_called(result)
316
+
317
+
318
+ # llm tools called
319
+ def _extract_tools_called_from_llm_response(
320
+ result: List[ModelResponsePart],
321
+ ) -> List[ToolCall]:
322
+ tool_calls = []
323
+
324
+ # Loop through each ModelResponsePart
325
+ for part in result:
326
+ # Look for parts with part_kind="tool-call"
327
+ if hasattr(part, "part_kind") and part.part_kind == "tool-call":
328
+ # Extract tool name and args from the ToolCallPart
329
+ tool_name = part.tool_name
330
+ input_parameters = (
331
+ part.args_as_dict() if hasattr(part, "args_as_dict") else None
332
+ )
333
+
334
+ # Create and append ToolCall object
335
+ tool_call = ToolCall(
336
+ name=tool_name, input_parameters=input_parameters
337
+ )
338
+ tool_calls.append(tool_call)
339
+
340
+ return tool_calls
341
+
342
+
343
+ # TODO: llm tools called (reposne is present next message)
344
+ def _extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
345
+ tool_calls = []
346
+
347
+ # Access the message history from the _state
348
+ message_history = result._state.message_history
349
+
350
+ # Scan through all messages in the history
351
+ for message in message_history:
352
+ # Check if this is a ModelResponse (kind="response")
353
+ if hasattr(message, "kind") and message.kind == "response":
354
+ # For ModelResponse messages, check each part
355
+ if hasattr(message, "parts"):
356
+ for part in message.parts:
357
+ # Look for parts with part_kind="tool-call"
358
+ if (
359
+ hasattr(part, "part_kind")
360
+ and part.part_kind == "tool-call"
361
+ ):
362
+ # Extract tool name and args from the ToolCallPart
363
+ tool_name = part.tool_name
364
+ input_parameters = (
365
+ part.args_as_dict()
366
+ if hasattr(part, "args_as_dict")
367
+ else None
368
+ )
369
+
370
+ # Create and append ToolCall object
371
+ tool_call = ToolCall(
372
+ name=tool_name, input_parameters=input_parameters
373
+ )
374
+ tool_calls.append(tool_call)
375
+
376
+ return tool_calls
@@ -30,7 +30,7 @@ model_pricing = {
30
30
  },
31
31
  "grok-3-fast": {
32
32
  "input": 0.60 / 1e6,
33
- "output": 2.50 / 1e-6,
33
+ "output": 2.50 / 1e6,
34
34
  },
35
35
  "grok-3-mini-fast": {
36
36
  "input": 30 / 1e6,
@@ -30,7 +30,7 @@ model_pricing = {
30
30
  },
31
31
  "kimi-k2-0711-preview": {
32
32
  "input": 0.60 / 1e6,
33
- "output": 2.50 / 1e-6,
33
+ "output": 2.50 / 1e6,
34
34
  },
35
35
  "kimi-thinking-preview": {
36
36
  "input": 30 / 1e6,
@@ -15,12 +15,12 @@ Usage:
15
15
 
16
16
  from __future__ import annotations
17
17
  import time
18
- from typing import Final
18
+ from typing import Final, Union
19
19
 
20
20
  # Module globals are initialised exactly once.
21
- _anchor_perf_ns: int | None = None
22
- _anchor_wall_ns: int | None = None
23
- _offset_ns: int | None = None
21
+ _anchor_perf_ns: Union[int, None] = None
22
+ _anchor_wall_ns: Union[int, None] = None
23
+ _offset_ns: Union[int, None] = None
24
24
 
25
25
 
26
26
  def init_clock_bridge() -> None:
deepeval/utils.py CHANGED
@@ -516,7 +516,7 @@ def remove_pbars(
516
516
 
517
517
 
518
518
  def read_env_int(
519
- name: str, default: int, *, min_value: int | None = None
519
+ name: str, default: int, *, min_value: Union[int, None] = None
520
520
  ) -> int:
521
521
  """Read an integer from an environment variable with safe fallback.
522
522
 
@@ -545,7 +545,7 @@ def read_env_int(
545
545
 
546
546
 
547
547
  def read_env_float(
548
- name: str, default: float, *, min_value: float | None = None
548
+ name: str, default: float, *, min_value: Union[float, None] = None
549
549
  ) -> float:
550
550
  """Read a float from an environment variable with safe fallback.
551
551
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.4.9
3
+ Version: 3.5.0
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -189,16 +189,6 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
189
189
  ```
190
190
  pip install -U deepeval
191
191
  ```
192
- ### Environment variables (.env / .env.local)
193
-
194
- DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
195
- **Precedence:** process env -> `.env.local` -> `.env`.
196
- Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
197
-
198
- ```bash
199
- cp .env.example .env.local
200
- # then edit .env.local (ignored by git)
201
- ```
202
192
 
203
193
  ## Create an account (highly recommended)
204
194
 
@@ -391,9 +381,20 @@ evaluate(dataset, [answer_relevancy_metric])
391
381
  dataset.evaluate([answer_relevancy_metric])
392
382
  ```
393
383
 
394
- # LLM Evaluation With Confident AI
384
+ ## A Note on Env Variables (.env / .env.local)
385
+
386
+ DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
387
+ **Precedence:** process env -> `.env.local` -> `.env`.
388
+ Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
389
+
390
+ ```bash
391
+ cp .env.example .env.local
392
+ # then edit .env.local (ignored by git)
393
+ ```
394
+
395
+ # DeepEval With Confident AI
395
396
 
396
- The correct LLM evaluation lifecycle is only achievable with [the DeepEval platform](https://confident-ai.com?utm_source=Github). It allows you to:
397
+ DeepEval's cloud platform, [Confident AI](https://confident-ai.com?utm_source=Github), allows you to:
397
398
 
398
399
  1. Curate/annotate evaluation datasets on the cloud
399
400
  2. Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best
@@ -1,5 +1,5 @@
1
1
  deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
2
- deepeval/_version.py,sha256=C51bPZRI51sTy21Ve1RwtpAOCNmb0BUammiT884H3Ow,27
2
+ deepeval/_version.py,sha256=xgoMNdDXsY3c4GfV1_DVK-xGdMOp5KCDaKln5j0PJdY,27
3
3
  deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
4
4
  deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
5
5
  deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -77,7 +77,7 @@ deepeval/benchmarks/bool_q/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
77
77
  deepeval/benchmarks/bool_q/bool_q.py,sha256=wJM4-wSybT8EwgDJVB4p3QYXGNzLD3tdrpGE1cNEz_E,5507
78
78
  deepeval/benchmarks/bool_q/template.py,sha256=pgNj4RR6-4VJDDySwnKt-MpghBCjVlZ7fPKY6PltllQ,4055
79
79
  deepeval/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
- deepeval/benchmarks/drop/drop.py,sha256=Qo74clH8ERf1Zutcz76wrTbR-fbR2qHdsZP6fJ1W-LQ,12054
80
+ deepeval/benchmarks/drop/drop.py,sha256=rGcqd79-IfQ2tvPuAL6wrON4R0hBiVGBy1OtDRmertE,12042
81
81
  deepeval/benchmarks/drop/task.py,sha256=RV7DEXF192IOsY-yIVdlGb_y-A_sS5APPn8PGOPn5yU,17950
82
82
  deepeval/benchmarks/drop/template.py,sha256=1P0mx_71Bxr9juIA8nGpVRIrP8NSoDILkIicjWvqE94,1376
83
83
  deepeval/benchmarks/equity_med_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -88,7 +88,7 @@ deepeval/benchmarks/gsm8k/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
88
88
  deepeval/benchmarks/gsm8k/gsm8k.py,sha256=LyJQBskKuqp013LLO3539RJiRXqCmlGl12BPXvQ8P88,6974
89
89
  deepeval/benchmarks/gsm8k/template.py,sha256=3F7DwQwhJwKxtlbaO6TNvBBRaDEUBEp58JwirSjxtR0,1626
90
90
  deepeval/benchmarks/hellaswag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- deepeval/benchmarks/hellaswag/hellaswag.py,sha256=ESX4A8WRxSeErPC34QuMM6WVEeDeyiCmNq10twZJxgw,11959
91
+ deepeval/benchmarks/hellaswag/hellaswag.py,sha256=_3felzBwQUhhRXk4D9NbcY8dme_qUQcwUjKGw9OtDJg,11972
92
92
  deepeval/benchmarks/hellaswag/task.py,sha256=LfO8T6bpNiwdM8VdubKrup7qje3-rHgu69iB6Sdsc6I,7323
93
93
  deepeval/benchmarks/hellaswag/template.py,sha256=TcCu25hkl89qbRwcEyRVGTGp7DU_5Eph754W2znk5QY,1279
94
94
  deepeval/benchmarks/human_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -102,15 +102,15 @@ deepeval/benchmarks/lambada/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
102
102
  deepeval/benchmarks/lambada/lambada.py,sha256=FExZLpDBgQfYe9o-MBS0LEy0-i4jHGeFHo8XCbMW_io,5556
103
103
  deepeval/benchmarks/lambada/template.py,sha256=mSn0Elvp34wTnvaAm3IENz0mfGSNM_iRx50hIouk4t0,3776
104
104
  deepeval/benchmarks/logi_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- deepeval/benchmarks/logi_qa/logi_qa.py,sha256=zN7KTa77d1WkM_m9V-Z0AfHBLFZ70s89yMaQ3_CNV84,11130
105
+ deepeval/benchmarks/logi_qa/logi_qa.py,sha256=VNZGASigEDlJjzwGZtWG3OUs3v3P733GD84-h3TaxjU,11143
106
106
  deepeval/benchmarks/logi_qa/task.py,sha256=pVMLVHPyDFSyoIsnckBNRDt8FK0J317PiGT-0dpr7rs,350
107
107
  deepeval/benchmarks/logi_qa/template.py,sha256=EddGd2s3u2bPejogTcM50SDS7ynHnMhHaKuqQjjZoLk,4354
108
108
  deepeval/benchmarks/math_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
- deepeval/benchmarks/math_qa/math_qa.py,sha256=yRlVkUb_L4fPJIvaDo-5EB0XeVId9CxrIZUX7wLQJh4,10638
109
+ deepeval/benchmarks/math_qa/math_qa.py,sha256=_eP-yocJom9r91qmAUBbIH4hrWazEHLV2lDKu0yMfEI,10651
110
110
  deepeval/benchmarks/math_qa/task.py,sha256=3q_jlK5kIl5Zs0mQwuzxyvmPP6ncLZwszn7gtl1GfZs,192
111
111
  deepeval/benchmarks/math_qa/template.py,sha256=pC3PB2GGU5TQ81I7E76RJh0xlu7xiF6d4SK3T_Nksh8,4468
112
112
  deepeval/benchmarks/mmlu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
- deepeval/benchmarks/mmlu/mmlu.py,sha256=KEiiRSfQQQSX51dcnrWPkQcud1WvcKe1v0T82P1NowY,11418
113
+ deepeval/benchmarks/mmlu/mmlu.py,sha256=flg3tb052DVo7wnfAHkW9n07tEEhHrkT2C0d5-UMBoQ,11431
114
114
  deepeval/benchmarks/mmlu/task.py,sha256=HnhnuD4Xjur9GlrBtswaR7ZPouGx4NTgbcFZu_oIzXw,2580
115
115
  deepeval/benchmarks/mmlu/template.py,sha256=MsdcrZWVkyZpEw--Kj6W7vjOJgig-ABiz9B3WtZz1MQ,1303
116
116
  deepeval/benchmarks/modes/__init__.py,sha256=IGhZp0-nmvVsZWBnTuBvKhdGiy4TJZShFSjYAeBZdbo,135
@@ -125,7 +125,7 @@ deepeval/benchmarks/truthful_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
125
125
  deepeval/benchmarks/truthful_qa/mode.py,sha256=66aXCTXGTbAprA33M3TT15OhpJAqxLPDzJuShKxiFwY,84
126
126
  deepeval/benchmarks/truthful_qa/task.py,sha256=PmfPbqINd9wizq8Tpk8pwms9TersoGlMGBqxpTmZhcc,1360
127
127
  deepeval/benchmarks/truthful_qa/template.py,sha256=5y6mfJm9AXnQL7xwrfsZjH080GwO1kd_1GdTzDCoYgo,4465
128
- deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=j8RzrAwgKVt2oWNf1bz9cPIk6b-I7WsjiPElJlQ21Xs,13768
128
+ deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=2r-xcFnzSSJds3ZGxYogBzjFFrCfJxYpXzKrpE8cC_c,13781
129
129
  deepeval/benchmarks/utils.py,sha256=NHImqH22mv108_CKM7ajTpu4hOeUhr5xPicbf0i2qGk,287
130
130
  deepeval/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
131
  deepeval/benchmarks/winogrande/template.py,sha256=tDwH8NpNF9x7FbDmQw45XaW1LNqGBV6zP5pwV1uOlwM,2089
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
138
138
  deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
139
139
  deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
140
140
  deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
141
- deepeval/confident/api.py,sha256=tUv5P5nSdY-RHjH1FQ9AlJ36Ut8QAxCHWeARks64-Vo,8287
141
+ deepeval/confident/api.py,sha256=-2i3IBLtj5bUIImwOF6ltGVR3ZyViIbIC38XxwWvf54,8318
142
142
  deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
143
143
  deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
144
  deepeval/config/settings.py,sha256=e7sk6_3I14hG457e75DoJd9Ojo3rOkpBZzsMYlj4gKQ,18139
@@ -179,9 +179,9 @@ deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08r
179
179
  deepeval/integrations/llama_index/agent/patched.py,sha256=4JbH0WQmt4lct7xxIH0phj8_Y-V35dgVv7DEDXK0jZI,2149
180
180
  deepeval/integrations/llama_index/handler.py,sha256=eqI1n8E4MsvfKoFs5Zrm9IdCR7g9eBgNedISs7UkU_I,8947
181
181
  deepeval/integrations/llama_index/utils.py,sha256=mxW71-3PjvBvJpLIU0kNWuTzCidy5l_-roLt8ZyWYA0,2599
182
- deepeval/integrations/pydantic_ai/__init__.py,sha256=04qKTNftGVzso3n7F1hvKxXI-NG3Enx8TP-EdpJykUc,134
183
- deepeval/integrations/pydantic_ai/agent.py,sha256=6PB_-zWaivQR65xi2F_WqBNtOcUwRpvI8wyCfpGhJJQ,13549
184
- deepeval/integrations/pydantic_ai/setup.py,sha256=-G15PEHZpLUsGNa7yP1IP2t3_6aKt5ac38930VnUYQY,1839
182
+ deepeval/integrations/pydantic_ai/__init__.py,sha256=36fBKBLRo1y5jFlj0Y4xhDJsiq4ZnqtmFO32R90Azo4,96
183
+ deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
184
+ deepeval/integrations/pydantic_ai/patcher.py,sha256=wszU2YROZAQovyz1ZNRvTtsuJ5By_x4SF6yjtmItcNk,12210
185
185
  deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
186
186
  deepeval/metrics/__init__.py,sha256=xofaK_bJq0QCSerSWYjHYRXXch9YQwZHxIfVAv1G7fo,4012
187
187
  deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
@@ -373,8 +373,8 @@ deepeval/models/llms/anthropic_model.py,sha256=T55-jKRbM3_B3Db9M3ruklm2cVVU1JDGA
373
373
  deepeval/models/llms/azure_model.py,sha256=MG6sVGUgIy2RURwFWvRP7O_RF6QAg2dpqXIJhIsgY60,10994
374
374
  deepeval/models/llms/deepseek_model.py,sha256=mz0U0uqazAVr8vv8SF74GRTr4ZEVc3Q1v9o5TVbmz_8,5440
375
375
  deepeval/models/llms/gemini_model.py,sha256=QXf9mjopfWwJxpm0gbkXo6op_Wtu1GaIt1BfzS3OU8Q,8174
376
- deepeval/models/llms/grok_model.py,sha256=S_ROLB3NWWyyY5tzezln1xifjWRgwdVGOraIpQPjF2Q,5987
377
- deepeval/models/llms/kimi_model.py,sha256=1axzi_DgtVDno8tb9U8FrRbqKK6LSeL7sQEuQqEsM5A,6727
376
+ deepeval/models/llms/grok_model.py,sha256=zPBmPnNCRGrtg_709gFv5A4iz7WilTmDpAyOpjXTa_M,5986
377
+ deepeval/models/llms/kimi_model.py,sha256=ZcvEwWgnv1dtmbq7LgMQJAjpkjxZr-l5eBi9KGqRbb0,6726
378
378
  deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
379
379
  deepeval/models/llms/local_model.py,sha256=PeF6ofMR8jBMTLzkCkgmkBJix9kHbWV5vTKGx8nehFs,3605
380
380
  deepeval/models/llms/ollama_model.py,sha256=foL6sMza37Z0HH8qPStyIr1g-xEaD6Ce53L2C8Er-P8,3055
@@ -454,13 +454,13 @@ deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx
454
454
  deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM53XY,26372
455
455
  deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
456
456
  deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
457
- deepeval/tracing/perf_epoch_bridge.py,sha256=9g5bdQMKrhXtlS5LgC-oDNE7foe307DE07NqkusW6GU,1800
457
+ deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
458
458
  deepeval/tracing/tracing.py,sha256=StvwFEG3MG67n7PBEyDDycdj0myMbP3LMB_FBhaZH-Y,38741
459
459
  deepeval/tracing/types.py,sha256=3w5HEI6y4zuzVr8xGEEzDviLZCX_s_pK85qbwnyf1aY,5196
460
460
  deepeval/tracing/utils.py,sha256=eTEickbDvRiOu1twNolh4sHnjZF49vqdLgI74BudeTw,6357
461
- deepeval/utils.py,sha256=SSCk7LLZQG-phJG8cuVPX-C21ZDCa_D2jWw0uJb6Ov8,16845
462
- deepeval-3.4.9.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
463
- deepeval-3.4.9.dist-info/METADATA,sha256=_iUKm_ZCpwntM0u23zLZBQeTjPV4ogVwr5zX-K4BpN4,18732
464
- deepeval-3.4.9.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
465
- deepeval-3.4.9.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
466
- deepeval-3.4.9.dist-info/RECORD,,
461
+ deepeval/utils.py,sha256=EimWDwI1pKCE8vl6kuTnGbGT6ep9zHL5sZ0o-gj49XI,16857
462
+ deepeval-3.5.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
463
+ deepeval-3.5.0.dist-info/METADATA,sha256=KBAB5m11q4GAhVwCJBmXZDtaYtKoAO3sQ0vg-ajFRLg,18682
464
+ deepeval-3.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
465
+ deepeval-3.5.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
466
+ deepeval-3.5.0.dist-info/RECORD,,
@@ -1,364 +0,0 @@
1
- from deepeval.telemetry import capture_tracing_integration
2
- from deepeval.metrics import BaseMetric
3
- from typing import List, Optional
4
- import functools
5
- import inspect
6
- import json
7
- from deepeval.test_case import LLMTestCase
8
- from deepeval.tracing.types import TestCaseMetricPair
9
- from deepeval.tracing.tracing import trace_manager
10
- from deepeval.tracing.otel.utils import parse_string, parse_list_of_strings
11
- from opentelemetry import trace
12
-
13
- try:
14
- from opentelemetry.trace import NoOpTracer
15
-
16
- opentelemetry_installed = True
17
- except:
18
- opentelemetry_installed = False
19
-
20
-
21
- def is_opentelemetry_available():
22
- if not opentelemetry_installed:
23
- raise ImportError(
24
- "OpenTelemetry SDK is not available. Please install it with `pip install opentelemetry-sdk`."
25
- )
26
- return True
27
-
28
-
29
- try:
30
- from pydantic_ai.agent import Agent
31
- from pydantic_ai.models.instrumented import InstrumentedModel
32
-
33
- pydantic_ai_installed = True
34
- except:
35
- pydantic_ai_installed = False
36
-
37
-
38
- def is_pydantic_ai_installed():
39
- if not pydantic_ai_installed:
40
- raise ImportError(
41
- "Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
42
- )
43
-
44
-
45
- class PydanticAIAgent(Agent):
46
- def __init__(self, *args, **kwargs):
47
- with capture_tracing_integration("pydantic_ai.agent.PydanticAIAgent"):
48
- is_pydantic_ai_installed()
49
- is_opentelemetry_available()
50
-
51
- super().__init__(*args, **kwargs)
52
-
53
- # attributes to be set if ran synchronously
54
- self.metric_collection: str = None
55
- self.metrics: list[BaseMetric] = None
56
-
57
- # trace attributes to be set if ran synchronously
58
- self._trace_name: str = None
59
- self._trace_tags: list[str] = None
60
- self._trace_metadata: dict = None
61
- self._trace_thread_id: str = None
62
- self._trace_user_id: str = None
63
-
64
- # Patch the run method only for this instance
65
- self._patch_run_method()
66
- self._patch_run_method_sync()
67
- self._patch_tool_decorator()
68
-
69
- def _patch_tool_decorator(self):
70
- """Patch the tool decorator to print input and output"""
71
- original_tool = self.tool
72
-
73
- @functools.wraps(original_tool)
74
- def patched_tool(
75
- *args,
76
- metric_collection: Optional[str] = None,
77
- metrics: Optional[List[BaseMetric]] = None,
78
- **kwargs
79
- ):
80
-
81
- # Check if function is in args (direct decoration: @agent.tool)
82
- if args and callable(args[0]):
83
- original_func = args[0]
84
- patched_func = self._create_patched_function(
85
- original_func, metric_collection, metrics
86
- )
87
- new_args = (patched_func,) + args[1:]
88
- result = original_tool(*new_args, **kwargs)
89
- return result
90
- else:
91
- # Decorator called with parameters: @agent.tool(metric_collection="...")
92
- # Return a decorator that will receive the function
93
- def decorator_with_params(func):
94
- patched_func = self._create_patched_function(
95
- func, metric_collection, metrics
96
- )
97
- return original_tool(patched_func, **kwargs)
98
-
99
- return decorator_with_params
100
-
101
- # Replace the tool method for this instance
102
- self.tool = patched_tool
103
-
104
- def _create_patched_function(
105
- self, original_func, metric_collection, metrics
106
- ):
107
- """Create a patched version of the function that adds tracing"""
108
- if inspect.iscoroutinefunction(original_func):
109
-
110
- @functools.wraps(original_func)
111
- async def patched_async_func(*func_args, **func_kwargs):
112
- result = await original_func(*func_args, **func_kwargs)
113
-
114
- current_span = trace.get_current_span()
115
- if current_span.is_recording():
116
- try:
117
- result_str = str(result)
118
- except Exception:
119
- result_str = ""
120
- current_span.set_attribute(
121
- "confident.span.output", result_str
122
- )
123
- if metric_collection:
124
- current_span.set_attribute(
125
- "confident.span.metric_collection",
126
- metric_collection,
127
- )
128
- # TODO: add metrics in component level evals
129
- return result
130
-
131
- return patched_async_func
132
- else:
133
-
134
- @functools.wraps(original_func)
135
- def patched_sync_func(*func_args, **func_kwargs):
136
- result = original_func(*func_args, **func_kwargs)
137
-
138
- current_span = trace.get_current_span()
139
- if current_span.is_recording():
140
- try:
141
- result_str = str(result)
142
- except Exception:
143
- result_str = ""
144
- current_span.set_attribute(
145
- "confident.span.output", result_str
146
- )
147
- if metric_collection:
148
- current_span.set_attribute(
149
- "confident.span.metric_collection",
150
- metric_collection,
151
- )
152
- # TODO: add metrics in component level evals
153
- return result
154
-
155
- return patched_sync_func
156
-
157
- def _patch_run_method(self):
158
- """Patch the Agent.run method only for this PydanticAIAgent instance"""
159
- original_run = self.run
160
-
161
- @functools.wraps(original_run)
162
- async def patched_run(
163
- *args,
164
- metric_collection=None,
165
- metrics=None,
166
- trace_name=None,
167
- trace_tags=None,
168
- trace_metadata=None,
169
- trace_thread_id=None,
170
- trace_user_id=None,
171
- **kwargs
172
- ):
173
- # extract and validate flattened arguments - use safe pop with defaults
174
- metric_collection = parse_string(metric_collection)
175
- trace_name = parse_string(trace_name)
176
- trace_tags = parse_list_of_strings(trace_tags)
177
- trace_thread_id = parse_string(trace_thread_id)
178
- trace_user_id = parse_string(trace_user_id)
179
-
180
- if metrics is not None and not (
181
- isinstance(metrics, list)
182
- and all(isinstance(m, BaseMetric) for m in metrics)
183
- ):
184
- raise TypeError(
185
- "metrics must be a list of BaseMetric instances"
186
- )
187
-
188
- if trace_metadata is not None and not isinstance(
189
- trace_metadata, dict
190
- ):
191
- raise TypeError("trace_metadata must be a dictionary")
192
-
193
- model = kwargs.get("model", None)
194
- infer_name = kwargs.get("infer_name", True)
195
-
196
- if infer_name and self.name is None:
197
- self._infer_name(inspect.currentframe())
198
- model_used = self._get_model(model)
199
- del model
200
-
201
- if isinstance(model_used, InstrumentedModel):
202
- tracer = model_used.instrumentation_settings.tracer
203
- else:
204
- tracer = NoOpTracer()
205
-
206
- with tracer.start_as_current_span("agent") as run_span:
207
- result = await original_run(*args, **kwargs)
208
-
209
- name = "agent"
210
- if self.name:
211
- name = str(self.name)
212
-
213
- input = ""
214
- if isinstance(args[0], str):
215
- input = args[0]
216
- elif isinstance(args[0], list) and all(
217
- isinstance(i, str) for i in args[0]
218
- ):
219
- input = args[0]
220
-
221
- output = ""
222
- try:
223
- output = str(result.output)
224
- except Exception:
225
- pass
226
-
227
- # set agent span attributes
228
- run_span.set_attribute("confident.span.type", "agent")
229
- run_span.set_attribute("confident.agent.name", name)
230
- run_span.set_attribute("confident.agent.input", input)
231
- run_span.set_attribute("confident.agent.output", output)
232
-
233
- # fallback for input and output not being set
234
- run_span.set_attribute("confident.span.input", input)
235
- run_span.set_attribute("confident.span.output", output)
236
-
237
- if metric_collection: # flattened argument to be replaced
238
- run_span.set_attribute(
239
- "confident.span.metric_collection", metric_collection
240
- )
241
- elif self.metric_collection: # for run_sync
242
- run_span.set_attribute(
243
- "confident.span.metric_collection",
244
- self.metric_collection,
245
- )
246
-
247
- # set the flattened trace attributes
248
- if trace_name:
249
- run_span.set_attribute("confident.trace.name", trace_name)
250
- if trace_tags:
251
- run_span.set_attribute("confident.trace.tags", trace_tags)
252
- if trace_metadata:
253
- run_span.set_attribute(
254
- "confident.trace.metadata", json.dumps(trace_metadata)
255
- )
256
- if trace_thread_id:
257
- run_span.set_attribute(
258
- "confident.trace.thread_id", trace_thread_id
259
- )
260
- if trace_user_id:
261
- run_span.set_attribute(
262
- "confident.trace.user_id", trace_user_id
263
- )
264
-
265
- # for run_sync
266
- if self._trace_name:
267
- run_span.set_attribute(
268
- "confident.trace.name", self._trace_name
269
- )
270
- if self._trace_tags:
271
- run_span.set_attribute(
272
- "confident.trace.tags", self._trace_tags
273
- )
274
- if self._trace_metadata:
275
- run_span.set_attribute(
276
- "confident.trace.metadata",
277
- json.dumps(self._trace_metadata),
278
- )
279
- if self._trace_thread_id:
280
- run_span.set_attribute(
281
- "confident.trace.thread_id", self._trace_thread_id
282
- )
283
- if self._trace_user_id:
284
- run_span.set_attribute(
285
- "confident.trace.user_id", self._trace_user_id
286
- )
287
-
288
- if metrics: # flattened argument to be replaced
289
- trace_manager.test_case_metrics.append(
290
- TestCaseMetricPair(
291
- test_case=LLMTestCase(
292
- input=input, actual_output=output
293
- ),
294
- metrics=metrics,
295
- )
296
- )
297
- elif self.metrics: # for run_sync
298
- trace_manager.test_case_metrics.append(
299
- TestCaseMetricPair(
300
- test_case=LLMTestCase(
301
- input=input, actual_output=output
302
- ),
303
- metrics=self.metrics,
304
- )
305
- )
306
-
307
- return result
308
-
309
- # Replace the method only for this instance
310
- self.run = patched_run
311
-
312
- def _patch_run_method_sync(self):
313
- """Patch the Agent.run method only for this PydanticAIAgent instance"""
314
- original_run = self.run_sync
315
-
316
- @functools.wraps(original_run)
317
- def patched_run(
318
- *args,
319
- metric_collection=None,
320
- metrics=None,
321
- trace_name=None,
322
- trace_tags=None,
323
- trace_metadata=None,
324
- trace_thread_id=None,
325
- trace_user_id=None,
326
- **kwargs
327
- ):
328
- metric_collection = parse_string(metric_collection)
329
- trace_name = parse_string(trace_name)
330
- trace_tags = parse_list_of_strings(trace_tags)
331
- trace_thread_id = parse_string(trace_thread_id)
332
- trace_user_id = parse_string(trace_user_id)
333
-
334
- if metrics is not None and not (
335
- isinstance(metrics, list)
336
- and all(isinstance(m, BaseMetric) for m in metrics)
337
- ):
338
- raise TypeError(
339
- "metrics must be a list of BaseMetric instances"
340
- )
341
-
342
- if trace_metadata is not None and not isinstance(
343
- trace_metadata, dict
344
- ):
345
- raise TypeError("trace_metadata must be a dictionary")
346
-
347
- # attributes to be set if ran synchronously
348
- if metric_collection:
349
- self.metric_collection = metric_collection
350
- if metrics:
351
- self.metrics = metrics
352
-
353
- self._trace_name = trace_name
354
- self._trace_tags = trace_tags
355
- self._trace_metadata = trace_metadata
356
- self._trace_thread_id = trace_thread_id
357
- self._trace_user_id = trace_user_id
358
-
359
- result = original_run(*args, **kwargs)
360
-
361
- return result
362
-
363
- # Replace the method only for this instance
364
- self.run_sync = patched_run