deepeval 3.6.9__py3-none-any.whl → 3.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. deepeval/__init__.py +0 -4
  2. deepeval/_version.py +1 -1
  3. deepeval/anthropic/__init__.py +19 -0
  4. deepeval/anthropic/extractors.py +94 -0
  5. deepeval/anthropic/patch.py +169 -0
  6. deepeval/anthropic/utils.py +225 -0
  7. deepeval/benchmarks/drop/drop.py +40 -14
  8. deepeval/benchmarks/ifeval/ifeval.py +2 -2
  9. deepeval/cli/main.py +7 -0
  10. deepeval/confident/api.py +6 -1
  11. deepeval/confident/types.py +4 -2
  12. deepeval/config/settings.py +159 -11
  13. deepeval/config/settings_manager.py +4 -0
  14. deepeval/evaluate/compare.py +215 -4
  15. deepeval/evaluate/types.py +6 -0
  16. deepeval/evaluate/utils.py +30 -0
  17. deepeval/integrations/crewai/handler.py +36 -0
  18. deepeval/integrations/langchain/callback.py +27 -2
  19. deepeval/integrations/llama_index/handler.py +58 -4
  20. deepeval/integrations/llama_index/utils.py +24 -0
  21. deepeval/key_handler.py +1 -0
  22. deepeval/metrics/__init__.py +5 -0
  23. deepeval/metrics/arena_g_eval/arena_g_eval.py +5 -1
  24. deepeval/metrics/arena_g_eval/utils.py +5 -5
  25. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +9 -18
  26. deepeval/metrics/exact_match/__init__.py +0 -0
  27. deepeval/metrics/exact_match/exact_match.py +94 -0
  28. deepeval/metrics/g_eval/g_eval.py +5 -1
  29. deepeval/metrics/g_eval/utils.py +1 -1
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +5 -1
  31. deepeval/metrics/pattern_match/__init__.py +0 -0
  32. deepeval/metrics/pattern_match/pattern_match.py +103 -0
  33. deepeval/metrics/task_completion/task_completion.py +9 -2
  34. deepeval/metrics/utils.py +1 -1
  35. deepeval/model_integrations/__init__.py +0 -0
  36. deepeval/model_integrations/utils.py +116 -0
  37. deepeval/models/base_model.py +3 -1
  38. deepeval/models/llms/gemini_model.py +27 -5
  39. deepeval/openai/__init__.py +3 -1
  40. deepeval/openai/extractors.py +2 -2
  41. deepeval/openai/utils.py +7 -31
  42. deepeval/openai_agents/callback_handler.py +12 -3
  43. deepeval/prompt/api.py +11 -10
  44. deepeval/prompt/prompt.py +27 -15
  45. deepeval/simulator/template.py +1 -1
  46. deepeval/telemetry.py +3 -3
  47. deepeval/test_case/__init__.py +2 -1
  48. deepeval/test_case/arena_test_case.py +15 -4
  49. deepeval/test_case/llm_test_case.py +3 -2
  50. deepeval/test_case/mllm_test_case.py +45 -22
  51. deepeval/test_run/api.py +3 -2
  52. deepeval/test_run/cache.py +35 -13
  53. deepeval/test_run/hyperparameters.py +5 -1
  54. deepeval/test_run/test_run.py +52 -14
  55. deepeval/tracing/api.py +11 -10
  56. deepeval/tracing/otel/exporter.py +11 -0
  57. deepeval/tracing/patchers.py +102 -1
  58. deepeval/tracing/trace_context.py +13 -4
  59. deepeval/tracing/tracing.py +11 -2
  60. deepeval/tracing/types.py +8 -8
  61. deepeval/tracing/utils.py +9 -0
  62. deepeval/utils.py +48 -2
  63. {deepeval-3.6.9.dist-info → deepeval-3.7.1.dist-info}/METADATA +3 -3
  64. {deepeval-3.6.9.dist-info → deepeval-3.7.1.dist-info}/RECORD +68 -58
  65. /deepeval/{openai → model_integrations}/types.py +0 -0
  66. {deepeval-3.6.9.dist-info → deepeval-3.7.1.dist-info}/LICENSE.md +0 -0
  67. {deepeval-3.6.9.dist-info → deepeval-3.7.1.dist-info}/WHEEL +0 -0
  68. {deepeval-3.6.9.dist-info → deepeval-3.7.1.dist-info}/entry_points.txt +0 -0
@@ -11,33 +11,50 @@ from deepeval.test_case import ToolCall
11
11
 
12
12
  @dataclass
13
13
  class MLLMImage:
14
- url: str
14
+ dataBase64: Optional[str] = None
15
+ mimeType: Optional[str] = None
16
+ url: Optional[str] = None
15
17
  local: Optional[bool] = None
16
- filename: Optional[str] = field(default=None, init=False, repr=False)
17
- mimeType: Optional[str] = field(default=None, init=False, repr=False)
18
- dataBase64: Optional[str] = field(default=None, init=False, repr=False)
18
+ filename: Optional[str] = None
19
19
 
20
20
  def __post_init__(self):
21
- is_local = self.is_local_path(self.url)
22
- if self.local is not None:
23
- assert self.local == is_local, "Local path mismatch"
24
- else:
25
- self.local = is_local
26
-
27
- # compute filename, mime_type, and Base64 data
28
- if self.local:
29
- path = self.process_url(self.url)
30
- self.filename = os.path.basename(path)
31
- self.mimeType = (
32
- mimetypes.guess_type(path)[0] or "application/octet-stream"
21
+
22
+ if self.url and self.dataBase64:
23
+ raise ValueError(
24
+ "You cannot provide both 'url' and 'dataBase64' at the same time when creating an MLLMImage."
25
+ )
26
+
27
+ if not self.url and not self.dataBase64:
28
+ raise ValueError(
29
+ "You must provide either a 'url' or both 'dataBase64' and 'mimeType' to create an MLLMImage."
33
30
  )
34
- with open(path, "rb") as f:
35
- raw = f.read()
36
- self.dataBase64 = base64.b64encode(raw).decode("ascii")
31
+
32
+ if self.dataBase64 is not None:
33
+ if self.mimeType is None:
34
+ raise ValueError(
35
+ "mimeType must be provided when initializing from Base64 data."
36
+ )
37
37
  else:
38
- self.filename = None
39
- self.mimeType = None
40
- self.dataBase64 = None
38
+ is_local = self.is_local_path(self.url)
39
+ if self.local is not None:
40
+ assert self.local == is_local, "Local path mismatch"
41
+ else:
42
+ self.local = is_local
43
+
44
+ # compute filename, mime_type, and Base64 data
45
+ if self.local:
46
+ path = self.process_url(self.url)
47
+ self.filename = os.path.basename(path)
48
+ self.mimeType = (
49
+ mimetypes.guess_type(path)[0] or "application/octet-stream"
50
+ )
51
+ with open(path, "rb") as f:
52
+ raw = f.read()
53
+ self.dataBase64 = base64.b64encode(raw).decode("ascii")
54
+ else:
55
+ self.filename = None
56
+ self.mimeType = None
57
+ self.dataBase64 = None
41
58
 
42
59
  @staticmethod
43
60
  def process_url(url: str) -> str:
@@ -69,6 +86,12 @@ class MLLMImage:
69
86
  return os.path.exists(path)
70
87
  return False
71
88
 
89
+ def as_data_uri(self) -> Optional[str]:
90
+ """Return the image as a data URI string, if Base64 data is available."""
91
+ if not self.dataBase64 or not self.mimeType:
92
+ return None
93
+ return f"data:{self.mimeType};base64,{self.dataBase64}"
94
+
72
95
 
73
96
  class MLLMTestCaseParams(Enum):
74
97
  INPUT = "input"
deepeval/test_run/api.py CHANGED
@@ -1,8 +1,9 @@
1
- from pydantic import BaseModel, Field, ConfigDict
1
+ from pydantic import BaseModel, Field
2
2
  from typing import Optional, List, Union, Dict
3
3
 
4
4
  from deepeval.test_case import MLLMImage, ToolCall
5
5
  from deepeval.tracing.api import TraceApi, MetricData
6
+ from deepeval.utils import make_model_config
6
7
 
7
8
 
8
9
  class LLMApiTestCase(BaseModel):
@@ -49,7 +50,7 @@ class LLMApiTestCase(BaseModel):
49
50
  comments: Optional[str] = Field(None)
50
51
  trace: Optional[TraceApi] = Field(None)
51
52
 
52
- model_config = ConfigDict(arbitrary_types_allowed=True)
53
+ model_config = make_model_config(arbitrary_types_allowed=True)
53
54
  # metric_collection: Optional[str] = Field(None, alias="metricCollection")
54
55
 
55
56
  def update_metric_data(self, metric_data: MetricData):
@@ -1,25 +1,44 @@
1
- import portalocker
1
+ import logging
2
2
  import sys
3
3
  import json
4
4
  import os
5
- from typing import List, Optional, Union, Dict, Union
5
+ from typing import List, Optional, Dict, Union
6
6
  from enum import Enum
7
7
  from pydantic import BaseModel, Field
8
8
 
9
+ from deepeval.utils import make_model_config
10
+
9
11
  from deepeval.test_case import LLMTestCaseParams, LLMTestCase, ToolCallParams
10
12
  from deepeval.test_run.api import MetricData
11
13
  from deepeval.utils import (
12
14
  delete_file_if_exists,
15
+ is_read_only_env,
13
16
  serialize,
14
17
  )
15
18
  from deepeval.metrics import BaseMetric
16
19
  from deepeval.constants import HIDDEN_DIR
17
20
 
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ portalocker = None
26
+ if not is_read_only_env():
27
+ try:
28
+ import portalocker
29
+ except Exception as e:
30
+ logger.warning("failed to import portalocker: %s", e)
31
+ else:
32
+ logger.warning("READ_ONLY filesystem: skipping disk cache for test runs.")
33
+
34
+
18
35
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-cache.json"
19
36
  TEMP_CACHE_FILE_NAME = f"{HIDDEN_DIR}/.temp-deepeval-cache.json"
20
37
 
21
38
 
22
39
  class MetricConfiguration(BaseModel):
40
+ model_config = make_model_config(arbitrary_types_allowed=True)
41
+
23
42
  ##### Required fields #####
24
43
  threshold: float
25
44
  evaluation_model: Optional[str] = None
@@ -36,9 +55,6 @@ class MetricConfiguration(BaseModel):
36
55
  Union[List[LLMTestCaseParams], List[ToolCallParams]]
37
56
  ] = None
38
57
 
39
- class Config:
40
- arbitrary_types_allowed = True
41
-
42
58
 
43
59
  class CachedMetricData(BaseModel):
44
60
  metric_data: MetricData
@@ -96,7 +112,7 @@ class TestRunCacheManager:
96
112
  def get_cached_test_case(
97
113
  self, test_case: LLMTestCase, hyperparameters: Union[Dict, None]
98
114
  ) -> Union[CachedTestCase, None]:
99
- if self.disable_write_cache:
115
+ if self.disable_write_cache or portalocker is None:
100
116
  return None
101
117
 
102
118
  cached_test_run = self.get_cached_test_run()
@@ -121,7 +137,7 @@ class TestRunCacheManager:
121
137
  hyperparameters: Union[Dict, None],
122
138
  to_temp: bool = False,
123
139
  ):
124
- if self.disable_write_cache:
140
+ if self.disable_write_cache or portalocker is None:
125
141
  return
126
142
  cache_dict = {
127
143
  LLMTestCaseParams.INPUT.value: test_case.input,
@@ -141,7 +157,7 @@ class TestRunCacheManager:
141
157
  def set_cached_test_run(
142
158
  self, cached_test_run: CachedTestRun, temp: bool = False
143
159
  ):
144
- if self.disable_write_cache:
160
+ if self.disable_write_cache or portalocker is None:
145
161
  return
146
162
 
147
163
  if temp:
@@ -150,7 +166,7 @@ class TestRunCacheManager:
150
166
  self.cached_test_run = cached_test_run
151
167
 
152
168
  def save_cached_test_run(self, to_temp: bool = False):
153
- if self.disable_write_cache:
169
+ if self.disable_write_cache or portalocker is None:
154
170
  return
155
171
 
156
172
  if to_temp:
@@ -177,7 +193,7 @@ class TestRunCacheManager:
177
193
  )
178
194
 
179
195
  def create_cached_test_run(self, temp: bool = False):
180
- if self.disable_write_cache:
196
+ if self.disable_write_cache or portalocker is None:
181
197
  return
182
198
 
183
199
  cached_test_run = CachedTestRun()
@@ -187,7 +203,7 @@ class TestRunCacheManager:
187
203
  def get_cached_test_run(
188
204
  self, from_temp: bool = False
189
205
  ) -> Union[CachedTestRun, None]:
190
- if self.disable_write_cache:
206
+ if self.disable_write_cache or portalocker is None:
191
207
  return
192
208
 
193
209
  should_create_cached_test_run = False
@@ -208,7 +224,7 @@ class TestRunCacheManager:
208
224
  try:
209
225
  data = json.loads(content)
210
226
  self.temp_cached_test_run = CachedTestRun.load(data)
211
- except Exception as e:
227
+ except Exception:
212
228
  should_create_cached_test_run = True
213
229
  except portalocker.exceptions.LockException as e:
214
230
  print(
@@ -216,6 +232,9 @@ class TestRunCacheManager:
216
232
  file=sys.stderr,
217
233
  )
218
234
 
235
+ if should_create_cached_test_run:
236
+ self.create_cached_test_run(temp=from_temp)
237
+
219
238
  return self.temp_cached_test_run
220
239
  else:
221
240
  if self.cached_test_run:
@@ -249,6 +268,9 @@ class TestRunCacheManager:
249
268
  return self.cached_test_run
250
269
 
251
270
  def wrap_up_cached_test_run(self):
271
+ if portalocker is None:
272
+ return
273
+
252
274
  if self.disable_write_cache:
253
275
  # Clear cache if write cache is disabled
254
276
  delete_file_if_exists(self.cache_file_name)
@@ -329,7 +351,7 @@ class Cache:
329
351
  if criteria_value != cached_criteria_value:
330
352
  return False
331
353
  continue
332
- except:
354
+ except Exception:
333
355
  # For non-GEval
334
356
  continue
335
357
 
@@ -33,7 +33,11 @@ def process_hyperparameters(
33
33
  )
34
34
 
35
35
  if isinstance(value, Prompt):
36
- prompt_key = f"{value.alias}_{value.version}"
36
+ try:
37
+ prompt_key = f"{value.alias}_{value.version}"
38
+ except AttributeError:
39
+ prompt_key = f"{value.alias}_00.00.01"
40
+
37
41
  if value._prompt_version_id is not None and value.type is not None:
38
42
  processed_hyperparameters[key] = PromptApi(
39
43
  id=value._prompt_version_id,
@@ -6,11 +6,11 @@ from typing import Any, Optional, List, Dict, Union, Tuple
6
6
  import shutil
7
7
  import sys
8
8
  import datetime
9
- import portalocker
10
9
  from rich.table import Table
11
10
  from rich.console import Console
12
11
  from rich import print
13
12
 
13
+
14
14
  from deepeval.metrics import BaseMetric
15
15
  from deepeval.confident.api import Api, Endpoints, HttpMethods, is_confident
16
16
  from deepeval.test_run.api import (
@@ -25,6 +25,7 @@ from deepeval.test_case import LLMTestCase, ConversationalTestCase, MLLMTestCase
25
25
  from deepeval.utils import (
26
26
  delete_file_if_exists,
27
27
  get_is_running_deepeval,
28
+ is_read_only_env,
28
29
  open_browser,
29
30
  shorten,
30
31
  format_turn,
@@ -42,6 +43,21 @@ from rich.panel import Panel
42
43
  from rich.columns import Columns
43
44
 
44
45
 
46
+ portalocker = None
47
+ if not is_read_only_env():
48
+ try:
49
+ import portalocker
50
+ except Exception as e:
51
+ print(
52
+ f"Warning: failed to import portalocker: {e}",
53
+ file=sys.stderr,
54
+ )
55
+ else:
56
+ print(
57
+ "Warning: DeepEval is configured for read only environment. Test runs will not be written to disk."
58
+ )
59
+
60
+
45
61
  TEMP_FILE_PATH = f"{HIDDEN_DIR}/.temp_test_run_data.json"
46
62
  LATEST_TEST_RUN_FILE_PATH = f"{HIDDEN_DIR}/.latest_test_run.json"
47
63
  LATEST_TEST_RUN_DATA_KEY = "testRunData"
@@ -456,26 +472,36 @@ class TestRunManager:
456
472
  if self.test_run is None:
457
473
  self.create_test_run(identifier=identifier)
458
474
 
459
- if self.save_to_disk:
475
+ if portalocker and self.save_to_disk:
460
476
  try:
461
477
  with portalocker.Lock(
462
478
  self.temp_file_path,
463
479
  mode="r",
464
480
  flags=portalocker.LOCK_SH | portalocker.LOCK_NB,
465
481
  ) as file:
466
- self.test_run = self.test_run.load(file)
482
+ loaded = self.test_run.load(file)
483
+ # only overwrite if loading actually worked
484
+ self.test_run = loaded
467
485
  except (
468
486
  FileNotFoundError,
487
+ json.JSONDecodeError,
469
488
  portalocker.exceptions.LockException,
470
489
  ) as e:
471
- print(f"Error loading test run from disk: {e}", file=sys.stderr)
472
- self.test_run = None
490
+ print(
491
+ f"Warning: Could not load test run from disk: {e}",
492
+ file=sys.stderr,
493
+ )
473
494
 
474
495
  return self.test_run
475
496
 
476
497
  def save_test_run(self, path: str, save_under_key: Optional[str] = None):
477
- if self.save_to_disk:
498
+ if portalocker and self.save_to_disk:
478
499
  try:
500
+ # ensure parent directory exists
501
+ parent = os.path.dirname(path)
502
+ if parent:
503
+ os.makedirs(parent, exist_ok=True)
504
+
479
505
  with portalocker.Lock(path, mode="w") as file:
480
506
  if save_under_key:
481
507
  try:
@@ -495,11 +521,14 @@ class TestRunManager:
495
521
  pass
496
522
 
497
523
  def save_final_test_run_link(self, link: str):
498
- try:
499
- with portalocker.Lock(LATEST_TEST_RUN_FILE_PATH, mode="w") as file:
500
- json.dump({LATEST_TEST_RUN_LINK_KEY: link}, file)
501
- except portalocker.exceptions.LockException:
502
- pass
524
+ if portalocker:
525
+ try:
526
+ with portalocker.Lock(
527
+ LATEST_TEST_RUN_FILE_PATH, mode="w"
528
+ ) as file:
529
+ json.dump({LATEST_TEST_RUN_LINK_KEY: link}, file)
530
+ except portalocker.exceptions.LockException:
531
+ pass
503
532
 
504
533
  def update_test_run(
505
534
  self,
@@ -513,7 +542,7 @@ class TestRunManager:
513
542
  ):
514
543
  return
515
544
 
516
- if self.save_to_disk:
545
+ if portalocker and self.save_to_disk:
517
546
  try:
518
547
  with portalocker.Lock(
519
548
  self.temp_file_path,
@@ -533,10 +562,19 @@ class TestRunManager:
533
562
  self.test_run.save(file)
534
563
  except (
535
564
  FileNotFoundError,
565
+ json.JSONDecodeError,
536
566
  portalocker.exceptions.LockException,
537
567
  ) as e:
538
- print(f"Error updating test run to disk: {e}", file=sys.stderr)
539
- self.test_run = None
568
+ print(
569
+ f"Warning: Could not update test run on disk: {e}",
570
+ file=sys.stderr,
571
+ )
572
+ if self.test_run is None:
573
+ # guarantee a valid in-memory run so the update can proceed.
574
+ # never destroy in-memory state on I/O failure.
575
+ self.create_test_run()
576
+ self.test_run.add_test_case(api_test_case)
577
+ self.test_run.set_dataset_properties(test_case)
540
578
  else:
541
579
  if self.test_run is None:
542
580
  self.create_test_run()
deepeval/tracing/api.py CHANGED
@@ -1,8 +1,9 @@
1
1
  from enum import Enum
2
2
  from typing import Dict, List, Optional, Union, Literal, Any
3
- from pydantic import BaseModel, ConfigDict, Field
3
+ from pydantic import BaseModel, Field
4
4
 
5
5
  from deepeval.test_case import ToolCall
6
+ from deepeval.utils import make_model_config
6
7
 
7
8
 
8
9
  class SpanApiType(Enum):
@@ -27,7 +28,7 @@ class PromptApi(BaseModel):
27
28
 
28
29
 
29
30
  class MetricData(BaseModel):
30
- model_config = ConfigDict(extra="ignore")
31
+ model_config = make_model_config(extra="ignore")
31
32
 
32
33
  name: str
33
34
  threshold: float
@@ -42,6 +43,10 @@ class MetricData(BaseModel):
42
43
 
43
44
 
44
45
  class BaseApiSpan(BaseModel):
46
+ model_config = make_model_config(
47
+ use_enum_values=True, validate_assignment=True
48
+ )
49
+
45
50
  uuid: str
46
51
  name: str = None
47
52
  status: TraceSpanApiStatus
@@ -96,12 +101,12 @@ class BaseApiSpan(BaseModel):
96
101
  metric_collection: Optional[str] = Field(None, alias="metricCollection")
97
102
  metrics_data: Optional[List[MetricData]] = Field(None, alias="metricsData")
98
103
 
99
- class Config:
100
- use_enum_values = True
101
- validate_assignment = True
102
-
103
104
 
104
105
  class TraceApi(BaseModel):
106
+ model_config = make_model_config(
107
+ use_enum_values=True, validate_assignment=True
108
+ )
109
+
105
110
  uuid: str
106
111
  base_spans: Optional[List[BaseApiSpan]] = Field(None, alias="baseSpans")
107
112
  agent_spans: Optional[List[BaseApiSpan]] = Field(None, alias="agentSpans")
@@ -139,7 +144,3 @@ class TraceApi(BaseModel):
139
144
 
140
145
  # Don't serialize these
141
146
  confident_api_key: Optional[str] = Field(None, exclude=True)
142
-
143
- class Config:
144
- use_enum_values = True
145
- validate_assignment = True
@@ -493,6 +493,17 @@ class ConfidentSpanExporter(SpanExporter):
493
493
  output_token_count = span.attributes.get(
494
494
  "confident.llm.output_token_count"
495
495
  )
496
+
497
+ # fallback to gen ai attributes if not found in confident attributes
498
+ if not input_token_count:
499
+ input_token_count = span.attributes.get(
500
+ "gen_ai.usage.input_tokens"
501
+ )
502
+ if not output_token_count:
503
+ output_token_count = span.attributes.get(
504
+ "gen_ai.usage.output_tokens"
505
+ )
506
+
496
507
  cost_per_input_token = span.attributes.get(
497
508
  "confident.llm.cost_per_input_token"
498
509
  )
@@ -1,6 +1,8 @@
1
- from openai import OpenAI
2
1
  import functools
3
2
 
3
+ from anthropic import Anthropic
4
+ from openai import OpenAI
5
+
4
6
  from deepeval.tracing.context import update_current_span, update_llm_span
5
7
  from deepeval.tracing.context import current_span_context
6
8
  from deepeval.tracing.types import LlmSpan
@@ -82,3 +84,102 @@ def patch_openai_client(client: OpenAI):
82
84
  return response
83
85
 
84
86
  setattr(current_obj, method_name, wrapped_method)
87
+
88
+
89
+ def patch_anthropic_client(client: Anthropic):
90
+ """
91
+ Patch an Anthropic client instance to add tracing capabilities.
92
+
93
+ Args:
94
+ client: An instance of Anthropic client to patch
95
+ """
96
+ original_methods = {}
97
+
98
+ methods_to_patch = [
99
+ "messages.create",
100
+ ]
101
+
102
+ for method_path in methods_to_patch:
103
+ parts = method_path.split(".")
104
+ current_obj = client
105
+
106
+ for part in parts[:-1]:
107
+ if not hasattr(current_obj, part):
108
+ print(f"Warning: Cannot find {part} in the path {method_path}")
109
+ continue
110
+ current_obj = getattr(current_obj, part)
111
+
112
+ method_name = parts[-1]
113
+ if not hasattr(current_obj, method_name):
114
+ print(
115
+ f"Warning: Cannot find method {method_name} in the path {method_path}"
116
+ )
117
+ continue
118
+
119
+ method = getattr(current_obj, method_name)
120
+
121
+ if callable(method) and not isinstance(method, type):
122
+ original_methods[method_path] = method
123
+
124
+ @functools.wraps(method)
125
+ def wrapped_method(*args, original_method=method, **kwargs):
126
+ current_span = current_span_context.get()
127
+ response = original_method(*args, **kwargs)
128
+
129
+ if isinstance(current_span, LlmSpan):
130
+ model = kwargs.get("model", None)
131
+ if model is None:
132
+ raise ValueError("model not found in client")
133
+
134
+ current_span.model = model
135
+
136
+ output = None
137
+ try:
138
+ if (
139
+ hasattr(response, "content")
140
+ and response.content
141
+ and len(response.content) > 0
142
+ ):
143
+ for block in response.content:
144
+ if hasattr(block, "text"):
145
+ output = block.text
146
+ break
147
+ except Exception:
148
+ pass
149
+
150
+ input_token_count = None
151
+ output_token_count = None
152
+ try:
153
+ if hasattr(response, "usage"):
154
+ usage = response.usage
155
+ # usage can be a dict or an object with attributes
156
+ if isinstance(usage, dict):
157
+ input_token_count = usage.get(
158
+ "input_tokens", None
159
+ )
160
+ output_token_count = usage.get(
161
+ "output_tokens", None
162
+ )
163
+ else:
164
+ input_token_count = getattr(
165
+ usage, "input_tokens", None
166
+ )
167
+ output_token_count = getattr(
168
+ usage, "output_tokens", None
169
+ )
170
+ except Exception:
171
+ pass
172
+
173
+ update_current_span(
174
+ input=kwargs.get("messages", "INPUT_MESSAGE_NOT_FOUND"),
175
+ output=output if output else "OUTPUT_MESSAGE_NOT_FOUND",
176
+ )
177
+ update_llm_span(
178
+ input_token_count=input_token_count,
179
+ output_token_count=output_token_count,
180
+ )
181
+ return response
182
+
183
+ setattr(current_obj, method_name, wrapped_method)
184
+
185
+ return original_methods
@@ -1,13 +1,15 @@
1
- from typing import Optional, List, Dict, Any
2
1
  from contextvars import ContextVar
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass
4
+ from typing import Optional, List, Dict, Any
5
5
 
6
- from .tracing import trace_manager
7
- from .context import current_trace_context, update_current_trace
8
- from deepeval.prompt import Prompt
9
6
  from deepeval.metrics import BaseMetric
7
+ from deepeval.prompt import Prompt
10
8
  from deepeval.test_case.llm_test_case import ToolCall
9
+ from deepeval.tracing.context import current_trace_context, update_current_trace
10
+ from deepeval.tracing.tracing import trace_manager
11
+ from deepeval.tracing.types import TraceWorkerStatus
12
+ from deepeval.tracing.utils import is_async_context
11
13
 
12
14
 
13
15
  @dataclass
@@ -59,6 +61,13 @@ def trace(
59
61
  metrics: Optional[List[BaseMetric]] = None,
60
62
  metric_collection: Optional[str] = None,
61
63
  ):
64
+ if is_async_context():
65
+ trace_manager._print_trace_status(
66
+ message="Warning: Detected use of the synchronous 'trace' context manager within an async method",
67
+ trace_worker_status=TraceWorkerStatus.WARNING,
68
+ description="Wrapping an async method with the synchronous 'trace' context manager may lead to unexpected behavior.",
69
+ )
70
+
62
71
  current_trace = current_trace_context.get()
63
72
 
64
73
  if not current_trace:
@@ -19,6 +19,7 @@ import random
19
19
  import atexit
20
20
  import queue
21
21
  import uuid
22
+ from anthropic import Anthropic
22
23
  from openai import OpenAI
23
24
  from rich.console import Console
24
25
  from rich.progress import Progress
@@ -38,7 +39,10 @@ from deepeval.tracing.api import (
38
39
  TraceSpanApiStatus,
39
40
  )
40
41
  from deepeval.telemetry import capture_send_trace
41
- from deepeval.tracing.patchers import patch_openai_client
42
+ from deepeval.tracing.patchers import (
43
+ patch_anthropic_client,
44
+ patch_openai_client,
45
+ )
42
46
  from deepeval.tracing.types import (
43
47
  AgentSpan,
44
48
  BaseSpan,
@@ -111,6 +115,7 @@ class TraceManager:
111
115
 
112
116
  self.sampling_rate = settings.CONFIDENT_TRACE_SAMPLE_RATE
113
117
  validate_sampling_rate(self.sampling_rate)
118
+ self.anthropic_client = None
114
119
  self.openai_client = None
115
120
  self.tracing_enabled = True
116
121
 
@@ -139,7 +144,7 @@ class TraceManager:
139
144
 
140
145
  def mask(self, data: Any):
141
146
  if self.custom_mask_fn is not None:
142
- self.custom_mask_fn(data)
147
+ return self.custom_mask_fn(data)
143
148
  else:
144
149
  return data
145
150
 
@@ -149,6 +154,7 @@ class TraceManager:
149
154
  environment: Optional[str] = None,
150
155
  sampling_rate: Optional[float] = None,
151
156
  confident_api_key: Optional[str] = None,
157
+ anthropic_client: Optional[Anthropic] = None,
152
158
  openai_client: Optional[OpenAI] = None,
153
159
  tracing_enabled: Optional[bool] = None,
154
160
  ) -> None:
@@ -165,6 +171,9 @@ class TraceManager:
165
171
  if openai_client is not None:
166
172
  self.openai_client = openai_client
167
173
  patch_openai_client(openai_client)
174
+ if anthropic_client is not None:
175
+ self.anthropic_client = anthropic_client
176
+ patch_anthropic_client(anthropic_client)
168
177
  if tracing_enabled is not None:
169
178
  self.tracing_enabled = tracing_enabled
170
179