deepeval 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/errors.py +20 -2
  3. deepeval/evaluate/execute.py +725 -217
  4. deepeval/evaluate/types.py +1 -0
  5. deepeval/evaluate/utils.py +13 -3
  6. deepeval/integrations/crewai/__init__.py +2 -1
  7. deepeval/integrations/crewai/tool.py +71 -0
  8. deepeval/integrations/llama_index/__init__.py +0 -4
  9. deepeval/integrations/llama_index/handler.py +20 -21
  10. deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
  11. deepeval/metrics/__init__.py +13 -0
  12. deepeval/metrics/base_metric.py +1 -0
  13. deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
  14. deepeval/metrics/conversational_g_eval/__init__.py +3 -0
  15. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
  16. deepeval/metrics/dag/schema.py +1 -1
  17. deepeval/metrics/dag/templates.py +2 -2
  18. deepeval/metrics/goal_accuracy/__init__.py +1 -0
  19. deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
  20. deepeval/metrics/goal_accuracy/schema.py +17 -0
  21. deepeval/metrics/goal_accuracy/template.py +235 -0
  22. deepeval/metrics/hallucination/hallucination.py +8 -8
  23. deepeval/metrics/mcp/mcp_task_completion.py +7 -2
  24. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
  25. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
  26. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
  27. deepeval/metrics/plan_adherence/__init__.py +1 -0
  28. deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
  29. deepeval/metrics/plan_adherence/schema.py +11 -0
  30. deepeval/metrics/plan_adherence/template.py +170 -0
  31. deepeval/metrics/plan_quality/__init__.py +1 -0
  32. deepeval/metrics/plan_quality/plan_quality.py +292 -0
  33. deepeval/metrics/plan_quality/schema.py +11 -0
  34. deepeval/metrics/plan_quality/template.py +101 -0
  35. deepeval/metrics/step_efficiency/__init__.py +1 -0
  36. deepeval/metrics/step_efficiency/schema.py +11 -0
  37. deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
  38. deepeval/metrics/step_efficiency/template.py +256 -0
  39. deepeval/metrics/task_completion/task_completion.py +1 -0
  40. deepeval/metrics/tool_correctness/schema.py +6 -0
  41. deepeval/metrics/tool_correctness/template.py +88 -0
  42. deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
  43. deepeval/metrics/tool_use/__init__.py +1 -0
  44. deepeval/metrics/tool_use/schema.py +19 -0
  45. deepeval/metrics/tool_use/template.py +220 -0
  46. deepeval/metrics/tool_use/tool_use.py +458 -0
  47. deepeval/metrics/topic_adherence/__init__.py +1 -0
  48. deepeval/metrics/topic_adherence/schema.py +16 -0
  49. deepeval/metrics/topic_adherence/template.py +162 -0
  50. deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +37 -36
  52. deepeval/models/embedding_models/local_embedding_model.py +30 -32
  53. deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
  54. deepeval/models/embedding_models/openai_embedding_model.py +22 -31
  55. deepeval/openai/extractors.py +61 -16
  56. deepeval/openai/patch.py +8 -12
  57. deepeval/openai/types.py +1 -1
  58. deepeval/openai/utils.py +108 -1
  59. deepeval/prompt/prompt.py +1 -0
  60. deepeval/prompt/utils.py +43 -14
  61. deepeval/synthesizer/synthesizer.py +11 -10
  62. deepeval/test_case/llm_test_case.py +6 -2
  63. deepeval/test_run/test_run.py +190 -207
  64. deepeval/tracing/__init__.py +2 -1
  65. deepeval/tracing/otel/exporter.py +3 -4
  66. deepeval/tracing/otel/utils.py +23 -4
  67. deepeval/tracing/trace_context.py +53 -38
  68. deepeval/tracing/tracing.py +23 -0
  69. deepeval/tracing/types.py +16 -14
  70. deepeval/utils.py +21 -0
  71. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
  72. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/RECORD +75 -53
  73. deepeval/integrations/llama_index/agent/patched.py +0 -68
  74. deepeval/tracing/message_types/__init__.py +0 -10
  75. deepeval/tracing/message_types/base.py +0 -6
  76. deepeval/tracing/message_types/messages.py +0 -14
  77. deepeval/tracing/message_types/tools.py +0 -18
  78. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
  79. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
  80. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from openai import OpenAI, AsyncOpenAI
2
- from typing import Dict, List
2
+ from typing import Dict, List, Optional
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
15
15
 
16
16
 
17
17
  class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
18
- def __init__(self, **kwargs):
19
- self.base_url = KEY_FILE_HANDLER.fetch_data(
18
+ def __init__(
19
+ self,
20
+ api_key: Optional[str] = None,
21
+ base_url: Optional[str] = None,
22
+ model: Optional[str] = None,
23
+ generation_kwargs: Optional[Dict] = None,
24
+ **client_kwargs,
25
+ ):
26
+ self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
27
+ EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
28
+ )
29
+ self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
20
30
  EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
21
31
  )
22
- model_name = KEY_FILE_HANDLER.fetch_data(
32
+ self.model_name = model or KEY_FILE_HANDLER.fetch_data(
23
33
  EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
24
34
  )
25
- self.api_key = KEY_FILE_HANDLER.fetch_data(
26
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
27
- )
28
- self.kwargs = kwargs
29
- super().__init__(model_name)
35
+ self.client_kwargs = client_kwargs or {}
36
+ self.generation_kwargs = generation_kwargs or {}
37
+ super().__init__(self.model_name)
30
38
 
31
39
  @retry_local
32
40
  def embed_text(self, text: str) -> List[float]:
33
41
  embedding_model = self.load_model()
34
42
  response = embedding_model.embeddings.create(
35
- model=self.model_name,
36
- input=[text],
43
+ model=self.model_name, input=[text], **self.generation_kwargs
37
44
  )
38
45
  return response.data[0].embedding
39
46
 
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
41
48
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
42
49
  embedding_model = self.load_model()
43
50
  response = embedding_model.embeddings.create(
44
- model=self.model_name,
45
- input=texts,
51
+ model=self.model_name, input=texts, **self.generation_kwargs
46
52
  )
47
53
  return [data.embedding for data in response.data]
48
54
 
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
56
  async def a_embed_text(self, text: str) -> List[float]:
51
57
  embedding_model = self.load_model(async_mode=True)
52
58
  response = await embedding_model.embeddings.create(
53
- model=self.model_name,
54
- input=[text],
59
+ model=self.model_name, input=[text], **self.generation_kwargs
55
60
  )
56
61
  return response.data[0].embedding
57
62
 
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
59
64
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
60
65
  embedding_model = self.load_model(async_mode=True)
61
66
  response = await embedding_model.embeddings.create(
62
- model=self.model_name,
63
- input=texts,
67
+ model=self.model_name, input=texts, **self.generation_kwargs
64
68
  )
65
69
  return [data.embedding for data in response.data]
66
70
 
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
76
80
  return self._build_client(OpenAI)
77
81
  return self._build_client(AsyncOpenAI)
78
82
 
79
- def _client_kwargs(self) -> Dict:
80
- """
81
- If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
82
- If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
83
- """
84
- kwargs = dict(self.kwargs or {})
83
+ def _build_client(self, cls):
84
+ client_kwargs = self.client_kwargs.copy()
85
85
  if not sdk_retries_for(PS.LOCAL):
86
- kwargs["max_retries"] = 0
87
- return kwargs
86
+ client_kwargs["max_retries"] = 0
88
87
 
89
- def _build_client(self, cls):
90
- kw = dict(
88
+ client_init_kwargs = dict(
91
89
  api_key=self.api_key,
92
90
  base_url=self.base_url,
93
- **self._client_kwargs(),
91
+ **client_kwargs,
94
92
  )
95
93
  try:
96
- return cls(**kw)
94
+ return cls(**client_init_kwargs)
97
95
  except TypeError as e:
98
- # Older OpenAI SDKs may not accept max_retries; drop and retry once.
96
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
99
97
  if "max_retries" in str(e):
100
- kw.pop("max_retries", None)
101
- return cls(**kw)
98
+ client_init_kwargs.pop("max_retries", None)
99
+ return cls(**client_init_kwargs)
102
100
  raise
@@ -1,5 +1,5 @@
1
1
  from ollama import Client, AsyncClient
2
- from typing import List
2
+ from typing import List, Optional, Dict
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
13
13
 
14
14
 
15
15
  class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
16
- def __init__(self, *args, **kwargs):
17
- self.base_url = KEY_FILE_HANDLER.fetch_data(
16
+ def __init__(
17
+ self,
18
+ model: Optional[str] = None,
19
+ host: Optional[str] = None,
20
+ generation_kwargs: Optional[Dict] = None,
21
+ **client_kwargs,
22
+ ):
23
+ self.host = host or KEY_FILE_HANDLER.fetch_data(
18
24
  EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
19
25
  )
20
- model_name = KEY_FILE_HANDLER.fetch_data(
26
+ self.model_name = model or KEY_FILE_HANDLER.fetch_data(
21
27
  EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
22
28
  )
23
- # TODO: This is not being used. Clean it up in consistency PR
24
- self.api_key = KEY_FILE_HANDLER.fetch_data(
25
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
26
- )
27
- self.args = args
28
- self.kwargs = kwargs
29
- super().__init__(model_name)
29
+ self.client_kwargs = client_kwargs or {}
30
+ self.generation_kwargs = generation_kwargs or {}
31
+ super().__init__(self.model_name)
30
32
 
31
33
  @retry_ollama
32
34
  def embed_text(self, text: str) -> List[float]:
33
35
  embedding_model = self.load_model()
34
36
  response = embedding_model.embed(
35
- model=self.model_name,
36
- input=text,
37
+ model=self.model_name, input=text, **self.generation_kwargs
37
38
  )
38
39
  return response["embeddings"][0]
39
40
 
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
41
42
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
42
43
  embedding_model = self.load_model()
43
44
  response = embedding_model.embed(
44
- model=self.model_name,
45
- input=texts,
45
+ model=self.model_name, input=texts, **self.generation_kwargs
46
46
  )
47
47
  return response["embeddings"]
48
48
 
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
50
  async def a_embed_text(self, text: str) -> List[float]:
51
51
  embedding_model = self.load_model(async_mode=True)
52
52
  response = await embedding_model.embed(
53
- model=self.model_name,
54
- input=text,
53
+ model=self.model_name, input=text, **self.generation_kwargs
55
54
  )
56
55
  return response["embeddings"][0]
57
56
 
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
59
58
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
60
59
  embedding_model = self.load_model(async_mode=True)
61
60
  response = await embedding_model.embed(
62
- model=self.model_name,
63
- input=texts,
61
+ model=self.model_name, input=texts, **self.generation_kwargs
64
62
  )
65
63
  return response["embeddings"]
66
64
 
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
74
72
  return self._build_client(AsyncClient)
75
73
 
76
74
  def _build_client(self, cls):
77
- return cls(host=self.base_url, **self.kwargs)
75
+ return cls(host=self.host, **self.client_kwargs)
78
76
 
79
77
  def get_model_name(self):
80
78
  return f"{self.model_name} (Ollama)"
@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
19
19
 
20
20
 
21
21
  class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
22
+
22
23
  def __init__(
23
24
  self,
24
25
  model: Optional[str] = None,
25
- _openai_api_key: Optional[str] = None,
26
- **kwargs,
26
+ openai_api_key: Optional[str] = None,
27
+ generation_kwargs: Optional[Dict] = None,
28
+ **client_kwargs,
27
29
  ):
28
- model_name = model if model else default_openai_embedding_model
29
- if model_name not in valid_openai_embedding_models:
30
+ self.openai_api_key = openai_api_key
31
+ self.model_name = model if model else default_openai_embedding_model
32
+ if self.model_name not in valid_openai_embedding_models:
30
33
  raise ValueError(
31
34
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
32
35
  )
33
- self._openai_api_key = _openai_api_key
34
- self.model_name = model_name
35
- self.kwargs = kwargs
36
+ self.client_kwargs = client_kwargs or {}
37
+ self.generation_kwargs = generation_kwargs or {}
36
38
 
37
39
  @retry_openai
38
40
  def embed_text(self, text: str) -> List[float]:
39
41
  client = self.load_model(async_mode=False)
40
42
  response = client.embeddings.create(
41
- input=text,
42
- model=self.model_name,
43
+ input=text, model=self.model_name, **self.generation_kwargs
43
44
  )
44
45
  return response.data[0].embedding
45
46
 
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
47
48
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
48
49
  client = self.load_model(async_mode=False)
49
50
  response = client.embeddings.create(
50
- input=texts,
51
- model=self.model_name,
51
+ input=texts, model=self.model_name, **self.generation_kwargs
52
52
  )
53
53
  return [item.embedding for item in response.data]
54
54
 
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
56
56
  async def a_embed_text(self, text: str) -> List[float]:
57
57
  client = self.load_model(async_mode=True)
58
58
  response = await client.embeddings.create(
59
- input=text,
60
- model=self.model_name,
59
+ input=text, model=self.model_name, **self.generation_kwargs
61
60
  )
62
61
  return response.data[0].embedding
63
62
 
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
65
64
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
66
65
  client = self.load_model(async_mode=True)
67
66
  response = await client.embeddings.create(
68
- input=texts,
69
- model=self.model_name,
67
+ input=texts, model=self.model_name, **self.generation_kwargs
70
68
  )
71
69
  return [item.embedding for item in response.data]
72
70
 
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
82
80
  return self._build_client(OpenAI)
83
81
  return self._build_client(AsyncOpenAI)
84
82
 
85
- def _client_kwargs(self) -> Dict:
86
- """
87
- If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
88
- If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
89
- leave their retry settings as is.
90
- """
91
- kwargs = dict(self.kwargs or {})
83
+ def _build_client(self, cls):
84
+ client_kwargs = self.client_kwargs.copy()
92
85
  if not sdk_retries_for(PS.OPENAI):
93
- kwargs["max_retries"] = 0
94
- return kwargs
86
+ client_kwargs["max_retries"] = 0
95
87
 
96
- def _build_client(self, cls):
97
- kw = dict(
98
- api_key=self._openai_api_key,
99
- **self._client_kwargs(),
88
+ client_init_kwargs = dict(
89
+ api_key=self.openai_api_key,
90
+ **client_kwargs,
100
91
  )
101
92
  try:
102
- return cls(**kw)
93
+ return cls(**client_init_kwargs)
103
94
  except TypeError as e:
104
95
  # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
105
96
  if "max_retries" in str(e):
106
- kw.pop("max_retries", None)
107
- return cls(**kw)
97
+ client_init_kwargs.pop("max_retries", None)
98
+ return cls(**client_init_kwargs)
108
99
  raise
@@ -4,17 +4,26 @@ from typing import Any, Union, Dict
4
4
  from openai.types.responses import Response
5
5
 
6
6
  from deepeval.test_case.llm_test_case import ToolCall
7
- from deepeval.openai.utils import stringify_multimodal_content
7
+ from deepeval.openai.utils import (
8
+ render_response_input,
9
+ stringify_multimodal_content,
10
+ render_messages,
11
+ )
8
12
  from deepeval.openai.types import InputParameters, OutputParameters
13
+ from deepeval.tracing.types import Message
9
14
 
10
15
 
11
- def extract_input_parameters(
16
+ # guarding against errors to be compatible with legacy APIs
17
+ def safe_extract_input_parameters(
12
18
  is_completion: bool, kwargs: Dict[str, Any]
13
19
  ) -> InputParameters:
14
- if is_completion:
15
- return extract_input_parameters_from_completion(kwargs)
16
- else:
17
- return extract_input_parameters_from_response(kwargs)
20
+ try:
21
+ if is_completion:
22
+ return extract_input_parameters_from_completion(kwargs)
23
+ else:
24
+ return extract_input_parameters_from_response(kwargs)
25
+ except:
26
+ return InputParameters(model="NA")
18
27
 
19
28
 
20
29
  def extract_input_parameters_from_completion(
@@ -43,6 +52,9 @@ def extract_input_parameters_from_completion(
43
52
  if len(user_messages) > 0:
44
53
  input_arg = user_messages[0]
45
54
 
55
+ # render messages
56
+ messages = render_messages(messages)
57
+
46
58
  return InputParameters(
47
59
  model=model,
48
60
  input=stringify_multimodal_content(input_arg),
@@ -64,7 +76,24 @@ def extract_input_parameters_from_response(
64
76
  if tools is not None
65
77
  else None
66
78
  )
67
- messages = input_payload if isinstance(input_payload, list) else None
79
+ messages = []
80
+ if isinstance(input_payload, list):
81
+ messages = render_response_input(input_payload)
82
+ elif isinstance(input_payload, str):
83
+ messages = [
84
+ {
85
+ "role": "user",
86
+ "content": input_payload,
87
+ }
88
+ ]
89
+ if instructions:
90
+ messages.insert(
91
+ 0,
92
+ {
93
+ "role": "system",
94
+ "content": instructions,
95
+ },
96
+ )
68
97
  return InputParameters(
69
98
  model=model,
70
99
  input=stringify_multimodal_content(input_payload),
@@ -75,19 +104,24 @@ def extract_input_parameters_from_response(
75
104
  )
76
105
 
77
106
 
78
- def extract_output_parameters(
107
+ def safe_extract_output_parameters(
79
108
  is_completion: bool,
80
109
  response: Union[ChatCompletion, ParsedChatCompletion, Response],
81
110
  input_parameters: InputParameters,
82
111
  ) -> OutputParameters:
83
- if is_completion:
84
- return extract_output_parameters_from_completion(
85
- response, input_parameters
86
- )
87
- else:
88
- return extract_output_parameters_from_response(
89
- response, input_parameters
90
- )
112
+
113
+ # guarding against errors to be compatible with legacy APIs
114
+ try:
115
+ if is_completion:
116
+ return extract_output_parameters_from_completion(
117
+ response, input_parameters
118
+ )
119
+ else:
120
+ return extract_output_parameters_from_response(
121
+ response, input_parameters
122
+ )
123
+ except:
124
+ return OutputParameters()
91
125
 
92
126
 
93
127
  def extract_output_parameters_from_completion(
@@ -113,6 +147,12 @@ def extract_output_parameters_from_completion(
113
147
  )
114
148
  )
115
149
 
150
+ if not output and tools_called:
151
+ tool_calls = []
152
+ for tool_call in tools_called:
153
+ tool_calls.append(tool_call)
154
+ output = tool_calls
155
+
116
156
  return OutputParameters(
117
157
  output=output,
118
158
  prompt_tokens=prompt_tokens,
@@ -144,6 +184,11 @@ def extract_output_parameters_from_response(
144
184
  description=tool_descriptions.get(tool_call.name),
145
185
  )
146
186
  )
187
+ if not output and tools_called:
188
+ tool_calls = []
189
+ for tool_call in tools_called:
190
+ tool_calls.append(tool_call)
191
+ output = tool_calls
147
192
 
148
193
  return OutputParameters(
149
194
  output=output,
deepeval/openai/patch.py CHANGED
@@ -3,8 +3,8 @@ from functools import wraps
3
3
 
4
4
 
5
5
  from deepeval.openai.extractors import (
6
- extract_output_parameters,
7
- extract_input_parameters,
6
+ safe_extract_output_parameters,
7
+ safe_extract_input_parameters,
8
8
  InputParameters,
9
9
  OutputParameters,
10
10
  )
@@ -16,7 +16,6 @@ from deepeval.tracing.context import (
16
16
  )
17
17
  from deepeval.tracing import observe
18
18
  from deepeval.tracing.trace_context import current_llm_context
19
- from deepeval.openai.utils import create_child_tool_spans
20
19
 
21
20
  # Store original methods for safety and potential unpatching
22
21
  _ORIGINAL_METHODS = {}
@@ -123,7 +122,7 @@ def _patch_async_openai_client_method(
123
122
  ):
124
123
  @wraps(orig_method)
125
124
  async def patched_async_openai_method(*args, **kwargs):
126
- input_parameters: InputParameters = extract_input_parameters(
125
+ input_parameters: InputParameters = safe_extract_input_parameters(
127
126
  is_completion_method, kwargs
128
127
  )
129
128
 
@@ -137,7 +136,7 @@ def _patch_async_openai_client_method(
137
136
  )
138
137
  async def llm_generation(*args, **kwargs):
139
138
  response = await orig_method(*args, **kwargs)
140
- output_parameters = extract_output_parameters(
139
+ output_parameters = safe_extract_output_parameters(
141
140
  is_completion_method, response, input_parameters
142
141
  )
143
142
  _update_all_attributes(
@@ -162,7 +161,7 @@ def _patch_sync_openai_client_method(
162
161
  ):
163
162
  @wraps(orig_method)
164
163
  def patched_sync_openai_method(*args, **kwargs):
165
- input_parameters: InputParameters = extract_input_parameters(
164
+ input_parameters: InputParameters = safe_extract_input_parameters(
166
165
  is_completion_method, kwargs
167
166
  )
168
167
 
@@ -176,7 +175,7 @@ def _patch_sync_openai_client_method(
176
175
  )
177
176
  def llm_generation(*args, **kwargs):
178
177
  response = orig_method(*args, **kwargs)
179
- output_parameters = extract_output_parameters(
178
+ output_parameters = safe_extract_output_parameters(
180
179
  is_completion_method, response, input_parameters
181
180
  )
182
181
  _update_all_attributes(
@@ -205,8 +204,8 @@ def _update_all_attributes(
205
204
  ):
206
205
  """Update span and trace attributes with input/output parameters."""
207
206
  update_current_span(
208
- input=input_parameters.input or input_parameters.messages or "NA",
209
- output=output_parameters.output or "NA",
207
+ input=input_parameters.messages,
208
+ output=output_parameters.output or output_parameters.tools_called,
210
209
  tools_called=output_parameters.tools_called,
211
210
  # attributes to be added
212
211
  expected_output=expected_output,
@@ -223,9 +222,6 @@ def _update_all_attributes(
223
222
  prompt=llm_context.prompt,
224
223
  )
225
224
 
226
- if output_parameters.tools_called:
227
- create_child_tool_spans(output_parameters)
228
-
229
225
  __update_input_and_output_of_current_trace(
230
226
  input_parameters, output_parameters
231
227
  )
deepeval/openai/types.py CHANGED
@@ -14,7 +14,7 @@ class InputParameters(BaseModel):
14
14
 
15
15
 
16
16
  class OutputParameters(BaseModel):
17
- output: Optional[str] = None
17
+ output: Optional[Any] = None
18
18
  prompt_tokens: Optional[int] = None
19
19
  completion_tokens: Optional[int] = None
20
20
  tools_called: Optional[List[ToolCall]] = None
deepeval/openai/utils.py CHANGED
@@ -1,6 +1,10 @@
1
1
  import json
2
2
  import uuid
3
- from typing import Any, List, Optional
3
+ from typing import Any, Dict, List, Optional, Iterable
4
+
5
+ from openai.types.chat.chat_completion_message_param import (
6
+ ChatCompletionMessageParam,
7
+ )
4
8
 
5
9
  from deepeval.tracing.types import ToolSpan, TraceSpanStatus
6
10
  from deepeval.tracing.context import current_span_context
@@ -126,3 +130,106 @@ def stringify_multimodal_content(content: Any) -> str:
126
130
 
127
131
  # unknown dicts and types returned as shortened JSON
128
132
  return _compact_dump(content)
133
+
134
+
135
+ def render_messages(
136
+ messages: Iterable[ChatCompletionMessageParam],
137
+ ) -> List[Dict[str, Any]]:
138
+
139
+ messages_list = []
140
+
141
+ for message in messages:
142
+ role = message.get("role")
143
+ content = message.get("content")
144
+ if role == "assistant" and message.get("tool_calls"):
145
+ tool_calls = message.get("tool_calls")
146
+ if isinstance(tool_calls, list):
147
+ for tool_call in tool_calls:
148
+ # Extract type - either "function" or "custom"
149
+ tool_type = tool_call.get("type", "function")
150
+
151
+ # Extract name and arguments based on type
152
+ if tool_type == "function":
153
+ function_data = tool_call.get("function", {})
154
+ name = function_data.get("name", "")
155
+ arguments = function_data.get("arguments", "")
156
+ elif tool_type == "custom":
157
+ custom_data = tool_call.get("custom", {})
158
+ name = custom_data.get("name", "")
159
+ arguments = custom_data.get("input", "")
160
+ else:
161
+ name = ""
162
+ arguments = ""
163
+
164
+ messages_list.append(
165
+ {
166
+ "id": tool_call.get("id", ""),
167
+ "call_id": tool_call.get(
168
+ "id", ""
169
+ ), # OpenAI uses 'id', not 'call_id'
170
+ "name": name,
171
+ "type": tool_type,
172
+ "arguments": json.loads(arguments),
173
+ }
174
+ )
175
+
176
+ elif role == "tool":
177
+ messages_list.append(
178
+ {
179
+ "call_id": message.get("tool_call_id", ""),
180
+ "type": role, # "tool"
181
+ "output": message.get("content", {}),
182
+ }
183
+ )
184
+ else:
185
+ messages_list.append(
186
+ {
187
+ "role": role,
188
+ "content": content,
189
+ }
190
+ )
191
+
192
+ return messages_list
193
+
194
+
195
+ def render_response_input(input: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
196
+
197
+ messages_list = []
198
+
199
+ for item in input:
200
+ type = item.get("type")
201
+ role = item.get("role")
202
+
203
+ if type == "message":
204
+ messages_list.append(
205
+ {
206
+ "role": role,
207
+ "content": item.get("content"),
208
+ }
209
+ )
210
+ else:
211
+ messages_list.append(item)
212
+
213
+ return messages_list
214
+
215
+
216
+ def _render_content(content: Dict[str, Any], indent: int = 0) -> str:
217
+ """
218
+ Renders a dictionary as a formatted string with indentation for nested structures.
219
+ """
220
+ if not content:
221
+ return ""
222
+
223
+ lines = []
224
+ prefix = " " * indent
225
+
226
+ for key, value in content.items():
227
+ if isinstance(value, dict):
228
+ lines.append(f"{prefix}{key}:")
229
+ lines.append(_render_content(value, indent + 1))
230
+ elif isinstance(value, list):
231
+ lines.append(f"{prefix}{key}: {_compact_dump(value)}")
232
+ else:
233
+ lines.append(f"{prefix}{key}: {value}")
234
+
235
+ return "\n".join(lines)
deepeval/prompt/prompt.py CHANGED
@@ -202,6 +202,7 @@ class Prompt:
202
202
  "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
203
203
  )
204
204
 
205
+ print("@@@@@")
205
206
  return interpolate_text(interpolation_type, text_template, **kwargs)
206
207
 
207
208
  elif prompt_type == PromptType.LIST: