deepeval 3.7.2__py3-none-any.whl → 3.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.7.2"
1
+ __version__: str = "3.7.3"
@@ -92,7 +92,7 @@ class HumanEval(DeepEvalBaseBenchmark):
92
92
  self.predictions: Optional[pd.DataFrame] = None
93
93
  self.task_scores: Optional[pd.DataFrame] = None
94
94
  self.overall_score: Optional[float] = None
95
- self.verbose_mode: bool = (False,)
95
+ self.verbose_mode: bool = verbose_mode
96
96
 
97
97
  def evaluate(
98
98
  self, model: DeepEvalBaseLLM, *args, k: int = 1, **kwargs
@@ -123,6 +123,7 @@ class HumanEval(DeepEvalBaseBenchmark):
123
123
  task.value,
124
124
  golden.input,
125
125
  prediction,
126
+ task_correct,
126
127
  golden.expected_output,
127
128
  score,
128
129
  )
@@ -189,17 +189,35 @@ class EvaluationDataset:
189
189
  test_case._dataset_alias = self._alias
190
190
  test_case._dataset_id = self._id
191
191
  if isinstance(test_case, LLMTestCase):
192
+ if self._conversational_goldens or self._conversational_test_cases:
193
+ raise TypeError(
194
+ "You cannot add 'LLMTestCase' to a multi-turn dataset."
195
+ )
192
196
  test_case._dataset_rank = len(self._llm_test_cases)
193
197
  self._llm_test_cases.append(test_case)
194
198
  elif isinstance(test_case, ConversationalTestCase):
199
+ if self._goldens or self._llm_test_cases:
200
+ raise TypeError(
201
+ "You cannot add 'ConversationalTestCase' to a single-turn dataset."
202
+ )
203
+ self._multi_turn = True
195
204
  test_case._dataset_rank = len(self._conversational_test_cases)
196
205
  self._conversational_test_cases.append(test_case)
197
206
 
198
207
  def add_golden(self, golden: Union[Golden, ConversationalGolden]):
199
- if self._multi_turn:
200
- self._add_conversational_golden(golden)
201
- else:
208
+ if isinstance(golden, Golden):
209
+ if self._conversational_goldens or self._conversational_test_cases:
210
+ raise TypeError(
211
+ "You cannot add 'Golden' to a multi-turn dataset."
212
+ )
202
213
  self._add_golden(golden)
214
+ else:
215
+ if self._goldens or self._llm_test_cases:
216
+ raise TypeError(
217
+ "You cannot add 'ConversationalGolden' to a single-turn dataset."
218
+ )
219
+ self._multi_turn = True
220
+ self._add_conversational_golden(golden)
203
221
 
204
222
  def _add_golden(self, golden: Union[Golden, ConversationalGolden]):
205
223
  if isinstance(golden, Golden):
@@ -224,16 +242,16 @@ class EvaluationDataset:
224
242
  file_path: str,
225
243
  input_col_name: str,
226
244
  actual_output_col_name: str,
227
- expected_output_col_name: Optional[str] = None,
228
- context_col_name: Optional[str] = None,
245
+ expected_output_col_name: Optional[str] = "expected_output",
246
+ context_col_name: Optional[str] = "context",
229
247
  context_col_delimiter: str = ";",
230
- retrieval_context_col_name: Optional[str] = None,
248
+ retrieval_context_col_name: Optional[str] = "retrieval_context",
231
249
  retrieval_context_col_delimiter: str = ";",
232
- tools_called_col_name: Optional[str] = None,
250
+ tools_called_col_name: Optional[str] = "tools_called",
233
251
  tools_called_col_delimiter: str = ";",
234
- expected_tools_col_name: Optional[str] = None,
252
+ expected_tools_col_name: Optional[str] = "expected_tools",
235
253
  expected_tools_col_delimiter: str = ";",
236
- additional_metadata_col_name: Optional[str] = None,
254
+ additional_metadata_col_name: Optional[str] = "additional_metadata",
237
255
  ):
238
256
  """
239
257
  Load test cases from a CSV file.
@@ -379,6 +397,7 @@ class EvaluationDataset:
379
397
  retrieval_context_key_name: Optional[str] = None,
380
398
  tools_called_key_name: Optional[str] = None,
381
399
  expected_tools_key_name: Optional[str] = None,
400
+ addtional_metadata_key_name: Optional[str] = None,
382
401
  encoding_type: str = "utf-8",
383
402
  ):
384
403
  """
@@ -431,6 +450,7 @@ class EvaluationDataset:
431
450
  tools_called = [ToolCall(**tool) for tool in tools_called_data]
432
451
  expected_tools_data = json_obj.get(expected_tools_key_name, [])
433
452
  expected_tools = [ToolCall(**tool) for tool in expected_tools_data]
453
+ # additional_metadata = json_obj.get(addtional_metadata_key_name)
434
454
 
435
455
  self.add_test_case(
436
456
  LLMTestCase(
@@ -441,6 +461,7 @@ class EvaluationDataset:
441
461
  retrieval_context=retrieval_context,
442
462
  tools_called=tools_called,
443
463
  expected_tools=expected_tools,
464
+ # additional_metadata=additional_metadata,
444
465
  )
445
466
  )
446
467
 
@@ -460,8 +481,8 @@ class EvaluationDataset:
460
481
  expected_tools_col_delimiter: str = ";",
461
482
  comments_key_name: str = "comments",
462
483
  name_key_name: str = "name",
463
- source_file_col_name: Optional[str] = None,
464
- additional_metadata_col_name: Optional[str] = None,
484
+ source_file_col_name: Optional[str] = "source_file",
485
+ additional_metadata_col_name: Optional[str] = "additional_metadata",
465
486
  scenario_col_name: Optional[str] = "scenario",
466
487
  turns_col_name: Optional[str] = "turns",
467
488
  expected_outcome_col_name: Optional[str] = "expected_outcome",
@@ -587,6 +608,7 @@ class EvaluationDataset:
587
608
  context=context,
588
609
  comments=comments,
589
610
  name=name,
611
+ additional_metadata=additional_metadata,
590
612
  )
591
613
  )
592
614
  else:
@@ -645,6 +667,7 @@ class EvaluationDataset:
645
667
  comments = json_obj.get(comments_key_name)
646
668
  name = json_obj.get(name_key_name)
647
669
  parsed_turns = parse_turns(turns) if turns else []
670
+ additional_metadata = json_obj.get(additional_metadata_key_name)
648
671
 
649
672
  self._multi_turn = True
650
673
  self.goldens.append(
@@ -656,6 +679,7 @@ class EvaluationDataset:
656
679
  context=context,
657
680
  comments=comments,
658
681
  name=name,
682
+ additional_metadata=additional_metadata,
659
683
  )
660
684
  )
661
685
  else:
deepeval/dataset/utils.py CHANGED
@@ -24,6 +24,7 @@ def convert_test_cases_to_goldens(
24
24
  "retrieval_context": test_case.retrieval_context,
25
25
  "tools_called": test_case.tools_called,
26
26
  "expected_tools": test_case.expected_tools,
27
+ "additional_metadata": test_case.additional_metadata,
27
28
  }
28
29
  goldens.append(Golden(**golden))
29
30
  return goldens
@@ -70,6 +71,7 @@ def convert_convo_test_cases_to_convo_goldens(
70
71
  "expected_outcome": test_case.expected_outcome,
71
72
  "user_description": test_case.user_description,
72
73
  "context": test_case.context,
74
+ "additional_metadata": test_case.additional_metadata,
73
75
  }
74
76
  goldens.append(ConversationalGolden(**golden))
75
77
  return goldens
deepeval/metrics/utils.py CHANGED
@@ -25,6 +25,7 @@ from deepeval.models import (
25
25
  MultimodalOpenAIModel,
26
26
  MultimodalGeminiModel,
27
27
  MultimodalOllamaModel,
28
+ MultimodalAzureOpenAIMLLMModel,
28
29
  AmazonBedrockModel,
29
30
  LiteLLMModel,
30
31
  KimiModel,
@@ -514,6 +515,8 @@ def initialize_multimodal_model(
514
515
  return MultimodalGeminiModel(), True
515
516
  if should_use_ollama_model():
516
517
  return MultimodalOllamaModel(), True
518
+ elif should_use_azure_openai():
519
+ return MultimodalAzureOpenAIMLLMModel(model_name=model), True
517
520
  elif isinstance(model, str) or model is None:
518
521
  return MultimodalOpenAIModel(model=model), True
519
522
  raise TypeError(
@@ -21,6 +21,7 @@ from deepeval.models.mlllms import (
21
21
  MultimodalOpenAIModel,
22
22
  MultimodalOllamaModel,
23
23
  MultimodalGeminiModel,
24
+ MultimodalAzureOpenAIMLLMModel,
24
25
  )
25
26
  from deepeval.models.embedding_models import (
26
27
  OpenAIEmbeddingModel,
@@ -48,6 +49,7 @@ __all__ = [
48
49
  "MultimodalOpenAIModel",
49
50
  "MultimodalOllamaModel",
50
51
  "MultimodalGeminiModel",
52
+ "MultimodalAzureOpenAIMLLMModel",
51
53
  "OpenAIEmbeddingModel",
52
54
  "AzureOpenAIEmbeddingModel",
53
55
  "LocalEmbeddingModel",
@@ -1,3 +1,4 @@
1
1
  from .openai_model import MultimodalOpenAIModel
2
2
  from .ollama_model import MultimodalOllamaModel
3
3
  from .gemini_model import MultimodalGeminiModel
4
+ from .azure_model import MultimodalAzureOpenAIMLLMModel
@@ -0,0 +1,334 @@
1
+ from openai.types.chat.chat_completion import ChatCompletion
2
+ from openai import AzureOpenAI, AsyncAzureOpenAI
3
+ from typing import Optional, Tuple, Union, Dict, List
4
+ from pydantic import BaseModel
5
+ from io import BytesIO
6
+ import base64
7
+
8
+ from deepeval.models import DeepEvalBaseMLLM
9
+ from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
10
+ from deepeval.test_case import MLLMImage
11
+ from deepeval.models.llms.openai_model import (
12
+ structured_outputs_models,
13
+ json_mode_models,
14
+ model_pricing,
15
+ )
16
+ from deepeval.models.retry_policy import (
17
+ create_retry_decorator,
18
+ sdk_retries_for,
19
+ )
20
+
21
+ from deepeval.models.llms.utils import trim_and_load_json
22
+ from deepeval.models.utils import parse_model_name
23
+ from deepeval.constants import ProviderSlug as PS
24
+
25
+
26
+ retry_azure = create_retry_decorator(PS.AZURE)
27
+
28
+
29
+ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
30
+ def __init__(
31
+ self,
32
+ deployment_name: Optional[str] = None,
33
+ model_name: Optional[str] = None,
34
+ azure_openai_api_key: Optional[str] = None,
35
+ openai_api_version: Optional[str] = None,
36
+ azure_endpoint: Optional[str] = None,
37
+ temperature: float = 0,
38
+ generation_kwargs: Optional[Dict] = None,
39
+ **kwargs,
40
+ ):
41
+ # fetch Azure deployment parameters
42
+ model_name = model_name or KEY_FILE_HANDLER.fetch_data(
43
+ ModelKeyValues.AZURE_MODEL_NAME
44
+ )
45
+ self.deployment_name = deployment_name or KEY_FILE_HANDLER.fetch_data(
46
+ ModelKeyValues.AZURE_DEPLOYMENT_NAME
47
+ )
48
+ self.azure_openai_api_key = (
49
+ azure_openai_api_key
50
+ or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.AZURE_OPENAI_API_KEY)
51
+ )
52
+ self.openai_api_version = (
53
+ openai_api_version
54
+ or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
55
+ )
56
+ self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
57
+ ModelKeyValues.AZURE_OPENAI_ENDPOINT
58
+ )
59
+ if temperature < 0:
60
+ raise ValueError("Temperature must be >= 0.")
61
+ self.temperature = temperature
62
+
63
+ # args and kwargs will be passed to the underlying model, in load_model function
64
+ self.kwargs = kwargs
65
+ self.generation_kwargs = generation_kwargs or {}
66
+ super().__init__(parse_model_name(model_name))
67
+
68
+ ###############################################
69
+ # Generate functions
70
+ ###############################################
71
+
72
+ @retry_azure
73
+ def generate(
74
+ self,
75
+ multimodal_input: List[Union[str, MLLMImage]],
76
+ schema: Optional[BaseModel] = None,
77
+ ) -> Tuple[Union[str, BaseModel], float]:
78
+ client = self.load_model(async_mode=False)
79
+ prompt = self.generate_prompt(multimodal_input)
80
+
81
+ if schema:
82
+ if self.model_name in structured_outputs_models:
83
+ messages = [{"role": "user", "content": prompt}]
84
+ completion = client.beta.chat.completions.parse(
85
+ model=self.deployment_name,
86
+ messages=messages,
87
+ response_format=schema,
88
+ temperature=self.temperature,
89
+ )
90
+ structured_output: BaseModel = completion.choices[
91
+ 0
92
+ ].message.parsed
93
+ cost = self.calculate_cost(
94
+ completion.usage.prompt_tokens,
95
+ completion.usage.completion_tokens,
96
+ )
97
+ return structured_output, cost
98
+ if self.model_name in json_mode_models:
99
+ messages = [{"role": "user", "content": prompt}]
100
+ completion = client.beta.chat.completions.parse(
101
+ model=self.deployment_name,
102
+ messages=messages,
103
+ response_format={"type": "json_object"},
104
+ temperature=self.temperature,
105
+ )
106
+ json_output = trim_and_load_json(
107
+ completion.choices[0].message.content
108
+ )
109
+ cost = self.calculate_cost(
110
+ completion.usage.prompt_tokens,
111
+ completion.usage.completion_tokens,
112
+ )
113
+ return schema.model_validate(json_output), cost
114
+ print("Loading model client:")
115
+ print(client.base_url)
116
+ completion = client.chat.completions.create(
117
+ model=self.deployment_name,
118
+ messages=[{"role": "user", "content": prompt}],
119
+ temperature=self.temperature,
120
+ **self.generation_kwargs,
121
+ )
122
+ output = completion.choices[0].message.content
123
+ cost = self.calculate_cost(
124
+ completion.usage.prompt_tokens, completion.usage.completion_tokens
125
+ )
126
+ if schema:
127
+ json_output = trim_and_load_json(output)
128
+ return schema.model_validate(json_output), cost
129
+ else:
130
+ return output, cost
131
+
132
+ @retry_azure
133
+ async def a_generate(
134
+ self,
135
+ multimodal_input: List[Union[str, MLLMImage]],
136
+ schema: Optional[BaseModel] = None,
137
+ ) -> Tuple[Union[str, BaseModel], float]:
138
+ client = self.load_model(async_mode=True)
139
+ prompt = self.generate_prompt(multimodal_input)
140
+
141
+ if schema:
142
+ if self.model_name in structured_outputs_models:
143
+ messages = [{"role": "user", "content": prompt}]
144
+ completion = await client.beta.chat.completions.parse(
145
+ model=self.deployment_name,
146
+ messages=messages,
147
+ response_format=schema,
148
+ temperature=self.temperature,
149
+ )
150
+ structured_output: BaseModel = completion.choices[
151
+ 0
152
+ ].message.parsed
153
+ cost = self.calculate_cost(
154
+ completion.usage.prompt_tokens,
155
+ completion.usage.completion_tokens,
156
+ )
157
+ return structured_output, cost
158
+ if self.model_name in json_mode_models:
159
+ messages = [{"role": "user", "content": prompt}]
160
+ completion = await client.beta.chat.completions.parse(
161
+ model=self.deployment_name,
162
+ messages=messages,
163
+ response_format={"type": "json_object"},
164
+ temperature=self.temperature,
165
+ **self.generation_kwargs,
166
+ )
167
+ json_output = trim_and_load_json(
168
+ completion.choices[0].message.content
169
+ )
170
+ cost = self.calculate_cost(
171
+ completion.usage.prompt_tokens,
172
+ completion.usage.completion_tokens,
173
+ )
174
+ return schema.model_validate(json_output), cost
175
+
176
+ completion = await client.chat.completions.create(
177
+ model=self.deployment_name,
178
+ messages=[{"role": "user", "content": prompt}],
179
+ temperature=self.temperature,
180
+ **self.generation_kwargs,
181
+ )
182
+ output = completion.choices[0].message.content
183
+ cost = self.calculate_cost(
184
+ completion.usage.prompt_tokens,
185
+ completion.usage.completion_tokens,
186
+ )
187
+ if schema:
188
+ json_output = trim_and_load_json(output)
189
+ return schema.model_validate(json_output), cost
190
+ else:
191
+ return output, cost
192
+
193
+ ###############################################
194
+ # Other generate functions
195
+ ###############################################
196
+
197
+ @retry_azure
198
+ def generate_raw_response(
199
+ self,
200
+ multimodal_input: List[Union[str, MLLMImage]],
201
+ top_logprobs: int = 5,
202
+ ) -> Tuple[ChatCompletion, float]:
203
+ client = self.load_model(async_mode=False)
204
+ prompt = self.generate_prompt(multimodal_input)
205
+ messages = [{"role": "user", "content": prompt}]
206
+
207
+ # Generate completion
208
+ completion = client.chat.completions.create(
209
+ model=self.deployment_name,
210
+ messages=messages,
211
+ temperature=self.temperature,
212
+ logprobs=True,
213
+ top_logprobs=top_logprobs,
214
+ **self.generation_kwargs,
215
+ )
216
+ # Cost calculation
217
+ input_tokens = completion.usage.prompt_tokens
218
+ output_tokens = completion.usage.completion_tokens
219
+ cost = self.calculate_cost(input_tokens, output_tokens)
220
+
221
+ return completion, cost
222
+
223
+ @retry_azure
224
+ async def a_generate_raw_response(
225
+ self,
226
+ multimodal_input: List[Union[str, MLLMImage]],
227
+ top_logprobs: int = 5,
228
+ ) -> Tuple[ChatCompletion, float]:
229
+ client = self.load_model(async_mode=True)
230
+ prompt = self.generate_prompt(multimodal_input)
231
+ messages = [{"role": "user", "content": prompt}]
232
+
233
+ # Generate completion
234
+ completion = await client.chat.completions.create(
235
+ model=self.deployment_name,
236
+ messages=messages,
237
+ temperature=self.temperature,
238
+ logprobs=True,
239
+ top_logprobs=top_logprobs,
240
+ **self.generation_kwargs,
241
+ )
242
+ # Cost calculation
243
+ input_tokens = completion.usage.prompt_tokens
244
+ output_tokens = completion.usage.completion_tokens
245
+ cost = self.calculate_cost(input_tokens, output_tokens)
246
+
247
+ return completion, cost
248
+
249
+ ###############################################
250
+ # Utilities
251
+ ###############################################
252
+
253
+ def generate_prompt(
254
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
255
+ ):
256
+ """Convert multimodal input into the proper message format for Azure OpenAI."""
257
+ prompt = []
258
+ for ele in multimodal_input:
259
+ if isinstance(ele, str):
260
+ prompt.append({"type": "text", "text": ele})
261
+ elif isinstance(ele, MLLMImage):
262
+ if ele.local:
263
+ import PIL.Image
264
+
265
+ image = PIL.Image.open(ele.url)
266
+ visual_dict = {
267
+ "type": "image_url",
268
+ "image_url": {
269
+ "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
270
+ },
271
+ }
272
+ else:
273
+ visual_dict = {
274
+ "type": "image_url",
275
+ "image_url": {"url": ele.url},
276
+ }
277
+ prompt.append(visual_dict)
278
+ return prompt
279
+
280
+ def encode_pil_image(self, pil_image):
281
+ """Encode a PIL image to base64 string."""
282
+ image_buffer = BytesIO()
283
+ if pil_image.mode in ("RGBA", "LA", "P"):
284
+ pil_image = pil_image.convert("RGB")
285
+ pil_image.save(image_buffer, format="JPEG")
286
+ image_bytes = image_buffer.getvalue()
287
+ base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
288
+ return base64_encoded_image
289
+
290
+ def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
291
+ pricing = model_pricing.get(self.model_name, model_pricing["gpt-4.1"])
292
+ input_cost = input_tokens * pricing["input"]
293
+ output_cost = output_tokens * pricing["output"]
294
+ return input_cost + output_cost
295
+
296
+ ###############################################
297
+ # Model
298
+ ###############################################
299
+
300
+ def get_model_name(self):
301
+ return f"Azure OpenAI ({self.model_name})"
302
+
303
+ def load_model(self, async_mode: bool = False):
304
+ if not async_mode:
305
+ return self._build_client(AzureOpenAI)
306
+ return self._build_client(AsyncAzureOpenAI)
307
+
308
+ def _client_kwargs(self) -> Dict:
309
+ """
310
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
311
+ If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
312
+ leave their retry settings as is.
313
+ """
314
+ kwargs = dict(self.kwargs or {})
315
+ if not sdk_retries_for(PS.AZURE):
316
+ kwargs["max_retries"] = 0
317
+ return kwargs
318
+
319
+ def _build_client(self, cls):
320
+ kw = dict(
321
+ api_key=self.azure_openai_api_key,
322
+ api_version=self.openai_api_version,
323
+ azure_endpoint=self.azure_endpoint,
324
+ azure_deployment=self.deployment_name,
325
+ **self._client_kwargs(),
326
+ )
327
+ try:
328
+ return cls(**kw)
329
+ except TypeError as e:
330
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
331
+ if "max_retries" in str(e):
332
+ kw.pop("max_retries", None)
333
+ return cls(**kw)
334
+ raise
@@ -73,6 +73,7 @@ def update_current_trace(
73
73
  tools_called: Optional[List[ToolCall]] = None,
74
74
  expected_tools: Optional[List[ToolCall]] = None,
75
75
  test_case: Optional[LLMTestCase] = None,
76
+ confident_api_key: Optional[str] = None,
76
77
  ):
77
78
  current_trace = current_trace_context.get()
78
79
  if not current_trace:
@@ -109,6 +110,8 @@ def update_current_trace(
109
110
  current_trace.tools_called = tools_called
110
111
  if expected_tools:
111
112
  current_trace.expected_tools = expected_tools
113
+ if confident_api_key:
114
+ current_trace.confident_api_key = confident_api_key
112
115
 
113
116
 
114
117
  def update_llm_span(
@@ -441,11 +441,11 @@ class TraceManager:
441
441
  loop = asyncio.new_event_loop()
442
442
  asyncio.set_event_loop(loop)
443
443
 
444
- # buffer for payloads that need to be sent after main exits
445
- remaining_trace_request_bodies: List[Dict[str, Any]] = []
444
+ # buffer for traces that need to be sent after main exits
445
+ remaining_traces: List[TraceApi] = []
446
446
 
447
447
  async def _a_send_trace(trace_obj):
448
- nonlocal remaining_trace_request_bodies
448
+ nonlocal remaining_traces
449
449
  try:
450
450
  # Build API object & payload
451
451
  if isinstance(trace_obj, TraceApi):
@@ -486,7 +486,7 @@ class TraceManager:
486
486
  )
487
487
  elif self._flush_enabled:
488
488
  # Main thread gone → to be flushed
489
- remaining_trace_request_bodies.append(body)
489
+ remaining_traces.append(trace_api)
490
490
 
491
491
  except Exception as e:
492
492
  queue_size = self._trace_queue.qsize()
@@ -544,24 +544,35 @@ class TraceManager:
544
544
  loop.run_until_complete(
545
545
  asyncio.gather(*pending, return_exceptions=True)
546
546
  )
547
- self.flush_traces(remaining_trace_request_bodies)
547
+ self.flush_traces(remaining_traces)
548
548
  loop.run_until_complete(loop.shutdown_asyncgens())
549
549
  loop.close()
550
550
 
551
- def flush_traces(
552
- self, remaining_trace_request_bodies: List[Dict[str, Any]]
553
- ):
551
+ def flush_traces(self, remaining_traces: List[TraceApi]):
554
552
  if not tracing_enabled() or not self.tracing_enabled:
555
553
  return
556
554
 
557
555
  self._print_trace_status(
558
556
  TraceWorkerStatus.WARNING,
559
- message=f"Flushing {len(remaining_trace_request_bodies)} remaining trace(s)",
557
+ message=f"Flushing {len(remaining_traces)} remaining trace(s)",
560
558
  )
561
- for body in remaining_trace_request_bodies:
559
+ for trace_api in remaining_traces:
562
560
  with capture_send_trace():
563
561
  try:
564
- api = Api(api_key=self.confident_api_key)
562
+ try:
563
+ body = trace_api.model_dump(
564
+ by_alias=True,
565
+ exclude_none=True,
566
+ )
567
+ except AttributeError:
568
+ # Pydantic version below 2.0
569
+ body = trace_api.dict(by_alias=True, exclude_none=True)
570
+
571
+ body = make_json_serializable(body)
572
+ if trace_api.confident_api_key:
573
+ api = Api(api_key=trace_api.confident_api_key)
574
+ else:
575
+ api = Api(api_key=self.confident_api_key)
565
576
 
566
577
  _, link = api.send_request(
567
578
  method=HttpMethods.POST,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.2
3
+ Version: 3.7.3
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  deepeval/__init__.py,sha256=tle4lT4FONApg3OeztGPEdrpGMEGLWajyGTu7bEd3s0,2976
2
- deepeval/_version.py,sha256=uv4OEocrxlEkjp4YMUEXl0-kwRJuwkdtGlNshton0Mg,27
2
+ deepeval/_version.py,sha256=neKNug0TPAnGHtzNXdePXNla9tw6mxKTmj9WJu2trY8,27
3
3
  deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
4
4
  deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
5
5
  deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -96,7 +96,7 @@ deepeval/benchmarks/hellaswag/hellaswag.py,sha256=_3felzBwQUhhRXk4D9NbcY8dme_qUQ
96
96
  deepeval/benchmarks/hellaswag/task.py,sha256=LfO8T6bpNiwdM8VdubKrup7qje3-rHgu69iB6Sdsc6I,7323
97
97
  deepeval/benchmarks/hellaswag/template.py,sha256=TcCu25hkl89qbRwcEyRVGTGp7DU_5Eph754W2znk5QY,1279
98
98
  deepeval/benchmarks/human_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
- deepeval/benchmarks/human_eval/human_eval.py,sha256=cx4x5OAeCrTjuUdQI7gjeSY_pUL3crfzKmieL1yKXMY,7714
99
+ deepeval/benchmarks/human_eval/human_eval.py,sha256=1xMVLQ1rQccY9Ac7BB6_1dFjo1QLK7DEMg0QXY2ybRM,7756
100
100
  deepeval/benchmarks/human_eval/task.py,sha256=lEHJpEiRbw5cXUKA_id0J5gQwae1G1T1JCJAeeTpXGg,5412
101
101
  deepeval/benchmarks/human_eval/template.py,sha256=rcCHSb0wP_FS9DQPaoBn-iwgicI1OyEdFCkZLQ1vxPk,647
102
102
  deepeval/benchmarks/ifeval/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
@@ -153,11 +153,11 @@ deepeval/constants.py,sha256=J5rNXGsMKTFYJ_9Wi49qchZXuUityZjnvuy3I3TO5zk,1667
153
153
  deepeval/contextvars.py,sha256=oqXtuYiKd4Zvc1rNoR1gcRBxzZYCGTMVn7XostwvkRI,524
154
154
  deepeval/dataset/__init__.py,sha256=N2c-rkuxWYiiJSOZArw0H02Cwo7cnfzFuNYJlvsIBEg,249
155
155
  deepeval/dataset/api.py,sha256=ZxkEqAF4nZH_Ys_1f5r9N2LFI_vBcAJxt8eJm7Mplpw,831
156
- deepeval/dataset/dataset.py,sha256=Nx0Nr12_AGjOOOmmAMaC6YIX62HgK8T86FtcL9IrsF4,57798
156
+ deepeval/dataset/dataset.py,sha256=Y9U-hVoa5BbnlzwJiFiDTkDcp9E6VmKOd7NtyLmdpHY,59182
157
157
  deepeval/dataset/golden.py,sha256=T-rTk4Hw1tANx_Iimv977F6Y4QK3s5OIB4PecU5FJDM,2338
158
158
  deepeval/dataset/test_run_tracer.py,sha256=5CdpDvhzkEEBRyqWi6egocaxiN6IRS3XfbACxEQZQeM,2544
159
159
  deepeval/dataset/types.py,sha256=CWeOIBPK2WdmRUqjFa9gfN-w2da0r8Ilzl3ToDpJQoQ,558
160
- deepeval/dataset/utils.py,sha256=MRiqwt-3E5WNCHtP2kY7P1PeRtFMRpGoy3r75tJ2QFg,7910
160
+ deepeval/dataset/utils.py,sha256=nWCNmD1kyLwvlCXlN-7XiqN2W7IUOkDckc1xl32MF-U,8042
161
161
  deepeval/errors.py,sha256=FfhtULNIQqHpKVqCr-xlvTtLxkNj40qVU89sXYKuDrA,754
162
162
  deepeval/evaluate/__init__.py,sha256=315IaMiYEz7oJhZ4kPTBfeCNd1xF-wWVU6KOQnrKQpE,291
163
163
  deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
@@ -394,11 +394,11 @@ deepeval/metrics/turn_relevancy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
394
394
  deepeval/metrics/turn_relevancy/schema.py,sha256=om0zFJcM6qu2GWS9aJTP3lUmuEXX8KpoACEvCsJqfq4,234
395
395
  deepeval/metrics/turn_relevancy/template.py,sha256=klZ10QI8jo4ekf-KgcWgRxS9E3AK4vgKDNzjwAYGl48,2797
396
396
  deepeval/metrics/turn_relevancy/turn_relevancy.py,sha256=cgMt0toBIwzDc8lE8Q3YztzQA_DqR4GfdDrlyX7ya6w,10385
397
- deepeval/metrics/utils.py,sha256=iSzb8mOpqT5Ciceix761zjlDUm0eMU-L4V6PyetIIeg,18778
397
+ deepeval/metrics/utils.py,sha256=gEEETXaug997fqrVW2Suceuaw1MgZAYMF4wT15Mu2Z8,18920
398
398
  deepeval/model_integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
399
  deepeval/model_integrations/types.py,sha256=rbVMhC_2yWwD6JqzkRO9D7aMVC_KtXN686G_S7de7S8,630
400
400
  deepeval/model_integrations/utils.py,sha256=Zt9SYPgTxlGsQFZgpZvh_a5fWuL8mmIFVSe6uoQywZ4,3562
401
- deepeval/models/__init__.py,sha256=0x4EsoqtSf7sLOg28DoOoInL_D5fKPWCakkE2gJa2pM,1195
401
+ deepeval/models/__init__.py,sha256=7vANBeNkDUADNzP8cyZUoQVmExEEODlHWGcj_9ik9A8,1269
402
402
  deepeval/models/_summac_model.py,sha256=xflanxl_IBuzuainlYCVX7UvjHCnAckKSvNR2NwZI6k,19750
403
403
  deepeval/models/answer_relevancy_model.py,sha256=SLOA6uUImNOuxpPGfTg2AH7MIkf9QsotYixvI1jcVC8,2197
404
404
  deepeval/models/base_model.py,sha256=owmHhVBppPe5Zt6GK9p87dE31hoqtn5_8F9TWr97aRE,4112
@@ -422,7 +422,8 @@ deepeval/models/llms/local_model.py,sha256=hEyKVA6pkQm9dICUKsMNgjVI3w6gnyMdmBt_E
422
422
  deepeval/models/llms/ollama_model.py,sha256=xPO4d4jMY-cQAyHAcMuFvWS8JMWwCUbKP9CMi838Nuc,3307
423
423
  deepeval/models/llms/openai_model.py,sha256=1rjwbyt87fK03pw7r5tq3PjUVfl2EWllAssGyy6Dt2A,17494
424
424
  deepeval/models/llms/utils.py,sha256=gFM_8eIvdSwN_D4Yqp-j7PkfoiRn_bgu7tlCHol3A6c,1324
425
- deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
425
+ deepeval/models/mlllms/__init__.py,sha256=EgFWQcqVPhIqb90QCtIH2Np0bLSRj_beaymbuRN7Ds8,200
426
+ deepeval/models/mlllms/azure_model.py,sha256=pMpLAKZypVKMOAtyHToX8WgCcbbA8hXa2sd16DhCKEM,12696
426
427
  deepeval/models/mlllms/gemini_model.py,sha256=7tHIWD4w_fBz3L7jkKWygn1QpBPk9nl2Kw-yb0Jc3PI,10167
427
428
  deepeval/models/mlllms/ollama_model.py,sha256=_YtYtw8oIMVVI-CFsDicsdeEJUPhw_9ArPxB_1olsJA,4798
428
429
  deepeval/models/mlllms/openai_model.py,sha256=KgvYgQwWZ1A_Gcl6-4-W7IMqbUF9K8sNY37j5Ag7kQQ,9014
@@ -487,7 +488,7 @@ deepeval/test_run/hyperparameters.py,sha256=4yJkNgwL2y6eyWDTmUV62f5RUlfOui4R22ws
487
488
  deepeval/test_run/test_run.py,sha256=mStiBQZkgktwF99FRqkT_-UFrMT0m06X20TEEO6V2Bc,41278
488
489
  deepeval/tracing/__init__.py,sha256=aSOk_ZgL-K7CZzcyiaIa5peAiaPViDST5GhpHA3Adc8,614
489
490
  deepeval/tracing/api.py,sha256=GbtpUDdGpchl6rPXtZT6IBKjAhwux6qOlKLdP3dRVHU,4996
490
- deepeval/tracing/context.py,sha256=rzm42zYzP7jmQJO08AV-Qmw86ik45qRfF4UQNpGcmJw,5338
491
+ deepeval/tracing/context.py,sha256=v4uzd0N2H8mNntPwZvL49ya2kW9FvjqQqWXJFgE5d0c,5469
491
492
  deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
492
493
  deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
493
494
  deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
@@ -501,12 +502,12 @@ deepeval/tracing/patchers.py,sha256=c-8Fjc5VIWB5VD9ONKq735ypW6O1pZIFQWsHR3lRh0E,
501
502
  deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
502
503
  deepeval/tracing/trace_context.py,sha256=Z0n0Cu1A5g9dXiZnzTFO5TzeOYHKeNuO6v3_EU_Gi_c,3568
503
504
  deepeval/tracing/trace_test_manager.py,sha256=wt4y7EWTRc4Bw938-UFFtXHkdFFOrnx6JaIk7J5Iulw,555
504
- deepeval/tracing/tracing.py,sha256=VWu5z6fxWP6KfDaw5ZmIkN-0yNDqe1CVHa0e9_Wjrgg,45995
505
+ deepeval/tracing/tracing.py,sha256=dx4JpiixkscEaYBR0LFpYCjeGgfZqiVlRCD-HblMo6g,46459
505
506
  deepeval/tracing/types.py,sha256=WhnxefUc5I8jcAOBQ-tsZ8_zVZfGqSvCWHD5XUN6Ggw,6040
506
507
  deepeval/tracing/utils.py,sha256=mdvhYAxDNsdnusaEXJd-c-_O2Jn6S3xSuzRvLO1Jz4U,5684
507
508
  deepeval/utils.py,sha256=zy9RR0bt3YMzWVlJc5Rl6eU5RyeW2uEjMfwD1sdgPr4,23234
508
- deepeval-3.7.2.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
509
- deepeval-3.7.2.dist-info/METADATA,sha256=-p5kW-oBcfsEi5_uiLnCn_godaoKAeRDCgzRdpOsWUI,18743
510
- deepeval-3.7.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
511
- deepeval-3.7.2.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
512
- deepeval-3.7.2.dist-info/RECORD,,
509
+ deepeval-3.7.3.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
510
+ deepeval-3.7.3.dist-info/METADATA,sha256=LvW_cE214Ta8lMgI-eQC3cnlO0c6yHyYw1L4B4AHxaM,18743
511
+ deepeval-3.7.3.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
512
+ deepeval-3.7.3.dist-info/entry_points.txt,sha256=NoismUQfwLOojSGZmBrdcpwfaoFRAzUhBvZD3UwOKog,95
513
+ deepeval-3.7.3.dist-info/RECORD,,
@@ -2,5 +2,5 @@
2
2
  deepeval=deepeval.cli.main:app
3
3
 
4
4
  [pytest11]
5
- plugins=deepeval.plugins.plugin
5
+ deepeval=deepeval.plugins.plugin
6
6